forked from pool/python-xmldiff
- added patches
fix 34b810f439
+ python-xmldiff-src-upgrades.patch
OBS-URL: https://build.opensuse.org/request/show/1010919
OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-xmldiff?expand=0&rev=16
11544 lines
461 KiB
Diff
11544 lines
461 KiB
Diff
From 34b810f4394965aadeca31204e6b76eb023fd11a Mon Sep 17 00:00:00 2001
|
|
From: Jeremy Lavergne <jlavergne@shoobx.com>
|
|
Date: Tue, 4 Aug 2020 08:32:14 -0400
|
|
Subject: [PATCH] src upgrades (#64)
|
|
|
|
* black
|
|
|
|
* pyupgrade
|
|
|
|
* py3+
|
|
|
|
* py3.6+
|
|
|
|
* black
|
|
|
|
* update python version deps
|
|
|
|
* travis: drop py < 3.6, use bionic
|
|
|
|
* flake fixes
|
|
|
|
* more flake8 fixups
|
|
|
|
* fixup black command
|
|
|
|
* try skipping black on pypy3
|
|
|
|
* travis: try figuring out build type by env
|
|
---
|
|
.travis.yml | 14 +-
|
|
Makefile | 5 +-
|
|
docs/source/conf.py | 158 +-
|
|
docs/source/contributing.rst | 6 +-
|
|
setup.py | 75 +-
|
|
tests/test_diff.py | 913 +++++----
|
|
tests/test_formatting.py | 394 ++--
|
|
tests/test_main.py | 83 +-
|
|
tests/test_patch.py | 169 +-
|
|
tests/test_utils.py | 116 +-
|
|
tests/testing.py | 20 +-
|
|
xmldiff/_diff_match_patch_py2.py | 3105 ++++++++++++++++--------------
|
|
xmldiff/_diff_match_patch_py3.py | 3080 +++++++++++++++--------------
|
|
xmldiff/actions.py | 22 +-
|
|
xmldiff/diff.py | 98 +-
|
|
xmldiff/diff_match_patch.py | 1 +
|
|
xmldiff/formatting.py | 246 +--
|
|
xmldiff/main.py | 170 +-
|
|
xmldiff/patch.py | 17 +-
|
|
xmldiff/utils.py | 57 +-
|
|
20 files changed, 4506 insertions(+), 4243 deletions(-)
|
|
|
|
Index: xmldiff-2.4/.travis.yml
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/.travis.yml
|
|
+++ xmldiff-2.4/.travis.yml
|
|
@@ -4,17 +4,21 @@ language: python
|
|
matrix:
|
|
fast_finish: true
|
|
include:
|
|
- - python: 2.7
|
|
- - python: 3.5
|
|
- python: 3.6
|
|
+ env: MATRIX=py36
|
|
- python: 3.7
|
|
sudo: required
|
|
- dist: xenial
|
|
- - python: pypy
|
|
+ dist: bionic
|
|
+ env: MATRIX=py37
|
|
- python: pypy3
|
|
+ env: MATRIX=pypy3
|
|
+
|
|
+before_install:
|
|
+ - if [ $MATRIX != pypy3 ]; then pip install black; fi
|
|
+ - pip install coverage coveralls flake8 sphinx sphinx-argparse
|
|
|
|
install:
|
|
- - pip install . coverage coveralls flake8 sphinx sphinx-argparse
|
|
+ - pip install .
|
|
|
|
script:
|
|
- make flake
|
|
Index: xmldiff-2.4/Makefile
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/Makefile
|
|
+++ xmldiff-2.4/Makefile
|
|
@@ -5,7 +5,10 @@ dfm_source_3 := "https://raw.githubuserc
|
|
all: coverage flake
|
|
|
|
flake:
|
|
- flake8 tests xmldiff --exclude *diff_match_patch*.py
|
|
+ifneq (, $(shell which black))
|
|
+ black --check .
|
|
+endif
|
|
+ flake8 tests xmldiff --exclude *diff_match_patch*.py --ignore=E231,E501,W503
|
|
|
|
coverage:
|
|
coverage run setup.py test
|
|
Index: xmldiff-2.4/docs/source/conf.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/docs/source/conf.py
|
|
+++ xmldiff-2.4/docs/source/conf.py
|
|
@@ -1,4 +1,3 @@
|
|
-# -*- coding: utf-8 -*-
|
|
#
|
|
# xmldiff documentation build configuration file, created by
|
|
# sphinx-quickstart on Tue Sep 4 12:07:12 2018.
|
|
@@ -18,49 +17,49 @@ import os
|
|
# If extensions (or modules to document with autodoc) are in another directory,
|
|
# add these directories to sys.path here. If the directory is relative to the
|
|
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
|
-#sys.path.insert(0, os.path.abspath('.'))
|
|
+# sys.path.insert(0, os.path.abspath('.'))
|
|
|
|
# -- General configuration ------------------------------------------------
|
|
|
|
# If your documentation needs a minimal Sphinx version, state it here.
|
|
-#needs_sphinx = '1.0'
|
|
+# needs_sphinx = '1.0'
|
|
|
|
# Add any Sphinx extension module names here, as strings. They can be
|
|
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
|
# ones.
|
|
extensions = [
|
|
- 'sphinx.ext.doctest',
|
|
- 'sphinx.ext.coverage',
|
|
- 'sphinxarg.ext',
|
|
+ "sphinx.ext.doctest",
|
|
+ "sphinx.ext.coverage",
|
|
+ "sphinxarg.ext",
|
|
]
|
|
|
|
# Add any paths that contain templates here, relative to this directory.
|
|
-templates_path = ['_templates']
|
|
+templates_path = ["_templates"]
|
|
|
|
# The suffix(es) of source filenames.
|
|
# You can specify multiple suffix as a list of string:
|
|
# source_suffix = ['.rst', '.md']
|
|
-source_suffix = '.rst'
|
|
+source_suffix = ".rst"
|
|
|
|
# The encoding of source files.
|
|
-#source_encoding = 'utf-8-sig'
|
|
+# source_encoding = 'utf-8-sig'
|
|
|
|
# The master toctree document.
|
|
-master_doc = 'index'
|
|
+master_doc = "index"
|
|
|
|
# General information about the project.
|
|
-project = u'xmldiff'
|
|
-copyright = u'2018, Lennart Regebro'
|
|
-author = u'Lennart Regebro'
|
|
+project = "xmldiff"
|
|
+copyright = "2018, Lennart Regebro"
|
|
+author = "Lennart Regebro"
|
|
|
|
# The version info for the project you're documenting, acts as replacement for
|
|
# |version| and |release|, also used in various other places throughout the
|
|
# built documents.
|
|
#
|
|
# The short X.Y version.
|
|
-#version = u'2.0'
|
|
+# version = u'2.0'
|
|
# The full version, including alpha/beta/rc tags.
|
|
-#release = u'2.0'
|
|
+# release = u'2.0'
|
|
|
|
# The language for content autogenerated by Sphinx. Refer to documentation
|
|
# for a list of supported languages.
|
|
@@ -71,9 +70,9 @@ language = None
|
|
|
|
# There are two options for replacing |today|: either, you set today to some
|
|
# non-false value, then it is used:
|
|
-#today = ''
|
|
+# today = ''
|
|
# Else, today_fmt is used as the format for a strftime call.
|
|
-#today_fmt = '%B %d, %Y'
|
|
+# today_fmt = '%B %d, %Y'
|
|
|
|
# List of patterns, relative to source directory, that match files and
|
|
# directories to ignore when looking for source files.
|
|
@@ -81,27 +80,27 @@ exclude_patterns = []
|
|
|
|
# The reST default role (used for this markup: `text`) to use for all
|
|
# documents.
|
|
-#default_role = None
|
|
+# default_role = None
|
|
|
|
# If true, '()' will be appended to :func: etc. cross-reference text.
|
|
-#add_function_parentheses = True
|
|
+# add_function_parentheses = True
|
|
|
|
# If true, the current module name will be prepended to all description
|
|
# unit titles (such as .. function::).
|
|
-#add_module_names = True
|
|
+# add_module_names = True
|
|
|
|
# If true, sectionauthor and moduleauthor directives will be shown in the
|
|
# output. They are ignored by default.
|
|
-#show_authors = False
|
|
+# show_authors = False
|
|
|
|
# The name of the Pygments (syntax highlighting) style to use.
|
|
-pygments_style = 'sphinx'
|
|
+pygments_style = "sphinx"
|
|
|
|
# A list of ignored prefixes for module index sorting.
|
|
-#modindex_common_prefix = []
|
|
+# modindex_common_prefix = []
|
|
|
|
# If true, keep warnings as "system message" paragraphs in the built documents.
|
|
-#keep_warnings = False
|
|
+# keep_warnings = False
|
|
|
|
# If true, `todo` and `todoList` produce output, else they produce nothing.
|
|
todo_include_todos = False
|
|
@@ -111,156 +110,149 @@ todo_include_todos = False
|
|
|
|
# The theme to use for HTML and HTML Help pages. See the documentation for
|
|
# a list of builtin themes.
|
|
-html_theme = 'alabaster'
|
|
+html_theme = "alabaster"
|
|
|
|
# Theme options are theme-specific and customize the look and feel of a theme
|
|
# further. For a list of options available for each theme, see the
|
|
# documentation.
|
|
-#html_theme_options = {}
|
|
+# html_theme_options = {}
|
|
|
|
# Add any paths that contain custom themes here, relative to this directory.
|
|
-#html_theme_path = []
|
|
+# html_theme_path = []
|
|
|
|
# The name for this set of Sphinx documents. If None, it defaults to
|
|
# "<project> v<release> documentation".
|
|
-#html_title = None
|
|
+# html_title = None
|
|
|
|
# A shorter title for the navigation bar. Default is the same as html_title.
|
|
-#html_short_title = None
|
|
+# html_short_title = None
|
|
|
|
# The name of an image file (relative to this directory) to place at the top
|
|
# of the sidebar.
|
|
-#html_logo = None
|
|
+# html_logo = None
|
|
|
|
# The name of an image file (relative to this directory) to use as a favicon of
|
|
# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
|
# pixels large.
|
|
-#html_favicon = None
|
|
+# html_favicon = None
|
|
|
|
# Add any paths that contain custom static files (such as style sheets) here,
|
|
# relative to this directory. They are copied after the builtin static files,
|
|
# so a file named "default.css" will overwrite the builtin "default.css".
|
|
-html_static_path = ['static']
|
|
+html_static_path = ["static"]
|
|
|
|
# Add any extra paths that contain custom files (such as robots.txt or
|
|
# .htaccess) here, relative to this directory. These files are copied
|
|
# directly to the root of the documentation.
|
|
-#html_extra_path = []
|
|
+# html_extra_path = []
|
|
|
|
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
|
# using the given strftime format.
|
|
-#html_last_updated_fmt = '%b %d, %Y'
|
|
+# html_last_updated_fmt = '%b %d, %Y'
|
|
|
|
# If true, SmartyPants will be used to convert quotes and dashes to
|
|
# typographically correct entities.
|
|
-#html_use_smartypants = True
|
|
+# html_use_smartypants = True
|
|
|
|
# Custom sidebar templates, maps document names to template names.
|
|
-#html_sidebars = {}
|
|
+# html_sidebars = {}
|
|
|
|
# Additional templates that should be rendered to pages, maps page names to
|
|
# template names.
|
|
-#html_additional_pages = {}
|
|
+# html_additional_pages = {}
|
|
|
|
# If false, no module index is generated.
|
|
-#html_domain_indices = True
|
|
+# html_domain_indices = True
|
|
|
|
# If false, no index is generated.
|
|
-#html_use_index = True
|
|
+# html_use_index = True
|
|
|
|
# If true, the index is split into individual pages for each letter.
|
|
-#html_split_index = False
|
|
+# html_split_index = False
|
|
|
|
# If true, links to the reST sources are added to the pages.
|
|
-#html_show_sourcelink = True
|
|
+# html_show_sourcelink = True
|
|
|
|
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
|
-#html_show_sphinx = True
|
|
+# html_show_sphinx = True
|
|
|
|
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
|
|
-#html_show_copyright = True
|
|
+# html_show_copyright = True
|
|
|
|
# If true, an OpenSearch description file will be output, and all pages will
|
|
# contain a <link> tag referring to it. The value of this option must be the
|
|
# base URL from which the finished HTML is served.
|
|
-#html_use_opensearch = ''
|
|
+# html_use_opensearch = ''
|
|
|
|
# This is the file name suffix for HTML files (e.g. ".xhtml").
|
|
-#html_file_suffix = None
|
|
+# html_file_suffix = None
|
|
|
|
# Language to be used for generating the HTML full-text search index.
|
|
# Sphinx supports the following languages:
|
|
# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
|
|
# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
|
|
-#html_search_language = 'en'
|
|
+# html_search_language = 'en'
|
|
|
|
# A dictionary with options for the search language support, empty by default.
|
|
# Now only 'ja' uses this config value
|
|
-#html_search_options = {'type': 'default'}
|
|
+# html_search_options = {'type': 'default'}
|
|
|
|
# The name of a javascript file (relative to the configuration directory) that
|
|
# implements a search results scorer. If empty, the default will be used.
|
|
-#html_search_scorer = 'scorer.js'
|
|
+# html_search_scorer = 'scorer.js'
|
|
|
|
# Output file base name for HTML help builder.
|
|
-htmlhelp_basename = 'xmldiffdoc'
|
|
+htmlhelp_basename = "xmldiffdoc"
|
|
|
|
# -- Options for LaTeX output ---------------------------------------------
|
|
|
|
latex_elements = {
|
|
-# The paper size ('letterpaper' or 'a4paper').
|
|
-#'papersize': 'letterpaper',
|
|
-
|
|
-# The font size ('10pt', '11pt' or '12pt').
|
|
-#'pointsize': '10pt',
|
|
-
|
|
-# Additional stuff for the LaTeX preamble.
|
|
-#'preamble': '',
|
|
-
|
|
-# Latex figure (float) alignment
|
|
-#'figure_align': 'htbp',
|
|
+ # The paper size ('letterpaper' or 'a4paper').
|
|
+ #'papersize': 'letterpaper',
|
|
+ # The font size ('10pt', '11pt' or '12pt').
|
|
+ #'pointsize': '10pt',
|
|
+ # Additional stuff for the LaTeX preamble.
|
|
+ #'preamble': '',
|
|
+ # Latex figure (float) alignment
|
|
+ #'figure_align': 'htbp',
|
|
}
|
|
|
|
# Grouping the document tree into LaTeX files. List of tuples
|
|
# (source start file, target name, title,
|
|
# author, documentclass [howto, manual, or own class]).
|
|
latex_documents = [
|
|
- (master_doc, 'xmldiff.tex', u'xmldiff Documentation',
|
|
- u'Lennart Regebro', 'manual'),
|
|
+ (master_doc, "xmldiff.tex", "xmldiff Documentation", "Lennart Regebro", "manual"),
|
|
]
|
|
|
|
# The name of an image file (relative to this directory) to place at the top of
|
|
# the title page.
|
|
-#latex_logo = None
|
|
+# latex_logo = None
|
|
|
|
# For "manual" documents, if this is true, then toplevel headings are parts,
|
|
# not chapters.
|
|
-#latex_use_parts = False
|
|
+# latex_use_parts = False
|
|
|
|
# If true, show page references after internal links.
|
|
-#latex_show_pagerefs = False
|
|
+# latex_show_pagerefs = False
|
|
|
|
# If true, show URL addresses after external links.
|
|
-#latex_show_urls = False
|
|
+# latex_show_urls = False
|
|
|
|
# Documents to append as an appendix to all manuals.
|
|
-#latex_appendices = []
|
|
+# latex_appendices = []
|
|
|
|
# If false, no module index is generated.
|
|
-#latex_domain_indices = True
|
|
+# latex_domain_indices = True
|
|
|
|
|
|
# -- Options for manual page output ---------------------------------------
|
|
|
|
# One entry per manual page. List of tuples
|
|
# (source start file, name, description, authors, manual section).
|
|
-man_pages = [
|
|
- (master_doc, 'xmldiff', u'xmldiff Documentation',
|
|
- [author], 1)
|
|
-]
|
|
+man_pages = [(master_doc, "xmldiff", "xmldiff Documentation", [author], 1)]
|
|
|
|
# If true, show URL addresses after external links.
|
|
-#man_show_urls = False
|
|
+# man_show_urls = False
|
|
|
|
|
|
# -- Options for Texinfo output -------------------------------------------
|
|
@@ -269,19 +261,25 @@ man_pages = [
|
|
# (source start file, target name, title, author,
|
|
# dir menu entry, description, category)
|
|
texinfo_documents = [
|
|
- (master_doc, 'xmldiff', u'xmldiff Documentation',
|
|
- author, 'xmldiff', 'One line description of project.',
|
|
- 'Miscellaneous'),
|
|
+ (
|
|
+ master_doc,
|
|
+ "xmldiff",
|
|
+ "xmldiff Documentation",
|
|
+ author,
|
|
+ "xmldiff",
|
|
+ "One line description of project.",
|
|
+ "Miscellaneous",
|
|
+ ),
|
|
]
|
|
|
|
# Documents to append as an appendix to all manuals.
|
|
-#texinfo_appendices = []
|
|
+# texinfo_appendices = []
|
|
|
|
# If false, no module index is generated.
|
|
-#texinfo_domain_indices = True
|
|
+# texinfo_domain_indices = True
|
|
|
|
# How to display URL addresses: 'footnote', 'no', or 'inline'.
|
|
-#texinfo_show_urls = 'footnote'
|
|
+# texinfo_show_urls = 'footnote'
|
|
|
|
# If true, do not generate a @detailmenu in the "Top" node's menu.
|
|
-#texinfo_no_detailmenu = False
|
|
+# texinfo_no_detailmenu = False
|
|
Index: xmldiff-2.4/docs/source/contributing.rst
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/docs/source/contributing.rst
|
|
+++ xmldiff-2.4/docs/source/contributing.rst
|
|
@@ -15,9 +15,9 @@ Setting Up a Development Environment
|
|
|
|
To set up a development environment you need a github account, git, and
|
|
of course Python with pip installed. You also should have the Python tools
|
|
-``coverage`` and ``flake8`` installed::
|
|
+``black``, ``coverage``, and ``flake8`` installed::
|
|
|
|
- pip install coverage flake8
|
|
+ pip install black coverage flake8
|
|
|
|
Then you need to clone the repository, and install it's dependencies::
|
|
|
|
@@ -44,8 +44,6 @@ The following test runners/commands are
|
|
|
|
* ``python setup.py test``
|
|
|
|
- * ``nosetests``
|
|
-
|
|
* ``pytest``
|
|
|
|
There is no support for ``tox`` to run test under different Python versions.
|
|
Index: xmldiff-2.4/setup.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/setup.py
|
|
+++ xmldiff-2.4/setup.py
|
|
@@ -1,49 +1,45 @@
|
|
-from io import open
|
|
from setuptools import setup, find_packages
|
|
|
|
-version = '2.4'
|
|
+version = "2.5.dev0"
|
|
|
|
-with open('README.rst', 'rt', encoding='utf8') as readme:
|
|
+with open("README.rst", encoding="utf8") as readme:
|
|
description = readme.read()
|
|
|
|
-with open('CHANGES.rst', 'rt', encoding='utf8') as changes:
|
|
+with open("CHANGES.rst", encoding="utf8") as changes:
|
|
history = changes.read()
|
|
|
|
|
|
-setup(name='xmldiff',
|
|
- version=version,
|
|
- description="Creates diffs of XML files",
|
|
- long_description=description + '\n' + history,
|
|
- # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
|
|
- classifiers=['Development Status :: 5 - Production/Stable',
|
|
- 'Topic :: Text Processing :: Markup :: XML',
|
|
- 'Operating System :: OS Independent',
|
|
- 'Programming Language :: Python :: 2',
|
|
- 'Programming Language :: Python :: 2.7',
|
|
- 'Programming Language :: Python :: 3',
|
|
- 'Programming Language :: Python :: 3.5',
|
|
- 'Programming Language :: Python :: 3.6',
|
|
- 'Programming Language :: Python :: 3.7',
|
|
- 'License :: OSI Approved :: MIT License',
|
|
- ],
|
|
- keywords='xml html diff',
|
|
- author='Lennart Regebro',
|
|
- author_email='lregebro@shoobx.com',
|
|
- url='https://github.com/Shoobx/xmldiff',
|
|
- license='MIT',
|
|
- packages=find_packages(exclude=['doc', 'tests']),
|
|
- include_package_data=True,
|
|
- zip_safe=False,
|
|
- install_requires=[
|
|
- 'setuptools',
|
|
- 'lxml>=3.1.0',
|
|
- 'six',
|
|
- ],
|
|
- test_suite='tests',
|
|
- entry_points={
|
|
- 'console_scripts': [
|
|
- 'xmldiff = xmldiff.main:diff_command',
|
|
- 'xmlpatch = xmldiff.main:patch_command',
|
|
- ],
|
|
- },
|
|
+setup(
|
|
+ name="xmldiff",
|
|
+ version=version,
|
|
+ description="Creates diffs of XML files",
|
|
+ long_description=description + "\n" + history,
|
|
+ # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
|
|
+ classifiers=[
|
|
+ "Development Status :: 5 - Production/Stable",
|
|
+ "Topic :: Text Processing :: Markup :: XML",
|
|
+ "Operating System :: OS Independent",
|
|
+ "Programming Language :: Python :: 3",
|
|
+ "Programming Language :: Python :: 3.6",
|
|
+ "Programming Language :: Python :: 3.7",
|
|
+ "Programming Language :: Python :: 3.8",
|
|
+ "License :: OSI Approved :: MIT License",
|
|
+ ],
|
|
+ keywords="xml html diff",
|
|
+ author="Lennart Regebro",
|
|
+ author_email="lregebro@shoobx.com",
|
|
+ url="https://github.com/Shoobx/xmldiff",
|
|
+ license="MIT",
|
|
+ packages=find_packages(exclude=["doc", "tests"]),
|
|
+ include_package_data=True,
|
|
+ zip_safe=False,
|
|
+ install_requires=["setuptools", "lxml>=3.1.0", "six",],
|
|
+ test_suite="tests",
|
|
+ entry_points={
|
|
+ "console_scripts": [
|
|
+ "xmldiff = xmldiff.main:diff_command",
|
|
+ "xmlpatch = xmldiff.main:patch_command",
|
|
+ ],
|
|
+ },
|
|
)
|
|
+
|
|
Index: xmldiff-2.4/tests/test_diff.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/tests/test_diff.py
|
|
+++ xmldiff-2.4/tests/test_diff.py
|
|
@@ -1,14 +1,22 @@
|
|
import os
|
|
import unittest
|
|
|
|
-from io import open
|
|
from lxml import etree
|
|
from xmldiff import utils
|
|
from xmldiff.diff import Differ
|
|
-from xmldiff.actions import (UpdateTextIn, InsertNode, MoveNode,
|
|
- DeleteNode, UpdateAttrib, InsertAttrib,
|
|
- RenameAttrib, DeleteAttrib, UpdateTextAfter,
|
|
- RenameNode, InsertComment)
|
|
+from xmldiff.actions import (
|
|
+ UpdateTextIn,
|
|
+ InsertNode,
|
|
+ MoveNode,
|
|
+ DeleteNode,
|
|
+ UpdateAttrib,
|
|
+ InsertAttrib,
|
|
+ RenameAttrib,
|
|
+ DeleteAttrib,
|
|
+ UpdateTextAfter,
|
|
+ RenameNode,
|
|
+ InsertComment,
|
|
+)
|
|
|
|
from .testing import compare_elements
|
|
|
|
@@ -16,20 +24,13 @@ from .testing import compare_elements
|
|
def dedent(string):
|
|
"""Remove the maximum common indent of the lines making up the string."""
|
|
lines = string.splitlines()
|
|
- indent = min(
|
|
- len(line) - len(line.lstrip())
|
|
- for line in lines
|
|
- if line
|
|
- )
|
|
- return "\n".join(
|
|
- line[indent:] if line else line
|
|
- for line in lines
|
|
- )
|
|
+ indent = min(len(line) - len(line.lstrip()) for line in lines if line)
|
|
+ return "\n".join(line[indent:] if line else line for line in lines)
|
|
|
|
|
|
class APITests(unittest.TestCase):
|
|
- left = u"<document><p>Text</p><p>More</p></document>"
|
|
- right = u"<document><p>Tokst</p><p>More</p></document>"
|
|
+ left = "<document><p>Text</p><p>More</p></document>"
|
|
+ right = "<document><p>Tokst</p><p>More</p></document>"
|
|
lefttree = etree.fromstring(left)
|
|
righttree = etree.fromstring(right)
|
|
differ = Differ()
|
|
@@ -107,9 +108,8 @@ class APITests(unittest.TestCase):
|
|
|
|
|
|
class NodeRatioTests(unittest.TestCase):
|
|
-
|
|
def test_compare_equal(self):
|
|
- xml = u"""<document>
|
|
+ xml = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section xml:id="oldfirst" ref="3" single-ref="3">
|
|
<para>First paragraph</para>
|
|
@@ -127,8 +127,10 @@ class NodeRatioTests(unittest.TestCase):
|
|
|
|
# Every node in these trees should get a 1.0 leaf_ratio,
|
|
# and if it has children, 1.0 child_ration, else None
|
|
- for left, right in zip(utils.post_order_traverse(differ.left),
|
|
- utils.post_order_traverse(differ.right)):
|
|
+ for left, right in zip(
|
|
+ utils.post_order_traverse(differ.left),
|
|
+ utils.post_order_traverse(differ.right),
|
|
+ ):
|
|
self.assertEqual(differ.leaf_ratio(left, right), 1.0)
|
|
if left.getchildren():
|
|
self.assertEqual(differ.child_ratio(left, right), 1.0)
|
|
@@ -136,7 +138,7 @@ class NodeRatioTests(unittest.TestCase):
|
|
self.assertIsNone(differ.child_ratio(left, right))
|
|
|
|
def test_compare_different_leafs(self):
|
|
- left = u"""<document>
|
|
+ left = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="2" single-ref="2">
|
|
<para>This doesn't match at all</para>
|
|
@@ -151,7 +153,7 @@ class NodeRatioTests(unittest.TestCase):
|
|
</document>
|
|
"""
|
|
|
|
- right = u"""<document>
|
|
+ right = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3">
|
|
<para>Completely different from before</para>
|
|
@@ -172,26 +174,24 @@ class NodeRatioTests(unittest.TestCase):
|
|
|
|
# Make some choice comparisons here
|
|
# These node are exactly the same
|
|
- left = lefttree.xpath('/document/story/section[3]/para')[0]
|
|
- right = righttree.xpath('/document/story/section[3]/para')[0]
|
|
+ left = lefttree.xpath("/document/story/section[3]/para")[0]
|
|
+ right = righttree.xpath("/document/story/section[3]/para")[0]
|
|
|
|
self.assertEqual(differ.leaf_ratio(left, right), 1.0)
|
|
|
|
# These nodes have slightly different text, but no children
|
|
- left = lefttree.xpath('/document/story/section[2]/para')[0]
|
|
- right = righttree.xpath('/document/story/section[2]/para')[0]
|
|
+ left = lefttree.xpath("/document/story/section[2]/para")[0]
|
|
+ right = righttree.xpath("/document/story/section[2]/para")[0]
|
|
|
|
- self.assertAlmostEqual(differ.leaf_ratio(left, right),
|
|
- 0.75)
|
|
+ self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.75)
|
|
|
|
# These nodes should not be very similar
|
|
- left = lefttree.xpath('/document/story/section[1]/para')[0]
|
|
- right = righttree.xpath('/document/story/section[1]/para')[0]
|
|
- self.assertAlmostEqual(differ.leaf_ratio(left, right),
|
|
- 0.45614035087719)
|
|
+ left = lefttree.xpath("/document/story/section[1]/para")[0]
|
|
+ right = righttree.xpath("/document/story/section[1]/para")[0]
|
|
+ self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.45614035087719)
|
|
|
|
def test_compare_different_nodes(self):
|
|
- left = u"""<document>
|
|
+ left = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="2" single-ref="2">
|
|
<para>First paragraph</para>
|
|
@@ -207,7 +207,7 @@ class NodeRatioTests(unittest.TestCase):
|
|
</document>
|
|
"""
|
|
|
|
- right = u"""<document>
|
|
+ right = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="2" single-ref="2">
|
|
<para>First paragraph</para>
|
|
@@ -230,28 +230,28 @@ class NodeRatioTests(unittest.TestCase):
|
|
# Make some choice comparisons here. leaf_ratio will always be 1.0,
|
|
# as these leafs have the same attributes and no text, even though
|
|
# attributes may be in different order.
|
|
- left = differ.left.xpath('/document/story/section[1]')[0]
|
|
- right = differ.right.xpath('/document/story/section[1]')[0]
|
|
+ left = differ.left.xpath("/document/story/section[1]")[0]
|
|
+ right = differ.right.xpath("/document/story/section[1]")[0]
|
|
|
|
self.assertEqual(differ.leaf_ratio(left, right), 1.0)
|
|
# Only one of two matches:
|
|
self.assertEqual(differ.child_ratio(left, right), 0.5)
|
|
|
|
- left = differ.left.xpath('/document/story/section[2]')[0]
|
|
- right = differ.right.xpath('/document/story/section[2]')[0]
|
|
+ left = differ.left.xpath("/document/story/section[2]")[0]
|
|
+ right = differ.right.xpath("/document/story/section[2]")[0]
|
|
|
|
self.assertEqual(differ.leaf_ratio(left, right), 1.0)
|
|
# Only one of two matches:
|
|
self.assertEqual(differ.child_ratio(left, right), 0.5)
|
|
|
|
# These nodes should not be very similar
|
|
- left = differ.left.xpath('/document/story/section[3]')[0]
|
|
- right = differ.right.xpath('/document/story/section[3]')[0]
|
|
+ left = differ.left.xpath("/document/story/section[3]")[0]
|
|
+ right = differ.right.xpath("/document/story/section[3]")[0]
|
|
self.assertEqual(differ.leaf_ratio(left, right), 1.0)
|
|
self.assertEqual(differ.child_ratio(left, right), 1.0)
|
|
|
|
def test_compare_with_xmlid(self):
|
|
- left = u"""<document>
|
|
+ left = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section xml:id="oldfirst" ref="1" single-ref="1">
|
|
<para>First paragraph</para>
|
|
@@ -267,7 +267,7 @@ class NodeRatioTests(unittest.TestCase):
|
|
</document>
|
|
"""
|
|
|
|
- right = u"""<document>
|
|
+ right = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section xml:id="newfirst" ref="1" single-ref="1">
|
|
<para>First paragraph</para>
|
|
@@ -289,8 +289,8 @@ class NodeRatioTests(unittest.TestCase):
|
|
|
|
# Make some choice comparisons here.
|
|
|
|
- left = differ.left.xpath('/document/story/section[1]')[0]
|
|
- right = differ.right.xpath('/document/story/section[1]')[0]
|
|
+ left = differ.left.xpath("/document/story/section[1]")[0]
|
|
+ right = differ.right.xpath("/document/story/section[1]")[0]
|
|
|
|
# These are very similar
|
|
self.assertEqual(differ.leaf_ratio(left, right), 0.9)
|
|
@@ -300,8 +300,8 @@ class NodeRatioTests(unittest.TestCase):
|
|
self.assertEqual(differ.node_ratio(left, right), 0)
|
|
|
|
# Here's the ones with the same id:
|
|
- left = differ.left.xpath('/document/story/section[1]')[0]
|
|
- right = differ.right.xpath('/document/story/section[2]')[0]
|
|
+ left = differ.left.xpath("/document/story/section[1]")[0]
|
|
+ right = differ.right.xpath("/document/story/section[2]")[0]
|
|
|
|
# Only one out of two children in common
|
|
self.assertEqual(differ.child_ratio(left, right), 0.5)
|
|
@@ -310,8 +310,8 @@ class NodeRatioTests(unittest.TestCase):
|
|
|
|
# The last ones are completely similar, but only one
|
|
# has an xml:id, so they do not match.
|
|
- left = differ.left.xpath('/document/story/section[3]')[0]
|
|
- right = differ.right.xpath('/document/story/section[3]')[0]
|
|
+ left = differ.left.xpath("/document/story/section[3]")[0]
|
|
+ right = differ.right.xpath("/document/story/section[3]")[0]
|
|
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.81818181818)
|
|
self.assertEqual(differ.child_ratio(left, right), 1.0)
|
|
self.assertEqual(differ.node_ratio(left, right), 0)
|
|
@@ -319,7 +319,8 @@ class NodeRatioTests(unittest.TestCase):
|
|
def test_compare_with_uniqueattrs(self):
|
|
# `uniqueattrs` can be pairs of (tag, attribute) as well as just string
|
|
# attributes.
|
|
- left = dedent(u"""\
|
|
+ left = dedent(
|
|
+ """\
|
|
<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section name="oldfirst" ref="1" single-ref="1">
|
|
@@ -334,9 +335,11 @@ class NodeRatioTests(unittest.TestCase):
|
|
</section>
|
|
</story>
|
|
</document>
|
|
- """)
|
|
+ """
|
|
+ )
|
|
|
|
- right = dedent(u"""\
|
|
+ right = dedent(
|
|
+ """\
|
|
<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section name="newfirst" ref="1" single-ref="1">
|
|
@@ -355,19 +358,22 @@ class NodeRatioTests(unittest.TestCase):
|
|
</subsection>
|
|
</story>
|
|
</document>
|
|
- """)
|
|
+ """
|
|
+ )
|
|
|
|
- differ = Differ(uniqueattrs=[
|
|
- ('section', 'name'),
|
|
- '{http://www.w3.org/XML/1998/namespace}id'
|
|
- ])
|
|
+ differ = Differ(
|
|
+ uniqueattrs=[
|
|
+ ("section", "name"),
|
|
+ "{http://www.w3.org/XML/1998/namespace}id",
|
|
+ ]
|
|
+ )
|
|
differ.set_trees(etree.fromstring(left), etree.fromstring(right))
|
|
differ.match()
|
|
|
|
# Make some choice comparisons here.
|
|
|
|
- left = differ.left.xpath('/document/story/section[1]')[0]
|
|
- right = differ.right.xpath('/document/story/section[1]')[0]
|
|
+ left = differ.left.xpath("/document/story/section[1]")[0]
|
|
+ right = differ.right.xpath("/document/story/section[1]")[0]
|
|
|
|
# These are very similar
|
|
self.assertEqual(differ.leaf_ratio(left, right), 0.90625)
|
|
@@ -377,8 +383,8 @@ class NodeRatioTests(unittest.TestCase):
|
|
self.assertEqual(differ.node_ratio(left, right), 0)
|
|
|
|
# Here's the ones with the same tag and name attribute:
|
|
- left = differ.left.xpath('/document/story/section[1]')[0]
|
|
- right = differ.right.xpath('/document/story/section[2]')[0]
|
|
+ left = differ.left.xpath("/document/story/section[1]")[0]
|
|
+ right = differ.right.xpath("/document/story/section[2]")[0]
|
|
|
|
# Only one out of two children in common
|
|
self.assertEqual(differ.child_ratio(left, right), 0)
|
|
@@ -387,29 +393,29 @@ class NodeRatioTests(unittest.TestCase):
|
|
|
|
# The last ones are completely similar, but only one
|
|
# has an name, so they do not match.
|
|
- left = differ.left.xpath('/document/story/section[3]')[0]
|
|
- right = differ.right.xpath('/document/story/section[3]')[0]
|
|
+ left = differ.left.xpath("/document/story/section[3]")[0]
|
|
+ right = differ.right.xpath("/document/story/section[3]")[0]
|
|
self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.78260869565)
|
|
self.assertEqual(differ.child_ratio(left, right), 1.0)
|
|
self.assertEqual(differ.node_ratio(left, right), 0)
|
|
|
|
# Now these are structurally similar, have the same name, but
|
|
# one of them is not a section, so the uniqueattr does not match
|
|
- left = differ.left.xpath('/document/story/section[1]')[0]
|
|
- right = differ.right.xpath('/document/story/subsection[1]')[0]
|
|
+ left = differ.left.xpath("/document/story/section[1]")[0]
|
|
+ right = differ.right.xpath("/document/story/subsection[1]")[0]
|
|
self.assertAlmostEqual(differ.leaf_ratio(left, right), 1.0)
|
|
self.assertEqual(differ.child_ratio(left, right), 0.5)
|
|
self.assertAlmostEqual(differ.node_ratio(left, right), 0.75)
|
|
|
|
def test_compare_node_rename(self):
|
|
- left = u"""<document>
|
|
+ left = """<document>
|
|
<para>First paragraph</para>
|
|
<para attr="value">Second paragraph</para>
|
|
<para attr="value">Third paragraph</para>
|
|
</document>
|
|
"""
|
|
|
|
- right = u"""<document>
|
|
+ right = """<document>
|
|
<section>First paragraph</section>
|
|
<section attr="something else">Second paragraph</section>
|
|
<section attr="something else">A different text</section>
|
|
@@ -421,38 +427,35 @@ class NodeRatioTests(unittest.TestCase):
|
|
differ.match()
|
|
|
|
# Make some choice comparisons here.
|
|
- left = differ.left.xpath('/document/para[1]')[0]
|
|
- right = differ.right.xpath('/document/section[1]')[0]
|
|
+ left = differ.left.xpath("/document/para[1]")[0]
|
|
+ right = differ.right.xpath("/document/section[1]")[0]
|
|
|
|
# These have different tags, but should still match
|
|
- self.assertEqual(differ.leaf_ratio(left, right),
|
|
- 1.0)
|
|
+ self.assertEqual(differ.leaf_ratio(left, right), 1.0)
|
|
|
|
# These have different tags, and different attribute value,
|
|
# but still similar enough
|
|
- left = differ.left.xpath('/document/para[2]')[0]
|
|
- right = differ.right.xpath('/document/section[2]')[0]
|
|
+ left = differ.left.xpath("/document/para[2]")[0]
|
|
+ right = differ.right.xpath("/document/section[2]")[0]
|
|
|
|
# These have different tags, but should still match
|
|
- self.assertAlmostEqual(differ.leaf_ratio(left, right),
|
|
- 0.76190476190476)
|
|
+ self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.76190476190476)
|
|
|
|
# These have different tags, and different attribute value,
|
|
# but still similar enough
|
|
- left = differ.left.xpath('/document/para[3]')[0]
|
|
- right = differ.right.xpath('/document/section[3]')[0]
|
|
+ left = differ.left.xpath("/document/para[3]")[0]
|
|
+ right = differ.right.xpath("/document/section[3]")[0]
|
|
|
|
# These are too different
|
|
- self.assertAlmostEqual(differ.leaf_ratio(left, right),
|
|
- 0.45161290322580)
|
|
+ self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.45161290322580)
|
|
|
|
def test_compare_namespaces(self):
|
|
- left = u"""<document>
|
|
+ left = """<document>
|
|
<foo:para xmlns:foo="someuri">First paragraph</foo:para>
|
|
</document>
|
|
"""
|
|
|
|
- right = u"""<document>
|
|
+ right = """<document>
|
|
<foo:para xmlns:foo="otheruri">First paragraph</foo:para>
|
|
</document>
|
|
"""
|
|
@@ -462,46 +465,46 @@ class NodeRatioTests(unittest.TestCase):
|
|
differ.match()
|
|
|
|
# Make some choice comparisons here.
|
|
- left = differ.left.xpath('/document/foo:para[1]',
|
|
- namespaces={'foo': 'someuri'})[0]
|
|
- right = differ.right.xpath('/document/foo:para[1]',
|
|
- namespaces={'foo': 'otheruri'})[0]
|
|
+ left = differ.left.xpath(
|
|
+ "/document/foo:para[1]", namespaces={"foo": "someuri"}
|
|
+ )[0]
|
|
+ right = differ.right.xpath(
|
|
+ "/document/foo:para[1]", namespaces={"foo": "otheruri"}
|
|
+ )[0]
|
|
|
|
# These have different namespaces, but should still match
|
|
- self.assertEqual(differ.leaf_ratio(left, right),
|
|
- 1.0)
|
|
+ self.assertEqual(differ.leaf_ratio(left, right), 1.0)
|
|
|
|
def test_different_ratio_modes(self):
|
|
- node1 = etree.Element('para')
|
|
+ node1 = etree.Element("para")
|
|
node1.text = "This doesn't match at all"
|
|
- node2 = etree.Element('para')
|
|
+ node2 = etree.Element("para")
|
|
node2.text = "It's completely different"
|
|
- node3 = etree.Element('para')
|
|
+ node3 = etree.Element("para")
|
|
node3.text = "Completely different from before"
|
|
|
|
# These texts are very different
|
|
- differ = Differ(ratio_mode='accurate')
|
|
+ differ = Differ(ratio_mode="accurate")
|
|
self.assertAlmostEqual(differ.leaf_ratio(node1, node2), 0.24)
|
|
# However, the quick_ratio doesn't catch that, and think they match
|
|
- differ = Differ(ratio_mode='fast')
|
|
+ differ = Differ(ratio_mode="fast")
|
|
self.assertAlmostEqual(differ.leaf_ratio(node1, node2), 0.64)
|
|
# It still realizes these sentences are different, though.
|
|
- differ = Differ(ratio_mode='fast')
|
|
+ differ = Differ(ratio_mode="fast")
|
|
self.assertAlmostEqual(differ.leaf_ratio(node1, node3), 0.4561403508)
|
|
# Faster thinks the first two are the same!
|
|
- differ = Differ(ratio_mode='faster')
|
|
+ differ = Differ(ratio_mode="faster")
|
|
self.assertAlmostEqual(differ.leaf_ratio(node1, node2), 1.0)
|
|
# And that the third is almost the same
|
|
- differ = Differ(ratio_mode='faster')
|
|
+ differ = Differ(ratio_mode="faster")
|
|
self.assertAlmostEqual(differ.leaf_ratio(node1, node3), 0.8771929824)
|
|
|
|
# Invalid modes raise error:
|
|
with self.assertRaises(ValueError):
|
|
- differ = Differ(ratio_mode='allezlebleus')
|
|
+ differ = Differ(ratio_mode="allezlebleus")
|
|
|
|
|
|
class MatchTests(unittest.TestCase):
|
|
-
|
|
def _match(self, left, right):
|
|
left_tree = etree.fromstring(left)
|
|
right_tree = etree.fromstring(right)
|
|
@@ -513,7 +516,7 @@ class MatchTests(unittest.TestCase):
|
|
return [(lpath(item[0]), rpath(item[1])) for item in matches]
|
|
|
|
def test_same_tree(self):
|
|
- xml = u"""<document>
|
|
+ xml = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section xml:id="oldfirst" ref="3" single-ref="3">
|
|
<para>First paragraph</para>
|
|
@@ -537,7 +540,7 @@ class MatchTests(unittest.TestCase):
|
|
# If the number of similar attributes are few it works fine, the
|
|
# differing content of the ref="3" section means it's detected to
|
|
# be an insert.
|
|
- left = u"""<document>
|
|
+ left = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3">
|
|
<para>First paragraph</para>
|
|
@@ -551,7 +554,7 @@ class MatchTests(unittest.TestCase):
|
|
|
|
# We even detect that the first section is an insert without
|
|
# xmlid, but that's less reliable.
|
|
- right = u"""<document>
|
|
+ right = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3">
|
|
<para>New paragraph</para>
|
|
@@ -567,27 +570,24 @@ class MatchTests(unittest.TestCase):
|
|
"""
|
|
|
|
result = self._match(left, right)
|
|
- self.assertEqual(result, [
|
|
- ('/document/story/section[1]/para',
|
|
- '/document/story/section[2]/para'),
|
|
- ('/document/story/section[1]',
|
|
- '/document/story/section[2]'),
|
|
- ('/document/story/section[2]/para',
|
|
- '/document/story/section[3]/para'),
|
|
- ('/document/story/section[2]',
|
|
- '/document/story/section[3]'),
|
|
- ('/document/story',
|
|
- '/document/story'),
|
|
- ('/document',
|
|
- '/document')
|
|
- ])
|
|
+ self.assertEqual(
|
|
+ result,
|
|
+ [
|
|
+ ("/document/story/section[1]/para", "/document/story/section[2]/para"),
|
|
+ ("/document/story/section[1]", "/document/story/section[2]"),
|
|
+ ("/document/story/section[2]/para", "/document/story/section[3]/para"),
|
|
+ ("/document/story/section[2]", "/document/story/section[3]"),
|
|
+ ("/document/story", "/document/story"),
|
|
+ ("/document", "/document"),
|
|
+ ],
|
|
+ )
|
|
|
|
def test_with_xmlid(self):
|
|
# This first section contains attributes that are similar (and longer
|
|
# than the content text. That would trick the matcher into matching
|
|
# the oldfirst and the newfirst section to match, except that we
|
|
# this time also have xml:id's, and they trump everything else!
|
|
- left = u"""<document>
|
|
+ left = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3" xml:id="oldfirst"
|
|
description="This is to trick the differ">
|
|
@@ -605,7 +605,7 @@ class MatchTests(unittest.TestCase):
|
|
|
|
# We even detect that the first section is an insert without
|
|
# xmlid, but that's less reliable.
|
|
- right = u"""<document>
|
|
+ right = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3" xml:id="newfirst"
|
|
description="This is to trick the differ">
|
|
@@ -625,28 +625,23 @@ class MatchTests(unittest.TestCase):
|
|
"""
|
|
|
|
result = self._match(left, right)
|
|
- self.assertEqual(result, [
|
|
- ('/document/story/section[1]/para',
|
|
- '/document/story/section[2]/para'),
|
|
- ('/document/story/section[1]',
|
|
- '/document/story/section[2]'),
|
|
- ('/document/story/section[2]/para',
|
|
- '/document/story/section[3]/para'),
|
|
- ('/document/story/section[2]',
|
|
- '/document/story/section[3]'),
|
|
- ('/document/story/section[3]/para',
|
|
- '/document/story/section[4]/para'),
|
|
- ('/document/story/section[3]',
|
|
- '/document/story/section[4]'),
|
|
- ('/document/story',
|
|
- '/document/story'),
|
|
- ('/document',
|
|
- '/document')
|
|
- ])
|
|
+ self.assertEqual(
|
|
+ result,
|
|
+ [
|
|
+ ("/document/story/section[1]/para", "/document/story/section[2]/para"),
|
|
+ ("/document/story/section[1]", "/document/story/section[2]"),
|
|
+ ("/document/story/section[2]/para", "/document/story/section[3]/para"),
|
|
+ ("/document/story/section[2]", "/document/story/section[3]"),
|
|
+ ("/document/story/section[3]/para", "/document/story/section[4]/para"),
|
|
+ ("/document/story/section[3]", "/document/story/section[4]"),
|
|
+ ("/document/story", "/document/story"),
|
|
+ ("/document", "/document"),
|
|
+ ],
|
|
+ )
|
|
|
|
def test_change_attribs(self):
|
|
|
|
- left = u"""<document>
|
|
+ left = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section xml:id="oldfirst" ref="3" single-ref="3">
|
|
<para>First</para>
|
|
@@ -658,7 +653,7 @@ class MatchTests(unittest.TestCase):
|
|
</document>
|
|
"""
|
|
|
|
- right = u"""<document>
|
|
+ right = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section xml:id="oldfirst" ref="4" single-ref="4">
|
|
<para>First</para>
|
|
@@ -672,23 +667,20 @@ class MatchTests(unittest.TestCase):
|
|
# It matches everything straight, which means the attrib changes
|
|
# should become updates, which makes sense.
|
|
result = self._match(left, right)
|
|
- self.assertEqual(result, [
|
|
- ('/document/story/section[1]/para',
|
|
- '/document/story/section[1]/para'),
|
|
- ('/document/story/section[1]',
|
|
- '/document/story/section[1]'),
|
|
- ('/document/story/section[2]/para',
|
|
- '/document/story/section[2]/para'),
|
|
- ('/document/story/section[2]',
|
|
- '/document/story/section[2]'),
|
|
- ('/document/story',
|
|
- '/document/story'),
|
|
- ('/document',
|
|
- '/document')
|
|
- ])
|
|
+ self.assertEqual(
|
|
+ result,
|
|
+ [
|
|
+ ("/document/story/section[1]/para", "/document/story/section[1]/para"),
|
|
+ ("/document/story/section[1]", "/document/story/section[1]"),
|
|
+ ("/document/story/section[2]/para", "/document/story/section[2]/para"),
|
|
+ ("/document/story/section[2]", "/document/story/section[2]"),
|
|
+ ("/document/story", "/document/story"),
|
|
+ ("/document", "/document"),
|
|
+ ],
|
|
+ )
|
|
|
|
def test_move_paragraph(self):
|
|
- left = u"""<document>
|
|
+ left = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3">
|
|
<para>First paragraph</para>
|
|
@@ -701,7 +693,7 @@ class MatchTests(unittest.TestCase):
|
|
</document>
|
|
"""
|
|
|
|
- right = u"""<document>
|
|
+ right = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3">
|
|
<para>First paragraph</para>
|
|
@@ -714,18 +706,27 @@ class MatchTests(unittest.TestCase):
|
|
</document>
|
|
"""
|
|
result = self._match(left, right)
|
|
- self.assertEqual(result, [
|
|
- ('/document/story/section[1]/para[1]',
|
|
- '/document/story/section[1]/para'),
|
|
- ('/document/story/section[1]/para[2]',
|
|
- '/document/story/section[2]/para[1]'),
|
|
- ('/document/story/section[1]', '/document/story/section[1]'),
|
|
- ('/document/story/section[2]/para',
|
|
- '/document/story/section[2]/para[2]'),
|
|
- ('/document/story/section[2]', '/document/story/section[2]'),
|
|
- ('/document/story', '/document/story'),
|
|
- ('/document', '/document')
|
|
- ])
|
|
+ self.assertEqual(
|
|
+ result,
|
|
+ [
|
|
+ (
|
|
+ "/document/story/section[1]/para[1]",
|
|
+ "/document/story/section[1]/para",
|
|
+ ),
|
|
+ (
|
|
+ "/document/story/section[1]/para[2]",
|
|
+ "/document/story/section[2]/para[1]",
|
|
+ ),
|
|
+ ("/document/story/section[1]", "/document/story/section[1]"),
|
|
+ (
|
|
+ "/document/story/section[2]/para",
|
|
+ "/document/story/section[2]/para[2]",
|
|
+ ),
|
|
+ ("/document/story/section[2]", "/document/story/section[2]"),
|
|
+ ("/document/story", "/document/story"),
|
|
+ ("/document", "/document"),
|
|
+ ],
|
|
+ )
|
|
|
|
def test_match_complex_text(self):
|
|
left = """<wrap id="1533728456.41"><para>
|
|
@@ -769,50 +770,50 @@ class MatchTests(unittest.TestCase):
|
|
</para></wrap>"""
|
|
|
|
result = self._match(left, right)
|
|
- self.assertEqual(result, [
|
|
- ('/wrap/para/b', '/wrap/para/b'),
|
|
- ('/wrap/para', '/wrap/para'),
|
|
- ('/wrap', '/wrap')
|
|
- ])
|
|
+ self.assertEqual(
|
|
+ result,
|
|
+ [
|
|
+ ("/wrap/para/b", "/wrap/para/b"),
|
|
+ ("/wrap/para", "/wrap/para"),
|
|
+ ("/wrap", "/wrap"),
|
|
+ ],
|
|
+ )
|
|
|
|
def test_match_insert_node(self):
|
|
- left = u'''<document title="insert-node">
|
|
+ left = """<document title="insert-node">
|
|
<story id="id">
|
|
|
|
</story>
|
|
</document>
|
|
-'''
|
|
- right = u'''<document title="insert-node">
|
|
+"""
|
|
+ right = """<document title="insert-node">
|
|
<story id="id">
|
|
|
|
<h1>Inserted <i>Node</i></h1>
|
|
|
|
</story>
|
|
-</document>'''
|
|
+</document>"""
|
|
result = self._match(left, right)
|
|
- self.assertEqual(result, [
|
|
- ('/document/story', '/document/story'),
|
|
- ('/document', '/document'),
|
|
- ])
|
|
+ self.assertEqual(
|
|
+ result,
|
|
+ [("/document/story", "/document/story"), ("/document", "/document"),],
|
|
+ )
|
|
|
|
def test_entirely_different(self):
|
|
- left = u'''<document title="insert-node">
|
|
+ left = """<document title="insert-node">
|
|
<story id="id">
|
|
|
|
</story>
|
|
</document>
|
|
-'''
|
|
- right = u'''<document title="something else">
|
|
+"""
|
|
+ right = """<document title="something else">
|
|
<h1>Inserted <i>Node</i></h1>
|
|
-</document>'''
|
|
+</document>"""
|
|
result = self._match(left, right)
|
|
- self.assertEqual(result, [
|
|
- ('/document', '/document'),
|
|
- ])
|
|
+ self.assertEqual(result, [("/document", "/document"),])
|
|
|
|
|
|
class FastMatchTests(unittest.TestCase):
|
|
-
|
|
def _match(self, left, right, fast_match):
|
|
left_tree = etree.fromstring(left)
|
|
right_tree = etree.fromstring(right)
|
|
@@ -824,7 +825,7 @@ class FastMatchTests(unittest.TestCase):
|
|
return [(lpath(item[0]), rpath(item[1])) for item in matches]
|
|
|
|
def test_move_paragraph(self):
|
|
- left = u"""<document>
|
|
+ left = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3">
|
|
<para>First paragraph</para>
|
|
@@ -837,7 +838,7 @@ class FastMatchTests(unittest.TestCase):
|
|
</document>
|
|
"""
|
|
|
|
- right = u"""<document>
|
|
+ right = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3">
|
|
<para>First paragraph</para>
|
|
@@ -858,7 +859,7 @@ class FastMatchTests(unittest.TestCase):
|
|
def test_move_children(self):
|
|
# Here the paragraphs are all so similar that that each paragraph
|
|
# will match any other.
|
|
- left = u"""<document>
|
|
+ left = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3">
|
|
<para>First paragraph</para>
|
|
@@ -869,7 +870,7 @@ class FastMatchTests(unittest.TestCase):
|
|
</document>
|
|
"""
|
|
|
|
- right = u"""<document>
|
|
+ right = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3">
|
|
<para>Second paragraph</para>
|
|
@@ -882,31 +883,31 @@ class FastMatchTests(unittest.TestCase):
|
|
# The slow match will match the nodes that match *best*, so it will
|
|
# find that paragraphs have moved around.
|
|
slow_result = sorted(self._match(left, right, False))
|
|
- self.assertEqual(slow_result, [
|
|
- ('/document', '/document'),
|
|
- ('/document/story', '/document/story'),
|
|
- ('/document/story/section', '/document/story/section'),
|
|
- ('/document/story/section/para[1]',
|
|
- '/document/story/section/para[3]'),
|
|
- ('/document/story/section/para[2]',
|
|
- '/document/story/section/para[1]'),
|
|
- ('/document/story/section/para[3]',
|
|
- '/document/story/section/para[2]')
|
|
- ])
|
|
+ self.assertEqual(
|
|
+ slow_result,
|
|
+ [
|
|
+ ("/document", "/document"),
|
|
+ ("/document/story", "/document/story"),
|
|
+ ("/document/story/section", "/document/story/section"),
|
|
+ ("/document/story/section/para[1]", "/document/story/section/para[3]"),
|
|
+ ("/document/story/section/para[2]", "/document/story/section/para[1]"),
|
|
+ ("/document/story/section/para[3]", "/document/story/section/para[2]"),
|
|
+ ],
|
|
+ )
|
|
|
|
# But the fast match will just pick any that matches.
|
|
fast_result = sorted(self._match(left, right, True))
|
|
- self.assertEqual(fast_result, [
|
|
- ('/document', '/document'),
|
|
- ('/document/story', '/document/story'),
|
|
- ('/document/story/section', '/document/story/section'),
|
|
- ('/document/story/section/para[1]',
|
|
- '/document/story/section/para[1]'),
|
|
- ('/document/story/section/para[2]',
|
|
- '/document/story/section/para[2]'),
|
|
- ('/document/story/section/para[3]',
|
|
- '/document/story/section/para[3]')
|
|
- ])
|
|
+ self.assertEqual(
|
|
+ fast_result,
|
|
+ [
|
|
+ ("/document", "/document"),
|
|
+ ("/document/story", "/document/story"),
|
|
+ ("/document/story/section", "/document/story/section"),
|
|
+ ("/document/story/section/para[1]", "/document/story/section/para[1]"),
|
|
+ ("/document/story/section/para[2]", "/document/story/section/para[2]"),
|
|
+ ("/document/story/section/para[3]", "/document/story/section/para[3]"),
|
|
+ ],
|
|
+ )
|
|
|
|
|
|
class UpdateNodeTests(unittest.TestCase):
|
|
@@ -926,7 +927,7 @@ class UpdateNodeTests(unittest.TestCase)
|
|
return steps
|
|
|
|
def test_same_tree(self):
|
|
- xml = u"""<document>
|
|
+ xml = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section xml:id="oldfirst" ref="3" single-ref="3">
|
|
<para>First paragraph</para>
|
|
@@ -942,24 +943,28 @@ class UpdateNodeTests(unittest.TestCase)
|
|
self.assertEqual(result, [])
|
|
|
|
def test_attribute_changes(self):
|
|
- left = u"""<root><node attr1="ohyeah" attr2="ohno" attr3="maybe" """\
|
|
- u"""attr0="del">The contained text</node>And a tail!</root>"""
|
|
+ left = (
|
|
+ """<root><node attr1="ohyeah" attr2="ohno" attr3="maybe" """
|
|
+ """attr0="del">The contained text</node>And a tail!</root>"""
|
|
+ )
|
|
|
|
- right = u"""<root><node attr4="ohyeah" attr2="uhhuh" attr3="maybe" """\
|
|
- u"""attr5="new">The new text</node>Also a tail!</root>"""
|
|
+ right = (
|
|
+ """<root><node attr4="ohyeah" attr2="uhhuh" attr3="maybe" """
|
|
+ """attr5="new">The new text</node>Also a tail!</root>"""
|
|
+ )
|
|
|
|
result = self._match(left, right)
|
|
|
|
self.assertEqual(
|
|
result,
|
|
[
|
|
- UpdateAttrib('/root/node[1]', 'attr2', 'uhhuh'),
|
|
- RenameAttrib('/root/node[1]', 'attr1', 'attr4'),
|
|
- InsertAttrib('/root/node[1]', 'attr5', 'new'),
|
|
- DeleteAttrib('/root/node[1]', 'attr0'),
|
|
- UpdateTextIn('/root/node[1]', 'The new text'),
|
|
- UpdateTextAfter('/root/node[1]', 'Also a tail!'),
|
|
- ]
|
|
+ UpdateAttrib("/root/node[1]", "attr2", "uhhuh"),
|
|
+ RenameAttrib("/root/node[1]", "attr1", "attr4"),
|
|
+ InsertAttrib("/root/node[1]", "attr5", "new"),
|
|
+ DeleteAttrib("/root/node[1]", "attr0"),
|
|
+ UpdateTextIn("/root/node[1]", "The new text"),
|
|
+ UpdateTextAfter("/root/node[1]", "Also a tail!"),
|
|
+ ],
|
|
)
|
|
|
|
|
|
@@ -978,7 +983,7 @@ class AlignChildrenTests(unittest.TestCa
|
|
return steps
|
|
|
|
def test_same_tree(self):
|
|
- xml = u"""<document>
|
|
+ xml = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section xml:id="oldfirst" ref="3" single-ref="3">
|
|
<para>First paragraph</para>
|
|
@@ -994,7 +999,7 @@ class AlignChildrenTests(unittest.TestCa
|
|
self.assertEqual(result, [])
|
|
|
|
def test_move_paragraph(self):
|
|
- left = u"""<document>
|
|
+ left = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3">
|
|
<para>First paragraph</para>
|
|
@@ -1007,7 +1012,7 @@ class AlignChildrenTests(unittest.TestCa
|
|
</document>
|
|
"""
|
|
|
|
- right = u"""<document>
|
|
+ right = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3">
|
|
<para>First paragraph</para>
|
|
@@ -1024,7 +1029,7 @@ class AlignChildrenTests(unittest.TestCa
|
|
self.assertEqual(result, [])
|
|
|
|
def test_move_children(self):
|
|
- left = u"""<document>
|
|
+ left = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3">
|
|
<para>First paragraph</para>
|
|
@@ -1035,7 +1040,7 @@ class AlignChildrenTests(unittest.TestCa
|
|
</document>
|
|
"""
|
|
|
|
- right = u"""<document>
|
|
+ right = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3">
|
|
<para>Second paragraph</para>
|
|
@@ -1046,9 +1051,14 @@ class AlignChildrenTests(unittest.TestCa
|
|
</document>
|
|
"""
|
|
result = self._align(left, right)
|
|
- self.assertEqual(result,
|
|
- [MoveNode('/document/story/section/para[1]',
|
|
- '/document/story/section[1]', 2)])
|
|
+ self.assertEqual(
|
|
+ result,
|
|
+ [
|
|
+ MoveNode(
|
|
+ "/document/story/section/para[1]", "/document/story/section[1]", 2
|
|
+ )
|
|
+ ],
|
|
+ )
|
|
|
|
|
|
class DiffTests(unittest.TestCase):
|
|
@@ -1065,7 +1075,7 @@ class DiffTests(unittest.TestCase):
|
|
return editscript
|
|
|
|
def test_process(self):
|
|
- left = u"""<document>
|
|
+ left = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3">
|
|
<para>First paragraph</para>
|
|
@@ -1079,7 +1089,7 @@ class DiffTests(unittest.TestCase):
|
|
</document>
|
|
"""
|
|
|
|
- right = u"""<document>
|
|
+ right = """<document>
|
|
<story firstPageTemplate="FirstPage">
|
|
<section ref="3" single-ref="3">
|
|
<para>First paragraph</para>
|
|
@@ -1096,17 +1106,19 @@ class DiffTests(unittest.TestCase):
|
|
self.assertEqual(
|
|
result,
|
|
[
|
|
- InsertNode('/document/story[1]', 'section', 1),
|
|
- InsertAttrib('/document/story/section[2]', 'ref', '4'),
|
|
- InsertAttrib('/document/story/section[2]', 'single-ref', '4'),
|
|
- MoveNode('/document/story/section[1]/para[3]',
|
|
- '/document/story/section[2]', 0),
|
|
- InsertNode('/document/story/section[2]', 'para', 1),
|
|
- UpdateTextIn('/document/story/section[2]/para[2]',
|
|
- 'Fourth paragraph'),
|
|
- DeleteNode('/document/story/deleteme/para[1]'),
|
|
- DeleteNode('/document/story/deleteme[1]'),
|
|
- ]
|
|
+ InsertNode("/document/story[1]", "section", 1),
|
|
+ InsertAttrib("/document/story/section[2]", "ref", "4"),
|
|
+ InsertAttrib("/document/story/section[2]", "single-ref", "4"),
|
|
+ MoveNode(
|
|
+ "/document/story/section[1]/para[3]",
|
|
+ "/document/story/section[2]",
|
|
+ 0,
|
|
+ ),
|
|
+ InsertNode("/document/story/section[2]", "para", 1),
|
|
+ UpdateTextIn("/document/story/section[2]/para[2]", "Fourth paragraph"),
|
|
+ DeleteNode("/document/story/deleteme/para[1]"),
|
|
+ DeleteNode("/document/story/deleteme[1]"),
|
|
+ ],
|
|
)
|
|
|
|
def test_needs_align(self):
|
|
@@ -1116,34 +1128,36 @@ class DiffTests(unittest.TestCase):
|
|
self.assertEqual(
|
|
result,
|
|
[
|
|
- MoveNode('/root/n[1]', '/root[1]', 1),
|
|
- MoveNode('/root/n[2]/p[2]', '/root/n[1]', 0),
|
|
- ]
|
|
+ MoveNode("/root/n[1]", "/root[1]", 1),
|
|
+ MoveNode("/root/n[2]/p[2]", "/root/n[1]", 0),
|
|
+ ],
|
|
)
|
|
|
|
def test_no_root_match(self):
|
|
- left = '<root attr="val"><root><n><p>1</p><p>2</p><p>3</p></n>'\
|
|
- '<n><p>4</p></n></root></root>'
|
|
- right = '<root><n><p>2</p><p>4</p></n><n><p>1</p><p>3</p></n></root>'
|
|
+ left = (
|
|
+ '<root attr="val"><root><n><p>1</p><p>2</p><p>3</p></n>'
|
|
+ "<n><p>4</p></n></root></root>"
|
|
+ )
|
|
+ right = "<root><n><p>2</p><p>4</p></n><n><p>1</p><p>3</p></n></root>"
|
|
result = self._diff(left, right)
|
|
self.assertEqual(
|
|
result,
|
|
[
|
|
- DeleteAttrib('/root[1]', 'attr'),
|
|
- MoveNode('/root/root/n[2]', '/root[1]', 0),
|
|
- MoveNode('/root/root/n[1]', '/root[1]', 1),
|
|
- MoveNode('/root/n[2]/p[2]', '/root/n[1]', 0),
|
|
- DeleteNode('/root/root[1]')
|
|
- ]
|
|
+ DeleteAttrib("/root[1]", "attr"),
|
|
+ MoveNode("/root/root/n[2]", "/root[1]", 0),
|
|
+ MoveNode("/root/root/n[1]", "/root[1]", 1),
|
|
+ MoveNode("/root/n[2]/p[2]", "/root/n[1]", 0),
|
|
+ DeleteNode("/root/root[1]"),
|
|
+ ],
|
|
)
|
|
|
|
def test_rmldoc(self):
|
|
here = os.path.split(__file__)[0]
|
|
- lfile = os.path.join(here, 'test_data', 'rmldoc.left.xml')
|
|
- rfile = os.path.join(here, 'test_data', 'rmldoc.right.xml')
|
|
- with open(lfile, 'rt', encoding='utf8') as infile:
|
|
+ lfile = os.path.join(here, "test_data", "rmldoc.left.xml")
|
|
+ rfile = os.path.join(here, "test_data", "rmldoc.right.xml")
|
|
+ with open(lfile, encoding="utf8") as infile:
|
|
left = infile.read()
|
|
- with open(rfile, 'rt', encoding='utf8') as infile:
|
|
+ with open(rfile, encoding="utf8") as infile:
|
|
right = infile.read()
|
|
|
|
result = self._diff(left, right)
|
|
@@ -1151,240 +1165,197 @@ class DiffTests(unittest.TestCase):
|
|
result,
|
|
[
|
|
InsertNode(
|
|
- '/document/story[1]',
|
|
- '{http://namespaces.shoobx.com/application}section',
|
|
- 4),
|
|
- InsertAttrib(
|
|
- '/document/story/app:section[4]', 'hidden', 'false'),
|
|
- InsertAttrib(
|
|
- '/document/story/app:section[4]', 'name', 'sign'),
|
|
- InsertAttrib(
|
|
- '/document/story/app:section[4]', 'ref', '3'),
|
|
- InsertAttrib(
|
|
- '/document/story/app:section[4]', 'removed', 'false'),
|
|
- InsertAttrib(
|
|
- '/document/story/app:section[4]', 'single-ref', '3'),
|
|
+ "/document/story[1]",
|
|
+ "{http://namespaces.shoobx.com/application}section",
|
|
+ 4,
|
|
+ ),
|
|
+ InsertAttrib("/document/story/app:section[4]", "hidden", "false"),
|
|
+ InsertAttrib("/document/story/app:section[4]", "name", "sign"),
|
|
+ InsertAttrib("/document/story/app:section[4]", "ref", "3"),
|
|
+ InsertAttrib("/document/story/app:section[4]", "removed", "false"),
|
|
+ InsertAttrib("/document/story/app:section[4]", "single-ref", "3"),
|
|
InsertAttrib(
|
|
- '/document/story/app:section[4]', 'title', 'Signing Bonus'),
|
|
- UpdateAttrib('/document/story/app:section[5]', 'ref', '4'),
|
|
- UpdateAttrib(
|
|
- '/document/story/app:section[5]', 'single-ref', '4'),
|
|
- UpdateAttrib('/document/story/app:section[6]', 'ref', '5'),
|
|
- UpdateAttrib(
|
|
- '/document/story/app:section[6]', 'single-ref', '5'),
|
|
- UpdateAttrib('/document/story/app:section[7]', 'ref', '6'),
|
|
- UpdateAttrib(
|
|
- '/document/story/app:section[7]', 'single-ref', '6'),
|
|
- UpdateAttrib('/document/story/app:section[8]', 'ref', '7'),
|
|
- UpdateAttrib(
|
|
- '/document/story/app:section[8]', 'single-ref', '7'),
|
|
- UpdateAttrib('/document/story/app:section[9]', 'ref', '8'),
|
|
- UpdateAttrib(
|
|
- '/document/story/app:section[9]', 'single-ref', '8'),
|
|
- UpdateAttrib('/document/story/app:section[10]', 'ref', '9'),
|
|
- UpdateAttrib(
|
|
- '/document/story/app:section[10]', 'single-ref', '9'),
|
|
- UpdateAttrib('/document/story/app:section[11]', 'ref', '10'),
|
|
- UpdateAttrib(
|
|
- '/document/story/app:section[11]', 'single-ref', '10'),
|
|
- UpdateAttrib('/document/story/app:section[12]', 'ref', '11'),
|
|
- UpdateAttrib(
|
|
- '/document/story/app:section[12]', 'single-ref', '11'),
|
|
- UpdateAttrib('/document/story/app:section[14]', 'ref', '12'),
|
|
- UpdateAttrib(
|
|
- '/document/story/app:section[14]', 'single-ref', '12'),
|
|
+ "/document/story/app:section[4]", "title", "Signing Bonus"
|
|
+ ),
|
|
+ UpdateAttrib("/document/story/app:section[5]", "ref", "4"),
|
|
+ UpdateAttrib("/document/story/app:section[5]", "single-ref", "4"),
|
|
+ UpdateAttrib("/document/story/app:section[6]", "ref", "5"),
|
|
+ UpdateAttrib("/document/story/app:section[6]", "single-ref", "5"),
|
|
+ UpdateAttrib("/document/story/app:section[7]", "ref", "6"),
|
|
+ UpdateAttrib("/document/story/app:section[7]", "single-ref", "6"),
|
|
+ UpdateAttrib("/document/story/app:section[8]", "ref", "7"),
|
|
+ UpdateAttrib("/document/story/app:section[8]", "single-ref", "7"),
|
|
+ UpdateAttrib("/document/story/app:section[9]", "ref", "8"),
|
|
+ UpdateAttrib("/document/story/app:section[9]", "single-ref", "8"),
|
|
+ UpdateAttrib("/document/story/app:section[10]", "ref", "9"),
|
|
+ UpdateAttrib("/document/story/app:section[10]", "single-ref", "9"),
|
|
+ UpdateAttrib("/document/story/app:section[11]", "ref", "10"),
|
|
+ UpdateAttrib("/document/story/app:section[11]", "single-ref", "10"),
|
|
+ UpdateAttrib("/document/story/app:section[12]", "ref", "11"),
|
|
+ UpdateAttrib("/document/story/app:section[12]", "single-ref", "11"),
|
|
+ UpdateAttrib("/document/story/app:section[14]", "ref", "12"),
|
|
+ UpdateAttrib("/document/story/app:section[14]", "single-ref", "12"),
|
|
InsertNode(
|
|
- '/document/story/app:section[4]',
|
|
- '{http://namespaces.shoobx.com/application}term',
|
|
- 0),
|
|
- InsertAttrib(
|
|
- '/document/story/app:section[4]/app:term[1]', 'name',
|
|
- 'sign_bonus'),
|
|
+ "/document/story/app:section[4]",
|
|
+ "{http://namespaces.shoobx.com/application}term",
|
|
+ 0,
|
|
+ ),
|
|
InsertAttrib(
|
|
- '/document/story/app:section[4]/app:term[1]', 'set', 'ol'),
|
|
- InsertNode('/document/story/app:section[4]', 'para', 1),
|
|
+ "/document/story/app:section[4]/app:term[1]", "name", "sign_bonus"
|
|
+ ),
|
|
+ InsertAttrib("/document/story/app:section[4]/app:term[1]", "set", "ol"),
|
|
+ InsertNode("/document/story/app:section[4]", "para", 1),
|
|
UpdateTextIn(
|
|
- '/document/story/app:section[1]/para[2]/'
|
|
- 'app:placeholder[1]',
|
|
- 'consectetur'),
|
|
+ "/document/story/app:section[1]/para[2]/" "app:placeholder[1]",
|
|
+ "consectetur",
|
|
+ ),
|
|
InsertNode(
|
|
- '/document/story/app:section[4]/para[1]',
|
|
- '{http://namespaces.shoobx.com/application}ref',
|
|
- 0),
|
|
+ "/document/story/app:section[4]/para[1]",
|
|
+ "{http://namespaces.shoobx.com/application}ref",
|
|
+ 0,
|
|
+ ),
|
|
InsertAttrib(
|
|
- '/document/story/app:section[4]/para/app:ref[1]', 'name',
|
|
- 'sign'),
|
|
+ "/document/story/app:section[4]/para/app:ref[1]", "name", "sign"
|
|
+ ),
|
|
InsertAttrib(
|
|
- '/document/story/app:section[4]/para/app:ref[1]',
|
|
- '{http://namespaces.shoobx.com/preview}body',
|
|
- '<Ref>'),
|
|
- UpdateTextIn(
|
|
- '/document/story/app:section[4]/para/app:ref[1]', '3'),
|
|
- UpdateTextAfter(
|
|
- '/document/story/app:section[4]/para/app:ref[1]', 'eu'),
|
|
- InsertNode('/document/story/app:section[4]/para[1]', 'u', 1),
|
|
+ "/document/story/app:section[4]/para/app:ref[1]",
|
|
+ "{http://namespaces.shoobx.com/preview}body",
|
|
+ "<Ref>",
|
|
+ ),
|
|
+ UpdateTextIn("/document/story/app:section[4]/para/app:ref[1]", "3"),
|
|
+ UpdateTextAfter("/document/story/app:section[4]/para/app:ref[1]", "eu"),
|
|
+ InsertNode("/document/story/app:section[4]/para[1]", "u", 1),
|
|
UpdateTextAfter(
|
|
- '/document/story/app:section[4]/para/u[1]',
|
|
- 'ntum augue.\n\nAliquam nec tortor diam. Ph'),
|
|
+ "/document/story/app:section[4]/para/u[1]",
|
|
+ "ntum augue.\n\nAliquam nec tortor diam. Ph",
|
|
+ ),
|
|
InsertNode(
|
|
- '/document/story/app:section[4]/para[1]',
|
|
- '{http://namespaces.shoobx.com/application}placeholder',
|
|
- 2),
|
|
+ "/document/story/app:section[4]/para[1]",
|
|
+ "{http://namespaces.shoobx.com/application}placeholder",
|
|
+ 2,
|
|
+ ),
|
|
InsertAttrib(
|
|
- '/document/story/app:section[4]/para/app:placeholder[1]',
|
|
- 'field',
|
|
- 'ol.sign_bonus_include_amt'),
|
|
+ "/document/story/app:section[4]/para/app:placeholder[1]",
|
|
+ "field",
|
|
+ "ol.sign_bonus_include_amt",
|
|
+ ),
|
|
InsertAttrib(
|
|
- '/document/story/app:section[4]/para/app:placeholder[1]',
|
|
- 'missing',
|
|
- 'Signing Bonus Amount'),
|
|
+ "/document/story/app:section[4]/para/app:placeholder[1]",
|
|
+ "missing",
|
|
+ "Signing Bonus Amount",
|
|
+ ),
|
|
UpdateTextAfter(
|
|
- '/document/story/app:section[4]/para/app:placeholder[1]',
|
|
- 'asellus congue accumsan tempor. Donec vel risus se'
|
|
+ "/document/story/app:section[4]/para/app:placeholder[1]",
|
|
+ "asellus congue accumsan tempor. Donec vel risus se",
|
|
),
|
|
+ UpdateTextIn("/document/story/app:section[5]/para/app:ref[1]", "4"),
|
|
+ UpdateTextIn("/document/story/app:section[6]/para/app:ref[1]", "5"),
|
|
+ UpdateTextIn("/document/story/app:section[7]/para/app:ref[1]", "6"),
|
|
+ UpdateTextIn("/document/story/app:section[8]/para/app:ref[1]", "7"),
|
|
+ UpdateTextIn("/document/story/app:section[9]/para/app:ref[1]", "8"),
|
|
+ UpdateTextIn("/document/story/app:section[10]/para/app:ref[1]", "9"),
|
|
+ UpdateTextIn("/document/story/app:section[11]/para/app:ref[1]", "10"),
|
|
+ UpdateTextIn("/document/story/app:section[12]/para/app:ref[1]", "11"),
|
|
+ InsertNode("/document/story/app:section[4]/para/u[1]", "b", 0),
|
|
UpdateTextIn(
|
|
- '/document/story/app:section[5]/para/app:ref[1]',
|
|
- '4'),
|
|
- UpdateTextIn(
|
|
- '/document/story/app:section[6]/para/app:ref[1]',
|
|
- '5'),
|
|
- UpdateTextIn(
|
|
- '/document/story/app:section[7]/para/app:ref[1]',
|
|
- '6'),
|
|
- UpdateTextIn(
|
|
- '/document/story/app:section[8]/para/app:ref[1]',
|
|
- '7'),
|
|
- UpdateTextIn(
|
|
- '/document/story/app:section[9]/para/app:ref[1]',
|
|
- '8'),
|
|
- UpdateTextIn(
|
|
- '/document/story/app:section[10]/para/app:ref[1]',
|
|
- '9'),
|
|
- UpdateTextIn(
|
|
- '/document/story/app:section[11]/para/app:ref[1]',
|
|
- '10'),
|
|
- UpdateTextIn(
|
|
- '/document/story/app:section[12]/para/app:ref[1]',
|
|
- '11'),
|
|
- InsertNode('/document/story/app:section[4]/para/u[1]', 'b', 0),
|
|
- UpdateTextIn(
|
|
- '/document/story/app:section[4]/para/u/b[1]',
|
|
- 'ger nec ferme'),
|
|
- ]
|
|
+ "/document/story/app:section[4]/para/u/b[1]", "ger nec ferme"
|
|
+ ),
|
|
+ ],
|
|
)
|
|
|
|
def test_sbt_template(self):
|
|
here = os.path.split(__file__)[0]
|
|
- lfile = os.path.join(here, 'test_data', 'sbt_template.left.xml')
|
|
- rfile = os.path.join(here, 'test_data', 'sbt_template.right.xml')
|
|
- with open(lfile, 'rt', encoding='utf8') as infile:
|
|
+ lfile = os.path.join(here, "test_data", "sbt_template.left.xml")
|
|
+ rfile = os.path.join(here, "test_data", "sbt_template.right.xml")
|
|
+ with open(lfile, encoding="utf8") as infile:
|
|
left = infile.read()
|
|
- with open(rfile, 'rt', encoding='utf8') as infile:
|
|
+ with open(rfile, encoding="utf8") as infile:
|
|
right = infile.read()
|
|
|
|
result = self._diff(left, right)
|
|
|
|
- # Most lines get too long and flake8 complains because of this part:
|
|
- bm_bm_bm = '/metal:block/metal:block/metal:block'
|
|
+ bm_bm_bm = "/metal:block/metal:block/metal:block"
|
|
self.assertEqual(
|
|
result,
|
|
[
|
|
InsertNode(
|
|
- bm_bm_bm + '[1]',
|
|
- '{http://namespaces.shoobx.com/application}section',
|
|
- 0),
|
|
- InsertAttrib(
|
|
- bm_bm_bm + '/app:section[1]',
|
|
- 'allowCustom',
|
|
- 'False'),
|
|
- InsertAttrib(
|
|
- bm_bm_bm + '/app:section[1]',
|
|
- 'hidden',
|
|
- "advisor.payment_type == 'none'"),
|
|
- InsertAttrib(
|
|
- bm_bm_bm + '/app:section[1]',
|
|
- 'name',
|
|
- 'payment'),
|
|
+ bm_bm_bm + "[1]",
|
|
+ "{http://namespaces.shoobx.com/application}section",
|
|
+ 0,
|
|
+ ),
|
|
+ InsertAttrib(bm_bm_bm + "/app:section[1]", "allowCustom", "False"),
|
|
InsertAttrib(
|
|
- bm_bm_bm + '/app:section[1]',
|
|
- 'title',
|
|
- 'Payment'),
|
|
+ bm_bm_bm + "/app:section[1]",
|
|
+ "hidden",
|
|
+ "advisor.payment_type == 'none'",
|
|
+ ),
|
|
+ InsertAttrib(bm_bm_bm + "/app:section[1]", "name", "payment"),
|
|
+ InsertAttrib(bm_bm_bm + "/app:section[1]", "title", "Payment"),
|
|
InsertNode(
|
|
- bm_bm_bm + '/app:section[1]',
|
|
- '{http://xml.zope.org/namespaces/tal}if',
|
|
- 0),
|
|
+ bm_bm_bm + "/app:section[1]",
|
|
+ "{http://xml.zope.org/namespaces/tal}if",
|
|
+ 0,
|
|
+ ),
|
|
InsertAttrib(
|
|
- bm_bm_bm + '/app:section[1]/tal:if[1]',
|
|
- 'condition',
|
|
- "python: advisor.payment_type == 'stock_award'"),
|
|
+ bm_bm_bm + "/app:section[1]/tal:if[1]",
|
|
+ "condition",
|
|
+ "python: advisor.payment_type == 'stock_award'",
|
|
+ ),
|
|
InsertNode(
|
|
- bm_bm_bm + '/app:section[1]',
|
|
- '{http://xml.zope.org/namespaces/tal}if',
|
|
- 1),
|
|
+ bm_bm_bm + "/app:section[1]",
|
|
+ "{http://xml.zope.org/namespaces/tal}if",
|
|
+ 1,
|
|
+ ),
|
|
InsertAttrib(
|
|
- bm_bm_bm + '/app:section[1]/tal:if[2]',
|
|
- 'condition',
|
|
- "python: advisor.payment_type == 'cash'"),
|
|
+ bm_bm_bm + "/app:section[1]/tal:if[2]",
|
|
+ "condition",
|
|
+ "python: advisor.payment_type == 'cash'",
|
|
+ ),
|
|
InsertNode(
|
|
- bm_bm_bm + '/app:section[1]',
|
|
- '{http://xml.zope.org/namespaces/tal}if',
|
|
- 2),
|
|
+ bm_bm_bm + "/app:section[1]",
|
|
+ "{http://xml.zope.org/namespaces/tal}if",
|
|
+ 2,
|
|
+ ),
|
|
InsertAttrib(
|
|
- bm_bm_bm + '/app:section[1]/tal:if[3]',
|
|
- 'condition',
|
|
- "python: advisor.payment_type == 'stock_award_and_cash'"),
|
|
- InsertNode(
|
|
- bm_bm_bm + '/app:section[1]/tal:if[1]',
|
|
- 'para',
|
|
- 0),
|
|
- UpdateTextIn(
|
|
- bm_bm_bm + '/app:section[1]/tal:if[1]/para[1]',
|
|
- '\n A '),
|
|
- InsertNode(
|
|
- bm_bm_bm + '/app:section[1]/tal:if[2]',
|
|
- 'para',
|
|
- 0),
|
|
+ bm_bm_bm + "/app:section[1]/tal:if[3]",
|
|
+ "condition",
|
|
+ "python: advisor.payment_type == 'stock_award_and_cash'",
|
|
+ ),
|
|
+ InsertNode(bm_bm_bm + "/app:section[1]/tal:if[1]", "para", 0),
|
|
UpdateTextIn(
|
|
- bm_bm_bm + '/app:section[1]/tal:if[2]/para[1]',
|
|
- '\n More text for diffing purposes\n '),
|
|
- InsertNode(
|
|
- bm_bm_bm + '/app:section[1]/tal:if[3]',
|
|
- 'para',
|
|
- 0),
|
|
+ bm_bm_bm + "/app:section[1]/tal:if[1]/para[1]", "\n A "
|
|
+ ),
|
|
+ InsertNode(bm_bm_bm + "/app:section[1]/tal:if[2]", "para", 0),
|
|
UpdateTextIn(
|
|
- bm_bm_bm + '/app:section[1]/tal:if[3]/para[1]',
|
|
- '\n Lorem hipster ipso facto\n '),
|
|
- InsertNode(
|
|
- bm_bm_bm + '/app:section[1]/tal:if[1]/para[1]',
|
|
- 'i',
|
|
- 0),
|
|
+ bm_bm_bm + "/app:section[1]/tal:if[2]/para[1]",
|
|
+ "\n More text for diffing purposes\n ",
|
|
+ ),
|
|
+ InsertNode(bm_bm_bm + "/app:section[1]/tal:if[3]", "para", 0),
|
|
UpdateTextIn(
|
|
- bm_bm_bm + '/app:section[1]/tal:if[1]/para/i[1]',
|
|
- 'whole'),
|
|
+ bm_bm_bm + "/app:section[1]/tal:if[3]/para[1]",
|
|
+ "\n Lorem hipster ipso facto\n ",
|
|
+ ),
|
|
+ InsertNode(bm_bm_bm + "/app:section[1]/tal:if[1]/para[1]", "i", 0),
|
|
+ UpdateTextIn(bm_bm_bm + "/app:section[1]/tal:if[1]/para/i[1]", "whole"),
|
|
UpdateTextAfter(
|
|
- bm_bm_bm + '/app:section[1]/tal:if[1]/para/i[1]',
|
|
- ' load of formatted text and '),
|
|
- InsertNode(
|
|
- bm_bm_bm + '/app:section[1]/tal:if[1]/para[1]',
|
|
- 'br',
|
|
- 1),
|
|
+ bm_bm_bm + "/app:section[1]/tal:if[1]/para/i[1]",
|
|
+ " load of formatted text and ",
|
|
+ ),
|
|
+ InsertNode(bm_bm_bm + "/app:section[1]/tal:if[1]/para[1]", "br", 1),
|
|
UpdateTextAfter(
|
|
- bm_bm_bm + '/app:section[1]/tal:if[1]/para/br[1]',
|
|
- ' other stuff.\n '),
|
|
- DeleteNode(
|
|
- bm_bm_bm + '/app:section[2]/tal:if/para/b[1]'),
|
|
- DeleteNode(
|
|
- bm_bm_bm + '/app:section[2]/tal:if/para[1]'),
|
|
- DeleteNode(
|
|
- bm_bm_bm + '/app:section[2]/tal:if[1]'),
|
|
- DeleteNode(
|
|
- bm_bm_bm + '/app:section[2]')
|
|
- ]
|
|
+ bm_bm_bm + "/app:section[1]/tal:if[1]/para/br[1]",
|
|
+ " other stuff.\n ",
|
|
+ ),
|
|
+ DeleteNode(bm_bm_bm + "/app:section[2]/tal:if/para/b[1]"),
|
|
+ DeleteNode(bm_bm_bm + "/app:section[2]/tal:if/para[1]"),
|
|
+ DeleteNode(bm_bm_bm + "/app:section[2]/tal:if[1]"),
|
|
+ DeleteNode(bm_bm_bm + "/app:section[2]"),
|
|
+ ],
|
|
)
|
|
|
|
def test_namespace(self):
|
|
# Test changing nodes and attributes with namespaces
|
|
- left = u"""<document xmlns:app="someuri">
|
|
+ left = """<document xmlns:app="someuri">
|
|
<story app:foo="FirstPage">
|
|
<app:section>
|
|
<foo:para xmlns:foo="otheruri">Lorem ipsum dolor sit amet,
|
|
@@ -1407,7 +1378,7 @@ class DiffTests(unittest.TestCase):
|
|
</document>
|
|
"""
|
|
|
|
- right = u"""<document xmlns:app="someuri">
|
|
+ right = """<document xmlns:app="someuri">
|
|
<story app:foo="FirstPage">
|
|
<app:section>
|
|
<app:para>Lorem ipsum dolor sit amet,
|
|
@@ -1433,17 +1404,17 @@ class DiffTests(unittest.TestCase):
|
|
self.assertEqual(
|
|
result,
|
|
[
|
|
- RenameNode(
|
|
- '/document/story/app:section/foo:para[1]',
|
|
- '{someuri}para'),
|
|
+ RenameNode("/document/story/app:section/foo:para[1]", "{someuri}para"),
|
|
InsertAttrib(
|
|
- '/document/story/app:section/app:para[3]',
|
|
- '{someuri}attrib', 'value'),
|
|
- ]
|
|
+ "/document/story/app:section/app:para[3]",
|
|
+ "{someuri}attrib",
|
|
+ "value",
|
|
+ ),
|
|
+ ],
|
|
)
|
|
|
|
def test_multiple_tag_deletes(self):
|
|
- left = u"""<document title="delte-node-ul">
|
|
+ left = """<document title="delte-node-ul">
|
|
<story id="id">
|
|
|
|
<ul>
|
|
@@ -1455,7 +1426,7 @@ class DiffTests(unittest.TestCase):
|
|
</story>
|
|
</document>"""
|
|
|
|
- right = u"""<document title="delte-node-ul">
|
|
+ right = """<document title="delte-node-ul">
|
|
<story id="id">
|
|
</story>
|
|
</document>"""
|
|
@@ -1463,23 +1434,21 @@ class DiffTests(unittest.TestCase):
|
|
result = self._diff(left, right)
|
|
self.assertEqual(
|
|
result,
|
|
- [UpdateTextIn('/document/story[1]', '\n '),
|
|
- DeleteNode('/document/story/ul/li[3]'),
|
|
- DeleteNode('/document/story/ul/li[2]'),
|
|
- DeleteNode('/document/story/ul/li[1]'),
|
|
- DeleteNode('/document/story/ul[1]'),
|
|
- ]
|
|
+ [
|
|
+ UpdateTextIn("/document/story[1]", "\n "),
|
|
+ DeleteNode("/document/story/ul/li[3]"),
|
|
+ DeleteNode("/document/story/ul/li[2]"),
|
|
+ DeleteNode("/document/story/ul/li[1]"),
|
|
+ DeleteNode("/document/story/ul[1]"),
|
|
+ ],
|
|
)
|
|
|
|
def test_insert_comment(self):
|
|
- left = u"<doc><body>Something</body></doc>"
|
|
- right = u"<doc><!-- New comment! --><body>Something</body></doc>"
|
|
+ left = "<doc><body>Something</body></doc>"
|
|
+ right = "<doc><!-- New comment! --><body>Something</body></doc>"
|
|
|
|
result = self._diff(left, right)
|
|
- self.assertEqual(
|
|
- result,
|
|
- [InsertComment('/doc[1]', 0, ' New comment! ')]
|
|
- )
|
|
+ self.assertEqual(result, [InsertComment("/doc[1]", 0, " New comment! ")])
|
|
|
|
def test_issue_21_default_namespaces(self):
|
|
# When you have a default namespace you get "*" instead of the
|
|
@@ -1488,4 +1457,4 @@ class DiffTests(unittest.TestCase):
|
|
left = '<tag xmlns="ns">old</tag>'
|
|
right = '<tag xmlns="ns">new</tag>'
|
|
result = self._diff(left, right)
|
|
- self.assertEqual(result[0].node, '/*[1]')
|
|
+ self.assertEqual(result[0].node, "/*[1]")
|
|
Index: xmldiff-2.4/tests/test_formatting.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/tests/test_formatting.py
|
|
+++ xmldiff-2.4/tests/test_formatting.py
|
|
@@ -1,4 +1,3 @@
|
|
-# -*- coding: UTF-8 -*-
|
|
import os
|
|
import sys
|
|
import unittest
|
|
@@ -8,89 +7,80 @@ from xmldiff import formatting, main, ac
|
|
|
|
from .testing import generate_filebased_cases
|
|
|
|
-START = u'<document xmlns:diff="http://namespaces.shoobx.com/diff"><node'
|
|
-END = u'</node></document>'
|
|
+START = '<document xmlns:diff="http://namespaces.shoobx.com/diff"><node'
|
|
+END = "</node></document>"
|
|
|
|
|
|
class PlaceholderMakerTests(unittest.TestCase):
|
|
-
|
|
def test_get_placeholder(self):
|
|
replacer = formatting.PlaceholderMaker()
|
|
# Get a placeholder:
|
|
- ph = replacer.get_placeholder(
|
|
- etree.Element('tag'), formatting.T_OPEN, None)
|
|
- self.assertEqual(ph, u'\ue005')
|
|
+ ph = replacer.get_placeholder(etree.Element("tag"), formatting.T_OPEN, None)
|
|
+ self.assertEqual(ph, "\ue005")
|
|
# Do it again:
|
|
- ph = replacer.get_placeholder(
|
|
- etree.Element('tag'), formatting.T_OPEN, None)
|
|
- self.assertEqual(ph, u'\ue005')
|
|
+ ph = replacer.get_placeholder(etree.Element("tag"), formatting.T_OPEN, None)
|
|
+ self.assertEqual(ph, "\ue005")
|
|
# Get another one
|
|
- ph = replacer.get_placeholder(
|
|
- etree.Element('tag'), formatting.T_CLOSE, ph)
|
|
- self.assertEqual(ph, u'\ue006')
|
|
+ ph = replacer.get_placeholder(etree.Element("tag"), formatting.T_CLOSE, ph)
|
|
+ self.assertEqual(ph, "\ue006")
|
|
|
|
def test_do_element(self):
|
|
- replacer = formatting.PlaceholderMaker(['p'], ['b'])
|
|
+ replacer = formatting.PlaceholderMaker(["p"], ["b"])
|
|
|
|
# Formatting tags get replaced, and the content remains
|
|
- text = u'<p>This is a tag with <b>formatted</b> text.</p>'
|
|
+ text = "<p>This is a tag with <b>formatted</b> text.</p>"
|
|
element = etree.fromstring(text)
|
|
replacer.do_element(element)
|
|
|
|
self.assertEqual(
|
|
etree.tounicode(element),
|
|
- u'<p>This is a tag with \ue006formatted\ue005 text.</p>')
|
|
+ "<p>This is a tag with \ue006formatted\ue005 text.</p>",
|
|
+ )
|
|
|
|
replacer.undo_element(element)
|
|
self.assertEqual(etree.tounicode(element), text)
|
|
|
|
# Non formatting tags get replaced with content
|
|
- text = u'<p>This is a tag with <foo>formatted</foo> text.</p>'
|
|
+ text = "<p>This is a tag with <foo>formatted</foo> text.</p>"
|
|
element = etree.fromstring(text)
|
|
replacer.do_element(element)
|
|
result = etree.tounicode(element)
|
|
- self.assertEqual(
|
|
- result,
|
|
- u'<p>This is a tag with \ue007 text.</p>')
|
|
+ self.assertEqual(result, "<p>This is a tag with \ue007 text.</p>")
|
|
|
|
# Single formatting tags still get two placeholders.
|
|
- text = u'<p>This is a <b/> with <foo/> text.</p>'
|
|
+ text = "<p>This is a <b/> with <foo/> text.</p>"
|
|
element = etree.fromstring(text)
|
|
replacer.do_element(element)
|
|
result = etree.tounicode(element)
|
|
- self.assertEqual(
|
|
- result,
|
|
- u'<p>This is a \ue009\ue008 with \ue00a text.</p>')
|
|
+ self.assertEqual(result, "<p>This is a \ue009\ue008 with \ue00a text.</p>")
|
|
|
|
def test_do_undo_element(self):
|
|
- replacer = formatting.PlaceholderMaker(['p'], ['b'])
|
|
+ replacer = formatting.PlaceholderMaker(["p"], ["b"])
|
|
|
|
# Formatting tags get replaced, and the content remains
|
|
- text = u'<p>This <is/> a <f>tag</f> with <b>formatted</b> text.</p>'
|
|
+ text = "<p>This <is/> a <f>tag</f> with <b>formatted</b> text.</p>"
|
|
element = etree.fromstring(text)
|
|
replacer.do_element(element)
|
|
|
|
self.assertEqual(
|
|
- element.text,
|
|
- u'This \ue005 a \ue006 with \ue008formatted'
|
|
- u'\ue007 text.')
|
|
+ element.text, "This \ue005 a \ue006 with \ue008formatted" "\ue007 text."
|
|
+ )
|
|
|
|
replacer.undo_element(element)
|
|
result = etree.tounicode(element)
|
|
self.assertEqual(result, text)
|
|
|
|
def test_do_undo_element_double_format(self):
|
|
- replacer = formatting.PlaceholderMaker(['p'], ['b', 'u'])
|
|
+ replacer = formatting.PlaceholderMaker(["p"], ["b", "u"])
|
|
|
|
# Formatting tags get replaced, and the content remains
|
|
- text = u'<p>This is <u>doubly <b>formatted</b></u> text.</p>'
|
|
+ text = "<p>This is <u>doubly <b>formatted</b></u> text.</p>"
|
|
element = etree.fromstring(text)
|
|
replacer.do_element(element)
|
|
|
|
self.assertEqual(
|
|
- element.text,
|
|
- u'This is \ue006doubly \ue008formatted\ue007'
|
|
- u'\ue005 text.')
|
|
+ element.text, "This is \ue006doubly \ue008formatted\ue007" "\ue005 text."
|
|
+ )
|
|
|
|
replacer.undo_element(element)
|
|
result = etree.tounicode(element)
|
|
@@ -98,7 +88,7 @@ class PlaceholderMakerTests(unittest.Tes
|
|
|
|
def test_rml_bug(self):
|
|
etree.register_namespace(formatting.DIFF_PREFIX, formatting.DIFF_NS)
|
|
- before_diff = u"""<document xmlns:diff="http://namespaces.shoobx.com/diff">
|
|
+ before_diff = """<document xmlns:diff="http://namespaces.shoobx.com/diff">
|
|
<section>
|
|
<para>
|
|
<ref>4</ref>.
|
|
@@ -109,9 +99,10 @@ class PlaceholderMakerTests(unittest.Tes
|
|
</document>"""
|
|
tree = etree.fromstring(before_diff)
|
|
replacer = formatting.PlaceholderMaker(
|
|
- text_tags=('para',), formatting_tags=('b', 'u', 'i',))
|
|
+ text_tags=("para",), formatting_tags=("b", "u", "i",)
|
|
+ )
|
|
replacer.do_tree(tree)
|
|
- after_diff = u"""<document xmlns:diff="http://namespaces.shoobx.com/diff">
|
|
+ after_diff = """<document xmlns:diff="http://namespaces.shoobx.com/diff">
|
|
<section>
|
|
<para>
|
|
<insert>\ue005</insert>.
|
|
@@ -122,15 +113,13 @@ class PlaceholderMakerTests(unittest.Tes
|
|
</document>"""
|
|
|
|
# The diff formatting will find some text to insert.
|
|
- delete_attrib = u'{%s}delete-format' % formatting.DIFF_NS
|
|
- replacer.placeholder2tag[u'\ue006'
|
|
- ].element.attrib[delete_attrib] = ''
|
|
- replacer.placeholder2tag[u'\ue007'
|
|
- ].element.attrib[delete_attrib] = ''
|
|
+ delete_attrib = "{%s}delete-format" % formatting.DIFF_NS
|
|
+ replacer.placeholder2tag["\ue006"].element.attrib[delete_attrib] = ""
|
|
+ replacer.placeholder2tag["\ue007"].element.attrib[delete_attrib] = ""
|
|
tree = etree.fromstring(after_diff)
|
|
replacer.undo_tree(tree)
|
|
result = etree.tounicode(tree)
|
|
- expected = u"""<document xmlns:diff="http://namespaces.shoobx.com/diff">
|
|
+ expected = """<document xmlns:diff="http://namespaces.shoobx.com/diff">
|
|
<section>
|
|
<para>
|
|
<insert><ref>4</ref></insert>.
|
|
@@ -150,18 +139,17 @@ class PlaceholderMakerTests(unittest.Tes
|
|
# This is the last character of the Private use area
|
|
formatting.PLACEHOLDER_START = 0xF8FF
|
|
|
|
- replacer = formatting.PlaceholderMaker(['p'], ['b'])
|
|
+ replacer = formatting.PlaceholderMaker(["p"], ["b"])
|
|
|
|
# Formatting tags get replaced, and the content remains
|
|
- text = u'<p>This <is/> a <f>tag</f> with <b>some</b> text.</p>'
|
|
+ text = "<p>This <is/> a <f>tag</f> with <b>some</b> text.</p>"
|
|
element = etree.fromstring(text)
|
|
replacer.do_element(element)
|
|
|
|
#
|
|
self.assertEqual(
|
|
- element.text,
|
|
- u'This \uf904 a \uf905 with \uf907some'
|
|
- u'\uf906 text.')
|
|
+ element.text, "This \uf904 a \uf905 with \uf907some" "\uf906 text."
|
|
+ )
|
|
|
|
try:
|
|
# If this is a wide build, also test what happens if we
|
|
@@ -169,18 +157,19 @@ class PlaceholderMakerTests(unittest.Tes
|
|
# (On narrow builds this will give an error)
|
|
formatting.PLACEHOLDER_START = 0xFFFF
|
|
|
|
- replacer = formatting.PlaceholderMaker(['p'], ['b'])
|
|
+ replacer = formatting.PlaceholderMaker(["p"], ["b"])
|
|
|
|
# Formatting tags get replaced, and the content remains
|
|
- text = u'<p>This <is/> a <f>tag</f> with <b>some</b> text.</p>'
|
|
+ text = "<p>This <is/> a <f>tag</f> with <b>some</b> text.</p>"
|
|
element = etree.fromstring(text)
|
|
replacer.do_element(element)
|
|
|
|
# This should raise an error on a narrow build
|
|
self.assertEqual(
|
|
element.text,
|
|
- u'This \U00010004 a \U00010005 with \U00010007some'
|
|
- u'\U00010006 text.')
|
|
+ "This \U00010004 a \U00010005 with \U00010007some"
|
|
+ "\U00010006 text.",
|
|
+ )
|
|
except ValueError:
|
|
if sys.maxunicode > 0x10000:
|
|
# This is a wide build, we should NOT get an error
|
|
@@ -192,229 +181,224 @@ class PlaceholderMakerTests(unittest.Tes
|
|
|
|
|
|
class XMLFormatTests(unittest.TestCase):
|
|
-
|
|
def _format_test(self, left, action, expected):
|
|
formatter = formatting.XMLFormatter(pretty_print=False)
|
|
result = formatter.format([action], etree.fromstring(left))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_incorrect_xpaths(self):
|
|
- left = u'<document><node a="v"/><node>Text</node></document>'
|
|
- expected = START + u' diff:delete-attr="a">Text' + END
|
|
+ left = '<document><node a="v"/><node>Text</node></document>'
|
|
+ expected = START + ' diff:delete-attr="a">Text' + END
|
|
|
|
with self.assertRaises(ValueError):
|
|
- action = actions.DeleteAttrib('/document/node', 'a')
|
|
+ action = actions.DeleteAttrib("/document/node", "a")
|
|
self._format_test(left, action, expected)
|
|
|
|
with self.assertRaises(ValueError):
|
|
- action = actions.DeleteAttrib('/document/ummagumma', 'a')
|
|
+ action = actions.DeleteAttrib("/document/ummagumma", "a")
|
|
self._format_test(left, action, expected)
|
|
|
|
def test_del_attr(self):
|
|
- left = u'<document><node a="v">Text</node></document>'
|
|
- action = actions.DeleteAttrib('/document/node', 'a')
|
|
- expected = START + u' diff:delete-attr="a">Text' + END
|
|
+ left = '<document><node a="v">Text</node></document>'
|
|
+ action = actions.DeleteAttrib("/document/node", "a")
|
|
+ expected = START + ' diff:delete-attr="a">Text' + END
|
|
|
|
self._format_test(left, action, expected)
|
|
|
|
def test_del_node(self):
|
|
- left = u'<document><node attr="val">Text</node></document>'
|
|
- action = actions.DeleteNode('/document/node')
|
|
- expected = START + u' attr="val" diff:delete="">Text' + END
|
|
+ left = '<document><node attr="val">Text</node></document>'
|
|
+ action = actions.DeleteNode("/document/node")
|
|
+ expected = START + ' attr="val" diff:delete="">Text' + END
|
|
|
|
self._format_test(left, action, expected)
|
|
|
|
def test_del_text(self):
|
|
- left = u'<document><node attr="val">Text</node></document>'
|
|
- action = actions.UpdateTextIn('/document/node', None)
|
|
- expected = START + u' attr="val"><diff:delete>Text</diff:delete>' + END
|
|
+ left = '<document><node attr="val">Text</node></document>'
|
|
+ action = actions.UpdateTextIn("/document/node", None)
|
|
+ expected = START + ' attr="val"><diff:delete>Text</diff:delete>' + END
|
|
|
|
self._format_test(left, action, expected)
|
|
|
|
def test_insert_attr(self):
|
|
- left = u'<document><node>We need more text</node></document>'
|
|
- action = actions.InsertAttrib('/document/node', 'attr', 'val')
|
|
- expected = START + u' attr="val" diff:add-attr="attr">'\
|
|
- u'We need more text' + END
|
|
+ left = "<document><node>We need more text</node></document>"
|
|
+ action = actions.InsertAttrib("/document/node", "attr", "val")
|
|
+ expected = START + ' attr="val" diff:add-attr="attr">' "We need more text" + END
|
|
|
|
self._format_test(left, action, expected)
|
|
|
|
def test_insert_node(self):
|
|
- left = u'<document></document>'
|
|
- action = actions.InsertNode('/document', 'node', 0)
|
|
- expected = START + u' diff:insert=""/></document>'
|
|
+ left = "<document></document>"
|
|
+ action = actions.InsertNode("/document", "node", 0)
|
|
+ expected = START + ' diff:insert=""/></document>'
|
|
|
|
self._format_test(left, action, expected)
|
|
|
|
def test_move_attr(self):
|
|
# The library currently only uses move attr for when attributes are
|
|
# renamed:
|
|
- left = u'<document><node attr="val">Text</node></document>'
|
|
- action = actions.RenameAttrib('/document/node', 'attr', 'bottr')
|
|
- expected = START + u' bottr="val" diff:rename-attr="attr:bottr"'\
|
|
- u'>Text' + END
|
|
+ left = '<document><node attr="val">Text</node></document>'
|
|
+ action = actions.RenameAttrib("/document/node", "attr", "bottr")
|
|
+ expected = START + ' bottr="val" diff:rename-attr="attr:bottr"' ">Text" + END
|
|
|
|
self._format_test(left, action, expected)
|
|
|
|
def test_move_node(self):
|
|
# Move 1 down
|
|
- left = u'<document><node id="1" /><node id="2" /></document>'
|
|
- action = actions.MoveNode('/document/node[1]', '/document', 1)
|
|
- expected = START + u' id="1" diff:delete=""/><node id="2"/><node '\
|
|
- u'id="1" diff:insert=""/></document>'
|
|
+ left = '<document><node id="1" /><node id="2" /></document>'
|
|
+ action = actions.MoveNode("/document/node[1]", "/document", 1)
|
|
+ expected = (
|
|
+ START + ' id="1" diff:delete=""/><node id="2"/><node '
|
|
+ 'id="1" diff:insert=""/></document>'
|
|
+ )
|
|
|
|
self._format_test(left, action, expected)
|
|
|
|
# Move 2 up (same result, different diff)
|
|
- left = u'<document><node id="1" /><node id="2" /></document>'
|
|
- action = actions.MoveNode('/document/node[2]', '/document', 0)
|
|
- expected = START + u' id="2" diff:insert=""/><node id="1"/><node '\
|
|
- u'id="2" diff:delete=""/></document>'
|
|
+ left = '<document><node id="1" /><node id="2" /></document>'
|
|
+ action = actions.MoveNode("/document/node[2]", "/document", 0)
|
|
+ expected = (
|
|
+ START + ' id="2" diff:insert=""/><node id="1"/><node '
|
|
+ 'id="2" diff:delete=""/></document>'
|
|
+ )
|
|
|
|
self._format_test(left, action, expected)
|
|
|
|
def test_rename_node(self):
|
|
- left = u'<document><node><para>Content</para>Tail</node></document>'
|
|
- action = actions.RenameNode('/document/node[1]/para[1]', 'newtag')
|
|
- expected = START + u'><newtag diff:rename="para">Content'\
|
|
- '</newtag>Tail' + END
|
|
+ left = "<document><node><para>Content</para>Tail</node></document>"
|
|
+ action = actions.RenameNode("/document/node[1]/para[1]", "newtag")
|
|
+ expected = START + '><newtag diff:rename="para">Content' "</newtag>Tail" + END
|
|
|
|
self._format_test(left, action, expected)
|
|
|
|
def test_update_attr(self):
|
|
- left = u'<document><node attr="val"/></document>'
|
|
- action = actions.UpdateAttrib('/document/node', 'attr', 'newval')
|
|
- expected = START + u' attr="newval" diff:update-attr="attr:val"/>'\
|
|
- u'</document>'
|
|
+ left = '<document><node attr="val"/></document>'
|
|
+ action = actions.UpdateAttrib("/document/node", "attr", "newval")
|
|
+ expected = START + ' attr="newval" diff:update-attr="attr:val"/>' "</document>"
|
|
|
|
self._format_test(left, action, expected)
|
|
|
|
def test_update_text_in(self):
|
|
- left = u'<document><node attr="val"/></document>'
|
|
- action = actions.UpdateTextIn('/document/node', 'Text')
|
|
- expected = START + u' attr="val"><diff:insert>Text</diff:insert>' + END
|
|
+ left = '<document><node attr="val"/></document>'
|
|
+ action = actions.UpdateTextIn("/document/node", "Text")
|
|
+ expected = START + ' attr="val"><diff:insert>Text</diff:insert>' + END
|
|
|
|
self._format_test(left, action, expected)
|
|
|
|
- left = u'<document><node>This is a bit of text, right' + END
|
|
- action = actions.UpdateTextIn('/document/node',
|
|
- 'Also a bit of text, rick')
|
|
- expected = START + u'><diff:delete>This is</diff:delete><diff:insert>'\
|
|
- u'Also</diff:insert> a bit of text, ri<diff:delete>ght'\
|
|
- u'</diff:delete><diff:insert>ck</diff:insert>' + END
|
|
+ left = "<document><node>This is a bit of text, right" + END
|
|
+ action = actions.UpdateTextIn("/document/node", "Also a bit of text, rick")
|
|
+ expected = (
|
|
+ START + "><diff:delete>This is</diff:delete><diff:insert>"
|
|
+ "Also</diff:insert> a bit of text, ri<diff:delete>ght"
|
|
+ "</diff:delete><diff:insert>ck</diff:insert>" + END
|
|
+ )
|
|
|
|
self._format_test(left, action, expected)
|
|
|
|
def test_update_text_after_1(self):
|
|
- left = u'<document><node/><node/></document>'
|
|
- action = actions.UpdateTextAfter('/document/node[1]', 'Text')
|
|
- expected = START + u'/><diff:insert>Text</diff:insert>'\
|
|
- u'<node/></document>'
|
|
+ left = "<document><node/><node/></document>"
|
|
+ action = actions.UpdateTextAfter("/document/node[1]", "Text")
|
|
+ expected = START + "/><diff:insert>Text</diff:insert>" "<node/></document>"
|
|
|
|
self._format_test(left, action, expected)
|
|
|
|
def test_update_text_after_2(self):
|
|
- left = u'<document><node/>This is a bit of text, right</document>'
|
|
- action = actions.UpdateTextAfter('/document/node',
|
|
- 'Also a bit of text, rick')
|
|
- expected = START + u'/><diff:delete>This is</diff:delete>'\
|
|
- u'<diff:insert>Also</diff:insert> a bit of text, ri<diff:delete>'\
|
|
- u'ght</diff:delete><diff:insert>ck</diff:insert></document>'
|
|
+ left = "<document><node/>This is a bit of text, right</document>"
|
|
+ action = actions.UpdateTextAfter("/document/node", "Also a bit of text, rick")
|
|
+ expected = (
|
|
+ START + "/><diff:delete>This is</diff:delete>"
|
|
+ "<diff:insert>Also</diff:insert> a bit of text, ri<diff:delete>"
|
|
+ "ght</diff:delete><diff:insert>ck</diff:insert></document>"
|
|
+ )
|
|
|
|
self._format_test(left, action, expected)
|
|
|
|
|
|
class DiffFormatTests(unittest.TestCase):
|
|
-
|
|
def _format_test(self, action, expected):
|
|
formatter = formatting.DiffFormatter()
|
|
result = formatter.format([action], None)
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_del_attr(self):
|
|
- action = actions.DeleteAttrib('/document/node', 'a')
|
|
- expected = '[delete-attribute, /document/node, a]'
|
|
+ action = actions.DeleteAttrib("/document/node", "a")
|
|
+ expected = "[delete-attribute, /document/node, a]"
|
|
self._format_test(action, expected)
|
|
|
|
def test_del_node(self):
|
|
- action = actions.DeleteNode('/document/node')
|
|
- expected = '[delete, /document/node]'
|
|
+ action = actions.DeleteNode("/document/node")
|
|
+ expected = "[delete, /document/node]"
|
|
self._format_test(action, expected)
|
|
|
|
def test_del_text(self):
|
|
- action = actions.UpdateTextIn('/document/node', None)
|
|
- expected = '[update-text, /document/node, null]'
|
|
+ action = actions.UpdateTextIn("/document/node", None)
|
|
+ expected = "[update-text, /document/node, null]"
|
|
self._format_test(action, expected)
|
|
|
|
def test_insert_attr(self):
|
|
- action = actions.InsertAttrib('/document/node', 'attr', 'val')
|
|
+ action = actions.InsertAttrib("/document/node", "attr", "val")
|
|
expected = '[insert-attribute, /document/node, attr, "val"]'
|
|
self._format_test(action, expected)
|
|
|
|
def test_insert_node(self):
|
|
- action = actions.InsertNode('/document', 'node', 0)
|
|
- expected = '[insert, /document, node, 0]'
|
|
+ action = actions.InsertNode("/document", "node", 0)
|
|
+ expected = "[insert, /document, node, 0]"
|
|
self._format_test(action, expected)
|
|
|
|
def test_rename_attr(self):
|
|
- action = actions.RenameAttrib('/document/node', 'attr', 'bottr')
|
|
- expected = '[rename-attribute, /document/node, attr, bottr]'
|
|
+ action = actions.RenameAttrib("/document/node", "attr", "bottr")
|
|
+ expected = "[rename-attribute, /document/node, attr, bottr]"
|
|
self._format_test(action, expected)
|
|
|
|
def test_move_node(self):
|
|
# Move 1 down
|
|
- action = actions.MoveNode('/document/node[1]', '/document', 1)
|
|
- expected = '[move, /document/node[1], /document, 1]'
|
|
+ action = actions.MoveNode("/document/node[1]", "/document", 1)
|
|
+ expected = "[move, /document/node[1], /document, 1]"
|
|
self._format_test(action, expected)
|
|
|
|
# Move 2 up (same result, different diff)
|
|
- action = actions.MoveNode('/document/node[2]', '/document', 0)
|
|
- expected = '[move, /document/node[2], /document, 0]'
|
|
+ action = actions.MoveNode("/document/node[2]", "/document", 0)
|
|
+ expected = "[move, /document/node[2], /document, 0]"
|
|
|
|
self._format_test(action, expected)
|
|
|
|
def test_rename_node(self):
|
|
# Move 1 down
|
|
- action = actions.RenameNode('/document/node[1]', 'newtag')
|
|
- expected = '[rename, /document/node[1], newtag]'
|
|
+ action = actions.RenameNode("/document/node[1]", "newtag")
|
|
+ expected = "[rename, /document/node[1], newtag]"
|
|
self._format_test(action, expected)
|
|
|
|
# Move 2 up (same result, different diff)
|
|
- action = actions.MoveNode('/document/node[2]', '/document', 0)
|
|
- expected = '[move, /document/node[2], /document, 0]'
|
|
+ action = actions.MoveNode("/document/node[2]", "/document", 0)
|
|
+ expected = "[move, /document/node[2], /document, 0]"
|
|
|
|
self._format_test(action, expected)
|
|
|
|
def test_update_attr(self):
|
|
- action = actions.UpdateAttrib('/document/node', 'attr', 'newval')
|
|
+ action = actions.UpdateAttrib("/document/node", "attr", "newval")
|
|
expected = '[update-attribute, /document/node, attr, "newval"]'
|
|
self._format_test(action, expected)
|
|
|
|
def test_update_text_in(self):
|
|
- action = actions.UpdateTextIn('/document/node', 'Text')
|
|
+ action = actions.UpdateTextIn("/document/node", "Text")
|
|
expected = '[update-text, /document/node, "Text"]'
|
|
self._format_test(action, expected)
|
|
|
|
- action = actions.UpdateTextIn('/document/node',
|
|
- 'Also a bit of text, "rick"')
|
|
- expected = '[update-text, /document/node, '\
|
|
- u'"Also a bit of text, \\"rick\\""]'
|
|
+ action = actions.UpdateTextIn("/document/node", 'Also a bit of text, "rick"')
|
|
+ expected = "[update-text, /document/node, " '"Also a bit of text, \\"rick\\""]'
|
|
self._format_test(action, expected)
|
|
|
|
def test_update_text_after_1(self):
|
|
- action = actions.UpdateTextAfter('/document/node[1]', 'Text')
|
|
+ action = actions.UpdateTextAfter("/document/node[1]", "Text")
|
|
expected = '[update-text-after, /document/node[1], "Text"]'
|
|
self._format_test(action, expected)
|
|
|
|
def test_update_text_after_2(self):
|
|
- action = actions.UpdateTextAfter('/document/node',
|
|
- 'Also a bit of text, rick')
|
|
- expected = '[update-text-after, /document/node, '\
|
|
- u'"Also a bit of text, rick"]'
|
|
+ action = actions.UpdateTextAfter("/document/node", "Also a bit of text, rick")
|
|
+ expected = "[update-text-after, /document/node, " '"Also a bit of text, rick"]'
|
|
self._format_test(action, expected)
|
|
|
|
def test_insert_comment(self):
|
|
- action = actions.InsertComment('/document/node', 2, 'Commentary')
|
|
+ action = actions.InsertComment("/document/node", 2, "Commentary")
|
|
expected = '[insert-comment, /document/node, 2, "Commentary"]'
|
|
self._format_test(action, expected)
|
|
|
|
@@ -430,97 +414,95 @@ class XmlDiffFormatTests(unittest.TestCa
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_del_attr(self):
|
|
- action = actions.DeleteAttrib('/document/node', 'a')
|
|
- expected = '[remove, /document/node/@a]'
|
|
+ action = actions.DeleteAttrib("/document/node", "a")
|
|
+ expected = "[remove, /document/node/@a]"
|
|
self._format_test(action, expected)
|
|
|
|
def test_del_node(self):
|
|
- action = actions.DeleteNode('/document/node')
|
|
- expected = '[remove, /document/node]'
|
|
+ action = actions.DeleteNode("/document/node")
|
|
+ expected = "[remove, /document/node]"
|
|
self._format_test(action, expected)
|
|
|
|
def test_del_text(self):
|
|
- action = actions.UpdateTextIn('/document/node', None)
|
|
- expected = '[update, /document/node/text()[1], null]'
|
|
+ action = actions.UpdateTextIn("/document/node", None)
|
|
+ expected = "[update, /document/node/text()[1], null]"
|
|
self._format_test(action, expected)
|
|
|
|
def test_insert_attr(self):
|
|
- action = actions.InsertAttrib('/document/node', 'attr', 'val')
|
|
- expected = '[insert, /document/node, \n<@attr>\nval\n</@attr>]'
|
|
+ action = actions.InsertAttrib("/document/node", "attr", "val")
|
|
+ expected = "[insert, /document/node, \n<@attr>\nval\n</@attr>]"
|
|
self._format_test(action, expected)
|
|
|
|
def test_insert_node(self):
|
|
- action = actions.InsertNode('/document', 'node', 0)
|
|
- expected = '[insert-first, /document, \n<node/>]'
|
|
+ action = actions.InsertNode("/document", "node", 0)
|
|
+ expected = "[insert-first, /document, \n<node/>]"
|
|
self._format_test(action, expected)
|
|
|
|
def test_rename_node(self):
|
|
# Move 1 down
|
|
- action = actions.RenameNode('/document/node[1]', 'newtag')
|
|
- expected = '[rename, /document/node[1], newtag]'
|
|
+ action = actions.RenameNode("/document/node[1]", "newtag")
|
|
+ expected = "[rename, /document/node[1], newtag]"
|
|
self._format_test(action, expected)
|
|
|
|
# Move 2 up (same result, different diff)
|
|
- action = actions.MoveNode('/document/node[2]', '/document', 0)
|
|
- expected = '[move-first, /document/node[2], /document]'
|
|
+ action = actions.MoveNode("/document/node[2]", "/document", 0)
|
|
+ expected = "[move-first, /document/node[2], /document]"
|
|
self._format_test(action, expected)
|
|
|
|
def test_update_attr(self):
|
|
- action = actions.UpdateAttrib('/document/node', 'attr', 'newval')
|
|
+ action = actions.UpdateAttrib("/document/node", "attr", "newval")
|
|
expected = '[update, /document/node/@attr, "newval"]'
|
|
self._format_test(action, expected)
|
|
|
|
def test_update_text_in(self):
|
|
- action = actions.UpdateTextIn('/document/node', 'Text')
|
|
+ action = actions.UpdateTextIn("/document/node", "Text")
|
|
expected = '[update, /document/node/text()[1], "Text"]'
|
|
self._format_test(action, expected)
|
|
|
|
- action = actions.UpdateTextIn('/document/node',
|
|
- 'Also a bit of text, "rick"')
|
|
- expected = '[update, /document/node/text()[1], '\
|
|
- u'"Also a bit of text, \\"rick\\""]'
|
|
+ action = actions.UpdateTextIn("/document/node", 'Also a bit of text, "rick"')
|
|
+ expected = (
|
|
+ "[update, /document/node/text()[1], " '"Also a bit of text, \\"rick\\""]'
|
|
+ )
|
|
self._format_test(action, expected)
|
|
|
|
def test_update_text_after_1(self):
|
|
- action = actions.UpdateTextAfter('/document/node[1]', 'Text')
|
|
+ action = actions.UpdateTextAfter("/document/node[1]", "Text")
|
|
expected = '[update, /document/node[1]/text()[2], "Text"]'
|
|
self._format_test(action, expected)
|
|
|
|
def test_update_text_after_2(self):
|
|
- action = actions.UpdateTextAfter('/document/node',
|
|
- 'Also a bit of text, rick')
|
|
- expected = '[update, /document/node/text()[2], '\
|
|
- u'"Also a bit of text, rick"]'
|
|
+ action = actions.UpdateTextAfter("/document/node", "Also a bit of text, rick")
|
|
+ expected = "[update, /document/node/text()[2], " '"Also a bit of text, rick"]'
|
|
self._format_test(action, expected)
|
|
|
|
def test_all_actions(self):
|
|
here = os.path.split(__file__)[0]
|
|
- lfile = os.path.join(here, 'test_data', 'all_actions.left.xml')
|
|
- rfile = os.path.join(here, 'test_data', 'all_actions.right.xml')
|
|
+ lfile = os.path.join(here, "test_data", "all_actions.left.xml")
|
|
+ rfile = os.path.join(here, "test_data", "all_actions.right.xml")
|
|
|
|
formatter = formatting.XmlDiffFormatter()
|
|
result = main.diff_files(lfile, rfile, formatter=formatter)
|
|
expected = (
|
|
- u'[move-after, /document/node[2], /document/tag[1]]\n'
|
|
- u'[insert-comment, /document[1], 0, Insert a new comment ]\n'
|
|
- u'[update, /document/node[1]/@name, "was updated"]\n'
|
|
- u'[remove, /document/node[1]/@attribute]\n'
|
|
- u'[insert, /document/node[1], \n'
|
|
- u'<@newtribute>\n'
|
|
- u'renamed\n'
|
|
- u'</@newtribute>]\n'
|
|
- u'[insert, /document/node[1], \n'
|
|
- u'<@this>\n'
|
|
- u'is new\n'
|
|
- u'</@this>]\n'
|
|
- u'[remove, /document/node[1]/@attr]\n'
|
|
- u'[update, /document/node[1]/text()[1], "\\n Modified\\n "]\n'
|
|
- u'[update, /document/node[1]/text()[2], "\\n '
|
|
- u'New tail content\\n "]\n'
|
|
- u'[rename, /document/node[2], nod]\n'
|
|
- u'[insert-after, /document/tail[1], \n'
|
|
- u'<new/>]\n'
|
|
- u'[remove, /document/tail[1]]'
|
|
+ "[move-after, /document/node[2], /document/tag[1]]\n"
|
|
+ "[insert-comment, /document[1], 0, Insert a new comment ]\n"
|
|
+ '[update, /document/node[1]/@name, "was updated"]\n'
|
|
+ "[remove, /document/node[1]/@attribute]\n"
|
|
+ "[insert, /document/node[1], \n"
|
|
+ "<@newtribute>\n"
|
|
+ "renamed\n"
|
|
+ "</@newtribute>]\n"
|
|
+ "[insert, /document/node[1], \n"
|
|
+ "<@this>\n"
|
|
+ "is new\n"
|
|
+ "</@this>]\n"
|
|
+ "[remove, /document/node[1]/@attr]\n"
|
|
+ '[update, /document/node[1]/text()[1], "\\n Modified\\n "]\n'
|
|
+ '[update, /document/node[1]/text()[2], "\\n '
|
|
+ 'New tail content\\n "]\n'
|
|
+ "[rename, /document/node[2], nod]\n"
|
|
+ "[insert-after, /document/tail[1], \n"
|
|
+ "<new/>]\n"
|
|
+ "[remove, /document/tail[1]]"
|
|
)
|
|
self.assertEqual(result, expected)
|
|
|
|
@@ -537,12 +519,14 @@ class FormatterFileTests(unittest.TestCa
|
|
class XMLFormatterFileTests(FormatterFileTests):
|
|
|
|
# The XMLFormatter has no text or formatting tags, so
|
|
- formatter = formatting.XMLFormatter(pretty_print=False,
|
|
- normalize=formatting.WS_TEXT)
|
|
+ formatter = formatting.XMLFormatter(
|
|
+ pretty_print=False, normalize=formatting.WS_TEXT
|
|
+ )
|
|
|
|
|
|
# Also test the bits that handle text tags:
|
|
|
|
+
|
|
class HTMLFormatterFileTests(FormatterFileTests):
|
|
|
|
# We use a few tags for the placeholder tests.
|
|
@@ -551,15 +535,27 @@ class HTMLFormatterFileTests(FormatterFi
|
|
formatter = formatting.XMLFormatter(
|
|
normalize=formatting.WS_BOTH,
|
|
pretty_print=True,
|
|
- text_tags=('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'),
|
|
- formatting_tags=('b', 'u', 'i', 'strike', 'em', 'super',
|
|
- 'sup', 'sub', 'link', 'a', 'span'))
|
|
+ text_tags=("p", "h1", "h2", "h3", "h4", "h5", "h6", "li"),
|
|
+ formatting_tags=(
|
|
+ "b",
|
|
+ "u",
|
|
+ "i",
|
|
+ "strike",
|
|
+ "em",
|
|
+ "super",
|
|
+ "sup",
|
|
+ "sub",
|
|
+ "link",
|
|
+ "a",
|
|
+ "span",
|
|
+ ),
|
|
+ )
|
|
|
|
|
|
# Add tests that use no placeholder replacement (ie plain XML)
|
|
-data_dir = os.path.join(os.path.dirname(__file__), 'test_data')
|
|
+data_dir = os.path.join(os.path.dirname(__file__), "test_data")
|
|
generate_filebased_cases(data_dir, XMLFormatterFileTests)
|
|
|
|
# Add tests that use placeholder replacement (ie HTML)
|
|
-data_dir = os.path.join(os.path.dirname(__file__), 'test_data')
|
|
-generate_filebased_cases(data_dir, HTMLFormatterFileTests, suffix='html')
|
|
+data_dir = os.path.join(os.path.dirname(__file__), "test_data")
|
|
+generate_filebased_cases(data_dir, HTMLFormatterFileTests, suffix="html")
|
|
Index: xmldiff-2.4/tests/test_main.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/tests/test_main.py
|
|
+++ xmldiff-2.4/tests/test_main.py
|
|
@@ -7,44 +7,42 @@ from lxml import etree
|
|
from xmldiff import main, formatting
|
|
|
|
CURDIR = os.path.split(__file__)[0]
|
|
-LEFT_FILE = os.path.join(CURDIR, 'test_data', 'rmldoc.left.xml')
|
|
-RIGHT_FILE = os.path.join(CURDIR, 'test_data', 'rmldoc.right.xml')
|
|
-EXPECTED_FILE = os.path.join(CURDIR, 'test_data', 'rmldoc.expected.xml')
|
|
+LEFT_FILE = os.path.join(CURDIR, "test_data", "rmldoc.left.xml")
|
|
+RIGHT_FILE = os.path.join(CURDIR, "test_data", "rmldoc.right.xml")
|
|
+EXPECTED_FILE = os.path.join(CURDIR, "test_data", "rmldoc.expected.xml")
|
|
|
|
|
|
class MainAPITests(unittest.TestCase):
|
|
-
|
|
def test_api_diff_files(self):
|
|
# diff_files can take filenames
|
|
result1 = main.diff_files(LEFT_FILE, RIGHT_FILE)
|
|
|
|
# Or open file streams:
|
|
- with open(LEFT_FILE, 'rb') as linfile:
|
|
- with open(RIGHT_FILE, 'rb') as rinfile:
|
|
+ with open(LEFT_FILE, "rb") as linfile:
|
|
+ with open(RIGHT_FILE, "rb") as rinfile:
|
|
result2 = main.diff_files(linfile, rinfile)
|
|
|
|
self.assertEqual(result1, result2)
|
|
|
|
# Give something else, and it fails:
|
|
with self.assertRaises(IOError):
|
|
- main.diff_files('<xml1/>', '<xml2/>')
|
|
+ main.diff_files("<xml1/>", "<xml2/>")
|
|
|
|
def test_api_diff_texts(self):
|
|
# diff_text can take bytes
|
|
- with open(LEFT_FILE, 'rb') as linfile:
|
|
- with open(RIGHT_FILE, 'rb') as rinfile:
|
|
+ with open(LEFT_FILE, "rb") as linfile:
|
|
+ with open(RIGHT_FILE, "rb") as rinfile:
|
|
left = linfile.read()
|
|
right = rinfile.read()
|
|
result1 = main.diff_texts(left, right)
|
|
|
|
# And unicode
|
|
- result2 = main.diff_texts(left.decode('utf8'),
|
|
- right.decode('utf8'))
|
|
+ result2 = main.diff_texts(left.decode("utf8"), right.decode("utf8"))
|
|
|
|
self.assertEqual(result1, result2)
|
|
|
|
- with open(LEFT_FILE, 'rb') as infile:
|
|
- with open(RIGHT_FILE, 'rb') as infile:
|
|
+ with open(LEFT_FILE, "rb") as infile:
|
|
+ with open(RIGHT_FILE, "rb") as infile:
|
|
# Give something else, and it fails:
|
|
with self.assertRaises(ValueError):
|
|
main.diff_texts(infile, infile)
|
|
@@ -72,7 +70,6 @@ class MainAPITests(unittest.TestCase):
|
|
|
|
|
|
class MainCLITests(unittest.TestCase):
|
|
-
|
|
def call_run(self, args, command=main.diff_command):
|
|
output = six.StringIO()
|
|
errors = six.StringIO()
|
|
@@ -97,75 +94,75 @@ class MainCLITests(unittest.TestCase):
|
|
|
|
def test_diff_cli_simple(self):
|
|
curdir = os.path.dirname(__file__)
|
|
- filepath = os.path.join(curdir, 'test_data')
|
|
- file1 = os.path.join(filepath, 'insert-node.left.html')
|
|
- file2 = os.path.join(filepath, 'insert-node.right.html')
|
|
+ filepath = os.path.join(curdir, "test_data")
|
|
+ file1 = os.path.join(filepath, "insert-node.left.html")
|
|
+ file2 = os.path.join(filepath, "insert-node.right.html")
|
|
|
|
output, errors = self.call_run([file1, file2])
|
|
self.assertEqual(len(output.splitlines()), 3)
|
|
# This should default to the diff formatter:
|
|
- self.assertEqual(output[0], '[')
|
|
+ self.assertEqual(output[0], "[")
|
|
|
|
def test_diff_cli_args(self):
|
|
curdir = os.path.dirname(__file__)
|
|
- filepath = os.path.join(curdir, 'test_data')
|
|
- file1 = os.path.join(filepath, 'insert-node.left.html')
|
|
- file2 = os.path.join(filepath, 'insert-node.right.html')
|
|
+ filepath = os.path.join(curdir, "test_data")
|
|
+ file1 = os.path.join(filepath, "insert-node.left.html")
|
|
+ file2 = os.path.join(filepath, "insert-node.right.html")
|
|
|
|
# Select a formatter:
|
|
- output, errors = self.call_run([file1, file2, '--formatter', 'xml'])
|
|
+ output, errors = self.call_run([file1, file2, "--formatter", "xml"])
|
|
# It gives a very compact output
|
|
self.assertEqual(len(output.splitlines()), 1)
|
|
# Now it's XML
|
|
- self.assertEqual(output[0], '<')
|
|
+ self.assertEqual(output[0], "<")
|
|
|
|
# Don't strip the whitespace keeps the formatting from the source:
|
|
- output, errors = self.call_run([file1, file2, '--keep-whitespace',
|
|
- '--formatter', 'xml'])
|
|
+ output, errors = self.call_run(
|
|
+ [file1, file2, "--keep-whitespace", "--formatter", "xml"]
|
|
+ )
|
|
self.assertEqual(len(output.splitlines()), 5)
|
|
|
|
# And stripping and pretty printing gives a longer readable output
|
|
- output, errors = self.call_run([file1, file2, '--pretty-print',
|
|
- '--formatter', 'xml'])
|
|
+ output, errors = self.call_run(
|
|
+ [file1, file2, "--pretty-print", "--formatter", "xml"]
|
|
+ )
|
|
self.assertEqual(len(output.splitlines()), 6)
|
|
|
|
# The default output gives three lines for three actions
|
|
- output, errors = self.call_run([file1, file2, '--ratio-mode', 'fast'])
|
|
+ output, errors = self.call_run([file1, file2, "--ratio-mode", "fast"])
|
|
self.assertEqual(len(output.splitlines()), 3)
|
|
|
|
# 'fast' is default, so it's the same output
|
|
- output2, errors = self.call_run([file1, file2, '--ratio-mode', 'fast'])
|
|
+ output2, errors = self.call_run([file1, file2, "--ratio-mode", "fast"])
|
|
self.assertEqual(output, output2)
|
|
|
|
# Accurate is the same in this case, although sometimes it isn't
|
|
- output2, errors = self.call_run([file1, file2, '--ratio-mode',
|
|
- 'accurate'])
|
|
+ output2, errors = self.call_run([file1, file2, "--ratio-mode", "accurate"])
|
|
self.assertEqual(output, output2)
|
|
|
|
# But "faster" gives six actions instead of three
|
|
- output, errors = self.call_run([file1, file2, '--ratio-mode',
|
|
- 'faster'])
|
|
+ output, errors = self.call_run([file1, file2, "--ratio-mode", "faster"])
|
|
self.assertEqual(len(output.splitlines()), 6)
|
|
|
|
# You can specify unique attributes:
|
|
- output, errors = self.call_run([file1, file2, '--unique-attributes',
|
|
- 'id,foo,frotz'])
|
|
+ output, errors = self.call_run(
|
|
+ [file1, file2, "--unique-attributes", "id,foo,frotz"]
|
|
+ )
|
|
self.assertEqual(len(output.splitlines()), 3)
|
|
|
|
# Or none
|
|
- output, errors = self.call_run([file1, file2, '--unique-attributes'])
|
|
+ output, errors = self.call_run([file1, file2, "--unique-attributes"])
|
|
self.assertEqual(len(output.splitlines()), 3)
|
|
|
|
def test_patch_cli_simple(self):
|
|
curdir = os.path.dirname(__file__)
|
|
- filepath = os.path.join(curdir, 'test_data')
|
|
- patchfile = os.path.join(filepath, 'insert-node.diff')
|
|
- xmlfile = os.path.join(filepath, 'insert-node.left.html')
|
|
+ filepath = os.path.join(curdir, "test_data")
|
|
+ patchfile = os.path.join(filepath, "insert-node.diff")
|
|
+ xmlfile = os.path.join(filepath, "insert-node.left.html")
|
|
|
|
- output, errors = self.call_run([patchfile, xmlfile],
|
|
- command=main.patch_command)
|
|
+ output, errors = self.call_run([patchfile, xmlfile], command=main.patch_command)
|
|
|
|
- expectedfile = os.path.join(filepath, 'insert-node.right.html')
|
|
- with open(expectedfile, 'rt') as f:
|
|
+ expectedfile = os.path.join(filepath, "insert-node.right.html")
|
|
+ with open(expectedfile) as f:
|
|
expected = f.read()
|
|
self.assertEqual(output, expected)
|
|
Index: xmldiff-2.4/tests/test_patch.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/tests/test_patch.py
|
|
+++ xmldiff-2.4/tests/test_patch.py
|
|
@@ -5,10 +5,19 @@ from lxml import etree
|
|
from xmldiff.formatting import DiffFormatter, WS_NONE
|
|
from xmldiff.main import diff_trees, diff_texts, patch_text, patch_file
|
|
from xmldiff.patch import Patcher, DiffParser
|
|
-from xmldiff.actions import (UpdateTextIn, InsertNode, MoveNode,
|
|
- DeleteNode, UpdateAttrib, InsertAttrib,
|
|
- RenameAttrib, DeleteAttrib, UpdateTextAfter,
|
|
- RenameNode, InsertComment)
|
|
+from xmldiff.actions import (
|
|
+ UpdateTextIn,
|
|
+ InsertNode,
|
|
+ MoveNode,
|
|
+ DeleteNode,
|
|
+ UpdateAttrib,
|
|
+ InsertAttrib,
|
|
+ RenameAttrib,
|
|
+ DeleteAttrib,
|
|
+ UpdateTextAfter,
|
|
+ RenameNode,
|
|
+ InsertComment,
|
|
+)
|
|
|
|
from .testing import compare_elements
|
|
|
|
@@ -23,67 +32,84 @@ class PatcherTests(unittest.TestCase):
|
|
self.assertEqual(etree.tounicode(tree), end)
|
|
|
|
def test_delete_node(self):
|
|
- self._test('<root><deleteme/></root>',
|
|
- DeleteNode('/root/deleteme'),
|
|
- '<root/>')
|
|
+ self._test("<root><deleteme/></root>", DeleteNode("/root/deleteme"), "<root/>")
|
|
|
|
def test_insert_node(self):
|
|
- self._test('<root><anode/></root>',
|
|
- InsertNode('/root/anode', 'newnode', 0),
|
|
- '<root><anode><newnode/></anode></root>')
|
|
+ self._test(
|
|
+ "<root><anode/></root>",
|
|
+ InsertNode("/root/anode", "newnode", 0),
|
|
+ "<root><anode><newnode/></anode></root>",
|
|
+ )
|
|
|
|
def test_rename_node(self):
|
|
- self._test('<root><oldname/></root>',
|
|
- RenameNode('/root/oldname', 'newname'),
|
|
- '<root><newname/></root>')
|
|
+ self._test(
|
|
+ "<root><oldname/></root>",
|
|
+ RenameNode("/root/oldname", "newname"),
|
|
+ "<root><newname/></root>",
|
|
+ )
|
|
|
|
def test_move_node(self):
|
|
- self._test('<root><anode><moveme/></anode></root>',
|
|
- MoveNode('/root/anode/moveme', '/root', 1),
|
|
- '<root><anode/><moveme/></root>')
|
|
+ self._test(
|
|
+ "<root><anode><moveme/></anode></root>",
|
|
+ MoveNode("/root/anode/moveme", "/root", 1),
|
|
+ "<root><anode/><moveme/></root>",
|
|
+ )
|
|
|
|
def test_update_text_in(self):
|
|
- self._test('<root><anode/></root>',
|
|
- UpdateTextIn('/root/anode', 'New text'),
|
|
- '<root><anode>New text</anode></root>')
|
|
+ self._test(
|
|
+ "<root><anode/></root>",
|
|
+ UpdateTextIn("/root/anode", "New text"),
|
|
+ "<root><anode>New text</anode></root>",
|
|
+ )
|
|
|
|
def test_update_text_after(self):
|
|
- self._test('<root><anode/></root>',
|
|
- UpdateTextAfter('/root/anode', 'New text'),
|
|
- '<root><anode/>New text</root>')
|
|
+ self._test(
|
|
+ "<root><anode/></root>",
|
|
+ UpdateTextAfter("/root/anode", "New text"),
|
|
+ "<root><anode/>New text</root>",
|
|
+ )
|
|
|
|
def test_update_attrib(self):
|
|
- self._test('<root><anode attrib="oldvalue" /></root>',
|
|
- UpdateAttrib('/root/anode', 'attrib', 'newvalue'),
|
|
- '<root><anode attrib="newvalue"/></root>')
|
|
+ self._test(
|
|
+ '<root><anode attrib="oldvalue" /></root>',
|
|
+ UpdateAttrib("/root/anode", "attrib", "newvalue"),
|
|
+ '<root><anode attrib="newvalue"/></root>',
|
|
+ )
|
|
|
|
def test_delete_attrib(self):
|
|
- self._test('<root><anode attrib="oldvalue" /></root>',
|
|
- DeleteAttrib('/root/anode', 'attrib'),
|
|
- '<root><anode/></root>')
|
|
+ self._test(
|
|
+ '<root><anode attrib="oldvalue" /></root>',
|
|
+ DeleteAttrib("/root/anode", "attrib"),
|
|
+ "<root><anode/></root>",
|
|
+ )
|
|
|
|
def test_insert_attrib(self):
|
|
- self._test('<root><anode/></root>',
|
|
- InsertAttrib('/root/anode', 'attrib', 'value'),
|
|
- '<root><anode attrib="value"/></root>')
|
|
+ self._test(
|
|
+ "<root><anode/></root>",
|
|
+ InsertAttrib("/root/anode", "attrib", "value"),
|
|
+ '<root><anode attrib="value"/></root>',
|
|
+ )
|
|
|
|
def test_rename_attrib(self):
|
|
- self._test('<root><anode oldname="value"/></root>',
|
|
- RenameAttrib('/root/anode', 'oldname', 'newname'),
|
|
- '<root><anode newname="value"/></root>')
|
|
+ self._test(
|
|
+ '<root><anode oldname="value"/></root>',
|
|
+ RenameAttrib("/root/anode", "oldname", "newname"),
|
|
+ '<root><anode newname="value"/></root>',
|
|
+ )
|
|
|
|
def test_insert_comment(self):
|
|
- self._test('<root><anode/></root>',
|
|
- InsertComment('/root', 1, "This is a new comment"),
|
|
- '<root><anode/><!--This is a new comment--></root>')
|
|
+ self._test(
|
|
+ "<root><anode/></root>",
|
|
+ InsertComment("/root", 1, "This is a new comment"),
|
|
+ "<root><anode/><!--This is a new comment--></root>",
|
|
+ )
|
|
|
|
|
|
class DiffPatch(unittest.TestCase):
|
|
-
|
|
def test_diff_patch(self):
|
|
here = os.path.split(__file__)[0]
|
|
- lfile = os.path.join(here, 'test_data', 'all_actions.left.xml')
|
|
- rfile = os.path.join(here, 'test_data', 'all_actions.right.xml')
|
|
+ lfile = os.path.join(here, "test_data", "all_actions.left.xml")
|
|
+ rfile = os.path.join(here, "test_data", "all_actions.right.xml")
|
|
|
|
left = etree.parse(lfile)
|
|
right = etree.parse(rfile)
|
|
@@ -96,7 +122,7 @@ class DiffPatch(unittest.TestCase):
|
|
compare_elements(result.getroot(), right.getroot())
|
|
|
|
|
|
-TEST_DIFF = '''[delete, node]
|
|
+TEST_DIFF = """[delete, node]
|
|
[insert, target, tag, 0]
|
|
[rename, node, tag]
|
|
[move, node, target, 0]
|
|
@@ -107,67 +133,61 @@ TEST_DIFF = '''[delete, node]
|
|
[insert-attribute, node, name, "value"]
|
|
[rename-attribute, node, oldname, newname]
|
|
[insert-comment, target, 0, "text"]
|
|
-'''
|
|
+"""
|
|
|
|
|
|
class ParserTests(unittest.TestCase):
|
|
-
|
|
def test_make_action(self):
|
|
parser = DiffParser()
|
|
|
|
- self.assertEqual(
|
|
- parser.make_action('[delete, node]'),
|
|
- DeleteNode('node')
|
|
- )
|
|
+ self.assertEqual(parser.make_action("[delete, node]"), DeleteNode("node"))
|
|
|
|
self.assertEqual(
|
|
- parser.make_action('[insert, target, tag, 0]'),
|
|
- InsertNode('target', 'tag', 0)
|
|
+ parser.make_action("[insert, target, tag, 0]"),
|
|
+ InsertNode("target", "tag", 0),
|
|
)
|
|
|
|
self.assertEqual(
|
|
- parser.make_action('[rename, node, tag]'),
|
|
- RenameNode('node', 'tag')
|
|
+ parser.make_action("[rename, node, tag]"), RenameNode("node", "tag")
|
|
)
|
|
|
|
self.assertEqual(
|
|
- parser.make_action('[move, node, target, 0]'),
|
|
- MoveNode('node', 'target', 0)
|
|
+ parser.make_action("[move, node, target, 0]"), MoveNode("node", "target", 0)
|
|
)
|
|
|
|
self.assertEqual(
|
|
parser.make_action('[update-text, node, "text"]'),
|
|
- UpdateTextIn('node', 'text')
|
|
+ UpdateTextIn("node", "text"),
|
|
)
|
|
|
|
self.assertEqual(
|
|
parser.make_action('[update-text-after, node, "text"]'),
|
|
- UpdateTextAfter('node', 'text')
|
|
+ UpdateTextAfter("node", "text"),
|
|
)
|
|
|
|
self.assertEqual(
|
|
parser.make_action('[update-attribute, node, name, "value"]'),
|
|
- UpdateAttrib('node', 'name', 'value')
|
|
+ UpdateAttrib("node", "name", "value"),
|
|
)
|
|
|
|
self.assertEqual(
|
|
- parser.make_action('[delete-attribute, node, name]'),
|
|
- DeleteAttrib('node', 'name')
|
|
+ parser.make_action("[delete-attribute, node, name]"),
|
|
+ DeleteAttrib("node", "name"),
|
|
)
|
|
|
|
self.assertEqual(
|
|
parser.make_action('[insert-attribute, node, name, "value"]'),
|
|
- InsertAttrib('node', 'name', 'value')
|
|
+ InsertAttrib("node", "name", "value"),
|
|
)
|
|
|
|
self.assertEqual(
|
|
- parser.make_action('[rename-attribute, node, oldname, newname]'),
|
|
- RenameAttrib('node', 'oldname', 'newname')
|
|
+ parser.make_action("[rename-attribute, node, oldname, newname]"),
|
|
+ RenameAttrib("node", "oldname", "newname"),
|
|
)
|
|
|
|
self.assertEqual(
|
|
parser.make_action('[insert-comment, target, 0, "text"]'),
|
|
- InsertComment('target', 0, 'text')
|
|
+ InsertComment("target", 0, "text"),
|
|
)
|
|
|
|
def test_parse(self):
|
|
@@ -180,43 +200,42 @@ class ParserTests(unittest.TestCase):
|
|
parser = DiffParser()
|
|
|
|
# Empty file, nothing happens
|
|
- actions = list(parser.parse(''))
|
|
+ actions = list(parser.parse(""))
|
|
self.assertEqual(actions, [])
|
|
|
|
# Not a diff raises error
|
|
with self.assertRaises(ValueError):
|
|
- actions = list(parser.parse('Not a diff'))
|
|
+ actions = list(parser.parse("Not a diff"))
|
|
|
|
# It should handle lines that have been broken, say in an email
|
|
actions = list(parser.parse('[insert-comment, target,\n 0, "text"]'))
|
|
- self.assertEqual(actions, [InsertComment('target', 0, 'text')])
|
|
+ self.assertEqual(actions, [InsertComment("target", 0, "text")])
|
|
|
|
# It should not handle broken files
|
|
with self.assertRaises(ValueError):
|
|
- actions = list(parser.parse('[insert-comment, target,\n'))
|
|
+ actions = list(parser.parse("[insert-comment, target,\n"))
|
|
|
|
def test_diff_patch(self):
|
|
here = os.path.split(__file__)[0]
|
|
- lfile = os.path.join(here, 'test_data', 'all_actions.left.xml')
|
|
- rfile = os.path.join(here, 'test_data', 'all_actions.right.xml')
|
|
+ lfile = os.path.join(here, "test_data", "all_actions.left.xml")
|
|
+ rfile = os.path.join(here, "test_data", "all_actions.right.xml")
|
|
with open(lfile) as f:
|
|
left = f.read()
|
|
with open(rfile) as f:
|
|
right = f.read()
|
|
|
|
- diff = diff_texts(left, right,
|
|
- formatter=DiffFormatter(normalize=WS_NONE))
|
|
+ diff = diff_texts(left, right, formatter=DiffFormatter(normalize=WS_NONE))
|
|
result = patch_text(diff, left)
|
|
compare_elements(etree.fromstring(result), etree.fromstring(right))
|
|
|
|
def test_patch_stream(self):
|
|
- here = os.path.join(os.path.split(__file__)[0], 'test_data')
|
|
- xmlfile = os.path.join(here, 'insert-node.left.html')
|
|
- patchfile = os.path.join(here, 'insert-node.diff')
|
|
+ here = os.path.join(os.path.split(__file__)[0], "test_data")
|
|
+ xmlfile = os.path.join(here, "insert-node.left.html")
|
|
+ patchfile = os.path.join(here, "insert-node.diff")
|
|
result = patch_file(patchfile, xmlfile)
|
|
|
|
- expectedfile = os.path.join(here, 'insert-node.right.html')
|
|
- with open(expectedfile, 'rt') as f:
|
|
+ expectedfile = os.path.join(here, "insert-node.right.html")
|
|
+ with open(expectedfile) as f:
|
|
expected = f.read()
|
|
# lxml.etree.parse() will strip ending whitespace
|
|
self.assertEqual(result, expected.rstrip())
|
|
Index: xmldiff-2.4/tests/test_utils.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/tests/test_utils.py
|
|
+++ xmldiff-2.4/tests/test_utils.py
|
|
@@ -5,9 +5,8 @@ from xmldiff import utils
|
|
|
|
|
|
class TraverseTests(unittest.TestCase):
|
|
-
|
|
def test_post_order(self):
|
|
- xml = u'''<document>
|
|
+ xml = """<document>
|
|
<story firstPageTemplate='FirstPage'>
|
|
<section xml:id='oldfirst' ref='3' single-ref='3'>
|
|
<para>First paragraph</para>
|
|
@@ -17,19 +16,24 @@ class TraverseTests(unittest.TestCase):
|
|
</section>
|
|
</story>
|
|
</document>
|
|
-'''
|
|
+"""
|
|
root = etree.fromstring(xml)
|
|
tree = root.getroottree()
|
|
res = [tree.getpath(x) for x in utils.post_order_traverse(root)]
|
|
- self.assertEqual(res, ['/document/story/section[1]/para',
|
|
- '/document/story/section[1]',
|
|
- '/document/story/section[2]/para',
|
|
- '/document/story/section[2]',
|
|
- '/document/story',
|
|
- '/document'])
|
|
+ self.assertEqual(
|
|
+ res,
|
|
+ [
|
|
+ "/document/story/section[1]/para",
|
|
+ "/document/story/section[1]",
|
|
+ "/document/story/section[2]/para",
|
|
+ "/document/story/section[2]",
|
|
+ "/document/story",
|
|
+ "/document",
|
|
+ ],
|
|
+ )
|
|
|
|
def test_reverse_post_order(self):
|
|
- xml = u'''<document>
|
|
+ xml = """<document>
|
|
<story firstPageTemplate='FirstPage'>
|
|
<section xml:id='oldfirst' ref='3' single-ref='3'>
|
|
<para>First paragraph</para>
|
|
@@ -39,20 +43,24 @@ class TraverseTests(unittest.TestCase):
|
|
</section>
|
|
</story>
|
|
</document>
|
|
-'''
|
|
+"""
|
|
root = etree.fromstring(xml)
|
|
tree = root.getroottree()
|
|
- res = [tree.getpath(x) for x in
|
|
- utils.reverse_post_order_traverse(root)]
|
|
- self.assertEqual(res, ['/document/story/section[2]/para',
|
|
- '/document/story/section[2]',
|
|
- '/document/story/section[1]/para',
|
|
- '/document/story/section[1]',
|
|
- '/document/story',
|
|
- '/document'])
|
|
+ res = [tree.getpath(x) for x in utils.reverse_post_order_traverse(root)]
|
|
+ self.assertEqual(
|
|
+ res,
|
|
+ [
|
|
+ "/document/story/section[2]/para",
|
|
+ "/document/story/section[2]",
|
|
+ "/document/story/section[1]/para",
|
|
+ "/document/story/section[1]",
|
|
+ "/document/story",
|
|
+ "/document",
|
|
+ ],
|
|
+ )
|
|
|
|
def test_breadth_first(self):
|
|
- xml = u'''<document>
|
|
+ xml = """<document>
|
|
<story>
|
|
<section>
|
|
<para>First <i>paragraph</i></para>
|
|
@@ -69,68 +77,70 @@ class TraverseTests(unittest.TestCase):
|
|
</section>
|
|
</story>
|
|
</document>
|
|
-'''
|
|
+"""
|
|
root = etree.fromstring(xml)
|
|
tree = root.getroottree()
|
|
res = [tree.getpath(x) for x in utils.breadth_first_traverse(root)]
|
|
- self.assertEqual(res, ['/document',
|
|
- '/document/story[1]',
|
|
- '/document/story[2]',
|
|
- '/document/story[1]/section[1]',
|
|
- '/document/story[1]/section[2]',
|
|
- '/document/story[2]/section',
|
|
- '/document/story[1]/section[1]/para[1]',
|
|
- '/document/story[1]/section[1]/para[2]',
|
|
- '/document/story[1]/section[2]/para[1]',
|
|
- '/document/story[1]/section[2]/para[2]',
|
|
- '/document/story[2]/section/para',
|
|
- '/document/story[1]/section[1]/para[1]/i',
|
|
- '/document/story[1]/section[2]/para[2]/b',
|
|
- ])
|
|
+ self.assertEqual(
|
|
+ res,
|
|
+ [
|
|
+ "/document",
|
|
+ "/document/story[1]",
|
|
+ "/document/story[2]",
|
|
+ "/document/story[1]/section[1]",
|
|
+ "/document/story[1]/section[2]",
|
|
+ "/document/story[2]/section",
|
|
+ "/document/story[1]/section[1]/para[1]",
|
|
+ "/document/story[1]/section[1]/para[2]",
|
|
+ "/document/story[1]/section[2]/para[1]",
|
|
+ "/document/story[1]/section[2]/para[2]",
|
|
+ "/document/story[2]/section/para",
|
|
+ "/document/story[1]/section[1]/para[1]/i",
|
|
+ "/document/story[1]/section[2]/para[2]/b",
|
|
+ ],
|
|
+ )
|
|
|
|
|
|
class LongestCommonSubsequenceTests(unittest.TestCase):
|
|
-
|
|
def _diff(self, left, right, result):
|
|
res = []
|
|
for x, y in utils.longest_common_subsequence(left, right):
|
|
self.assertEqual(left[x], right[y])
|
|
res.append(left[x])
|
|
|
|
- self.assertEqual(''.join(res), result)
|
|
+ self.assertEqual("".join(res), result)
|
|
|
|
def test_lcs(self):
|
|
|
|
- self._diff('ABCDEF', 'ABCDEF', 'ABCDEF')
|
|
+ self._diff("ABCDEF", "ABCDEF", "ABCDEF")
|
|
|
|
- self._diff('ABCDEF', 'GHIJKL', '')
|
|
+ self._diff("ABCDEF", "GHIJKL", "")
|
|
|
|
- self._diff('ABCDEF', 'ACDQRB', 'ACD')
|
|
+ self._diff("ABCDEF", "ACDQRB", "ACD")
|
|
|
|
- self._diff('CXCDEFX', 'CDEFX', 'CDEFX')
|
|
+ self._diff("CXCDEFX", "CDEFX", "CDEFX")
|
|
|
|
- self._diff('HUMAN', 'CHIMPANZEE', 'HMAN')
|
|
+ self._diff("HUMAN", "CHIMPANZEE", "HMAN")
|
|
|
|
- self._diff('ABCDEF', 'A', 'A')
|
|
+ self._diff("ABCDEF", "A", "A")
|
|
|
|
- self._diff('123AAAAAAAAA', '123BBBBBBBBB', '123')
|
|
+ self._diff("123AAAAAAAAA", "123BBBBBBBBB", "123")
|
|
|
|
- self._diff('AAAAAAAAA123', 'BBBBBBBBB123', '123')
|
|
+ self._diff("AAAAAAAAA123", "BBBBBBBBB123", "123")
|
|
|
|
- self._diff('ABCDE1', '1FGHIJK', '1')
|
|
+ self._diff("ABCDE1", "1FGHIJK", "1")
|
|
|
|
# There are several correct options here, make sure that doesn't
|
|
# confuse it, we want just one, and don't care which.
|
|
- self._diff('HORSEBACK', 'SNOWFLAKE', 'SAK')
|
|
+ self._diff("HORSEBACK", "SNOWFLAKE", "SAK")
|
|
|
|
# Empty sequences:
|
|
- self._diff('', '', '')
|
|
+ self._diff("", "", "")
|
|
|
|
|
|
class MakeAsciiTreeTests(unittest.TestCase):
|
|
-
|
|
def test_make_ascii_tree(self):
|
|
- xml = u'''<document xmlns:diff="http://namespaces.shoobx.com/diff">
|
|
+ xml = """<document xmlns:diff="http://namespaces.shoobx.com/diff">
|
|
<story firstPageTemplate='FirstPage'>
|
|
<section xml:id='oldfirst' ref='3' single-ref='3'>
|
|
<para diff:delete="">First paragraph</para>
|
|
@@ -140,11 +150,11 @@ class MakeAsciiTreeTests(unittest.TestCa
|
|
</section>
|
|
</story>
|
|
</document>
|
|
-'''
|
|
+"""
|
|
root = etree.fromstring(xml)
|
|
tree = utils.make_ascii_tree(root)
|
|
self.assertEqual(
|
|
tree,
|
|
- ' document \n story \n section \n para (delete)\n'
|
|
- ' section \n para \n diff:insert '
|
|
+ " document \n story \n section \n para (delete)\n"
|
|
+ " section \n para \n diff:insert ",
|
|
)
|
|
Index: xmldiff-2.4/tests/testing.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/tests/testing.py
|
|
+++ xmldiff-2.4/tests/testing.py
|
|
@@ -1,39 +1,37 @@
|
|
import os
|
|
|
|
-from io import open
|
|
-
|
|
|
|
def make_case_function(left_filename):
|
|
- right_filename = left_filename.replace('.left.', '.right.')
|
|
- expected_filename = left_filename.replace('.left.', '.expected.')
|
|
+ right_filename = left_filename.replace(".left.", ".right.")
|
|
+ expected_filename = left_filename.replace(".left.", ".expected.")
|
|
|
|
def test(self):
|
|
- with open(expected_filename, 'rt', encoding='utf8') as input_file:
|
|
+ with open(expected_filename, encoding="utf8") as input_file:
|
|
expected_xml = input_file.read()
|
|
|
|
try:
|
|
result_xml = self.process(left_filename, right_filename)
|
|
except Exception as err:
|
|
- if u'.err' not in left_filename:
|
|
+ if ".err" not in left_filename:
|
|
raise
|
|
- result_xml = u'%s: %s' % (err.__class__.__name__, err)
|
|
+ result_xml = f"{err.__class__.__name__}: {err}"
|
|
|
|
self.assertEqual(expected_xml.strip(), result_xml.strip())
|
|
|
|
return test
|
|
|
|
|
|
-def generate_filebased_cases(data_dir, test_class, suffix='xml', ignore=()):
|
|
+def generate_filebased_cases(data_dir, test_class, suffix="xml", ignore=()):
|
|
for left_filename in os.listdir(data_dir):
|
|
- if not left_filename.endswith('.left.' + suffix):
|
|
+ if not left_filename.endswith(".left." + suffix):
|
|
continue
|
|
if left_filename in ignore:
|
|
continue
|
|
|
|
left_filename = os.path.join(data_dir, left_filename)
|
|
test_function = make_case_function(left_filename)
|
|
- function_name = os.path.split(left_filename)[-1].replace('.', '-')
|
|
- test_name = 'test_' + function_name
|
|
+ function_name = os.path.split(left_filename)[-1].replace(".", "-")
|
|
+ test_name = "test_" + function_name
|
|
setattr(test_class, test_name, test_function)
|
|
|
|
|
|
Index: xmldiff-2.4/xmldiff/_diff_match_patch_py2.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/xmldiff/_diff_match_patch_py2.py
|
|
+++ xmldiff-2.4/xmldiff/_diff_match_patch_py2.py
|
|
@@ -1,6 +1,5 @@
|
|
#!/usr/bin/python2.4
|
|
|
|
-from __future__ import division
|
|
|
|
"""Diff Match and Patch
|
|
Copyright 2018 The diff-match-patch Authors.
|
|
@@ -25,7 +24,7 @@ Computes the difference between two text
|
|
Applies the patch onto another text, allowing for errors.
|
|
"""
|
|
|
|
-__author__ = 'fraser@google.com (Neil Fraser)'
|
|
+__author__ = "fraser@google.com (Neil Fraser)"
|
|
|
|
import re
|
|
import sys
|
|
@@ -34,51 +33,51 @@ import urllib
|
|
|
|
|
|
class diff_match_patch:
|
|
- """Class containing the diff, match and patch methods.
|
|
+ """Class containing the diff, match and patch methods.
|
|
|
|
Also contains the behaviour settings.
|
|
"""
|
|
|
|
- def __init__(self):
|
|
- """Inits a diff_match_patch object with default settings.
|
|
+ def __init__(self):
|
|
+ """Inits a diff_match_patch object with default settings.
|
|
Redefine these in your program to override the defaults.
|
|
"""
|
|
|
|
- # Number of seconds to map a diff before giving up (0 for infinity).
|
|
- self.Diff_Timeout = 1.0
|
|
- # Cost of an empty edit operation in terms of edit characters.
|
|
- self.Diff_EditCost = 4
|
|
- # At what point is no match declared (0.0 = perfection, 1.0 = very loose).
|
|
- self.Match_Threshold = 0.5
|
|
- # How far to search for a match (0 = exact location, 1000+ = broad match).
|
|
- # A match this many characters away from the expected location will add
|
|
- # 1.0 to the score (0.0 is a perfect match).
|
|
- self.Match_Distance = 1000
|
|
- # When deleting a large block of text (over ~64 characters), how close do
|
|
- # the contents have to be to match the expected contents. (0.0 = perfection,
|
|
- # 1.0 = very loose). Note that Match_Threshold controls how closely the
|
|
- # end points of a delete need to match.
|
|
- self.Patch_DeleteThreshold = 0.5
|
|
- # Chunk size for context length.
|
|
- self.Patch_Margin = 4
|
|
-
|
|
- # The number of bits in an int.
|
|
- # Python has no maximum, thus to disable patch splitting set to 0.
|
|
- # However to avoid long patches in certain pathological cases, use 32.
|
|
- # Multiple short patches (using native ints) are much faster than long ones.
|
|
- self.Match_MaxBits = 32
|
|
-
|
|
- # DIFF FUNCTIONS
|
|
-
|
|
- # The data structure representing a diff is an array of tuples:
|
|
- # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]
|
|
- # which means: delete "Hello", add "Goodbye" and keep " world."
|
|
- DIFF_DELETE = -1
|
|
- DIFF_INSERT = 1
|
|
- DIFF_EQUAL = 0
|
|
+ # Number of seconds to map a diff before giving up (0 for infinity).
|
|
+ self.Diff_Timeout = 1.0
|
|
+ # Cost of an empty edit operation in terms of edit characters.
|
|
+ self.Diff_EditCost = 4
|
|
+ # At what point is no match declared (0.0 = perfection, 1.0 = very loose).
|
|
+ self.Match_Threshold = 0.5
|
|
+ # How far to search for a match (0 = exact location, 1000+ = broad match).
|
|
+ # A match this many characters away from the expected location will add
|
|
+ # 1.0 to the score (0.0 is a perfect match).
|
|
+ self.Match_Distance = 1000
|
|
+ # When deleting a large block of text (over ~64 characters), how close do
|
|
+ # the contents have to be to match the expected contents. (0.0 = perfection,
|
|
+ # 1.0 = very loose). Note that Match_Threshold controls how closely the
|
|
+ # end points of a delete need to match.
|
|
+ self.Patch_DeleteThreshold = 0.5
|
|
+ # Chunk size for context length.
|
|
+ self.Patch_Margin = 4
|
|
+
|
|
+ # The number of bits in an int.
|
|
+ # Python has no maximum, thus to disable patch splitting set to 0.
|
|
+ # However to avoid long patches in certain pathological cases, use 32.
|
|
+ # Multiple short patches (using native ints) are much faster than long ones.
|
|
+ self.Match_MaxBits = 32
|
|
+
|
|
+ # DIFF FUNCTIONS
|
|
+
|
|
+ # The data structure representing a diff is an array of tuples:
|
|
+ # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]
|
|
+ # which means: delete "Hello", add "Goodbye" and keep " world."
|
|
+ DIFF_DELETE = -1
|
|
+ DIFF_INSERT = 1
|
|
+ DIFF_EQUAL = 0
|
|
|
|
- def diff_main(self, text1, text2, checklines=True, deadline=None):
|
|
- """Find the differences between two texts. Simplifies the problem by
|
|
+ def diff_main(self, text1, text2, checklines=True, deadline=None):
|
|
+ """Find the differences between two texts. Simplifies the problem by
|
|
stripping any common prefix or suffix off the texts before diffing.
|
|
|
|
Args:
|
|
@@ -93,52 +92,52 @@ class diff_match_patch:
|
|
Returns:
|
|
Array of changes.
|
|
"""
|
|
- # Set a deadline by which time the diff must be complete.
|
|
- if deadline == None:
|
|
- # Unlike in most languages, Python counts time in seconds.
|
|
- if self.Diff_Timeout <= 0:
|
|
- deadline = sys.maxint
|
|
- else:
|
|
- deadline = time.time() + self.Diff_Timeout
|
|
-
|
|
- # Check for null inputs.
|
|
- if text1 == None or text2 == None:
|
|
- raise ValueError("Null inputs. (diff_main)")
|
|
-
|
|
- # Check for equality (speedup).
|
|
- if text1 == text2:
|
|
- if text1:
|
|
- return [(self.DIFF_EQUAL, text1)]
|
|
- return []
|
|
-
|
|
- # Trim off common prefix (speedup).
|
|
- commonlength = self.diff_commonPrefix(text1, text2)
|
|
- commonprefix = text1[:commonlength]
|
|
- text1 = text1[commonlength:]
|
|
- text2 = text2[commonlength:]
|
|
-
|
|
- # Trim off common suffix (speedup).
|
|
- commonlength = self.diff_commonSuffix(text1, text2)
|
|
- if commonlength == 0:
|
|
- commonsuffix = ''
|
|
- else:
|
|
- commonsuffix = text1[-commonlength:]
|
|
- text1 = text1[:-commonlength]
|
|
- text2 = text2[:-commonlength]
|
|
-
|
|
- # Compute the diff on the middle block.
|
|
- diffs = self.diff_compute(text1, text2, checklines, deadline)
|
|
-
|
|
- # Restore the prefix and suffix.
|
|
- if commonprefix:
|
|
- diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
|
|
- if commonsuffix:
|
|
- diffs.append((self.DIFF_EQUAL, commonsuffix))
|
|
- self.diff_cleanupMerge(diffs)
|
|
- return diffs
|
|
+ # Set a deadline by which time the diff must be complete.
|
|
+ if deadline == None:
|
|
+ # Unlike in most languages, Python counts time in seconds.
|
|
+ if self.Diff_Timeout <= 0:
|
|
+ deadline = sys.maxint
|
|
+ else:
|
|
+ deadline = time.time() + self.Diff_Timeout
|
|
+
|
|
+ # Check for null inputs.
|
|
+ if text1 == None or text2 == None:
|
|
+ raise ValueError("Null inputs. (diff_main)")
|
|
+
|
|
+ # Check for equality (speedup).
|
|
+ if text1 == text2:
|
|
+ if text1:
|
|
+ return [(self.DIFF_EQUAL, text1)]
|
|
+ return []
|
|
+
|
|
+ # Trim off common prefix (speedup).
|
|
+ commonlength = self.diff_commonPrefix(text1, text2)
|
|
+ commonprefix = text1[:commonlength]
|
|
+ text1 = text1[commonlength:]
|
|
+ text2 = text2[commonlength:]
|
|
+
|
|
+ # Trim off common suffix (speedup).
|
|
+ commonlength = self.diff_commonSuffix(text1, text2)
|
|
+ if commonlength == 0:
|
|
+ commonsuffix = ""
|
|
+ else:
|
|
+ commonsuffix = text1[-commonlength:]
|
|
+ text1 = text1[:-commonlength]
|
|
+ text2 = text2[:-commonlength]
|
|
+
|
|
+ # Compute the diff on the middle block.
|
|
+ diffs = self.diff_compute(text1, text2, checklines, deadline)
|
|
+
|
|
+ # Restore the prefix and suffix.
|
|
+ if commonprefix:
|
|
+ diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
|
|
+ if commonsuffix:
|
|
+ diffs.append((self.DIFF_EQUAL, commonsuffix))
|
|
+ self.diff_cleanupMerge(diffs)
|
|
+ return diffs
|
|
|
|
- def diff_compute(self, text1, text2, checklines, deadline):
|
|
- """Find the differences between two texts. Assumes that the texts do not
|
|
+ def diff_compute(self, text1, text2, checklines, deadline):
|
|
+ """Find the differences between two texts. Assumes that the texts do not
|
|
have any common prefix or suffix.
|
|
|
|
Args:
|
|
@@ -152,52 +151,55 @@ class diff_match_patch:
|
|
Returns:
|
|
Array of changes.
|
|
"""
|
|
- if not text1:
|
|
- # Just add some text (speedup).
|
|
- return [(self.DIFF_INSERT, text2)]
|
|
-
|
|
- if not text2:
|
|
- # Just delete some text (speedup).
|
|
- return [(self.DIFF_DELETE, text1)]
|
|
-
|
|
- if len(text1) > len(text2):
|
|
- (longtext, shorttext) = (text1, text2)
|
|
- else:
|
|
- (shorttext, longtext) = (text1, text2)
|
|
- i = longtext.find(shorttext)
|
|
- if i != -1:
|
|
- # Shorter text is inside the longer text (speedup).
|
|
- diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),
|
|
- (self.DIFF_INSERT, longtext[i + len(shorttext):])]
|
|
- # Swap insertions for deletions if diff is reversed.
|
|
- if len(text1) > len(text2):
|
|
- diffs[0] = (self.DIFF_DELETE, diffs[0][1])
|
|
- diffs[2] = (self.DIFF_DELETE, diffs[2][1])
|
|
- return diffs
|
|
-
|
|
- if len(shorttext) == 1:
|
|
- # Single character string.
|
|
- # After the previous speedup, the character can't be an equality.
|
|
- return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
|
|
-
|
|
- # Check to see if the problem can be split in two.
|
|
- hm = self.diff_halfMatch(text1, text2)
|
|
- if hm:
|
|
- # A half-match was found, sort out the return data.
|
|
- (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
|
|
- # Send both pairs off for separate processing.
|
|
- diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline)
|
|
- diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline)
|
|
- # Merge the results.
|
|
- return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
|
|
+ if not text1:
|
|
+ # Just add some text (speedup).
|
|
+ return [(self.DIFF_INSERT, text2)]
|
|
+
|
|
+ if not text2:
|
|
+ # Just delete some text (speedup).
|
|
+ return [(self.DIFF_DELETE, text1)]
|
|
|
|
- if checklines and len(text1) > 100 and len(text2) > 100:
|
|
- return self.diff_lineMode(text1, text2, deadline)
|
|
+ if len(text1) > len(text2):
|
|
+ (longtext, shorttext) = (text1, text2)
|
|
+ else:
|
|
+ (shorttext, longtext) = (text1, text2)
|
|
+ i = longtext.find(shorttext)
|
|
+ if i != -1:
|
|
+ # Shorter text is inside the longer text (speedup).
|
|
+ diffs = [
|
|
+ (self.DIFF_INSERT, longtext[:i]),
|
|
+ (self.DIFF_EQUAL, shorttext),
|
|
+ (self.DIFF_INSERT, longtext[i + len(shorttext) :]),
|
|
+ ]
|
|
+ # Swap insertions for deletions if diff is reversed.
|
|
+ if len(text1) > len(text2):
|
|
+ diffs[0] = (self.DIFF_DELETE, diffs[0][1])
|
|
+ diffs[2] = (self.DIFF_DELETE, diffs[2][1])
|
|
+ return diffs
|
|
+
|
|
+ if len(shorttext) == 1:
|
|
+ # Single character string.
|
|
+ # After the previous speedup, the character can't be an equality.
|
|
+ return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
|
|
+
|
|
+ # Check to see if the problem can be split in two.
|
|
+ hm = self.diff_halfMatch(text1, text2)
|
|
+ if hm:
|
|
+ # A half-match was found, sort out the return data.
|
|
+ (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
|
|
+ # Send both pairs off for separate processing.
|
|
+ diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline)
|
|
+ diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline)
|
|
+ # Merge the results.
|
|
+ return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
|
|
+
|
|
+ if checklines and len(text1) > 100 and len(text2) > 100:
|
|
+ return self.diff_lineMode(text1, text2, deadline)
|
|
|
|
- return self.diff_bisect(text1, text2, deadline)
|
|
+ return self.diff_bisect(text1, text2, deadline)
|
|
|
|
- def diff_lineMode(self, text1, text2, deadline):
|
|
- """Do a quick line-level diff on both strings, then rediff the parts for
|
|
+ def diff_lineMode(self, text1, text2, deadline):
|
|
+ """Do a quick line-level diff on both strings, then rediff the parts for
|
|
greater accuracy.
|
|
This speedup can produce non-minimal diffs.
|
|
|
|
@@ -210,51 +212,51 @@ class diff_match_patch:
|
|
Array of changes.
|
|
"""
|
|
|
|
- # Scan the text on a line-by-line basis first.
|
|
- (text1, text2, linearray) = self.diff_linesToChars(text1, text2)
|
|
+ # Scan the text on a line-by-line basis first.
|
|
+ (text1, text2, linearray) = self.diff_linesToChars(text1, text2)
|
|
|
|
- diffs = self.diff_main(text1, text2, False, deadline)
|
|
+ diffs = self.diff_main(text1, text2, False, deadline)
|
|
|
|
- # Convert the diff back to original text.
|
|
- self.diff_charsToLines(diffs, linearray)
|
|
- # Eliminate freak matches (e.g. blank lines)
|
|
- self.diff_cleanupSemantic(diffs)
|
|
-
|
|
- # Rediff any replacement blocks, this time character-by-character.
|
|
- # Add a dummy entry at the end.
|
|
- diffs.append((self.DIFF_EQUAL, ''))
|
|
- pointer = 0
|
|
- count_delete = 0
|
|
- count_insert = 0
|
|
- text_delete = ''
|
|
- text_insert = ''
|
|
- while pointer < len(diffs):
|
|
- if diffs[pointer][0] == self.DIFF_INSERT:
|
|
- count_insert += 1
|
|
- text_insert += diffs[pointer][1]
|
|
- elif diffs[pointer][0] == self.DIFF_DELETE:
|
|
- count_delete += 1
|
|
- text_delete += diffs[pointer][1]
|
|
- elif diffs[pointer][0] == self.DIFF_EQUAL:
|
|
- # Upon reaching an equality, check for prior redundancies.
|
|
- if count_delete >= 1 and count_insert >= 1:
|
|
- # Delete the offending records and add the merged ones.
|
|
- subDiff = self.diff_main(text_delete, text_insert, False, deadline)
|
|
- diffs[pointer - count_delete - count_insert : pointer] = subDiff
|
|
- pointer = pointer - count_delete - count_insert + len(subDiff)
|
|
- count_insert = 0
|
|
+ # Convert the diff back to original text.
|
|
+ self.diff_charsToLines(diffs, linearray)
|
|
+ # Eliminate freak matches (e.g. blank lines)
|
|
+ self.diff_cleanupSemantic(diffs)
|
|
+
|
|
+ # Rediff any replacement blocks, this time character-by-character.
|
|
+ # Add a dummy entry at the end.
|
|
+ diffs.append((self.DIFF_EQUAL, ""))
|
|
+ pointer = 0
|
|
count_delete = 0
|
|
- text_delete = ''
|
|
- text_insert = ''
|
|
+ count_insert = 0
|
|
+ text_delete = ""
|
|
+ text_insert = ""
|
|
+ while pointer < len(diffs):
|
|
+ if diffs[pointer][0] == self.DIFF_INSERT:
|
|
+ count_insert += 1
|
|
+ text_insert += diffs[pointer][1]
|
|
+ elif diffs[pointer][0] == self.DIFF_DELETE:
|
|
+ count_delete += 1
|
|
+ text_delete += diffs[pointer][1]
|
|
+ elif diffs[pointer][0] == self.DIFF_EQUAL:
|
|
+ # Upon reaching an equality, check for prior redundancies.
|
|
+ if count_delete >= 1 and count_insert >= 1:
|
|
+ # Delete the offending records and add the merged ones.
|
|
+ subDiff = self.diff_main(text_delete, text_insert, False, deadline)
|
|
+ diffs[pointer - count_delete - count_insert : pointer] = subDiff
|
|
+ pointer = pointer - count_delete - count_insert + len(subDiff)
|
|
+ count_insert = 0
|
|
+ count_delete = 0
|
|
+ text_delete = ""
|
|
+ text_insert = ""
|
|
|
|
- pointer += 1
|
|
+ pointer += 1
|
|
|
|
- diffs.pop() # Remove the dummy entry at the end.
|
|
+ diffs.pop() # Remove the dummy entry at the end.
|
|
|
|
- return diffs
|
|
+ return diffs
|
|
|
|
- def diff_bisect(self, text1, text2, deadline):
|
|
- """Find the 'middle snake' of a diff, split the problem in two
|
|
+ def diff_bisect(self, text1, text2, deadline):
|
|
+ """Find the 'middle snake' of a diff, split the problem in two
|
|
and return the recursively constructed diff.
|
|
See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
|
|
|
|
@@ -267,96 +269,98 @@ class diff_match_patch:
|
|
Array of diff tuples.
|
|
"""
|
|
|
|
- # Cache the text lengths to prevent multiple calls.
|
|
- text1_length = len(text1)
|
|
- text2_length = len(text2)
|
|
- max_d = (text1_length + text2_length + 1) // 2
|
|
- v_offset = max_d
|
|
- v_length = 2 * max_d
|
|
- v1 = [-1] * v_length
|
|
- v1[v_offset + 1] = 0
|
|
- v2 = v1[:]
|
|
- delta = text1_length - text2_length
|
|
- # If the total number of characters is odd, then the front path will
|
|
- # collide with the reverse path.
|
|
- front = (delta % 2 != 0)
|
|
- # Offsets for start and end of k loop.
|
|
- # Prevents mapping of space beyond the grid.
|
|
- k1start = 0
|
|
- k1end = 0
|
|
- k2start = 0
|
|
- k2end = 0
|
|
- for d in xrange(max_d):
|
|
- # Bail out if deadline is reached.
|
|
- if time.time() > deadline:
|
|
- break
|
|
-
|
|
- # Walk the front path one step.
|
|
- for k1 in xrange(-d + k1start, d + 1 - k1end, 2):
|
|
- k1_offset = v_offset + k1
|
|
- if k1 == -d or (k1 != d and
|
|
- v1[k1_offset - 1] < v1[k1_offset + 1]):
|
|
- x1 = v1[k1_offset + 1]
|
|
- else:
|
|
- x1 = v1[k1_offset - 1] + 1
|
|
- y1 = x1 - k1
|
|
- while (x1 < text1_length and y1 < text2_length and
|
|
- text1[x1] == text2[y1]):
|
|
- x1 += 1
|
|
- y1 += 1
|
|
- v1[k1_offset] = x1
|
|
- if x1 > text1_length:
|
|
- # Ran off the right of the graph.
|
|
- k1end += 2
|
|
- elif y1 > text2_length:
|
|
- # Ran off the bottom of the graph.
|
|
- k1start += 2
|
|
- elif front:
|
|
- k2_offset = v_offset + delta - k1
|
|
- if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1:
|
|
- # Mirror x2 onto top-left coordinate system.
|
|
- x2 = text1_length - v2[k2_offset]
|
|
- if x1 >= x2:
|
|
- # Overlap detected.
|
|
- return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
|
|
-
|
|
- # Walk the reverse path one step.
|
|
- for k2 in xrange(-d + k2start, d + 1 - k2end, 2):
|
|
- k2_offset = v_offset + k2
|
|
- if k2 == -d or (k2 != d and
|
|
- v2[k2_offset - 1] < v2[k2_offset + 1]):
|
|
- x2 = v2[k2_offset + 1]
|
|
- else:
|
|
- x2 = v2[k2_offset - 1] + 1
|
|
- y2 = x2 - k2
|
|
- while (x2 < text1_length and y2 < text2_length and
|
|
- text1[-x2 - 1] == text2[-y2 - 1]):
|
|
- x2 += 1
|
|
- y2 += 1
|
|
- v2[k2_offset] = x2
|
|
- if x2 > text1_length:
|
|
- # Ran off the left of the graph.
|
|
- k2end += 2
|
|
- elif y2 > text2_length:
|
|
- # Ran off the top of the graph.
|
|
- k2start += 2
|
|
- elif not front:
|
|
- k1_offset = v_offset + delta - k2
|
|
- if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1:
|
|
- x1 = v1[k1_offset]
|
|
- y1 = v_offset + x1 - k1_offset
|
|
- # Mirror x2 onto top-left coordinate system.
|
|
- x2 = text1_length - x2
|
|
- if x1 >= x2:
|
|
- # Overlap detected.
|
|
- return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
|
|
-
|
|
- # Diff took too long and hit the deadline or
|
|
- # number of diffs equals number of characters, no commonality at all.
|
|
- return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
|
|
+ # Cache the text lengths to prevent multiple calls.
|
|
+ text1_length = len(text1)
|
|
+ text2_length = len(text2)
|
|
+ max_d = (text1_length + text2_length + 1) // 2
|
|
+ v_offset = max_d
|
|
+ v_length = 2 * max_d
|
|
+ v1 = [-1] * v_length
|
|
+ v1[v_offset + 1] = 0
|
|
+ v2 = v1[:]
|
|
+ delta = text1_length - text2_length
|
|
+ # If the total number of characters is odd, then the front path will
|
|
+ # collide with the reverse path.
|
|
+ front = delta % 2 != 0
|
|
+ # Offsets for start and end of k loop.
|
|
+ # Prevents mapping of space beyond the grid.
|
|
+ k1start = 0
|
|
+ k1end = 0
|
|
+ k2start = 0
|
|
+ k2end = 0
|
|
+ for d in xrange(max_d):
|
|
+ # Bail out if deadline is reached.
|
|
+ if time.time() > deadline:
|
|
+ break
|
|
+
|
|
+ # Walk the front path one step.
|
|
+ for k1 in xrange(-d + k1start, d + 1 - k1end, 2):
|
|
+ k1_offset = v_offset + k1
|
|
+ if k1 == -d or (k1 != d and v1[k1_offset - 1] < v1[k1_offset + 1]):
|
|
+ x1 = v1[k1_offset + 1]
|
|
+ else:
|
|
+ x1 = v1[k1_offset - 1] + 1
|
|
+ y1 = x1 - k1
|
|
+ while (
|
|
+ x1 < text1_length and y1 < text2_length and text1[x1] == text2[y1]
|
|
+ ):
|
|
+ x1 += 1
|
|
+ y1 += 1
|
|
+ v1[k1_offset] = x1
|
|
+ if x1 > text1_length:
|
|
+ # Ran off the right of the graph.
|
|
+ k1end += 2
|
|
+ elif y1 > text2_length:
|
|
+ # Ran off the bottom of the graph.
|
|
+ k1start += 2
|
|
+ elif front:
|
|
+ k2_offset = v_offset + delta - k1
|
|
+ if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1:
|
|
+ # Mirror x2 onto top-left coordinate system.
|
|
+ x2 = text1_length - v2[k2_offset]
|
|
+ if x1 >= x2:
|
|
+ # Overlap detected.
|
|
+ return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
|
|
+
|
|
+ # Walk the reverse path one step.
|
|
+ for k2 in xrange(-d + k2start, d + 1 - k2end, 2):
|
|
+ k2_offset = v_offset + k2
|
|
+ if k2 == -d or (k2 != d and v2[k2_offset - 1] < v2[k2_offset + 1]):
|
|
+ x2 = v2[k2_offset + 1]
|
|
+ else:
|
|
+ x2 = v2[k2_offset - 1] + 1
|
|
+ y2 = x2 - k2
|
|
+ while (
|
|
+ x2 < text1_length
|
|
+ and y2 < text2_length
|
|
+ and text1[-x2 - 1] == text2[-y2 - 1]
|
|
+ ):
|
|
+ x2 += 1
|
|
+ y2 += 1
|
|
+ v2[k2_offset] = x2
|
|
+ if x2 > text1_length:
|
|
+ # Ran off the left of the graph.
|
|
+ k2end += 2
|
|
+ elif y2 > text2_length:
|
|
+ # Ran off the top of the graph.
|
|
+ k2start += 2
|
|
+ elif not front:
|
|
+ k1_offset = v_offset + delta - k2
|
|
+ if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1:
|
|
+ x1 = v1[k1_offset]
|
|
+ y1 = v_offset + x1 - k1_offset
|
|
+ # Mirror x2 onto top-left coordinate system.
|
|
+ x2 = text1_length - x2
|
|
+ if x1 >= x2:
|
|
+ # Overlap detected.
|
|
+ return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
|
|
+
|
|
+ # Diff took too long and hit the deadline or
|
|
+ # number of diffs equals number of characters, no commonality at all.
|
|
+ return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
|
|
|
|
- def diff_bisectSplit(self, text1, text2, x, y, deadline):
|
|
- """Given the location of the 'middle snake', split the diff in two parts
|
|
+ def diff_bisectSplit(self, text1, text2, x, y, deadline):
|
|
+ """Given the location of the 'middle snake', split the diff in two parts
|
|
and recurse.
|
|
|
|
Args:
|
|
@@ -369,19 +373,19 @@ class diff_match_patch:
|
|
Returns:
|
|
Array of diff tuples.
|
|
"""
|
|
- text1a = text1[:x]
|
|
- text2a = text2[:y]
|
|
- text1b = text1[x:]
|
|
- text2b = text2[y:]
|
|
-
|
|
- # Compute both diffs serially.
|
|
- diffs = self.diff_main(text1a, text2a, False, deadline)
|
|
- diffsb = self.diff_main(text1b, text2b, False, deadline)
|
|
+ text1a = text1[:x]
|
|
+ text2a = text2[:y]
|
|
+ text1b = text1[x:]
|
|
+ text2b = text2[y:]
|
|
+
|
|
+ # Compute both diffs serially.
|
|
+ diffs = self.diff_main(text1a, text2a, False, deadline)
|
|
+ diffsb = self.diff_main(text1b, text2b, False, deadline)
|
|
|
|
- return diffs + diffsb
|
|
+ return diffs + diffsb
|
|
|
|
- def diff_linesToChars(self, text1, text2):
|
|
- """Split two texts into an array of strings. Reduce the texts to a string
|
|
+ def diff_linesToChars(self, text1, text2):
|
|
+ """Split two texts into an array of strings. Reduce the texts to a string
|
|
of hashes where each Unicode character represents one line.
|
|
|
|
Args:
|
|
@@ -393,15 +397,15 @@ class diff_match_patch:
|
|
the array of unique strings. The zeroth element of the array of unique
|
|
strings is intentionally blank.
|
|
"""
|
|
- lineArray = [] # e.g. lineArray[4] == "Hello\n"
|
|
- lineHash = {} # e.g. lineHash["Hello\n"] == 4
|
|
+ lineArray = [] # e.g. lineArray[4] == "Hello\n"
|
|
+ lineHash = {} # e.g. lineHash["Hello\n"] == 4
|
|
|
|
- # "\x00" is a valid character, but various debuggers don't like it.
|
|
- # So we'll insert a junk entry to avoid generating a null character.
|
|
- lineArray.append('')
|
|
+ # "\x00" is a valid character, but various debuggers don't like it.
|
|
+ # So we'll insert a junk entry to avoid generating a null character.
|
|
+ lineArray.append("")
|
|
|
|
- def diff_linesToCharsMunge(text):
|
|
- """Split a text into an array of strings. Reduce the texts to a string
|
|
+ def diff_linesToCharsMunge(text):
|
|
+ """Split a text into an array of strings. Reduce the texts to a string
|
|
of hashes where each Unicode character represents one line.
|
|
Modifies linearray and linehash through being a closure.
|
|
|
|
@@ -411,54 +415,54 @@ class diff_match_patch:
|
|
Returns:
|
|
Encoded string.
|
|
"""
|
|
- chars = []
|
|
- # Walk the text, pulling out a substring for each line.
|
|
- # text.split('\n') would would temporarily double our memory footprint.
|
|
- # Modifying text would create many large strings to garbage collect.
|
|
- lineStart = 0
|
|
- lineEnd = -1
|
|
- while lineEnd < len(text) - 1:
|
|
- lineEnd = text.find('\n', lineStart)
|
|
- if lineEnd == -1:
|
|
- lineEnd = len(text) - 1
|
|
- line = text[lineStart:lineEnd + 1]
|
|
-
|
|
- if line in lineHash:
|
|
- chars.append(unichr(lineHash[line]))
|
|
- else:
|
|
- if len(lineArray) == maxLines:
|
|
- # Bail out at 65535 because unichr(65536) throws.
|
|
- line = text[lineStart:]
|
|
- lineEnd = len(text)
|
|
- lineArray.append(line)
|
|
- lineHash[line] = len(lineArray) - 1
|
|
- chars.append(unichr(len(lineArray) - 1))
|
|
- lineStart = lineEnd + 1
|
|
- return "".join(chars)
|
|
-
|
|
- # Allocate 2/3rds of the space for text1, the rest for text2.
|
|
- maxLines = 40000
|
|
- chars1 = diff_linesToCharsMunge(text1)
|
|
- maxLines = 65535
|
|
- chars2 = diff_linesToCharsMunge(text2)
|
|
- return (chars1, chars2, lineArray)
|
|
+ chars = []
|
|
+ # Walk the text, pulling out a substring for each line.
|
|
+ # text.split('\n') would would temporarily double our memory footprint.
|
|
+ # Modifying text would create many large strings to garbage collect.
|
|
+ lineStart = 0
|
|
+ lineEnd = -1
|
|
+ while lineEnd < len(text) - 1:
|
|
+ lineEnd = text.find("\n", lineStart)
|
|
+ if lineEnd == -1:
|
|
+ lineEnd = len(text) - 1
|
|
+ line = text[lineStart : lineEnd + 1]
|
|
+
|
|
+ if line in lineHash:
|
|
+ chars.append(unichr(lineHash[line]))
|
|
+ else:
|
|
+ if len(lineArray) == maxLines:
|
|
+ # Bail out at 65535 because unichr(65536) throws.
|
|
+ line = text[lineStart:]
|
|
+ lineEnd = len(text)
|
|
+ lineArray.append(line)
|
|
+ lineHash[line] = len(lineArray) - 1
|
|
+ chars.append(unichr(len(lineArray) - 1))
|
|
+ lineStart = lineEnd + 1
|
|
+ return "".join(chars)
|
|
+
|
|
+ # Allocate 2/3rds of the space for text1, the rest for text2.
|
|
+ maxLines = 40000
|
|
+ chars1 = diff_linesToCharsMunge(text1)
|
|
+ maxLines = 65535
|
|
+ chars2 = diff_linesToCharsMunge(text2)
|
|
+ return (chars1, chars2, lineArray)
|
|
|
|
- def diff_charsToLines(self, diffs, lineArray):
|
|
- """Rehydrate the text in a diff from a string of line hashes to real lines
|
|
+ def diff_charsToLines(self, diffs, lineArray):
|
|
+ """Rehydrate the text in a diff from a string of line hashes to real lines
|
|
of text.
|
|
|
|
Args:
|
|
diffs: Array of diff tuples.
|
|
lineArray: Array of unique strings.
|
|
"""
|
|
- for i in xrange(len(diffs)):
|
|
- text = []
|
|
- for char in diffs[i][1]:
|
|
- text.append(lineArray[ord(char)])
|
|
- diffs[i] = (diffs[i][0], "".join(text))
|
|
+ for i in xrange(len(diffs)):
|
|
+ text = []
|
|
+ for char in diffs[i][1]:
|
|
+ text.append(lineArray[ord(char)])
|
|
+ diffs[i] = (diffs[i][0], "".join(text))
|
|
|
|
- def diff_commonPrefix(self, text1, text2):
|
|
- """Determine the common prefix of two strings.
|
|
+ def diff_commonPrefix(self, text1, text2):
|
|
+ """Determine the common prefix of two strings.
|
|
|
|
Args:
|
|
text1: First string.
|
|
@@ -467,26 +471,26 @@ class diff_match_patch:
|
|
Returns:
|
|
The number of characters common to the start of each string.
|
|
"""
|
|
- # Quick check for common null cases.
|
|
- if not text1 or not text2 or text1[0] != text2[0]:
|
|
- return 0
|
|
- # Binary search.
|
|
- # Performance analysis: https://neil.fraser.name/news/2007/10/09/
|
|
- pointermin = 0
|
|
- pointermax = min(len(text1), len(text2))
|
|
- pointermid = pointermax
|
|
- pointerstart = 0
|
|
- while pointermin < pointermid:
|
|
- if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
|
|
- pointermin = pointermid
|
|
- pointerstart = pointermin
|
|
- else:
|
|
- pointermax = pointermid
|
|
- pointermid = (pointermax - pointermin) // 2 + pointermin
|
|
- return pointermid
|
|
+ # Quick check for common null cases.
|
|
+ if not text1 or not text2 or text1[0] != text2[0]:
|
|
+ return 0
|
|
+ # Binary search.
|
|
+ # Performance analysis: https://neil.fraser.name/news/2007/10/09/
|
|
+ pointermin = 0
|
|
+ pointermax = min(len(text1), len(text2))
|
|
+ pointermid = pointermax
|
|
+ pointerstart = 0
|
|
+ while pointermin < pointermid:
|
|
+ if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
|
|
+ pointermin = pointermid
|
|
+ pointerstart = pointermin
|
|
+ else:
|
|
+ pointermax = pointermid
|
|
+ pointermid = (pointermax - pointermin) // 2 + pointermin
|
|
+ return pointermid
|
|
|
|
- def diff_commonSuffix(self, text1, text2):
|
|
- """Determine the common suffix of two strings.
|
|
+ def diff_commonSuffix(self, text1, text2):
|
|
+ """Determine the common suffix of two strings.
|
|
|
|
Args:
|
|
text1: First string.
|
|
@@ -495,27 +499,29 @@ class diff_match_patch:
|
|
Returns:
|
|
The number of characters common to the end of each string.
|
|
"""
|
|
- # Quick check for common null cases.
|
|
- if not text1 or not text2 or text1[-1] != text2[-1]:
|
|
- return 0
|
|
- # Binary search.
|
|
- # Performance analysis: https://neil.fraser.name/news/2007/10/09/
|
|
- pointermin = 0
|
|
- pointermax = min(len(text1), len(text2))
|
|
- pointermid = pointermax
|
|
- pointerend = 0
|
|
- while pointermin < pointermid:
|
|
- if (text1[-pointermid:len(text1) - pointerend] ==
|
|
- text2[-pointermid:len(text2) - pointerend]):
|
|
- pointermin = pointermid
|
|
- pointerend = pointermin
|
|
- else:
|
|
- pointermax = pointermid
|
|
- pointermid = (pointermax - pointermin) // 2 + pointermin
|
|
- return pointermid
|
|
+ # Quick check for common null cases.
|
|
+ if not text1 or not text2 or text1[-1] != text2[-1]:
|
|
+ return 0
|
|
+ # Binary search.
|
|
+ # Performance analysis: https://neil.fraser.name/news/2007/10/09/
|
|
+ pointermin = 0
|
|
+ pointermax = min(len(text1), len(text2))
|
|
+ pointermid = pointermax
|
|
+ pointerend = 0
|
|
+ while pointermin < pointermid:
|
|
+ if (
|
|
+ text1[-pointermid : len(text1) - pointerend]
|
|
+ == text2[-pointermid : len(text2) - pointerend]
|
|
+ ):
|
|
+ pointermin = pointermid
|
|
+ pointerend = pointermin
|
|
+ else:
|
|
+ pointermax = pointermid
|
|
+ pointermid = (pointermax - pointermin) // 2 + pointermin
|
|
+ return pointermid
|
|
|
|
- def diff_commonOverlap(self, text1, text2):
|
|
- """Determine if the suffix of one string is the prefix of another.
|
|
+ def diff_commonOverlap(self, text1, text2):
|
|
+ """Determine if the suffix of one string is the prefix of another.
|
|
|
|
Args:
|
|
text1 First string.
|
|
@@ -525,39 +531,39 @@ class diff_match_patch:
|
|
The number of characters common to the end of the first
|
|
string and the start of the second string.
|
|
"""
|
|
- # Cache the text lengths to prevent multiple calls.
|
|
- text1_length = len(text1)
|
|
- text2_length = len(text2)
|
|
- # Eliminate the null case.
|
|
- if text1_length == 0 or text2_length == 0:
|
|
- return 0
|
|
- # Truncate the longer string.
|
|
- if text1_length > text2_length:
|
|
- text1 = text1[-text2_length:]
|
|
- elif text1_length < text2_length:
|
|
- text2 = text2[:text1_length]
|
|
- text_length = min(text1_length, text2_length)
|
|
- # Quick check for the worst case.
|
|
- if text1 == text2:
|
|
- return text_length
|
|
-
|
|
- # Start by looking for a single character match
|
|
- # and increase length until no match is found.
|
|
- # Performance analysis: https://neil.fraser.name/news/2010/11/04/
|
|
- best = 0
|
|
- length = 1
|
|
- while True:
|
|
- pattern = text1[-length:]
|
|
- found = text2.find(pattern)
|
|
- if found == -1:
|
|
- return best
|
|
- length += found
|
|
- if found == 0 or text1[-length:] == text2[:length]:
|
|
- best = length
|
|
- length += 1
|
|
+ # Cache the text lengths to prevent multiple calls.
|
|
+ text1_length = len(text1)
|
|
+ text2_length = len(text2)
|
|
+ # Eliminate the null case.
|
|
+ if text1_length == 0 or text2_length == 0:
|
|
+ return 0
|
|
+ # Truncate the longer string.
|
|
+ if text1_length > text2_length:
|
|
+ text1 = text1[-text2_length:]
|
|
+ elif text1_length < text2_length:
|
|
+ text2 = text2[:text1_length]
|
|
+ text_length = min(text1_length, text2_length)
|
|
+ # Quick check for the worst case.
|
|
+ if text1 == text2:
|
|
+ return text_length
|
|
+
|
|
+ # Start by looking for a single character match
|
|
+ # and increase length until no match is found.
|
|
+ # Performance analysis: https://neil.fraser.name/news/2010/11/04/
|
|
+ best = 0
|
|
+ length = 1
|
|
+ while True:
|
|
+ pattern = text1[-length:]
|
|
+ found = text2.find(pattern)
|
|
+ if found == -1:
|
|
+ return best
|
|
+ length += found
|
|
+ if found == 0 or text1[-length:] == text2[:length]:
|
|
+ best = length
|
|
+ length += 1
|
|
|
|
- def diff_halfMatch(self, text1, text2):
|
|
- """Do the two texts share a substring which is at least half the length of
|
|
+ def diff_halfMatch(self, text1, text2):
|
|
+ """Do the two texts share a substring which is at least half the length of
|
|
the longer text?
|
|
This speedup can produce non-minimal diffs.
|
|
|
|
@@ -570,18 +576,18 @@ class diff_match_patch:
|
|
the prefix of text2, the suffix of text2 and the common middle. Or None
|
|
if there was no match.
|
|
"""
|
|
- if self.Diff_Timeout <= 0:
|
|
- # Don't risk returning a non-optimal diff if we have unlimited time.
|
|
- return None
|
|
- if len(text1) > len(text2):
|
|
- (longtext, shorttext) = (text1, text2)
|
|
- else:
|
|
- (shorttext, longtext) = (text1, text2)
|
|
- if len(longtext) < 4 or len(shorttext) * 2 < len(longtext):
|
|
- return None # Pointless.
|
|
+ if self.Diff_Timeout <= 0:
|
|
+ # Don't risk returning a non-optimal diff if we have unlimited time.
|
|
+ return None
|
|
+ if len(text1) > len(text2):
|
|
+ (longtext, shorttext) = (text1, text2)
|
|
+ else:
|
|
+ (shorttext, longtext) = (text1, text2)
|
|
+ if len(longtext) < 4 or len(shorttext) * 2 < len(longtext):
|
|
+ return None # Pointless.
|
|
|
|
- def diff_halfMatchI(longtext, shorttext, i):
|
|
- """Does a substring of shorttext exist within longtext such that the
|
|
+ def diff_halfMatchI(longtext, shorttext, i):
|
|
+ """Does a substring of shorttext exist within longtext such that the
|
|
substring is at least half the length of longtext?
|
|
Closure, but does not reference any external variables.
|
|
|
|
@@ -595,148 +601,181 @@ class diff_match_patch:
|
|
longtext, the prefix of shorttext, the suffix of shorttext and the
|
|
common middle. Or None if there was no match.
|
|
"""
|
|
- seed = longtext[i:i + len(longtext) // 4]
|
|
- best_common = ''
|
|
- j = shorttext.find(seed)
|
|
- while j != -1:
|
|
- prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
|
|
- suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
|
|
- if len(best_common) < suffixLength + prefixLength:
|
|
- best_common = (shorttext[j - suffixLength:j] +
|
|
- shorttext[j:j + prefixLength])
|
|
- best_longtext_a = longtext[:i - suffixLength]
|
|
- best_longtext_b = longtext[i + prefixLength:]
|
|
- best_shorttext_a = shorttext[:j - suffixLength]
|
|
- best_shorttext_b = shorttext[j + prefixLength:]
|
|
- j = shorttext.find(seed, j + 1)
|
|
-
|
|
- if len(best_common) * 2 >= len(longtext):
|
|
- return (best_longtext_a, best_longtext_b,
|
|
- best_shorttext_a, best_shorttext_b, best_common)
|
|
- else:
|
|
- return None
|
|
-
|
|
- # First check if the second quarter is the seed for a half-match.
|
|
- hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4)
|
|
- # Check again based on the third quarter.
|
|
- hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2)
|
|
- if not hm1 and not hm2:
|
|
- return None
|
|
- elif not hm2:
|
|
- hm = hm1
|
|
- elif not hm1:
|
|
- hm = hm2
|
|
- else:
|
|
- # Both matched. Select the longest.
|
|
- if len(hm1[4]) > len(hm2[4]):
|
|
- hm = hm1
|
|
- else:
|
|
- hm = hm2
|
|
-
|
|
- # A half-match was found, sort out the return data.
|
|
- if len(text1) > len(text2):
|
|
- (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
|
|
- else:
|
|
- (text2_a, text2_b, text1_a, text1_b, mid_common) = hm
|
|
- return (text1_a, text1_b, text2_a, text2_b, mid_common)
|
|
+ seed = longtext[i : i + len(longtext) // 4]
|
|
+ best_common = ""
|
|
+ j = shorttext.find(seed)
|
|
+ while j != -1:
|
|
+ prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
|
|
+ suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
|
|
+ if len(best_common) < suffixLength + prefixLength:
|
|
+ best_common = (
|
|
+ shorttext[j - suffixLength : j]
|
|
+ + shorttext[j : j + prefixLength]
|
|
+ )
|
|
+ best_longtext_a = longtext[: i - suffixLength]
|
|
+ best_longtext_b = longtext[i + prefixLength :]
|
|
+ best_shorttext_a = shorttext[: j - suffixLength]
|
|
+ best_shorttext_b = shorttext[j + prefixLength :]
|
|
+ j = shorttext.find(seed, j + 1)
|
|
+
|
|
+ if len(best_common) * 2 >= len(longtext):
|
|
+ return (
|
|
+ best_longtext_a,
|
|
+ best_longtext_b,
|
|
+ best_shorttext_a,
|
|
+ best_shorttext_b,
|
|
+ best_common,
|
|
+ )
|
|
+ else:
|
|
+ return None
|
|
+
|
|
+ # First check if the second quarter is the seed for a half-match.
|
|
+ hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4)
|
|
+ # Check again based on the third quarter.
|
|
+ hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2)
|
|
+ if not hm1 and not hm2:
|
|
+ return None
|
|
+ elif not hm2:
|
|
+ hm = hm1
|
|
+ elif not hm1:
|
|
+ hm = hm2
|
|
+ else:
|
|
+ # Both matched. Select the longest.
|
|
+ if len(hm1[4]) > len(hm2[4]):
|
|
+ hm = hm1
|
|
+ else:
|
|
+ hm = hm2
|
|
|
|
- def diff_cleanupSemantic(self, diffs):
|
|
- """Reduce the number of edits by eliminating semantically trivial
|
|
+ # A half-match was found, sort out the return data.
|
|
+ if len(text1) > len(text2):
|
|
+ (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
|
|
+ else:
|
|
+ (text2_a, text2_b, text1_a, text1_b, mid_common) = hm
|
|
+ return (text1_a, text1_b, text2_a, text2_b, mid_common)
|
|
+
|
|
+ def diff_cleanupSemantic(self, diffs):
|
|
+ """Reduce the number of edits by eliminating semantically trivial
|
|
equalities.
|
|
|
|
Args:
|
|
diffs: Array of diff tuples.
|
|
"""
|
|
- changes = False
|
|
- equalities = [] # Stack of indices where equalities are found.
|
|
- lastEquality = None # Always equal to diffs[equalities[-1]][1]
|
|
- pointer = 0 # Index of current position.
|
|
- # Number of chars that changed prior to the equality.
|
|
- length_insertions1, length_deletions1 = 0, 0
|
|
- # Number of chars that changed after the equality.
|
|
- length_insertions2, length_deletions2 = 0, 0
|
|
- while pointer < len(diffs):
|
|
- if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.
|
|
- equalities.append(pointer)
|
|
- length_insertions1, length_insertions2 = length_insertions2, 0
|
|
- length_deletions1, length_deletions2 = length_deletions2, 0
|
|
- lastEquality = diffs[pointer][1]
|
|
- else: # An insertion or deletion.
|
|
- if diffs[pointer][0] == self.DIFF_INSERT:
|
|
- length_insertions2 += len(diffs[pointer][1])
|
|
- else:
|
|
- length_deletions2 += len(diffs[pointer][1])
|
|
- # Eliminate an equality that is smaller or equal to the edits on both
|
|
- # sides of it.
|
|
- if (lastEquality and (len(lastEquality) <=
|
|
- max(length_insertions1, length_deletions1)) and
|
|
- (len(lastEquality) <= max(length_insertions2, length_deletions2))):
|
|
- # Duplicate record.
|
|
- diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality))
|
|
- # Change second copy to insert.
|
|
- diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
|
|
- diffs[equalities[-1] + 1][1])
|
|
- # Throw away the equality we just deleted.
|
|
- equalities.pop()
|
|
- # Throw away the previous equality (it needs to be reevaluated).
|
|
- if len(equalities):
|
|
- equalities.pop()
|
|
- if len(equalities):
|
|
- pointer = equalities[-1]
|
|
- else:
|
|
- pointer = -1
|
|
- # Reset the counters.
|
|
- length_insertions1, length_deletions1 = 0, 0
|
|
- length_insertions2, length_deletions2 = 0, 0
|
|
- lastEquality = None
|
|
- changes = True
|
|
- pointer += 1
|
|
-
|
|
- # Normalize the diff.
|
|
- if changes:
|
|
- self.diff_cleanupMerge(diffs)
|
|
- self.diff_cleanupSemanticLossless(diffs)
|
|
-
|
|
- # Find any overlaps between deletions and insertions.
|
|
- # e.g: <del>abcxxx</del><ins>xxxdef</ins>
|
|
- # -> <del>abc</del>xxx<ins>def</ins>
|
|
- # e.g: <del>xxxabc</del><ins>defxxx</ins>
|
|
- # -> <ins>def</ins>xxx<del>abc</del>
|
|
- # Only extract an overlap if it is as big as the edit ahead or behind it.
|
|
- pointer = 1
|
|
- while pointer < len(diffs):
|
|
- if (diffs[pointer - 1][0] == self.DIFF_DELETE and
|
|
- diffs[pointer][0] == self.DIFF_INSERT):
|
|
- deletion = diffs[pointer - 1][1]
|
|
- insertion = diffs[pointer][1]
|
|
- overlap_length1 = self.diff_commonOverlap(deletion, insertion)
|
|
- overlap_length2 = self.diff_commonOverlap(insertion, deletion)
|
|
- if overlap_length1 >= overlap_length2:
|
|
- if (overlap_length1 >= len(deletion) / 2.0 or
|
|
- overlap_length1 >= len(insertion) / 2.0):
|
|
- # Overlap found. Insert an equality and trim the surrounding edits.
|
|
- diffs.insert(pointer, (self.DIFF_EQUAL,
|
|
- insertion[:overlap_length1]))
|
|
- diffs[pointer - 1] = (self.DIFF_DELETE,
|
|
- deletion[:len(deletion) - overlap_length1])
|
|
- diffs[pointer + 1] = (self.DIFF_INSERT,
|
|
- insertion[overlap_length1:])
|
|
+ changes = False
|
|
+ equalities = [] # Stack of indices where equalities are found.
|
|
+ lastEquality = None # Always equal to diffs[equalities[-1]][1]
|
|
+ pointer = 0 # Index of current position.
|
|
+ # Number of chars that changed prior to the equality.
|
|
+ length_insertions1, length_deletions1 = 0, 0
|
|
+ # Number of chars that changed after the equality.
|
|
+ length_insertions2, length_deletions2 = 0, 0
|
|
+ while pointer < len(diffs):
|
|
+ if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.
|
|
+ equalities.append(pointer)
|
|
+ length_insertions1, length_insertions2 = length_insertions2, 0
|
|
+ length_deletions1, length_deletions2 = length_deletions2, 0
|
|
+ lastEquality = diffs[pointer][1]
|
|
+ else: # An insertion or deletion.
|
|
+ if diffs[pointer][0] == self.DIFF_INSERT:
|
|
+ length_insertions2 += len(diffs[pointer][1])
|
|
+ else:
|
|
+ length_deletions2 += len(diffs[pointer][1])
|
|
+ # Eliminate an equality that is smaller or equal to the edits on both
|
|
+ # sides of it.
|
|
+ if (
|
|
+ lastEquality
|
|
+ and (
|
|
+ len(lastEquality) <= max(length_insertions1, length_deletions1)
|
|
+ )
|
|
+ and (
|
|
+ len(lastEquality) <= max(length_insertions2, length_deletions2)
|
|
+ )
|
|
+ ):
|
|
+ # Duplicate record.
|
|
+ diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality))
|
|
+ # Change second copy to insert.
|
|
+ diffs[equalities[-1] + 1] = (
|
|
+ self.DIFF_INSERT,
|
|
+ diffs[equalities[-1] + 1][1],
|
|
+ )
|
|
+ # Throw away the equality we just deleted.
|
|
+ equalities.pop()
|
|
+ # Throw away the previous equality (it needs to be reevaluated).
|
|
+ if len(equalities):
|
|
+ equalities.pop()
|
|
+ if len(equalities):
|
|
+ pointer = equalities[-1]
|
|
+ else:
|
|
+ pointer = -1
|
|
+ # Reset the counters.
|
|
+ length_insertions1, length_deletions1 = 0, 0
|
|
+ length_insertions2, length_deletions2 = 0, 0
|
|
+ lastEquality = None
|
|
+ changes = True
|
|
pointer += 1
|
|
- else:
|
|
- if (overlap_length2 >= len(deletion) / 2.0 or
|
|
- overlap_length2 >= len(insertion) / 2.0):
|
|
- # Reverse overlap found.
|
|
- # Insert an equality and swap and trim the surrounding edits.
|
|
- diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2]))
|
|
- diffs[pointer - 1] = (self.DIFF_INSERT,
|
|
- insertion[:len(insertion) - overlap_length2])
|
|
- diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:])
|
|
+
|
|
+ # Normalize the diff.
|
|
+ if changes:
|
|
+ self.diff_cleanupMerge(diffs)
|
|
+ self.diff_cleanupSemanticLossless(diffs)
|
|
+
|
|
+ # Find any overlaps between deletions and insertions.
|
|
+ # e.g: <del>abcxxx</del><ins>xxxdef</ins>
|
|
+ # -> <del>abc</del>xxx<ins>def</ins>
|
|
+ # e.g: <del>xxxabc</del><ins>defxxx</ins>
|
|
+ # -> <ins>def</ins>xxx<del>abc</del>
|
|
+ # Only extract an overlap if it is as big as the edit ahead or behind it.
|
|
+ pointer = 1
|
|
+ while pointer < len(diffs):
|
|
+ if (
|
|
+ diffs[pointer - 1][0] == self.DIFF_DELETE
|
|
+ and diffs[pointer][0] == self.DIFF_INSERT
|
|
+ ):
|
|
+ deletion = diffs[pointer - 1][1]
|
|
+ insertion = diffs[pointer][1]
|
|
+ overlap_length1 = self.diff_commonOverlap(deletion, insertion)
|
|
+ overlap_length2 = self.diff_commonOverlap(insertion, deletion)
|
|
+ if overlap_length1 >= overlap_length2:
|
|
+ if (
|
|
+ overlap_length1 >= len(deletion) / 2.0
|
|
+ or overlap_length1 >= len(insertion) / 2.0
|
|
+ ):
|
|
+ # Overlap found. Insert an equality and trim the surrounding edits.
|
|
+ diffs.insert(
|
|
+ pointer, (self.DIFF_EQUAL, insertion[:overlap_length1])
|
|
+ )
|
|
+ diffs[pointer - 1] = (
|
|
+ self.DIFF_DELETE,
|
|
+ deletion[: len(deletion) - overlap_length1],
|
|
+ )
|
|
+ diffs[pointer + 1] = (
|
|
+ self.DIFF_INSERT,
|
|
+ insertion[overlap_length1:],
|
|
+ )
|
|
+ pointer += 1
|
|
+ else:
|
|
+ if (
|
|
+ overlap_length2 >= len(deletion) / 2.0
|
|
+ or overlap_length2 >= len(insertion) / 2.0
|
|
+ ):
|
|
+ # Reverse overlap found.
|
|
+ # Insert an equality and swap and trim the surrounding edits.
|
|
+ diffs.insert(
|
|
+ pointer, (self.DIFF_EQUAL, deletion[:overlap_length2])
|
|
+ )
|
|
+ diffs[pointer - 1] = (
|
|
+ self.DIFF_INSERT,
|
|
+ insertion[: len(insertion) - overlap_length2],
|
|
+ )
|
|
+ diffs[pointer + 1] = (
|
|
+ self.DIFF_DELETE,
|
|
+ deletion[overlap_length2:],
|
|
+ )
|
|
+ pointer += 1
|
|
+ pointer += 1
|
|
pointer += 1
|
|
- pointer += 1
|
|
- pointer += 1
|
|
|
|
- def diff_cleanupSemanticLossless(self, diffs):
|
|
- """Look for single edits surrounded on both sides by equalities
|
|
+ def diff_cleanupSemanticLossless(self, diffs):
|
|
+ """Look for single edits surrounded on both sides by equalities
|
|
which can be shifted sideways to align the edit to a word boundary.
|
|
e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
|
|
|
|
@@ -744,8 +783,8 @@ class diff_match_patch:
|
|
diffs: Array of diff tuples.
|
|
"""
|
|
|
|
- def diff_cleanupSemanticScore(one, two):
|
|
- """Given two strings, compute a score representing whether the
|
|
+ def diff_cleanupSemanticScore(one, two):
|
|
+ """Given two strings, compute a score representing whether the
|
|
internal boundary falls on logical boundaries.
|
|
Scores range from 6 (best) to 0 (worst).
|
|
Closure, but does not reference any external variables.
|
|
@@ -757,277 +796,306 @@ class diff_match_patch:
|
|
Returns:
|
|
The score.
|
|
"""
|
|
- if not one or not two:
|
|
- # Edges are the best.
|
|
- return 6
|
|
-
|
|
- # Each port of this function behaves slightly differently due to
|
|
- # subtle differences in each language's definition of things like
|
|
- # 'whitespace'. Since this function's purpose is largely cosmetic,
|
|
- # the choice has been made to use each language's native features
|
|
- # rather than force total conformity.
|
|
- char1 = one[-1]
|
|
- char2 = two[0]
|
|
- nonAlphaNumeric1 = not char1.isalnum()
|
|
- nonAlphaNumeric2 = not char2.isalnum()
|
|
- whitespace1 = nonAlphaNumeric1 and char1.isspace()
|
|
- whitespace2 = nonAlphaNumeric2 and char2.isspace()
|
|
- lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n")
|
|
- lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n")
|
|
- blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one)
|
|
- blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two)
|
|
-
|
|
- if blankLine1 or blankLine2:
|
|
- # Five points for blank lines.
|
|
- return 5
|
|
- elif lineBreak1 or lineBreak2:
|
|
- # Four points for line breaks.
|
|
- return 4
|
|
- elif nonAlphaNumeric1 and not whitespace1 and whitespace2:
|
|
- # Three points for end of sentences.
|
|
- return 3
|
|
- elif whitespace1 or whitespace2:
|
|
- # Two points for whitespace.
|
|
- return 2
|
|
- elif nonAlphaNumeric1 or nonAlphaNumeric2:
|
|
- # One point for non-alphanumeric.
|
|
- return 1
|
|
- return 0
|
|
-
|
|
- pointer = 1
|
|
- # Intentionally ignore the first and last element (don't need checking).
|
|
- while pointer < len(diffs) - 1:
|
|
- if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
|
|
- diffs[pointer + 1][0] == self.DIFF_EQUAL):
|
|
- # This is a single edit surrounded by equalities.
|
|
- equality1 = diffs[pointer - 1][1]
|
|
- edit = diffs[pointer][1]
|
|
- equality2 = diffs[pointer + 1][1]
|
|
-
|
|
- # First, shift the edit as far left as possible.
|
|
- commonOffset = self.diff_commonSuffix(equality1, edit)
|
|
- if commonOffset:
|
|
- commonString = edit[-commonOffset:]
|
|
- equality1 = equality1[:-commonOffset]
|
|
- edit = commonString + edit[:-commonOffset]
|
|
- equality2 = commonString + equality2
|
|
-
|
|
- # Second, step character by character right, looking for the best fit.
|
|
- bestEquality1 = equality1
|
|
- bestEdit = edit
|
|
- bestEquality2 = equality2
|
|
- bestScore = (diff_cleanupSemanticScore(equality1, edit) +
|
|
- diff_cleanupSemanticScore(edit, equality2))
|
|
- while edit and equality2 and edit[0] == equality2[0]:
|
|
- equality1 += edit[0]
|
|
- edit = edit[1:] + equality2[0]
|
|
- equality2 = equality2[1:]
|
|
- score = (diff_cleanupSemanticScore(equality1, edit) +
|
|
- diff_cleanupSemanticScore(edit, equality2))
|
|
- # The >= encourages trailing rather than leading whitespace on edits.
|
|
- if score >= bestScore:
|
|
- bestScore = score
|
|
- bestEquality1 = equality1
|
|
- bestEdit = edit
|
|
- bestEquality2 = equality2
|
|
-
|
|
- if diffs[pointer - 1][1] != bestEquality1:
|
|
- # We have an improvement, save it back to the diff.
|
|
- if bestEquality1:
|
|
- diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)
|
|
- else:
|
|
- del diffs[pointer - 1]
|
|
- pointer -= 1
|
|
- diffs[pointer] = (diffs[pointer][0], bestEdit)
|
|
- if bestEquality2:
|
|
- diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)
|
|
- else:
|
|
- del diffs[pointer + 1]
|
|
- pointer -= 1
|
|
- pointer += 1
|
|
-
|
|
- # Define some regex patterns for matching boundaries.
|
|
- BLANKLINEEND = re.compile(r"\n\r?\n$")
|
|
- BLANKLINESTART = re.compile(r"^\r?\n\r?\n")
|
|
+ if not one or not two:
|
|
+ # Edges are the best.
|
|
+ return 6
|
|
+
|
|
+ # Each port of this function behaves slightly differently due to
|
|
+ # subtle differences in each language's definition of things like
|
|
+ # 'whitespace'. Since this function's purpose is largely cosmetic,
|
|
+ # the choice has been made to use each language's native features
|
|
+ # rather than force total conformity.
|
|
+ char1 = one[-1]
|
|
+ char2 = two[0]
|
|
+ nonAlphaNumeric1 = not char1.isalnum()
|
|
+ nonAlphaNumeric2 = not char2.isalnum()
|
|
+ whitespace1 = nonAlphaNumeric1 and char1.isspace()
|
|
+ whitespace2 = nonAlphaNumeric2 and char2.isspace()
|
|
+ lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n")
|
|
+ lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n")
|
|
+ blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one)
|
|
+ blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two)
|
|
+
|
|
+ if blankLine1 or blankLine2:
|
|
+ # Five points for blank lines.
|
|
+ return 5
|
|
+ elif lineBreak1 or lineBreak2:
|
|
+ # Four points for line breaks.
|
|
+ return 4
|
|
+ elif nonAlphaNumeric1 and not whitespace1 and whitespace2:
|
|
+ # Three points for end of sentences.
|
|
+ return 3
|
|
+ elif whitespace1 or whitespace2:
|
|
+ # Two points for whitespace.
|
|
+ return 2
|
|
+ elif nonAlphaNumeric1 or nonAlphaNumeric2:
|
|
+ # One point for non-alphanumeric.
|
|
+ return 1
|
|
+ return 0
|
|
+
|
|
+ pointer = 1
|
|
+ # Intentionally ignore the first and last element (don't need checking).
|
|
+ while pointer < len(diffs) - 1:
|
|
+ if (
|
|
+ diffs[pointer - 1][0] == self.DIFF_EQUAL
|
|
+ and diffs[pointer + 1][0] == self.DIFF_EQUAL
|
|
+ ):
|
|
+ # This is a single edit surrounded by equalities.
|
|
+ equality1 = diffs[pointer - 1][1]
|
|
+ edit = diffs[pointer][1]
|
|
+ equality2 = diffs[pointer + 1][1]
|
|
+
|
|
+ # First, shift the edit as far left as possible.
|
|
+ commonOffset = self.diff_commonSuffix(equality1, edit)
|
|
+ if commonOffset:
|
|
+ commonString = edit[-commonOffset:]
|
|
+ equality1 = equality1[:-commonOffset]
|
|
+ edit = commonString + edit[:-commonOffset]
|
|
+ equality2 = commonString + equality2
|
|
+
|
|
+ # Second, step character by character right, looking for the best fit.
|
|
+ bestEquality1 = equality1
|
|
+ bestEdit = edit
|
|
+ bestEquality2 = equality2
|
|
+ bestScore = diff_cleanupSemanticScore(
|
|
+ equality1, edit
|
|
+ ) + diff_cleanupSemanticScore(edit, equality2)
|
|
+ while edit and equality2 and edit[0] == equality2[0]:
|
|
+ equality1 += edit[0]
|
|
+ edit = edit[1:] + equality2[0]
|
|
+ equality2 = equality2[1:]
|
|
+ score = diff_cleanupSemanticScore(
|
|
+ equality1, edit
|
|
+ ) + diff_cleanupSemanticScore(edit, equality2)
|
|
+ # The >= encourages trailing rather than leading whitespace on edits.
|
|
+ if score >= bestScore:
|
|
+ bestScore = score
|
|
+ bestEquality1 = equality1
|
|
+ bestEdit = edit
|
|
+ bestEquality2 = equality2
|
|
+
|
|
+ if diffs[pointer - 1][1] != bestEquality1:
|
|
+ # We have an improvement, save it back to the diff.
|
|
+ if bestEquality1:
|
|
+ diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)
|
|
+ else:
|
|
+ del diffs[pointer - 1]
|
|
+ pointer -= 1
|
|
+ diffs[pointer] = (diffs[pointer][0], bestEdit)
|
|
+ if bestEquality2:
|
|
+ diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)
|
|
+ else:
|
|
+ del diffs[pointer + 1]
|
|
+ pointer -= 1
|
|
+ pointer += 1
|
|
+
|
|
+ # Define some regex patterns for matching boundaries.
|
|
+ BLANKLINEEND = re.compile(r"\n\r?\n$")
|
|
+ BLANKLINESTART = re.compile(r"^\r?\n\r?\n")
|
|
|
|
- def diff_cleanupEfficiency(self, diffs):
|
|
- """Reduce the number of edits by eliminating operationally trivial
|
|
+ def diff_cleanupEfficiency(self, diffs):
|
|
+ """Reduce the number of edits by eliminating operationally trivial
|
|
equalities.
|
|
|
|
Args:
|
|
diffs: Array of diff tuples.
|
|
"""
|
|
- changes = False
|
|
- equalities = [] # Stack of indices where equalities are found.
|
|
- lastEquality = None # Always equal to diffs[equalities[-1]][1]
|
|
- pointer = 0 # Index of current position.
|
|
- pre_ins = False # Is there an insertion operation before the last equality.
|
|
- pre_del = False # Is there a deletion operation before the last equality.
|
|
- post_ins = False # Is there an insertion operation after the last equality.
|
|
- post_del = False # Is there a deletion operation after the last equality.
|
|
- while pointer < len(diffs):
|
|
- if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.
|
|
- if (len(diffs[pointer][1]) < self.Diff_EditCost and
|
|
- (post_ins or post_del)):
|
|
- # Candidate found.
|
|
- equalities.append(pointer)
|
|
- pre_ins = post_ins
|
|
- pre_del = post_del
|
|
- lastEquality = diffs[pointer][1]
|
|
- else:
|
|
- # Not a candidate, and can never become one.
|
|
- equalities = []
|
|
- lastEquality = None
|
|
-
|
|
- post_ins = post_del = False
|
|
- else: # An insertion or deletion.
|
|
- if diffs[pointer][0] == self.DIFF_DELETE:
|
|
- post_del = True
|
|
- else:
|
|
- post_ins = True
|
|
-
|
|
- # Five types to be split:
|
|
- # <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
|
|
- # <ins>A</ins>X<ins>C</ins><del>D</del>
|
|
- # <ins>A</ins><del>B</del>X<ins>C</ins>
|
|
- # <ins>A</del>X<ins>C</ins><del>D</del>
|
|
- # <ins>A</ins><del>B</del>X<del>C</del>
|
|
-
|
|
- if lastEquality and ((pre_ins and pre_del and post_ins and post_del) or
|
|
- ((len(lastEquality) < self.Diff_EditCost / 2) and
|
|
- (pre_ins + pre_del + post_ins + post_del) == 3)):
|
|
- # Duplicate record.
|
|
- diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality))
|
|
- # Change second copy to insert.
|
|
- diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
|
|
- diffs[equalities[-1] + 1][1])
|
|
- equalities.pop() # Throw away the equality we just deleted.
|
|
- lastEquality = None
|
|
- if pre_ins and pre_del:
|
|
- # No changes made which could affect previous entry, keep going.
|
|
- post_ins = post_del = True
|
|
- equalities = []
|
|
- else:
|
|
- if len(equalities):
|
|
- equalities.pop() # Throw away the previous equality.
|
|
- if len(equalities):
|
|
- pointer = equalities[-1]
|
|
- else:
|
|
- pointer = -1
|
|
- post_ins = post_del = False
|
|
- changes = True
|
|
- pointer += 1
|
|
+ changes = False
|
|
+ equalities = [] # Stack of indices where equalities are found.
|
|
+ lastEquality = None # Always equal to diffs[equalities[-1]][1]
|
|
+ pointer = 0 # Index of current position.
|
|
+ pre_ins = False # Is there an insertion operation before the last equality.
|
|
+ pre_del = False # Is there a deletion operation before the last equality.
|
|
+ post_ins = False # Is there an insertion operation after the last equality.
|
|
+ post_del = False # Is there a deletion operation after the last equality.
|
|
+ while pointer < len(diffs):
|
|
+ if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.
|
|
+ if len(diffs[pointer][1]) < self.Diff_EditCost and (
|
|
+ post_ins or post_del
|
|
+ ):
|
|
+ # Candidate found.
|
|
+ equalities.append(pointer)
|
|
+ pre_ins = post_ins
|
|
+ pre_del = post_del
|
|
+ lastEquality = diffs[pointer][1]
|
|
+ else:
|
|
+ # Not a candidate, and can never become one.
|
|
+ equalities = []
|
|
+ lastEquality = None
|
|
+
|
|
+ post_ins = post_del = False
|
|
+ else: # An insertion or deletion.
|
|
+ if diffs[pointer][0] == self.DIFF_DELETE:
|
|
+ post_del = True
|
|
+ else:
|
|
+ post_ins = True
|
|
+
|
|
+ # Five types to be split:
|
|
+ # <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
|
|
+ # <ins>A</ins>X<ins>C</ins><del>D</del>
|
|
+ # <ins>A</ins><del>B</del>X<ins>C</ins>
|
|
+ # <ins>A</del>X<ins>C</ins><del>D</del>
|
|
+ # <ins>A</ins><del>B</del>X<del>C</del>
|
|
+
|
|
+ if lastEquality and (
|
|
+ (pre_ins and pre_del and post_ins and post_del)
|
|
+ or (
|
|
+ (len(lastEquality) < self.Diff_EditCost / 2)
|
|
+ and (pre_ins + pre_del + post_ins + post_del) == 3
|
|
+ )
|
|
+ ):
|
|
+ # Duplicate record.
|
|
+ diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality))
|
|
+ # Change second copy to insert.
|
|
+ diffs[equalities[-1] + 1] = (
|
|
+ self.DIFF_INSERT,
|
|
+ diffs[equalities[-1] + 1][1],
|
|
+ )
|
|
+ equalities.pop() # Throw away the equality we just deleted.
|
|
+ lastEquality = None
|
|
+ if pre_ins and pre_del:
|
|
+ # No changes made which could affect previous entry, keep going.
|
|
+ post_ins = post_del = True
|
|
+ equalities = []
|
|
+ else:
|
|
+ if len(equalities):
|
|
+ equalities.pop() # Throw away the previous equality.
|
|
+ if len(equalities):
|
|
+ pointer = equalities[-1]
|
|
+ else:
|
|
+ pointer = -1
|
|
+ post_ins = post_del = False
|
|
+ changes = True
|
|
+ pointer += 1
|
|
|
|
- if changes:
|
|
- self.diff_cleanupMerge(diffs)
|
|
+ if changes:
|
|
+ self.diff_cleanupMerge(diffs)
|
|
|
|
- def diff_cleanupMerge(self, diffs):
|
|
- """Reorder and merge like edit sections. Merge equalities.
|
|
+ def diff_cleanupMerge(self, diffs):
|
|
+ """Reorder and merge like edit sections. Merge equalities.
|
|
Any edit section can move as long as it doesn't cross an equality.
|
|
|
|
Args:
|
|
diffs: Array of diff tuples.
|
|
"""
|
|
- diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end.
|
|
- pointer = 0
|
|
- count_delete = 0
|
|
- count_insert = 0
|
|
- text_delete = ''
|
|
- text_insert = ''
|
|
- while pointer < len(diffs):
|
|
- if diffs[pointer][0] == self.DIFF_INSERT:
|
|
- count_insert += 1
|
|
- text_insert += diffs[pointer][1]
|
|
- pointer += 1
|
|
- elif diffs[pointer][0] == self.DIFF_DELETE:
|
|
- count_delete += 1
|
|
- text_delete += diffs[pointer][1]
|
|
- pointer += 1
|
|
- elif diffs[pointer][0] == self.DIFF_EQUAL:
|
|
- # Upon reaching an equality, check for prior redundancies.
|
|
- if count_delete + count_insert > 1:
|
|
- if count_delete != 0 and count_insert != 0:
|
|
- # Factor out any common prefixies.
|
|
- commonlength = self.diff_commonPrefix(text_insert, text_delete)
|
|
- if commonlength != 0:
|
|
- x = pointer - count_delete - count_insert - 1
|
|
- if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
|
|
- diffs[x] = (diffs[x][0], diffs[x][1] +
|
|
- text_insert[:commonlength])
|
|
- else:
|
|
- diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength]))
|
|
- pointer += 1
|
|
- text_insert = text_insert[commonlength:]
|
|
- text_delete = text_delete[commonlength:]
|
|
- # Factor out any common suffixes.
|
|
- commonlength = self.diff_commonSuffix(text_insert, text_delete)
|
|
- if commonlength != 0:
|
|
- diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] +
|
|
- diffs[pointer][1])
|
|
- text_insert = text_insert[:-commonlength]
|
|
- text_delete = text_delete[:-commonlength]
|
|
- # Delete the offending records and add the merged ones.
|
|
- new_ops = []
|
|
- if len(text_delete) != 0:
|
|
- new_ops.append((self.DIFF_DELETE, text_delete))
|
|
- if len(text_insert) != 0:
|
|
- new_ops.append((self.DIFF_INSERT, text_insert))
|
|
- pointer -= count_delete + count_insert
|
|
- diffs[pointer : pointer + count_delete + count_insert] = new_ops
|
|
- pointer += len(new_ops) + 1
|
|
- elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
|
|
- # Merge this equality with the previous one.
|
|
- diffs[pointer - 1] = (diffs[pointer - 1][0],
|
|
- diffs[pointer - 1][1] + diffs[pointer][1])
|
|
- del diffs[pointer]
|
|
- else:
|
|
- pointer += 1
|
|
-
|
|
- count_insert = 0
|
|
+ diffs.append((self.DIFF_EQUAL, "")) # Add a dummy entry at the end.
|
|
+ pointer = 0
|
|
count_delete = 0
|
|
- text_delete = ''
|
|
- text_insert = ''
|
|
-
|
|
- if diffs[-1][1] == '':
|
|
- diffs.pop() # Remove the dummy entry at the end.
|
|
+ count_insert = 0
|
|
+ text_delete = ""
|
|
+ text_insert = ""
|
|
+ while pointer < len(diffs):
|
|
+ if diffs[pointer][0] == self.DIFF_INSERT:
|
|
+ count_insert += 1
|
|
+ text_insert += diffs[pointer][1]
|
|
+ pointer += 1
|
|
+ elif diffs[pointer][0] == self.DIFF_DELETE:
|
|
+ count_delete += 1
|
|
+ text_delete += diffs[pointer][1]
|
|
+ pointer += 1
|
|
+ elif diffs[pointer][0] == self.DIFF_EQUAL:
|
|
+ # Upon reaching an equality, check for prior redundancies.
|
|
+ if count_delete + count_insert > 1:
|
|
+ if count_delete != 0 and count_insert != 0:
|
|
+ # Factor out any common prefixies.
|
|
+ commonlength = self.diff_commonPrefix(text_insert, text_delete)
|
|
+ if commonlength != 0:
|
|
+ x = pointer - count_delete - count_insert - 1
|
|
+ if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
|
|
+ diffs[x] = (
|
|
+ diffs[x][0],
|
|
+ diffs[x][1] + text_insert[:commonlength],
|
|
+ )
|
|
+ else:
|
|
+ diffs.insert(
|
|
+ 0, (self.DIFF_EQUAL, text_insert[:commonlength])
|
|
+ )
|
|
+ pointer += 1
|
|
+ text_insert = text_insert[commonlength:]
|
|
+ text_delete = text_delete[commonlength:]
|
|
+ # Factor out any common suffixes.
|
|
+ commonlength = self.diff_commonSuffix(text_insert, text_delete)
|
|
+ if commonlength != 0:
|
|
+ diffs[pointer] = (
|
|
+ diffs[pointer][0],
|
|
+ text_insert[-commonlength:] + diffs[pointer][1],
|
|
+ )
|
|
+ text_insert = text_insert[:-commonlength]
|
|
+ text_delete = text_delete[:-commonlength]
|
|
+ # Delete the offending records and add the merged ones.
|
|
+ new_ops = []
|
|
+ if len(text_delete) != 0:
|
|
+ new_ops.append((self.DIFF_DELETE, text_delete))
|
|
+ if len(text_insert) != 0:
|
|
+ new_ops.append((self.DIFF_INSERT, text_insert))
|
|
+ pointer -= count_delete + count_insert
|
|
+ diffs[pointer : pointer + count_delete + count_insert] = new_ops
|
|
+ pointer += len(new_ops) + 1
|
|
+ elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
|
|
+ # Merge this equality with the previous one.
|
|
+ diffs[pointer - 1] = (
|
|
+ diffs[pointer - 1][0],
|
|
+ diffs[pointer - 1][1] + diffs[pointer][1],
|
|
+ )
|
|
+ del diffs[pointer]
|
|
+ else:
|
|
+ pointer += 1
|
|
+
|
|
+ count_insert = 0
|
|
+ count_delete = 0
|
|
+ text_delete = ""
|
|
+ text_insert = ""
|
|
+
|
|
+ if diffs[-1][1] == "":
|
|
+ diffs.pop() # Remove the dummy entry at the end.
|
|
+
|
|
+ # Second pass: look for single edits surrounded on both sides by equalities
|
|
+ # which can be shifted sideways to eliminate an equality.
|
|
+ # e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
|
|
+ changes = False
|
|
+ pointer = 1
|
|
+ # Intentionally ignore the first and last element (don't need checking).
|
|
+ while pointer < len(diffs) - 1:
|
|
+ if (
|
|
+ diffs[pointer - 1][0] == self.DIFF_EQUAL
|
|
+ and diffs[pointer + 1][0] == self.DIFF_EQUAL
|
|
+ ):
|
|
+ # This is a single edit surrounded by equalities.
|
|
+ if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
|
|
+ # Shift the edit over the previous equality.
|
|
+ if diffs[pointer - 1][1] != "":
|
|
+ diffs[pointer] = (
|
|
+ diffs[pointer][0],
|
|
+ diffs[pointer - 1][1]
|
|
+ + diffs[pointer][1][: -len(diffs[pointer - 1][1])],
|
|
+ )
|
|
+ diffs[pointer + 1] = (
|
|
+ diffs[pointer + 1][0],
|
|
+ diffs[pointer - 1][1] + diffs[pointer + 1][1],
|
|
+ )
|
|
+ del diffs[pointer - 1]
|
|
+ changes = True
|
|
+ elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
|
|
+ # Shift the edit over the next equality.
|
|
+ diffs[pointer - 1] = (
|
|
+ diffs[pointer - 1][0],
|
|
+ diffs[pointer - 1][1] + diffs[pointer + 1][1],
|
|
+ )
|
|
+ diffs[pointer] = (
|
|
+ diffs[pointer][0],
|
|
+ diffs[pointer][1][len(diffs[pointer + 1][1]) :]
|
|
+ + diffs[pointer + 1][1],
|
|
+ )
|
|
+ del diffs[pointer + 1]
|
|
+ changes = True
|
|
+ pointer += 1
|
|
|
|
- # Second pass: look for single edits surrounded on both sides by equalities
|
|
- # which can be shifted sideways to eliminate an equality.
|
|
- # e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
|
|
- changes = False
|
|
- pointer = 1
|
|
- # Intentionally ignore the first and last element (don't need checking).
|
|
- while pointer < len(diffs) - 1:
|
|
- if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
|
|
- diffs[pointer + 1][0] == self.DIFF_EQUAL):
|
|
- # This is a single edit surrounded by equalities.
|
|
- if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
|
|
- # Shift the edit over the previous equality.
|
|
- if diffs[pointer - 1][1] != "":
|
|
- diffs[pointer] = (diffs[pointer][0],
|
|
- diffs[pointer - 1][1] +
|
|
- diffs[pointer][1][:-len(diffs[pointer - 1][1])])
|
|
- diffs[pointer + 1] = (diffs[pointer + 1][0],
|
|
- diffs[pointer - 1][1] + diffs[pointer + 1][1])
|
|
- del diffs[pointer - 1]
|
|
- changes = True
|
|
- elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
|
|
- # Shift the edit over the next equality.
|
|
- diffs[pointer - 1] = (diffs[pointer - 1][0],
|
|
- diffs[pointer - 1][1] + diffs[pointer + 1][1])
|
|
- diffs[pointer] = (diffs[pointer][0],
|
|
- diffs[pointer][1][len(diffs[pointer + 1][1]):] +
|
|
- diffs[pointer + 1][1])
|
|
- del diffs[pointer + 1]
|
|
- changes = True
|
|
- pointer += 1
|
|
-
|
|
- # If shifts were made, the diff needs reordering and another shift sweep.
|
|
- if changes:
|
|
- self.diff_cleanupMerge(diffs)
|
|
+ # If shifts were made, the diff needs reordering and another shift sweep.
|
|
+ if changes:
|
|
+ self.diff_cleanupMerge(diffs)
|
|
|
|
- def diff_xIndex(self, diffs, loc):
|
|
- """loc is a location in text1, compute and return the equivalent location
|
|
+ def diff_xIndex(self, diffs, loc):
|
|
+ """loc is a location in text1, compute and return the equivalent location
|
|
in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8
|
|
|
|
Args:
|
|
@@ -1037,29 +1105,29 @@ class diff_match_patch:
|
|
Returns:
|
|
Location within text2.
|
|
"""
|
|
- chars1 = 0
|
|
- chars2 = 0
|
|
- last_chars1 = 0
|
|
- last_chars2 = 0
|
|
- for x in xrange(len(diffs)):
|
|
- (op, text) = diffs[x]
|
|
- if op != self.DIFF_INSERT: # Equality or deletion.
|
|
- chars1 += len(text)
|
|
- if op != self.DIFF_DELETE: # Equality or insertion.
|
|
- chars2 += len(text)
|
|
- if chars1 > loc: # Overshot the location.
|
|
- break
|
|
- last_chars1 = chars1
|
|
- last_chars2 = chars2
|
|
-
|
|
- if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
|
|
- # The location was deleted.
|
|
- return last_chars2
|
|
- # Add the remaining len(character).
|
|
- return last_chars2 + (loc - last_chars1)
|
|
+ chars1 = 0
|
|
+ chars2 = 0
|
|
+ last_chars1 = 0
|
|
+ last_chars2 = 0
|
|
+ for x in xrange(len(diffs)):
|
|
+ (op, text) = diffs[x]
|
|
+ if op != self.DIFF_INSERT: # Equality or deletion.
|
|
+ chars1 += len(text)
|
|
+ if op != self.DIFF_DELETE: # Equality or insertion.
|
|
+ chars2 += len(text)
|
|
+ if chars1 > loc: # Overshot the location.
|
|
+ break
|
|
+ last_chars1 = chars1
|
|
+ last_chars2 = chars2
|
|
+
|
|
+ if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
|
|
+ # The location was deleted.
|
|
+ return last_chars2
|
|
+ # Add the remaining len(character).
|
|
+ return last_chars2 + (loc - last_chars1)
|
|
|
|
- def diff_prettyHtml(self, diffs):
|
|
- """Convert a diff array into a pretty HTML report.
|
|
+ def diff_prettyHtml(self, diffs):
|
|
+ """Convert a diff array into a pretty HTML report.
|
|
|
|
Args:
|
|
diffs: Array of diff tuples.
|
|
@@ -1067,20 +1135,24 @@ class diff_match_patch:
|
|
Returns:
|
|
HTML representation.
|
|
"""
|
|
- html = []
|
|
- for (op, data) in diffs:
|
|
- text = (data.replace("&", "&").replace("<", "<")
|
|
- .replace(">", ">").replace("\n", "¶<br>"))
|
|
- if op == self.DIFF_INSERT:
|
|
- html.append("<ins style=\"background:#e6ffe6;\">%s</ins>" % text)
|
|
- elif op == self.DIFF_DELETE:
|
|
- html.append("<del style=\"background:#ffe6e6;\">%s</del>" % text)
|
|
- elif op == self.DIFF_EQUAL:
|
|
- html.append("<span>%s</span>" % text)
|
|
- return "".join(html)
|
|
+ html = []
|
|
+ for (op, data) in diffs:
|
|
+ text = (
|
|
+ data.replace("&", "&")
|
|
+ .replace("<", "<")
|
|
+ .replace(">", ">")
|
|
+ .replace("\n", "¶<br>")
|
|
+ )
|
|
+ if op == self.DIFF_INSERT:
|
|
+ html.append('<ins style="background:#e6ffe6;">%s</ins>' % text)
|
|
+ elif op == self.DIFF_DELETE:
|
|
+ html.append('<del style="background:#ffe6e6;">%s</del>' % text)
|
|
+ elif op == self.DIFF_EQUAL:
|
|
+ html.append("<span>%s</span>" % text)
|
|
+ return "".join(html)
|
|
|
|
- def diff_text1(self, diffs):
|
|
- """Compute and return the source text (all equalities and deletions).
|
|
+ def diff_text1(self, diffs):
|
|
+ """Compute and return the source text (all equalities and deletions).
|
|
|
|
Args:
|
|
diffs: Array of diff tuples.
|
|
@@ -1088,14 +1160,14 @@ class diff_match_patch:
|
|
Returns:
|
|
Source text.
|
|
"""
|
|
- text = []
|
|
- for (op, data) in diffs:
|
|
- if op != self.DIFF_INSERT:
|
|
- text.append(data)
|
|
- return "".join(text)
|
|
+ text = []
|
|
+ for (op, data) in diffs:
|
|
+ if op != self.DIFF_INSERT:
|
|
+ text.append(data)
|
|
+ return "".join(text)
|
|
|
|
- def diff_text2(self, diffs):
|
|
- """Compute and return the destination text (all equalities and insertions).
|
|
+ def diff_text2(self, diffs):
|
|
+ """Compute and return the destination text (all equalities and insertions).
|
|
|
|
Args:
|
|
diffs: Array of diff tuples.
|
|
@@ -1103,14 +1175,14 @@ class diff_match_patch:
|
|
Returns:
|
|
Destination text.
|
|
"""
|
|
- text = []
|
|
- for (op, data) in diffs:
|
|
- if op != self.DIFF_DELETE:
|
|
- text.append(data)
|
|
- return "".join(text)
|
|
+ text = []
|
|
+ for (op, data) in diffs:
|
|
+ if op != self.DIFF_DELETE:
|
|
+ text.append(data)
|
|
+ return "".join(text)
|
|
|
|
- def diff_levenshtein(self, diffs):
|
|
- """Compute the Levenshtein distance; the number of inserted, deleted or
|
|
+ def diff_levenshtein(self, diffs):
|
|
+ """Compute the Levenshtein distance; the number of inserted, deleted or
|
|
substituted characters.
|
|
|
|
Args:
|
|
@@ -1119,24 +1191,24 @@ class diff_match_patch:
|
|
Returns:
|
|
Number of changes.
|
|
"""
|
|
- levenshtein = 0
|
|
- insertions = 0
|
|
- deletions = 0
|
|
- for (op, data) in diffs:
|
|
- if op == self.DIFF_INSERT:
|
|
- insertions += len(data)
|
|
- elif op == self.DIFF_DELETE:
|
|
- deletions += len(data)
|
|
- elif op == self.DIFF_EQUAL:
|
|
- # A deletion and an insertion is one substitution.
|
|
- levenshtein += max(insertions, deletions)
|
|
+ levenshtein = 0
|
|
insertions = 0
|
|
deletions = 0
|
|
- levenshtein += max(insertions, deletions)
|
|
- return levenshtein
|
|
+ for (op, data) in diffs:
|
|
+ if op == self.DIFF_INSERT:
|
|
+ insertions += len(data)
|
|
+ elif op == self.DIFF_DELETE:
|
|
+ deletions += len(data)
|
|
+ elif op == self.DIFF_EQUAL:
|
|
+ # A deletion and an insertion is one substitution.
|
|
+ levenshtein += max(insertions, deletions)
|
|
+ insertions = 0
|
|
+ deletions = 0
|
|
+ levenshtein += max(insertions, deletions)
|
|
+ return levenshtein
|
|
|
|
- def diff_toDelta(self, diffs):
|
|
- """Crush the diff into an encoded string which describes the operations
|
|
+ def diff_toDelta(self, diffs):
|
|
+ """Crush the diff into an encoded string which describes the operations
|
|
required to transform text1 into text2.
|
|
E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
|
|
Operations are tab-separated. Inserted text is escaped using %xx notation.
|
|
@@ -1147,20 +1219,20 @@ class diff_match_patch:
|
|
Returns:
|
|
Delta text.
|
|
"""
|
|
- text = []
|
|
- for (op, data) in diffs:
|
|
- if op == self.DIFF_INSERT:
|
|
- # High ascii will raise UnicodeDecodeError. Use Unicode instead.
|
|
- data = data.encode("utf-8")
|
|
- text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# "))
|
|
- elif op == self.DIFF_DELETE:
|
|
- text.append("-%d" % len(data))
|
|
- elif op == self.DIFF_EQUAL:
|
|
- text.append("=%d" % len(data))
|
|
- return "\t".join(text)
|
|
+ text = []
|
|
+ for (op, data) in diffs:
|
|
+ if op == self.DIFF_INSERT:
|
|
+ # High ascii will raise UnicodeDecodeError. Use Unicode instead.
|
|
+ data = data.encode("utf-8")
|
|
+ text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# "))
|
|
+ elif op == self.DIFF_DELETE:
|
|
+ text.append("-%d" % len(data))
|
|
+ elif op == self.DIFF_EQUAL:
|
|
+ text.append("=%d" % len(data))
|
|
+ return "\t".join(text)
|
|
|
|
- def diff_fromDelta(self, text1, delta):
|
|
- """Given the original text1, and an encoded string which describes the
|
|
+ def diff_fromDelta(self, text1, delta):
|
|
+ """Given the original text1, and an encoded string which describes the
|
|
operations required to transform text1 into text2, compute the full diff.
|
|
|
|
Args:
|
|
@@ -1173,50 +1245,52 @@ class diff_match_patch:
|
|
Raises:
|
|
ValueError: If invalid input.
|
|
"""
|
|
- if type(delta) == unicode:
|
|
- # Deltas should be composed of a subset of ascii chars, Unicode not
|
|
- # required. If this encode raises UnicodeEncodeError, delta is invalid.
|
|
- delta = delta.encode("ascii")
|
|
- diffs = []
|
|
- pointer = 0 # Cursor in text1
|
|
- tokens = delta.split("\t")
|
|
- for token in tokens:
|
|
- if token == "":
|
|
- # Blank tokens are ok (from a trailing \t).
|
|
- continue
|
|
- # Each token begins with a one character parameter which specifies the
|
|
- # operation of this token (delete, insert, equality).
|
|
- param = token[1:]
|
|
- if token[0] == "+":
|
|
- param = urllib.unquote(param).decode("utf-8")
|
|
- diffs.append((self.DIFF_INSERT, param))
|
|
- elif token[0] == "-" or token[0] == "=":
|
|
- try:
|
|
- n = int(param)
|
|
- except ValueError:
|
|
- raise ValueError("Invalid number in diff_fromDelta: " + param)
|
|
- if n < 0:
|
|
- raise ValueError("Negative number in diff_fromDelta: " + param)
|
|
- text = text1[pointer : pointer + n]
|
|
- pointer += n
|
|
- if token[0] == "=":
|
|
- diffs.append((self.DIFF_EQUAL, text))
|
|
- else:
|
|
- diffs.append((self.DIFF_DELETE, text))
|
|
- else:
|
|
- # Anything else is an error.
|
|
- raise ValueError("Invalid diff operation in diff_fromDelta: " +
|
|
- token[0])
|
|
- if pointer != len(text1):
|
|
- raise ValueError(
|
|
- "Delta length (%d) does not equal source text length (%d)." %
|
|
- (pointer, len(text1)))
|
|
- return diffs
|
|
+ if type(delta) == unicode:
|
|
+ # Deltas should be composed of a subset of ascii chars, Unicode not
|
|
+ # required. If this encode raises UnicodeEncodeError, delta is invalid.
|
|
+ delta = delta.encode("ascii")
|
|
+ diffs = []
|
|
+ pointer = 0 # Cursor in text1
|
|
+ tokens = delta.split("\t")
|
|
+ for token in tokens:
|
|
+ if token == "":
|
|
+ # Blank tokens are ok (from a trailing \t).
|
|
+ continue
|
|
+ # Each token begins with a one character parameter which specifies the
|
|
+ # operation of this token (delete, insert, equality).
|
|
+ param = token[1:]
|
|
+ if token[0] == "+":
|
|
+ param = urllib.unquote(param).decode("utf-8")
|
|
+ diffs.append((self.DIFF_INSERT, param))
|
|
+ elif token[0] == "-" or token[0] == "=":
|
|
+ try:
|
|
+ n = int(param)
|
|
+ except ValueError:
|
|
+ raise ValueError("Invalid number in diff_fromDelta: " + param)
|
|
+ if n < 0:
|
|
+ raise ValueError("Negative number in diff_fromDelta: " + param)
|
|
+ text = text1[pointer : pointer + n]
|
|
+ pointer += n
|
|
+ if token[0] == "=":
|
|
+ diffs.append((self.DIFF_EQUAL, text))
|
|
+ else:
|
|
+ diffs.append((self.DIFF_DELETE, text))
|
|
+ else:
|
|
+ # Anything else is an error.
|
|
+ raise ValueError(
|
|
+ "Invalid diff operation in diff_fromDelta: " + token[0]
|
|
+ )
|
|
+ if pointer != len(text1):
|
|
+ raise ValueError(
|
|
+ "Delta length (%d) does not equal source text length (%d)."
|
|
+ % (pointer, len(text1))
|
|
+ )
|
|
+ return diffs
|
|
|
|
- # MATCH FUNCTIONS
|
|
+ # MATCH FUNCTIONS
|
|
|
|
- def match_main(self, text, pattern, loc):
|
|
- """Locate the best instance of 'pattern' in 'text' near 'loc'.
|
|
+ def match_main(self, text, pattern, loc):
|
|
+ """Locate the best instance of 'pattern' in 'text' near 'loc'.
|
|
|
|
Args:
|
|
text: The text to search.
|
|
@@ -1226,27 +1300,27 @@ class diff_match_patch:
|
|
Returns:
|
|
Best match index or -1.
|
|
"""
|
|
- # Check for null inputs.
|
|
- if text == None or pattern == None:
|
|
- raise ValueError("Null inputs. (match_main)")
|
|
-
|
|
- loc = max(0, min(loc, len(text)))
|
|
- if text == pattern:
|
|
- # Shortcut (potentially not guaranteed by the algorithm)
|
|
- return 0
|
|
- elif not text:
|
|
- # Nothing to match.
|
|
- return -1
|
|
- elif text[loc:loc + len(pattern)] == pattern:
|
|
- # Perfect match at the perfect spot! (Includes case of null pattern)
|
|
- return loc
|
|
- else:
|
|
- # Do a fuzzy compare.
|
|
- match = self.match_bitap(text, pattern, loc)
|
|
- return match
|
|
+ # Check for null inputs.
|
|
+ if text == None or pattern == None:
|
|
+ raise ValueError("Null inputs. (match_main)")
|
|
+
|
|
+ loc = max(0, min(loc, len(text)))
|
|
+ if text == pattern:
|
|
+ # Shortcut (potentially not guaranteed by the algorithm)
|
|
+ return 0
|
|
+ elif not text:
|
|
+ # Nothing to match.
|
|
+ return -1
|
|
+ elif text[loc : loc + len(pattern)] == pattern:
|
|
+ # Perfect match at the perfect spot! (Includes case of null pattern)
|
|
+ return loc
|
|
+ else:
|
|
+ # Do a fuzzy compare.
|
|
+ match = self.match_bitap(text, pattern, loc)
|
|
+ return match
|
|
|
|
- def match_bitap(self, text, pattern, loc):
|
|
- """Locate the best instance of 'pattern' in 'text' near 'loc' using the
|
|
+ def match_bitap(self, text, pattern, loc):
|
|
+ """Locate the best instance of 'pattern' in 'text' near 'loc' using the
|
|
Bitap algorithm.
|
|
|
|
Args:
|
|
@@ -1257,15 +1331,15 @@ class diff_match_patch:
|
|
Returns:
|
|
Best match index or -1.
|
|
"""
|
|
- # Python doesn't have a maxint limit, so ignore this check.
|
|
- #if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:
|
|
- # raise ValueError("Pattern too long for this application.")
|
|
+ # Python doesn't have a maxint limit, so ignore this check.
|
|
+ # if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:
|
|
+ # raise ValueError("Pattern too long for this application.")
|
|
|
|
- # Initialise the alphabet.
|
|
- s = self.match_alphabet(pattern)
|
|
+ # Initialise the alphabet.
|
|
+ s = self.match_alphabet(pattern)
|
|
|
|
- def match_bitapScore(e, x):
|
|
- """Compute and return the score for a match with e errors and x location.
|
|
+ def match_bitapScore(e, x):
|
|
+ """Compute and return the score for a match with e errors and x location.
|
|
Accesses loc and pattern through being a closure.
|
|
|
|
Args:
|
|
@@ -1275,84 +1349,87 @@ class diff_match_patch:
|
|
Returns:
|
|
Overall score for match (0.0 = good, 1.0 = bad).
|
|
"""
|
|
- accuracy = float(e) / len(pattern)
|
|
- proximity = abs(loc - x)
|
|
- if not self.Match_Distance:
|
|
- # Dodge divide by zero error.
|
|
- return proximity and 1.0 or accuracy
|
|
- return accuracy + (proximity / float(self.Match_Distance))
|
|
-
|
|
- # Highest score beyond which we give up.
|
|
- score_threshold = self.Match_Threshold
|
|
- # Is there a nearby exact match? (speedup)
|
|
- best_loc = text.find(pattern, loc)
|
|
- if best_loc != -1:
|
|
- score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
|
|
- # What about in the other direction? (speedup)
|
|
- best_loc = text.rfind(pattern, loc + len(pattern))
|
|
- if best_loc != -1:
|
|
- score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
|
|
-
|
|
- # Initialise the bit arrays.
|
|
- matchmask = 1 << (len(pattern) - 1)
|
|
- best_loc = -1
|
|
-
|
|
- bin_max = len(pattern) + len(text)
|
|
- # Empty initialization added to appease pychecker.
|
|
- last_rd = None
|
|
- for d in xrange(len(pattern)):
|
|
- # Scan for the best match each iteration allows for one more error.
|
|
- # Run a binary search to determine how far from 'loc' we can stray at
|
|
- # this error level.
|
|
- bin_min = 0
|
|
- bin_mid = bin_max
|
|
- while bin_min < bin_mid:
|
|
- if match_bitapScore(d, loc + bin_mid) <= score_threshold:
|
|
- bin_min = bin_mid
|
|
- else:
|
|
- bin_max = bin_mid
|
|
- bin_mid = (bin_max - bin_min) // 2 + bin_min
|
|
-
|
|
- # Use the result from this iteration as the maximum for the next.
|
|
- bin_max = bin_mid
|
|
- start = max(1, loc - bin_mid + 1)
|
|
- finish = min(loc + bin_mid, len(text)) + len(pattern)
|
|
-
|
|
- rd = [0] * (finish + 2)
|
|
- rd[finish + 1] = (1 << d) - 1
|
|
- for j in xrange(finish, start - 1, -1):
|
|
- if len(text) <= j - 1:
|
|
- # Out of range.
|
|
- charMatch = 0
|
|
- else:
|
|
- charMatch = s.get(text[j - 1], 0)
|
|
- if d == 0: # First pass: exact match.
|
|
- rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
|
|
- else: # Subsequent passes: fuzzy match.
|
|
- rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | (
|
|
- ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
|
|
- if rd[j] & matchmask:
|
|
- score = match_bitapScore(d, j - 1)
|
|
- # This match will almost certainly be better than any existing match.
|
|
- # But check anyway.
|
|
- if score <= score_threshold:
|
|
- # Told you so.
|
|
- score_threshold = score
|
|
- best_loc = j - 1
|
|
- if best_loc > loc:
|
|
- # When passing loc, don't exceed our current distance from loc.
|
|
- start = max(1, 2 * loc - best_loc)
|
|
- else:
|
|
- # Already passed loc, downhill from here on in.
|
|
- break
|
|
- # No hope for a (better) match at greater error levels.
|
|
- if match_bitapScore(d + 1, loc) > score_threshold:
|
|
- break
|
|
- last_rd = rd
|
|
- return best_loc
|
|
+ accuracy = float(e) / len(pattern)
|
|
+ proximity = abs(loc - x)
|
|
+ if not self.Match_Distance:
|
|
+ # Dodge divide by zero error.
|
|
+ return proximity and 1.0 or accuracy
|
|
+ return accuracy + (proximity / float(self.Match_Distance))
|
|
+
|
|
+ # Highest score beyond which we give up.
|
|
+ score_threshold = self.Match_Threshold
|
|
+ # Is there a nearby exact match? (speedup)
|
|
+ best_loc = text.find(pattern, loc)
|
|
+ if best_loc != -1:
|
|
+ score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
|
|
+ # What about in the other direction? (speedup)
|
|
+ best_loc = text.rfind(pattern, loc + len(pattern))
|
|
+ if best_loc != -1:
|
|
+ score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
|
|
+
|
|
+ # Initialise the bit arrays.
|
|
+ matchmask = 1 << (len(pattern) - 1)
|
|
+ best_loc = -1
|
|
+
|
|
+ bin_max = len(pattern) + len(text)
|
|
+ # Empty initialization added to appease pychecker.
|
|
+ last_rd = None
|
|
+ for d in xrange(len(pattern)):
|
|
+ # Scan for the best match each iteration allows for one more error.
|
|
+ # Run a binary search to determine how far from 'loc' we can stray at
|
|
+ # this error level.
|
|
+ bin_min = 0
|
|
+ bin_mid = bin_max
|
|
+ while bin_min < bin_mid:
|
|
+ if match_bitapScore(d, loc + bin_mid) <= score_threshold:
|
|
+ bin_min = bin_mid
|
|
+ else:
|
|
+ bin_max = bin_mid
|
|
+ bin_mid = (bin_max - bin_min) // 2 + bin_min
|
|
+
|
|
+ # Use the result from this iteration as the maximum for the next.
|
|
+ bin_max = bin_mid
|
|
+ start = max(1, loc - bin_mid + 1)
|
|
+ finish = min(loc + bin_mid, len(text)) + len(pattern)
|
|
+
|
|
+ rd = [0] * (finish + 2)
|
|
+ rd[finish + 1] = (1 << d) - 1
|
|
+ for j in xrange(finish, start - 1, -1):
|
|
+ if len(text) <= j - 1:
|
|
+ # Out of range.
|
|
+ charMatch = 0
|
|
+ else:
|
|
+ charMatch = s.get(text[j - 1], 0)
|
|
+ if d == 0: # First pass: exact match.
|
|
+ rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
|
|
+ else: # Subsequent passes: fuzzy match.
|
|
+ rd[j] = (
|
|
+ (((rd[j + 1] << 1) | 1) & charMatch)
|
|
+ | (((last_rd[j + 1] | last_rd[j]) << 1) | 1)
|
|
+ | last_rd[j + 1]
|
|
+ )
|
|
+ if rd[j] & matchmask:
|
|
+ score = match_bitapScore(d, j - 1)
|
|
+ # This match will almost certainly be better than any existing match.
|
|
+ # But check anyway.
|
|
+ if score <= score_threshold:
|
|
+ # Told you so.
|
|
+ score_threshold = score
|
|
+ best_loc = j - 1
|
|
+ if best_loc > loc:
|
|
+ # When passing loc, don't exceed our current distance from loc.
|
|
+ start = max(1, 2 * loc - best_loc)
|
|
+ else:
|
|
+ # Already passed loc, downhill from here on in.
|
|
+ break
|
|
+ # No hope for a (better) match at greater error levels.
|
|
+ if match_bitapScore(d + 1, loc) > score_threshold:
|
|
+ break
|
|
+ last_rd = rd
|
|
+ return best_loc
|
|
|
|
- def match_alphabet(self, pattern):
|
|
- """Initialise the alphabet for the Bitap algorithm.
|
|
+ def match_alphabet(self, pattern):
|
|
+ """Initialise the alphabet for the Bitap algorithm.
|
|
|
|
Args:
|
|
pattern: The text to encode.
|
|
@@ -1360,58 +1437,61 @@ class diff_match_patch:
|
|
Returns:
|
|
Hash of character locations.
|
|
"""
|
|
- s = {}
|
|
- for char in pattern:
|
|
- s[char] = 0
|
|
- for i in xrange(len(pattern)):
|
|
- s[pattern[i]] |= 1 << (len(pattern) - i - 1)
|
|
- return s
|
|
+ s = {}
|
|
+ for char in pattern:
|
|
+ s[char] = 0
|
|
+ for i in xrange(len(pattern)):
|
|
+ s[pattern[i]] |= 1 << (len(pattern) - i - 1)
|
|
+ return s
|
|
|
|
- # PATCH FUNCTIONS
|
|
+ # PATCH FUNCTIONS
|
|
|
|
- def patch_addContext(self, patch, text):
|
|
- """Increase the context until it is unique,
|
|
+ def patch_addContext(self, patch, text):
|
|
+ """Increase the context until it is unique,
|
|
but don't let the pattern expand beyond Match_MaxBits.
|
|
|
|
Args:
|
|
patch: The patch to grow.
|
|
text: Source text.
|
|
"""
|
|
- if len(text) == 0:
|
|
- return
|
|
- pattern = text[patch.start2 : patch.start2 + patch.length1]
|
|
- padding = 0
|
|
-
|
|
- # Look for the first and last matches of pattern in text. If two different
|
|
- # matches are found, increase the pattern length.
|
|
- while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits ==
|
|
- 0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin -
|
|
- self.Patch_Margin)):
|
|
- padding += self.Patch_Margin
|
|
- pattern = text[max(0, patch.start2 - padding) :
|
|
- patch.start2 + patch.length1 + padding]
|
|
- # Add one chunk for good luck.
|
|
- padding += self.Patch_Margin
|
|
-
|
|
- # Add the prefix.
|
|
- prefix = text[max(0, patch.start2 - padding) : patch.start2]
|
|
- if prefix:
|
|
- patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
|
|
- # Add the suffix.
|
|
- suffix = text[patch.start2 + patch.length1 :
|
|
- patch.start2 + patch.length1 + padding]
|
|
- if suffix:
|
|
- patch.diffs.append((self.DIFF_EQUAL, suffix))
|
|
-
|
|
- # Roll back the start points.
|
|
- patch.start1 -= len(prefix)
|
|
- patch.start2 -= len(prefix)
|
|
- # Extend lengths.
|
|
- patch.length1 += len(prefix) + len(suffix)
|
|
- patch.length2 += len(prefix) + len(suffix)
|
|
+ if len(text) == 0:
|
|
+ return
|
|
+ pattern = text[patch.start2 : patch.start2 + patch.length1]
|
|
+ padding = 0
|
|
+
|
|
+ # Look for the first and last matches of pattern in text. If two different
|
|
+ # matches are found, increase the pattern length.
|
|
+ while text.find(pattern) != text.rfind(pattern) and (
|
|
+ self.Match_MaxBits == 0
|
|
+ or len(pattern) < self.Match_MaxBits - self.Patch_Margin - self.Patch_Margin
|
|
+ ):
|
|
+ padding += self.Patch_Margin
|
|
+ pattern = text[
|
|
+ max(0, patch.start2 - padding) : patch.start2 + patch.length1 + padding
|
|
+ ]
|
|
+ # Add one chunk for good luck.
|
|
+ padding += self.Patch_Margin
|
|
+
|
|
+ # Add the prefix.
|
|
+ prefix = text[max(0, patch.start2 - padding) : patch.start2]
|
|
+ if prefix:
|
|
+ patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
|
|
+ # Add the suffix.
|
|
+ suffix = text[
|
|
+ patch.start2 + patch.length1 : patch.start2 + patch.length1 + padding
|
|
+ ]
|
|
+ if suffix:
|
|
+ patch.diffs.append((self.DIFF_EQUAL, suffix))
|
|
+
|
|
+ # Roll back the start points.
|
|
+ patch.start1 -= len(prefix)
|
|
+ patch.start2 -= len(prefix)
|
|
+ # Extend lengths.
|
|
+ patch.length1 += len(prefix) + len(suffix)
|
|
+ patch.length2 += len(prefix) + len(suffix)
|
|
|
|
- def patch_make(self, a, b=None, c=None):
|
|
- """Compute a list of patches to turn text1 into text2.
|
|
+ def patch_make(self, a, b=None, c=None):
|
|
+ """Compute a list of patches to turn text1 into text2.
|
|
Use diffs if provided, otherwise compute it ourselves.
|
|
There are four ways to call this function, depending on what data is
|
|
available to the caller:
|
|
@@ -1435,97 +1515,107 @@ class diff_match_patch:
|
|
Returns:
|
|
Array of Patch objects.
|
|
"""
|
|
- text1 = None
|
|
- diffs = None
|
|
- # Note that texts may arrive as 'str' or 'unicode'.
|
|
- if isinstance(a, basestring) and isinstance(b, basestring) and c is None:
|
|
- # Method 1: text1, text2
|
|
- # Compute diffs from text1 and text2.
|
|
- text1 = a
|
|
- diffs = self.diff_main(text1, b, True)
|
|
- if len(diffs) > 2:
|
|
- self.diff_cleanupSemantic(diffs)
|
|
- self.diff_cleanupEfficiency(diffs)
|
|
- elif isinstance(a, list) and b is None and c is None:
|
|
- # Method 2: diffs
|
|
- # Compute text1 from diffs.
|
|
- diffs = a
|
|
- text1 = self.diff_text1(diffs)
|
|
- elif isinstance(a, basestring) and isinstance(b, list) and c is None:
|
|
- # Method 3: text1, diffs
|
|
- text1 = a
|
|
- diffs = b
|
|
- elif (isinstance(a, basestring) and isinstance(b, basestring) and
|
|
- isinstance(c, list)):
|
|
- # Method 4: text1, text2, diffs
|
|
- # text2 is not used.
|
|
- text1 = a
|
|
- diffs = c
|
|
- else:
|
|
- raise ValueError("Unknown call format to patch_make.")
|
|
-
|
|
- if not diffs:
|
|
- return [] # Get rid of the None case.
|
|
- patches = []
|
|
- patch = patch_obj()
|
|
- char_count1 = 0 # Number of characters into the text1 string.
|
|
- char_count2 = 0 # Number of characters into the text2 string.
|
|
- prepatch_text = text1 # Recreate the patches to determine context info.
|
|
- postpatch_text = text1
|
|
- for x in xrange(len(diffs)):
|
|
- (diff_type, diff_text) = diffs[x]
|
|
- if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
|
|
- # A new patch starts here.
|
|
- patch.start1 = char_count1
|
|
- patch.start2 = char_count2
|
|
- if diff_type == self.DIFF_INSERT:
|
|
- # Insertion
|
|
- patch.diffs.append(diffs[x])
|
|
- patch.length2 += len(diff_text)
|
|
- postpatch_text = (postpatch_text[:char_count2] + diff_text +
|
|
- postpatch_text[char_count2:])
|
|
- elif diff_type == self.DIFF_DELETE:
|
|
- # Deletion.
|
|
- patch.length1 += len(diff_text)
|
|
- patch.diffs.append(diffs[x])
|
|
- postpatch_text = (postpatch_text[:char_count2] +
|
|
- postpatch_text[char_count2 + len(diff_text):])
|
|
- elif (diff_type == self.DIFF_EQUAL and
|
|
- len(diff_text) <= 2 * self.Patch_Margin and
|
|
- len(patch.diffs) != 0 and len(diffs) != x + 1):
|
|
- # Small equality inside a patch.
|
|
- patch.diffs.append(diffs[x])
|
|
- patch.length1 += len(diff_text)
|
|
- patch.length2 += len(diff_text)
|
|
-
|
|
- if (diff_type == self.DIFF_EQUAL and
|
|
- len(diff_text) >= 2 * self.Patch_Margin):
|
|
- # Time for a new patch.
|
|
+ text1 = None
|
|
+ diffs = None
|
|
+ # Note that texts may arrive as 'str' or 'unicode'.
|
|
+ if isinstance(a, basestring) and isinstance(b, basestring) and c is None:
|
|
+ # Method 1: text1, text2
|
|
+ # Compute diffs from text1 and text2.
|
|
+ text1 = a
|
|
+ diffs = self.diff_main(text1, b, True)
|
|
+ if len(diffs) > 2:
|
|
+ self.diff_cleanupSemantic(diffs)
|
|
+ self.diff_cleanupEfficiency(diffs)
|
|
+ elif isinstance(a, list) and b is None and c is None:
|
|
+ # Method 2: diffs
|
|
+ # Compute text1 from diffs.
|
|
+ diffs = a
|
|
+ text1 = self.diff_text1(diffs)
|
|
+ elif isinstance(a, basestring) and isinstance(b, list) and c is None:
|
|
+ # Method 3: text1, diffs
|
|
+ text1 = a
|
|
+ diffs = b
|
|
+ elif (
|
|
+ isinstance(a, basestring)
|
|
+ and isinstance(b, basestring)
|
|
+ and isinstance(c, list)
|
|
+ ):
|
|
+ # Method 4: text1, text2, diffs
|
|
+ # text2 is not used.
|
|
+ text1 = a
|
|
+ diffs = c
|
|
+ else:
|
|
+ raise ValueError("Unknown call format to patch_make.")
|
|
+
|
|
+ if not diffs:
|
|
+ return [] # Get rid of the None case.
|
|
+ patches = []
|
|
+ patch = patch_obj()
|
|
+ char_count1 = 0 # Number of characters into the text1 string.
|
|
+ char_count2 = 0 # Number of characters into the text2 string.
|
|
+ prepatch_text = text1 # Recreate the patches to determine context info.
|
|
+ postpatch_text = text1
|
|
+ for x in xrange(len(diffs)):
|
|
+ (diff_type, diff_text) = diffs[x]
|
|
+ if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
|
|
+ # A new patch starts here.
|
|
+ patch.start1 = char_count1
|
|
+ patch.start2 = char_count2
|
|
+ if diff_type == self.DIFF_INSERT:
|
|
+ # Insertion
|
|
+ patch.diffs.append(diffs[x])
|
|
+ patch.length2 += len(diff_text)
|
|
+ postpatch_text = (
|
|
+ postpatch_text[:char_count2]
|
|
+ + diff_text
|
|
+ + postpatch_text[char_count2:]
|
|
+ )
|
|
+ elif diff_type == self.DIFF_DELETE:
|
|
+ # Deletion.
|
|
+ patch.length1 += len(diff_text)
|
|
+ patch.diffs.append(diffs[x])
|
|
+ postpatch_text = (
|
|
+ postpatch_text[:char_count2]
|
|
+ + postpatch_text[char_count2 + len(diff_text) :]
|
|
+ )
|
|
+ elif (
|
|
+ diff_type == self.DIFF_EQUAL
|
|
+ and len(diff_text) <= 2 * self.Patch_Margin
|
|
+ and len(patch.diffs) != 0
|
|
+ and len(diffs) != x + 1
|
|
+ ):
|
|
+ # Small equality inside a patch.
|
|
+ patch.diffs.append(diffs[x])
|
|
+ patch.length1 += len(diff_text)
|
|
+ patch.length2 += len(diff_text)
|
|
+
|
|
+ if diff_type == self.DIFF_EQUAL and len(diff_text) >= 2 * self.Patch_Margin:
|
|
+ # Time for a new patch.
|
|
+ if len(patch.diffs) != 0:
|
|
+ self.patch_addContext(patch, prepatch_text)
|
|
+ patches.append(patch)
|
|
+ patch = patch_obj()
|
|
+ # Unlike Unidiff, our patch lists have a rolling context.
|
|
+ # https://github.com/google/diff-match-patch/wiki/Unidiff
|
|
+ # Update prepatch text & pos to reflect the application of the
|
|
+ # just completed patch.
|
|
+ prepatch_text = postpatch_text
|
|
+ char_count1 = char_count2
|
|
+
|
|
+ # Update the current character count.
|
|
+ if diff_type != self.DIFF_INSERT:
|
|
+ char_count1 += len(diff_text)
|
|
+ if diff_type != self.DIFF_DELETE:
|
|
+ char_count2 += len(diff_text)
|
|
+
|
|
+ # Pick up the leftover patch if not empty.
|
|
if len(patch.diffs) != 0:
|
|
- self.patch_addContext(patch, prepatch_text)
|
|
- patches.append(patch)
|
|
- patch = patch_obj()
|
|
- # Unlike Unidiff, our patch lists have a rolling context.
|
|
- # https://github.com/google/diff-match-patch/wiki/Unidiff
|
|
- # Update prepatch text & pos to reflect the application of the
|
|
- # just completed patch.
|
|
- prepatch_text = postpatch_text
|
|
- char_count1 = char_count2
|
|
-
|
|
- # Update the current character count.
|
|
- if diff_type != self.DIFF_INSERT:
|
|
- char_count1 += len(diff_text)
|
|
- if diff_type != self.DIFF_DELETE:
|
|
- char_count2 += len(diff_text)
|
|
-
|
|
- # Pick up the leftover patch if not empty.
|
|
- if len(patch.diffs) != 0:
|
|
- self.patch_addContext(patch, prepatch_text)
|
|
- patches.append(patch)
|
|
- return patches
|
|
+ self.patch_addContext(patch, prepatch_text)
|
|
+ patches.append(patch)
|
|
+ return patches
|
|
|
|
- def patch_deepCopy(self, patches):
|
|
- """Given an array of patches, return another array that is identical.
|
|
+ def patch_deepCopy(self, patches):
|
|
+ """Given an array of patches, return another array that is identical.
|
|
|
|
Args:
|
|
patches: Array of Patch objects.
|
|
@@ -1533,20 +1623,20 @@ class diff_match_patch:
|
|
Returns:
|
|
Array of Patch objects.
|
|
"""
|
|
- patchesCopy = []
|
|
- for patch in patches:
|
|
- patchCopy = patch_obj()
|
|
- # No need to deep copy the tuples since they are immutable.
|
|
- patchCopy.diffs = patch.diffs[:]
|
|
- patchCopy.start1 = patch.start1
|
|
- patchCopy.start2 = patch.start2
|
|
- patchCopy.length1 = patch.length1
|
|
- patchCopy.length2 = patch.length2
|
|
- patchesCopy.append(patchCopy)
|
|
- return patchesCopy
|
|
+ patchesCopy = []
|
|
+ for patch in patches:
|
|
+ patchCopy = patch_obj()
|
|
+ # No need to deep copy the tuples since they are immutable.
|
|
+ patchCopy.diffs = patch.diffs[:]
|
|
+ patchCopy.start1 = patch.start1
|
|
+ patchCopy.start2 = patch.start2
|
|
+ patchCopy.length1 = patch.length1
|
|
+ patchCopy.length2 = patch.length2
|
|
+ patchesCopy.append(patchCopy)
|
|
+ return patchesCopy
|
|
|
|
- def patch_apply(self, patches, text):
|
|
- """Merge a set of patches onto the text. Return a patched text, as well
|
|
+ def patch_apply(self, patches, text):
|
|
+ """Merge a set of patches onto the text. Return a patched text, as well
|
|
as a list of true/false values indicating which patches were applied.
|
|
|
|
Args:
|
|
@@ -1556,85 +1646,102 @@ class diff_match_patch:
|
|
Returns:
|
|
Two element Array, containing the new text and an array of boolean values.
|
|
"""
|
|
- if not patches:
|
|
- return (text, [])
|
|
+ if not patches:
|
|
+ return (text, [])
|
|
|
|
- # Deep copy the patches so that no changes are made to originals.
|
|
- patches = self.patch_deepCopy(patches)
|
|
+ # Deep copy the patches so that no changes are made to originals.
|
|
+ patches = self.patch_deepCopy(patches)
|
|
|
|
- nullPadding = self.patch_addPadding(patches)
|
|
- text = nullPadding + text + nullPadding
|
|
- self.patch_splitMax(patches)
|
|
-
|
|
- # delta keeps track of the offset between the expected and actual location
|
|
- # of the previous patch. If there are patches expected at positions 10 and
|
|
- # 20, but the first patch was found at 12, delta is 2 and the second patch
|
|
- # has an effective expected position of 22.
|
|
- delta = 0
|
|
- results = []
|
|
- for patch in patches:
|
|
- expected_loc = patch.start2 + delta
|
|
- text1 = self.diff_text1(patch.diffs)
|
|
- end_loc = -1
|
|
- if len(text1) > self.Match_MaxBits:
|
|
- # patch_splitMax will only provide an oversized pattern in the case of
|
|
- # a monster delete.
|
|
- start_loc = self.match_main(text, text1[:self.Match_MaxBits],
|
|
- expected_loc)
|
|
- if start_loc != -1:
|
|
- end_loc = self.match_main(text, text1[-self.Match_MaxBits:],
|
|
- expected_loc + len(text1) - self.Match_MaxBits)
|
|
- if end_loc == -1 or start_loc >= end_loc:
|
|
- # Can't find valid trailing context. Drop this patch.
|
|
- start_loc = -1
|
|
- else:
|
|
- start_loc = self.match_main(text, text1, expected_loc)
|
|
- if start_loc == -1:
|
|
- # No match found. :(
|
|
- results.append(False)
|
|
- # Subtract the delta for this failed patch from subsequent patches.
|
|
- delta -= patch.length2 - patch.length1
|
|
- else:
|
|
- # Found a match. :)
|
|
- results.append(True)
|
|
- delta = start_loc - expected_loc
|
|
- if end_loc == -1:
|
|
- text2 = text[start_loc : start_loc + len(text1)]
|
|
- else:
|
|
- text2 = text[start_loc : end_loc + self.Match_MaxBits]
|
|
- if text1 == text2:
|
|
- # Perfect match, just shove the replacement text in.
|
|
- text = (text[:start_loc] + self.diff_text2(patch.diffs) +
|
|
- text[start_loc + len(text1):])
|
|
- else:
|
|
- # Imperfect match.
|
|
- # Run a diff to get a framework of equivalent indices.
|
|
- diffs = self.diff_main(text1, text2, False)
|
|
- if (len(text1) > self.Match_MaxBits and
|
|
- self.diff_levenshtein(diffs) / float(len(text1)) >
|
|
- self.Patch_DeleteThreshold):
|
|
- # The end points match, but the content is unacceptably bad.
|
|
- results[-1] = False
|
|
- else:
|
|
- self.diff_cleanupSemanticLossless(diffs)
|
|
- index1 = 0
|
|
- for (op, data) in patch.diffs:
|
|
- if op != self.DIFF_EQUAL:
|
|
- index2 = self.diff_xIndex(diffs, index1)
|
|
- if op == self.DIFF_INSERT: # Insertion
|
|
- text = text[:start_loc + index2] + data + text[start_loc +
|
|
- index2:]
|
|
- elif op == self.DIFF_DELETE: # Deletion
|
|
- text = text[:start_loc + index2] + text[start_loc +
|
|
- self.diff_xIndex(diffs, index1 + len(data)):]
|
|
- if op != self.DIFF_DELETE:
|
|
- index1 += len(data)
|
|
- # Strip the padding off.
|
|
- text = text[len(nullPadding):-len(nullPadding)]
|
|
- return (text, results)
|
|
+ nullPadding = self.patch_addPadding(patches)
|
|
+ text = nullPadding + text + nullPadding
|
|
+ self.patch_splitMax(patches)
|
|
+
|
|
+ # delta keeps track of the offset between the expected and actual location
|
|
+ # of the previous patch. If there are patches expected at positions 10 and
|
|
+ # 20, but the first patch was found at 12, delta is 2 and the second patch
|
|
+ # has an effective expected position of 22.
|
|
+ delta = 0
|
|
+ results = []
|
|
+ for patch in patches:
|
|
+ expected_loc = patch.start2 + delta
|
|
+ text1 = self.diff_text1(patch.diffs)
|
|
+ end_loc = -1
|
|
+ if len(text1) > self.Match_MaxBits:
|
|
+ # patch_splitMax will only provide an oversized pattern in the case of
|
|
+ # a monster delete.
|
|
+ start_loc = self.match_main(
|
|
+ text, text1[: self.Match_MaxBits], expected_loc
|
|
+ )
|
|
+ if start_loc != -1:
|
|
+ end_loc = self.match_main(
|
|
+ text,
|
|
+ text1[-self.Match_MaxBits :],
|
|
+ expected_loc + len(text1) - self.Match_MaxBits,
|
|
+ )
|
|
+ if end_loc == -1 or start_loc >= end_loc:
|
|
+ # Can't find valid trailing context. Drop this patch.
|
|
+ start_loc = -1
|
|
+ else:
|
|
+ start_loc = self.match_main(text, text1, expected_loc)
|
|
+ if start_loc == -1:
|
|
+ # No match found. :(
|
|
+ results.append(False)
|
|
+ # Subtract the delta for this failed patch from subsequent patches.
|
|
+ delta -= patch.length2 - patch.length1
|
|
+ else:
|
|
+ # Found a match. :)
|
|
+ results.append(True)
|
|
+ delta = start_loc - expected_loc
|
|
+ if end_loc == -1:
|
|
+ text2 = text[start_loc : start_loc + len(text1)]
|
|
+ else:
|
|
+ text2 = text[start_loc : end_loc + self.Match_MaxBits]
|
|
+ if text1 == text2:
|
|
+ # Perfect match, just shove the replacement text in.
|
|
+ text = (
|
|
+ text[:start_loc]
|
|
+ + self.diff_text2(patch.diffs)
|
|
+ + text[start_loc + len(text1) :]
|
|
+ )
|
|
+ else:
|
|
+ # Imperfect match.
|
|
+ # Run a diff to get a framework of equivalent indices.
|
|
+ diffs = self.diff_main(text1, text2, False)
|
|
+ if (
|
|
+ len(text1) > self.Match_MaxBits
|
|
+ and self.diff_levenshtein(diffs) / float(len(text1))
|
|
+ > self.Patch_DeleteThreshold
|
|
+ ):
|
|
+ # The end points match, but the content is unacceptably bad.
|
|
+ results[-1] = False
|
|
+ else:
|
|
+ self.diff_cleanupSemanticLossless(diffs)
|
|
+ index1 = 0
|
|
+ for (op, data) in patch.diffs:
|
|
+ if op != self.DIFF_EQUAL:
|
|
+ index2 = self.diff_xIndex(diffs, index1)
|
|
+ if op == self.DIFF_INSERT: # Insertion
|
|
+ text = (
|
|
+ text[: start_loc + index2]
|
|
+ + data
|
|
+ + text[start_loc + index2 :]
|
|
+ )
|
|
+ elif op == self.DIFF_DELETE: # Deletion
|
|
+ text = (
|
|
+ text[: start_loc + index2]
|
|
+ + text[
|
|
+ start_loc
|
|
+ + self.diff_xIndex(diffs, index1 + len(data)) :
|
|
+ ]
|
|
+ )
|
|
+ if op != self.DIFF_DELETE:
|
|
+ index1 += len(data)
|
|
+ # Strip the padding off.
|
|
+ text = text[len(nullPadding) : -len(nullPadding)]
|
|
+ return (text, results)
|
|
|
|
- def patch_addPadding(self, patches):
|
|
- """Add some padding on text start and end so that edges can match
|
|
+ def patch_addPadding(self, patches):
|
|
+ """Add some padding on text start and end so that edges can match
|
|
something. Intended to be called only from within patch_apply.
|
|
|
|
Args:
|
|
@@ -1643,144 +1750,154 @@ class diff_match_patch:
|
|
Returns:
|
|
The padding string added to each side.
|
|
"""
|
|
- paddingLength = self.Patch_Margin
|
|
- nullPadding = ""
|
|
- for x in xrange(1, paddingLength + 1):
|
|
- nullPadding += chr(x)
|
|
-
|
|
- # Bump all the patches forward.
|
|
- for patch in patches:
|
|
- patch.start1 += paddingLength
|
|
- patch.start2 += paddingLength
|
|
-
|
|
- # Add some padding on start of first diff.
|
|
- patch = patches[0]
|
|
- diffs = patch.diffs
|
|
- if not diffs or diffs[0][0] != self.DIFF_EQUAL:
|
|
- # Add nullPadding equality.
|
|
- diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
|
|
- patch.start1 -= paddingLength # Should be 0.
|
|
- patch.start2 -= paddingLength # Should be 0.
|
|
- patch.length1 += paddingLength
|
|
- patch.length2 += paddingLength
|
|
- elif paddingLength > len(diffs[0][1]):
|
|
- # Grow first equality.
|
|
- extraLength = paddingLength - len(diffs[0][1])
|
|
- newText = nullPadding[len(diffs[0][1]):] + diffs[0][1]
|
|
- diffs[0] = (diffs[0][0], newText)
|
|
- patch.start1 -= extraLength
|
|
- patch.start2 -= extraLength
|
|
- patch.length1 += extraLength
|
|
- patch.length2 += extraLength
|
|
-
|
|
- # Add some padding on end of last diff.
|
|
- patch = patches[-1]
|
|
- diffs = patch.diffs
|
|
- if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
|
|
- # Add nullPadding equality.
|
|
- diffs.append((self.DIFF_EQUAL, nullPadding))
|
|
- patch.length1 += paddingLength
|
|
- patch.length2 += paddingLength
|
|
- elif paddingLength > len(diffs[-1][1]):
|
|
- # Grow last equality.
|
|
- extraLength = paddingLength - len(diffs[-1][1])
|
|
- newText = diffs[-1][1] + nullPadding[:extraLength]
|
|
- diffs[-1] = (diffs[-1][0], newText)
|
|
- patch.length1 += extraLength
|
|
- patch.length2 += extraLength
|
|
+ paddingLength = self.Patch_Margin
|
|
+ nullPadding = ""
|
|
+ for x in xrange(1, paddingLength + 1):
|
|
+ nullPadding += chr(x)
|
|
+
|
|
+ # Bump all the patches forward.
|
|
+ for patch in patches:
|
|
+ patch.start1 += paddingLength
|
|
+ patch.start2 += paddingLength
|
|
+
|
|
+ # Add some padding on start of first diff.
|
|
+ patch = patches[0]
|
|
+ diffs = patch.diffs
|
|
+ if not diffs or diffs[0][0] != self.DIFF_EQUAL:
|
|
+ # Add nullPadding equality.
|
|
+ diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
|
|
+ patch.start1 -= paddingLength # Should be 0.
|
|
+ patch.start2 -= paddingLength # Should be 0.
|
|
+ patch.length1 += paddingLength
|
|
+ patch.length2 += paddingLength
|
|
+ elif paddingLength > len(diffs[0][1]):
|
|
+ # Grow first equality.
|
|
+ extraLength = paddingLength - len(diffs[0][1])
|
|
+ newText = nullPadding[len(diffs[0][1]) :] + diffs[0][1]
|
|
+ diffs[0] = (diffs[0][0], newText)
|
|
+ patch.start1 -= extraLength
|
|
+ patch.start2 -= extraLength
|
|
+ patch.length1 += extraLength
|
|
+ patch.length2 += extraLength
|
|
+
|
|
+ # Add some padding on end of last diff.
|
|
+ patch = patches[-1]
|
|
+ diffs = patch.diffs
|
|
+ if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
|
|
+ # Add nullPadding equality.
|
|
+ diffs.append((self.DIFF_EQUAL, nullPadding))
|
|
+ patch.length1 += paddingLength
|
|
+ patch.length2 += paddingLength
|
|
+ elif paddingLength > len(diffs[-1][1]):
|
|
+ # Grow last equality.
|
|
+ extraLength = paddingLength - len(diffs[-1][1])
|
|
+ newText = diffs[-1][1] + nullPadding[:extraLength]
|
|
+ diffs[-1] = (diffs[-1][0], newText)
|
|
+ patch.length1 += extraLength
|
|
+ patch.length2 += extraLength
|
|
|
|
- return nullPadding
|
|
+ return nullPadding
|
|
|
|
- def patch_splitMax(self, patches):
|
|
- """Look through the patches and break up any which are longer than the
|
|
+ def patch_splitMax(self, patches):
|
|
+ """Look through the patches and break up any which are longer than the
|
|
maximum limit of the match algorithm.
|
|
Intended to be called only from within patch_apply.
|
|
|
|
Args:
|
|
patches: Array of Patch objects.
|
|
"""
|
|
- patch_size = self.Match_MaxBits
|
|
- if patch_size == 0:
|
|
- # Python has the option of not splitting strings due to its ability
|
|
- # to handle integers of arbitrary precision.
|
|
- return
|
|
- for x in xrange(len(patches)):
|
|
- if patches[x].length1 <= patch_size:
|
|
- continue
|
|
- bigpatch = patches[x]
|
|
- # Remove the big old patch.
|
|
- del patches[x]
|
|
- x -= 1
|
|
- start1 = bigpatch.start1
|
|
- start2 = bigpatch.start2
|
|
- precontext = ''
|
|
- while len(bigpatch.diffs) != 0:
|
|
- # Create one of several smaller patches.
|
|
- patch = patch_obj()
|
|
- empty = True
|
|
- patch.start1 = start1 - len(precontext)
|
|
- patch.start2 = start2 - len(precontext)
|
|
- if precontext:
|
|
- patch.length1 = patch.length2 = len(precontext)
|
|
- patch.diffs.append((self.DIFF_EQUAL, precontext))
|
|
-
|
|
- while (len(bigpatch.diffs) != 0 and
|
|
- patch.length1 < patch_size - self.Patch_Margin):
|
|
- (diff_type, diff_text) = bigpatch.diffs[0]
|
|
- if diff_type == self.DIFF_INSERT:
|
|
- # Insertions are harmless.
|
|
- patch.length2 += len(diff_text)
|
|
- start2 += len(diff_text)
|
|
- patch.diffs.append(bigpatch.diffs.pop(0))
|
|
- empty = False
|
|
- elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
|
|
- patch.diffs[0][0] == self.DIFF_EQUAL and
|
|
- len(diff_text) > 2 * patch_size):
|
|
- # This is a large deletion. Let it pass in one chunk.
|
|
- patch.length1 += len(diff_text)
|
|
- start1 += len(diff_text)
|
|
- empty = False
|
|
- patch.diffs.append((diff_type, diff_text))
|
|
- del bigpatch.diffs[0]
|
|
- else:
|
|
- # Deletion or equality. Only take as much as we can stomach.
|
|
- diff_text = diff_text[:patch_size - patch.length1 -
|
|
- self.Patch_Margin]
|
|
- patch.length1 += len(diff_text)
|
|
- start1 += len(diff_text)
|
|
- if diff_type == self.DIFF_EQUAL:
|
|
- patch.length2 += len(diff_text)
|
|
- start2 += len(diff_text)
|
|
- else:
|
|
- empty = False
|
|
-
|
|
- patch.diffs.append((diff_type, diff_text))
|
|
- if diff_text == bigpatch.diffs[0][1]:
|
|
- del bigpatch.diffs[0]
|
|
- else:
|
|
- bigpatch.diffs[0] = (bigpatch.diffs[0][0],
|
|
- bigpatch.diffs[0][1][len(diff_text):])
|
|
+ patch_size = self.Match_MaxBits
|
|
+ if patch_size == 0:
|
|
+ # Python has the option of not splitting strings due to its ability
|
|
+ # to handle integers of arbitrary precision.
|
|
+ return
|
|
+ for x in xrange(len(patches)):
|
|
+ if patches[x].length1 <= patch_size:
|
|
+ continue
|
|
+ bigpatch = patches[x]
|
|
+ # Remove the big old patch.
|
|
+ del patches[x]
|
|
+ x -= 1
|
|
+ start1 = bigpatch.start1
|
|
+ start2 = bigpatch.start2
|
|
+ precontext = ""
|
|
+ while len(bigpatch.diffs) != 0:
|
|
+ # Create one of several smaller patches.
|
|
+ patch = patch_obj()
|
|
+ empty = True
|
|
+ patch.start1 = start1 - len(precontext)
|
|
+ patch.start2 = start2 - len(precontext)
|
|
+ if precontext:
|
|
+ patch.length1 = patch.length2 = len(precontext)
|
|
+ patch.diffs.append((self.DIFF_EQUAL, precontext))
|
|
+
|
|
+ while (
|
|
+ len(bigpatch.diffs) != 0
|
|
+ and patch.length1 < patch_size - self.Patch_Margin
|
|
+ ):
|
|
+ (diff_type, diff_text) = bigpatch.diffs[0]
|
|
+ if diff_type == self.DIFF_INSERT:
|
|
+ # Insertions are harmless.
|
|
+ patch.length2 += len(diff_text)
|
|
+ start2 += len(diff_text)
|
|
+ patch.diffs.append(bigpatch.diffs.pop(0))
|
|
+ empty = False
|
|
+ elif (
|
|
+ diff_type == self.DIFF_DELETE
|
|
+ and len(patch.diffs) == 1
|
|
+ and patch.diffs[0][0] == self.DIFF_EQUAL
|
|
+ and len(diff_text) > 2 * patch_size
|
|
+ ):
|
|
+ # This is a large deletion. Let it pass in one chunk.
|
|
+ patch.length1 += len(diff_text)
|
|
+ start1 += len(diff_text)
|
|
+ empty = False
|
|
+ patch.diffs.append((diff_type, diff_text))
|
|
+ del bigpatch.diffs[0]
|
|
+ else:
|
|
+ # Deletion or equality. Only take as much as we can stomach.
|
|
+ diff_text = diff_text[
|
|
+ : patch_size - patch.length1 - self.Patch_Margin
|
|
+ ]
|
|
+ patch.length1 += len(diff_text)
|
|
+ start1 += len(diff_text)
|
|
+ if diff_type == self.DIFF_EQUAL:
|
|
+ patch.length2 += len(diff_text)
|
|
+ start2 += len(diff_text)
|
|
+ else:
|
|
+ empty = False
|
|
+
|
|
+ patch.diffs.append((diff_type, diff_text))
|
|
+ if diff_text == bigpatch.diffs[0][1]:
|
|
+ del bigpatch.diffs[0]
|
|
+ else:
|
|
+ bigpatch.diffs[0] = (
|
|
+ bigpatch.diffs[0][0],
|
|
+ bigpatch.diffs[0][1][len(diff_text) :],
|
|
+ )
|
|
+
|
|
+ # Compute the head context for the next patch.
|
|
+ precontext = self.diff_text2(patch.diffs)
|
|
+ precontext = precontext[-self.Patch_Margin :]
|
|
+ # Append the end context for this patch.
|
|
+ postcontext = self.diff_text1(bigpatch.diffs)[: self.Patch_Margin]
|
|
+ if postcontext:
|
|
+ patch.length1 += len(postcontext)
|
|
+ patch.length2 += len(postcontext)
|
|
+ if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
|
|
+ patch.diffs[-1] = (
|
|
+ self.DIFF_EQUAL,
|
|
+ patch.diffs[-1][1] + postcontext,
|
|
+ )
|
|
+ else:
|
|
+ patch.diffs.append((self.DIFF_EQUAL, postcontext))
|
|
+
|
|
+ if not empty:
|
|
+ x += 1
|
|
+ patches.insert(x, patch)
|
|
|
|
- # Compute the head context for the next patch.
|
|
- precontext = self.diff_text2(patch.diffs)
|
|
- precontext = precontext[-self.Patch_Margin:]
|
|
- # Append the end context for this patch.
|
|
- postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]
|
|
- if postcontext:
|
|
- patch.length1 += len(postcontext)
|
|
- patch.length2 += len(postcontext)
|
|
- if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
|
|
- patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
|
|
- postcontext)
|
|
- else:
|
|
- patch.diffs.append((self.DIFF_EQUAL, postcontext))
|
|
-
|
|
- if not empty:
|
|
- x += 1
|
|
- patches.insert(x, patch)
|
|
-
|
|
- def patch_toText(self, patches):
|
|
- """Take a list of patches and return a textual representation.
|
|
+ def patch_toText(self, patches):
|
|
+ """Take a list of patches and return a textual representation.
|
|
|
|
Args:
|
|
patches: Array of Patch objects.
|
|
@@ -1788,13 +1905,13 @@ class diff_match_patch:
|
|
Returns:
|
|
Text representation of patches.
|
|
"""
|
|
- text = []
|
|
- for patch in patches:
|
|
- text.append(str(patch))
|
|
- return "".join(text)
|
|
+ text = []
|
|
+ for patch in patches:
|
|
+ text.append(str(patch))
|
|
+ return "".join(text)
|
|
|
|
- def patch_fromText(self, textline):
|
|
- """Parse a textual representation of patches and return a list of patch
|
|
+ def patch_fromText(self, textline):
|
|
+ """Parse a textual representation of patches and return a list of patch
|
|
objects.
|
|
|
|
Args:
|
|
@@ -1806,114 +1923,114 @@ class diff_match_patch:
|
|
Raises:
|
|
ValueError: If invalid input.
|
|
"""
|
|
- if type(textline) == unicode:
|
|
- # Patches should be composed of a subset of ascii chars, Unicode not
|
|
- # required. If this encode raises UnicodeEncodeError, patch is invalid.
|
|
- textline = textline.encode("ascii")
|
|
- patches = []
|
|
- if not textline:
|
|
- return patches
|
|
- text = textline.split('\n')
|
|
- while len(text) != 0:
|
|
- m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
|
|
- if not m:
|
|
- raise ValueError("Invalid patch string: " + text[0])
|
|
- patch = patch_obj()
|
|
- patches.append(patch)
|
|
- patch.start1 = int(m.group(1))
|
|
- if m.group(2) == '':
|
|
- patch.start1 -= 1
|
|
- patch.length1 = 1
|
|
- elif m.group(2) == '0':
|
|
- patch.length1 = 0
|
|
- else:
|
|
- patch.start1 -= 1
|
|
- patch.length1 = int(m.group(2))
|
|
-
|
|
- patch.start2 = int(m.group(3))
|
|
- if m.group(4) == '':
|
|
- patch.start2 -= 1
|
|
- patch.length2 = 1
|
|
- elif m.group(4) == '0':
|
|
- patch.length2 = 0
|
|
- else:
|
|
- patch.start2 -= 1
|
|
- patch.length2 = int(m.group(4))
|
|
-
|
|
- del text[0]
|
|
-
|
|
- while len(text) != 0:
|
|
- if text[0]:
|
|
- sign = text[0][0]
|
|
- else:
|
|
- sign = ''
|
|
- line = urllib.unquote(text[0][1:])
|
|
- line = line.decode("utf-8")
|
|
- if sign == '+':
|
|
- # Insertion.
|
|
- patch.diffs.append((self.DIFF_INSERT, line))
|
|
- elif sign == '-':
|
|
- # Deletion.
|
|
- patch.diffs.append((self.DIFF_DELETE, line))
|
|
- elif sign == ' ':
|
|
- # Minor equality.
|
|
- patch.diffs.append((self.DIFF_EQUAL, line))
|
|
- elif sign == '@':
|
|
- # Start of next patch.
|
|
- break
|
|
- elif sign == '':
|
|
- # Blank line? Whatever.
|
|
- pass
|
|
- else:
|
|
- # WTF?
|
|
- raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line))
|
|
- del text[0]
|
|
- return patches
|
|
+ if type(textline) == unicode:
|
|
+ # Patches should be composed of a subset of ascii chars, Unicode not
|
|
+ # required. If this encode raises UnicodeEncodeError, patch is invalid.
|
|
+ textline = textline.encode("ascii")
|
|
+ patches = []
|
|
+ if not textline:
|
|
+ return patches
|
|
+ text = textline.split("\n")
|
|
+ while len(text) != 0:
|
|
+ m = re.match(r"^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
|
|
+ if not m:
|
|
+ raise ValueError("Invalid patch string: " + text[0])
|
|
+ patch = patch_obj()
|
|
+ patches.append(patch)
|
|
+ patch.start1 = int(m.group(1))
|
|
+ if m.group(2) == "":
|
|
+ patch.start1 -= 1
|
|
+ patch.length1 = 1
|
|
+ elif m.group(2) == "0":
|
|
+ patch.length1 = 0
|
|
+ else:
|
|
+ patch.start1 -= 1
|
|
+ patch.length1 = int(m.group(2))
|
|
+
|
|
+ patch.start2 = int(m.group(3))
|
|
+ if m.group(4) == "":
|
|
+ patch.start2 -= 1
|
|
+ patch.length2 = 1
|
|
+ elif m.group(4) == "0":
|
|
+ patch.length2 = 0
|
|
+ else:
|
|
+ patch.start2 -= 1
|
|
+ patch.length2 = int(m.group(4))
|
|
+
|
|
+ del text[0]
|
|
+
|
|
+ while len(text) != 0:
|
|
+ if text[0]:
|
|
+ sign = text[0][0]
|
|
+ else:
|
|
+ sign = ""
|
|
+ line = urllib.unquote(text[0][1:])
|
|
+ line = line.decode("utf-8")
|
|
+ if sign == "+":
|
|
+ # Insertion.
|
|
+ patch.diffs.append((self.DIFF_INSERT, line))
|
|
+ elif sign == "-":
|
|
+ # Deletion.
|
|
+ patch.diffs.append((self.DIFF_DELETE, line))
|
|
+ elif sign == " ":
|
|
+ # Minor equality.
|
|
+ patch.diffs.append((self.DIFF_EQUAL, line))
|
|
+ elif sign == "@":
|
|
+ # Start of next patch.
|
|
+ break
|
|
+ elif sign == "":
|
|
+ # Blank line? Whatever.
|
|
+ pass
|
|
+ else:
|
|
+ # WTF?
|
|
+ raise ValueError(f"Invalid patch mode: '{sign}'\n{line}")
|
|
+ del text[0]
|
|
+ return patches
|
|
|
|
|
|
class patch_obj:
|
|
- """Class representing one patch operation.
|
|
+ """Class representing one patch operation.
|
|
"""
|
|
|
|
- def __init__(self):
|
|
- """Initializes with an empty list of diffs.
|
|
+ def __init__(self):
|
|
+ """Initializes with an empty list of diffs.
|
|
"""
|
|
- self.diffs = []
|
|
- self.start1 = None
|
|
- self.start2 = None
|
|
- self.length1 = 0
|
|
- self.length2 = 0
|
|
+ self.diffs = []
|
|
+ self.start1 = None
|
|
+ self.start2 = None
|
|
+ self.length1 = 0
|
|
+ self.length2 = 0
|
|
|
|
- def __str__(self):
|
|
- """Emulate GNU diff's format.
|
|
+ def __str__(self):
|
|
+ """Emulate GNU diff's format.
|
|
Header: @@ -382,8 +481,9 @@
|
|
Indices are printed as 1-based, not 0-based.
|
|
|
|
Returns:
|
|
The GNU diff string.
|
|
"""
|
|
- if self.length1 == 0:
|
|
- coords1 = str(self.start1) + ",0"
|
|
- elif self.length1 == 1:
|
|
- coords1 = str(self.start1 + 1)
|
|
- else:
|
|
- coords1 = str(self.start1 + 1) + "," + str(self.length1)
|
|
- if self.length2 == 0:
|
|
- coords2 = str(self.start2) + ",0"
|
|
- elif self.length2 == 1:
|
|
- coords2 = str(self.start2 + 1)
|
|
- else:
|
|
- coords2 = str(self.start2 + 1) + "," + str(self.length2)
|
|
- text = ["@@ -", coords1, " +", coords2, " @@\n"]
|
|
- # Escape the body of the patch with %xx notation.
|
|
- for (op, data) in self.diffs:
|
|
- if op == diff_match_patch.DIFF_INSERT:
|
|
- text.append("+")
|
|
- elif op == diff_match_patch.DIFF_DELETE:
|
|
- text.append("-")
|
|
- elif op == diff_match_patch.DIFF_EQUAL:
|
|
- text.append(" ")
|
|
- # High ascii will raise UnicodeDecodeError. Use Unicode instead.
|
|
- data = data.encode("utf-8")
|
|
- text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + "\n")
|
|
- return "".join(text)
|
|
+ if self.length1 == 0:
|
|
+ coords1 = str(self.start1) + ",0"
|
|
+ elif self.length1 == 1:
|
|
+ coords1 = str(self.start1 + 1)
|
|
+ else:
|
|
+ coords1 = str(self.start1 + 1) + "," + str(self.length1)
|
|
+ if self.length2 == 0:
|
|
+ coords2 = str(self.start2) + ",0"
|
|
+ elif self.length2 == 1:
|
|
+ coords2 = str(self.start2 + 1)
|
|
+ else:
|
|
+ coords2 = str(self.start2 + 1) + "," + str(self.length2)
|
|
+ text = ["@@ -", coords1, " +", coords2, " @@\n"]
|
|
+ # Escape the body of the patch with %xx notation.
|
|
+ for (op, data) in self.diffs:
|
|
+ if op == diff_match_patch.DIFF_INSERT:
|
|
+ text.append("+")
|
|
+ elif op == diff_match_patch.DIFF_DELETE:
|
|
+ text.append("-")
|
|
+ elif op == diff_match_patch.DIFF_EQUAL:
|
|
+ text.append(" ")
|
|
+ # High ascii will raise UnicodeDecodeError. Use Unicode instead.
|
|
+ data = data.encode("utf-8")
|
|
+ text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + "\n")
|
|
+ return "".join(text)
|
|
Index: xmldiff-2.4/xmldiff/_diff_match_patch_py3.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/xmldiff/_diff_match_patch_py3.py
|
|
+++ xmldiff-2.4/xmldiff/_diff_match_patch_py3.py
|
|
@@ -23,7 +23,7 @@ Computes the difference between two text
|
|
Applies the patch onto another text, allowing for errors.
|
|
"""
|
|
|
|
-__author__ = 'fraser@google.com (Neil Fraser)'
|
|
+__author__ = "fraser@google.com (Neil Fraser)"
|
|
|
|
import re
|
|
import sys
|
|
@@ -32,51 +32,51 @@ import urllib.parse
|
|
|
|
|
|
class diff_match_patch:
|
|
- """Class containing the diff, match and patch methods.
|
|
+ """Class containing the diff, match and patch methods.
|
|
|
|
Also contains the behaviour settings.
|
|
"""
|
|
|
|
- def __init__(self):
|
|
- """Inits a diff_match_patch object with default settings.
|
|
+ def __init__(self):
|
|
+ """Inits a diff_match_patch object with default settings.
|
|
Redefine these in your program to override the defaults.
|
|
"""
|
|
|
|
- # Number of seconds to map a diff before giving up (0 for infinity).
|
|
- self.Diff_Timeout = 1.0
|
|
- # Cost of an empty edit operation in terms of edit characters.
|
|
- self.Diff_EditCost = 4
|
|
- # At what point is no match declared (0.0 = perfection, 1.0 = very loose).
|
|
- self.Match_Threshold = 0.5
|
|
- # How far to search for a match (0 = exact location, 1000+ = broad match).
|
|
- # A match this many characters away from the expected location will add
|
|
- # 1.0 to the score (0.0 is a perfect match).
|
|
- self.Match_Distance = 1000
|
|
- # When deleting a large block of text (over ~64 characters), how close do
|
|
- # the contents have to be to match the expected contents. (0.0 = perfection,
|
|
- # 1.0 = very loose). Note that Match_Threshold controls how closely the
|
|
- # end points of a delete need to match.
|
|
- self.Patch_DeleteThreshold = 0.5
|
|
- # Chunk size for context length.
|
|
- self.Patch_Margin = 4
|
|
-
|
|
- # The number of bits in an int.
|
|
- # Python has no maximum, thus to disable patch splitting set to 0.
|
|
- # However to avoid long patches in certain pathological cases, use 32.
|
|
- # Multiple short patches (using native ints) are much faster than long ones.
|
|
- self.Match_MaxBits = 32
|
|
-
|
|
- # DIFF FUNCTIONS
|
|
-
|
|
- # The data structure representing a diff is an array of tuples:
|
|
- # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]
|
|
- # which means: delete "Hello", add "Goodbye" and keep " world."
|
|
- DIFF_DELETE = -1
|
|
- DIFF_INSERT = 1
|
|
- DIFF_EQUAL = 0
|
|
+ # Number of seconds to map a diff before giving up (0 for infinity).
|
|
+ self.Diff_Timeout = 1.0
|
|
+ # Cost of an empty edit operation in terms of edit characters.
|
|
+ self.Diff_EditCost = 4
|
|
+ # At what point is no match declared (0.0 = perfection, 1.0 = very loose).
|
|
+ self.Match_Threshold = 0.5
|
|
+ # How far to search for a match (0 = exact location, 1000+ = broad match).
|
|
+ # A match this many characters away from the expected location will add
|
|
+ # 1.0 to the score (0.0 is a perfect match).
|
|
+ self.Match_Distance = 1000
|
|
+ # When deleting a large block of text (over ~64 characters), how close do
|
|
+ # the contents have to be to match the expected contents. (0.0 = perfection,
|
|
+ # 1.0 = very loose). Note that Match_Threshold controls how closely the
|
|
+ # end points of a delete need to match.
|
|
+ self.Patch_DeleteThreshold = 0.5
|
|
+ # Chunk size for context length.
|
|
+ self.Patch_Margin = 4
|
|
+
|
|
+ # The number of bits in an int.
|
|
+ # Python has no maximum, thus to disable patch splitting set to 0.
|
|
+ # However to avoid long patches in certain pathological cases, use 32.
|
|
+ # Multiple short patches (using native ints) are much faster than long ones.
|
|
+ self.Match_MaxBits = 32
|
|
+
|
|
+ # DIFF FUNCTIONS
|
|
+
|
|
+ # The data structure representing a diff is an array of tuples:
|
|
+ # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]
|
|
+ # which means: delete "Hello", add "Goodbye" and keep " world."
|
|
+ DIFF_DELETE = -1
|
|
+ DIFF_INSERT = 1
|
|
+ DIFF_EQUAL = 0
|
|
|
|
- def diff_main(self, text1, text2, checklines=True, deadline=None):
|
|
- """Find the differences between two texts. Simplifies the problem by
|
|
+ def diff_main(self, text1, text2, checklines=True, deadline=None):
|
|
+ """Find the differences between two texts. Simplifies the problem by
|
|
stripping any common prefix or suffix off the texts before diffing.
|
|
|
|
Args:
|
|
@@ -91,52 +91,52 @@ class diff_match_patch:
|
|
Returns:
|
|
Array of changes.
|
|
"""
|
|
- # Set a deadline by which time the diff must be complete.
|
|
- if deadline == None:
|
|
- # Unlike in most languages, Python counts time in seconds.
|
|
- if self.Diff_Timeout <= 0:
|
|
- deadline = sys.maxsize
|
|
- else:
|
|
- deadline = time.time() + self.Diff_Timeout
|
|
-
|
|
- # Check for null inputs.
|
|
- if text1 == None or text2 == None:
|
|
- raise ValueError("Null inputs. (diff_main)")
|
|
-
|
|
- # Check for equality (speedup).
|
|
- if text1 == text2:
|
|
- if text1:
|
|
- return [(self.DIFF_EQUAL, text1)]
|
|
- return []
|
|
-
|
|
- # Trim off common prefix (speedup).
|
|
- commonlength = self.diff_commonPrefix(text1, text2)
|
|
- commonprefix = text1[:commonlength]
|
|
- text1 = text1[commonlength:]
|
|
- text2 = text2[commonlength:]
|
|
-
|
|
- # Trim off common suffix (speedup).
|
|
- commonlength = self.diff_commonSuffix(text1, text2)
|
|
- if commonlength == 0:
|
|
- commonsuffix = ''
|
|
- else:
|
|
- commonsuffix = text1[-commonlength:]
|
|
- text1 = text1[:-commonlength]
|
|
- text2 = text2[:-commonlength]
|
|
-
|
|
- # Compute the diff on the middle block.
|
|
- diffs = self.diff_compute(text1, text2, checklines, deadline)
|
|
-
|
|
- # Restore the prefix and suffix.
|
|
- if commonprefix:
|
|
- diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
|
|
- if commonsuffix:
|
|
- diffs.append((self.DIFF_EQUAL, commonsuffix))
|
|
- self.diff_cleanupMerge(diffs)
|
|
- return diffs
|
|
+ # Set a deadline by which time the diff must be complete.
|
|
+ if deadline == None:
|
|
+ # Unlike in most languages, Python counts time in seconds.
|
|
+ if self.Diff_Timeout <= 0:
|
|
+ deadline = sys.maxsize
|
|
+ else:
|
|
+ deadline = time.time() + self.Diff_Timeout
|
|
+
|
|
+ # Check for null inputs.
|
|
+ if text1 == None or text2 == None:
|
|
+ raise ValueError("Null inputs. (diff_main)")
|
|
+
|
|
+ # Check for equality (speedup).
|
|
+ if text1 == text2:
|
|
+ if text1:
|
|
+ return [(self.DIFF_EQUAL, text1)]
|
|
+ return []
|
|
+
|
|
+ # Trim off common prefix (speedup).
|
|
+ commonlength = self.diff_commonPrefix(text1, text2)
|
|
+ commonprefix = text1[:commonlength]
|
|
+ text1 = text1[commonlength:]
|
|
+ text2 = text2[commonlength:]
|
|
+
|
|
+ # Trim off common suffix (speedup).
|
|
+ commonlength = self.diff_commonSuffix(text1, text2)
|
|
+ if commonlength == 0:
|
|
+ commonsuffix = ""
|
|
+ else:
|
|
+ commonsuffix = text1[-commonlength:]
|
|
+ text1 = text1[:-commonlength]
|
|
+ text2 = text2[:-commonlength]
|
|
+
|
|
+ # Compute the diff on the middle block.
|
|
+ diffs = self.diff_compute(text1, text2, checklines, deadline)
|
|
+
|
|
+ # Restore the prefix and suffix.
|
|
+ if commonprefix:
|
|
+ diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
|
|
+ if commonsuffix:
|
|
+ diffs.append((self.DIFF_EQUAL, commonsuffix))
|
|
+ self.diff_cleanupMerge(diffs)
|
|
+ return diffs
|
|
|
|
- def diff_compute(self, text1, text2, checklines, deadline):
|
|
- """Find the differences between two texts. Assumes that the texts do not
|
|
+ def diff_compute(self, text1, text2, checklines, deadline):
|
|
+ """Find the differences between two texts. Assumes that the texts do not
|
|
have any common prefix or suffix.
|
|
|
|
Args:
|
|
@@ -150,52 +150,55 @@ class diff_match_patch:
|
|
Returns:
|
|
Array of changes.
|
|
"""
|
|
- if not text1:
|
|
- # Just add some text (speedup).
|
|
- return [(self.DIFF_INSERT, text2)]
|
|
-
|
|
- if not text2:
|
|
- # Just delete some text (speedup).
|
|
- return [(self.DIFF_DELETE, text1)]
|
|
-
|
|
- if len(text1) > len(text2):
|
|
- (longtext, shorttext) = (text1, text2)
|
|
- else:
|
|
- (shorttext, longtext) = (text1, text2)
|
|
- i = longtext.find(shorttext)
|
|
- if i != -1:
|
|
- # Shorter text is inside the longer text (speedup).
|
|
- diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),
|
|
- (self.DIFF_INSERT, longtext[i + len(shorttext):])]
|
|
- # Swap insertions for deletions if diff is reversed.
|
|
- if len(text1) > len(text2):
|
|
- diffs[0] = (self.DIFF_DELETE, diffs[0][1])
|
|
- diffs[2] = (self.DIFF_DELETE, diffs[2][1])
|
|
- return diffs
|
|
-
|
|
- if len(shorttext) == 1:
|
|
- # Single character string.
|
|
- # After the previous speedup, the character can't be an equality.
|
|
- return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
|
|
-
|
|
- # Check to see if the problem can be split in two.
|
|
- hm = self.diff_halfMatch(text1, text2)
|
|
- if hm:
|
|
- # A half-match was found, sort out the return data.
|
|
- (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
|
|
- # Send both pairs off for separate processing.
|
|
- diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline)
|
|
- diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline)
|
|
- # Merge the results.
|
|
- return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
|
|
+ if not text1:
|
|
+ # Just add some text (speedup).
|
|
+ return [(self.DIFF_INSERT, text2)]
|
|
+
|
|
+ if not text2:
|
|
+ # Just delete some text (speedup).
|
|
+ return [(self.DIFF_DELETE, text1)]
|
|
|
|
- if checklines and len(text1) > 100 and len(text2) > 100:
|
|
- return self.diff_lineMode(text1, text2, deadline)
|
|
+ if len(text1) > len(text2):
|
|
+ (longtext, shorttext) = (text1, text2)
|
|
+ else:
|
|
+ (shorttext, longtext) = (text1, text2)
|
|
+ i = longtext.find(shorttext)
|
|
+ if i != -1:
|
|
+ # Shorter text is inside the longer text (speedup).
|
|
+ diffs = [
|
|
+ (self.DIFF_INSERT, longtext[:i]),
|
|
+ (self.DIFF_EQUAL, shorttext),
|
|
+ (self.DIFF_INSERT, longtext[i + len(shorttext) :]),
|
|
+ ]
|
|
+ # Swap insertions for deletions if diff is reversed.
|
|
+ if len(text1) > len(text2):
|
|
+ diffs[0] = (self.DIFF_DELETE, diffs[0][1])
|
|
+ diffs[2] = (self.DIFF_DELETE, diffs[2][1])
|
|
+ return diffs
|
|
+
|
|
+ if len(shorttext) == 1:
|
|
+ # Single character string.
|
|
+ # After the previous speedup, the character can't be an equality.
|
|
+ return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
|
|
+
|
|
+ # Check to see if the problem can be split in two.
|
|
+ hm = self.diff_halfMatch(text1, text2)
|
|
+ if hm:
|
|
+ # A half-match was found, sort out the return data.
|
|
+ (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
|
|
+ # Send both pairs off for separate processing.
|
|
+ diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline)
|
|
+ diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline)
|
|
+ # Merge the results.
|
|
+ return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
|
|
+
|
|
+ if checklines and len(text1) > 100 and len(text2) > 100:
|
|
+ return self.diff_lineMode(text1, text2, deadline)
|
|
|
|
- return self.diff_bisect(text1, text2, deadline)
|
|
+ return self.diff_bisect(text1, text2, deadline)
|
|
|
|
- def diff_lineMode(self, text1, text2, deadline):
|
|
- """Do a quick line-level diff on both strings, then rediff the parts for
|
|
+ def diff_lineMode(self, text1, text2, deadline):
|
|
+ """Do a quick line-level diff on both strings, then rediff the parts for
|
|
greater accuracy.
|
|
This speedup can produce non-minimal diffs.
|
|
|
|
@@ -208,51 +211,51 @@ class diff_match_patch:
|
|
Array of changes.
|
|
"""
|
|
|
|
- # Scan the text on a line-by-line basis first.
|
|
- (text1, text2, linearray) = self.diff_linesToChars(text1, text2)
|
|
+ # Scan the text on a line-by-line basis first.
|
|
+ (text1, text2, linearray) = self.diff_linesToChars(text1, text2)
|
|
|
|
- diffs = self.diff_main(text1, text2, False, deadline)
|
|
+ diffs = self.diff_main(text1, text2, False, deadline)
|
|
|
|
- # Convert the diff back to original text.
|
|
- self.diff_charsToLines(diffs, linearray)
|
|
- # Eliminate freak matches (e.g. blank lines)
|
|
- self.diff_cleanupSemantic(diffs)
|
|
-
|
|
- # Rediff any replacement blocks, this time character-by-character.
|
|
- # Add a dummy entry at the end.
|
|
- diffs.append((self.DIFF_EQUAL, ''))
|
|
- pointer = 0
|
|
- count_delete = 0
|
|
- count_insert = 0
|
|
- text_delete = ''
|
|
- text_insert = ''
|
|
- while pointer < len(diffs):
|
|
- if diffs[pointer][0] == self.DIFF_INSERT:
|
|
- count_insert += 1
|
|
- text_insert += diffs[pointer][1]
|
|
- elif diffs[pointer][0] == self.DIFF_DELETE:
|
|
- count_delete += 1
|
|
- text_delete += diffs[pointer][1]
|
|
- elif diffs[pointer][0] == self.DIFF_EQUAL:
|
|
- # Upon reaching an equality, check for prior redundancies.
|
|
- if count_delete >= 1 and count_insert >= 1:
|
|
- # Delete the offending records and add the merged ones.
|
|
- subDiff = self.diff_main(text_delete, text_insert, False, deadline)
|
|
- diffs[pointer - count_delete - count_insert : pointer] = subDiff
|
|
- pointer = pointer - count_delete - count_insert + len(subDiff)
|
|
- count_insert = 0
|
|
+ # Convert the diff back to original text.
|
|
+ self.diff_charsToLines(diffs, linearray)
|
|
+ # Eliminate freak matches (e.g. blank lines)
|
|
+ self.diff_cleanupSemantic(diffs)
|
|
+
|
|
+ # Rediff any replacement blocks, this time character-by-character.
|
|
+ # Add a dummy entry at the end.
|
|
+ diffs.append((self.DIFF_EQUAL, ""))
|
|
+ pointer = 0
|
|
count_delete = 0
|
|
- text_delete = ''
|
|
- text_insert = ''
|
|
+ count_insert = 0
|
|
+ text_delete = ""
|
|
+ text_insert = ""
|
|
+ while pointer < len(diffs):
|
|
+ if diffs[pointer][0] == self.DIFF_INSERT:
|
|
+ count_insert += 1
|
|
+ text_insert += diffs[pointer][1]
|
|
+ elif diffs[pointer][0] == self.DIFF_DELETE:
|
|
+ count_delete += 1
|
|
+ text_delete += diffs[pointer][1]
|
|
+ elif diffs[pointer][0] == self.DIFF_EQUAL:
|
|
+ # Upon reaching an equality, check for prior redundancies.
|
|
+ if count_delete >= 1 and count_insert >= 1:
|
|
+ # Delete the offending records and add the merged ones.
|
|
+ subDiff = self.diff_main(text_delete, text_insert, False, deadline)
|
|
+ diffs[pointer - count_delete - count_insert : pointer] = subDiff
|
|
+ pointer = pointer - count_delete - count_insert + len(subDiff)
|
|
+ count_insert = 0
|
|
+ count_delete = 0
|
|
+ text_delete = ""
|
|
+ text_insert = ""
|
|
|
|
- pointer += 1
|
|
+ pointer += 1
|
|
|
|
- diffs.pop() # Remove the dummy entry at the end.
|
|
+ diffs.pop() # Remove the dummy entry at the end.
|
|
|
|
- return diffs
|
|
+ return diffs
|
|
|
|
- def diff_bisect(self, text1, text2, deadline):
|
|
- """Find the 'middle snake' of a diff, split the problem in two
|
|
+ def diff_bisect(self, text1, text2, deadline):
|
|
+ """Find the 'middle snake' of a diff, split the problem in two
|
|
and return the recursively constructed diff.
|
|
See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
|
|
|
|
@@ -265,96 +268,98 @@ class diff_match_patch:
|
|
Array of diff tuples.
|
|
"""
|
|
|
|
- # Cache the text lengths to prevent multiple calls.
|
|
- text1_length = len(text1)
|
|
- text2_length = len(text2)
|
|
- max_d = (text1_length + text2_length + 1) // 2
|
|
- v_offset = max_d
|
|
- v_length = 2 * max_d
|
|
- v1 = [-1] * v_length
|
|
- v1[v_offset + 1] = 0
|
|
- v2 = v1[:]
|
|
- delta = text1_length - text2_length
|
|
- # If the total number of characters is odd, then the front path will
|
|
- # collide with the reverse path.
|
|
- front = (delta % 2 != 0)
|
|
- # Offsets for start and end of k loop.
|
|
- # Prevents mapping of space beyond the grid.
|
|
- k1start = 0
|
|
- k1end = 0
|
|
- k2start = 0
|
|
- k2end = 0
|
|
- for d in range(max_d):
|
|
- # Bail out if deadline is reached.
|
|
- if time.time() > deadline:
|
|
- break
|
|
-
|
|
- # Walk the front path one step.
|
|
- for k1 in range(-d + k1start, d + 1 - k1end, 2):
|
|
- k1_offset = v_offset + k1
|
|
- if k1 == -d or (k1 != d and
|
|
- v1[k1_offset - 1] < v1[k1_offset + 1]):
|
|
- x1 = v1[k1_offset + 1]
|
|
- else:
|
|
- x1 = v1[k1_offset - 1] + 1
|
|
- y1 = x1 - k1
|
|
- while (x1 < text1_length and y1 < text2_length and
|
|
- text1[x1] == text2[y1]):
|
|
- x1 += 1
|
|
- y1 += 1
|
|
- v1[k1_offset] = x1
|
|
- if x1 > text1_length:
|
|
- # Ran off the right of the graph.
|
|
- k1end += 2
|
|
- elif y1 > text2_length:
|
|
- # Ran off the bottom of the graph.
|
|
- k1start += 2
|
|
- elif front:
|
|
- k2_offset = v_offset + delta - k1
|
|
- if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1:
|
|
- # Mirror x2 onto top-left coordinate system.
|
|
- x2 = text1_length - v2[k2_offset]
|
|
- if x1 >= x2:
|
|
- # Overlap detected.
|
|
- return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
|
|
-
|
|
- # Walk the reverse path one step.
|
|
- for k2 in range(-d + k2start, d + 1 - k2end, 2):
|
|
- k2_offset = v_offset + k2
|
|
- if k2 == -d or (k2 != d and
|
|
- v2[k2_offset - 1] < v2[k2_offset + 1]):
|
|
- x2 = v2[k2_offset + 1]
|
|
- else:
|
|
- x2 = v2[k2_offset - 1] + 1
|
|
- y2 = x2 - k2
|
|
- while (x2 < text1_length and y2 < text2_length and
|
|
- text1[-x2 - 1] == text2[-y2 - 1]):
|
|
- x2 += 1
|
|
- y2 += 1
|
|
- v2[k2_offset] = x2
|
|
- if x2 > text1_length:
|
|
- # Ran off the left of the graph.
|
|
- k2end += 2
|
|
- elif y2 > text2_length:
|
|
- # Ran off the top of the graph.
|
|
- k2start += 2
|
|
- elif not front:
|
|
- k1_offset = v_offset + delta - k2
|
|
- if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1:
|
|
- x1 = v1[k1_offset]
|
|
- y1 = v_offset + x1 - k1_offset
|
|
- # Mirror x2 onto top-left coordinate system.
|
|
- x2 = text1_length - x2
|
|
- if x1 >= x2:
|
|
- # Overlap detected.
|
|
- return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
|
|
-
|
|
- # Diff took too long and hit the deadline or
|
|
- # number of diffs equals number of characters, no commonality at all.
|
|
- return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
|
|
+ # Cache the text lengths to prevent multiple calls.
|
|
+ text1_length = len(text1)
|
|
+ text2_length = len(text2)
|
|
+ max_d = (text1_length + text2_length + 1) // 2
|
|
+ v_offset = max_d
|
|
+ v_length = 2 * max_d
|
|
+ v1 = [-1] * v_length
|
|
+ v1[v_offset + 1] = 0
|
|
+ v2 = v1[:]
|
|
+ delta = text1_length - text2_length
|
|
+ # If the total number of characters is odd, then the front path will
|
|
+ # collide with the reverse path.
|
|
+ front = delta % 2 != 0
|
|
+ # Offsets for start and end of k loop.
|
|
+ # Prevents mapping of space beyond the grid.
|
|
+ k1start = 0
|
|
+ k1end = 0
|
|
+ k2start = 0
|
|
+ k2end = 0
|
|
+ for d in range(max_d):
|
|
+ # Bail out if deadline is reached.
|
|
+ if time.time() > deadline:
|
|
+ break
|
|
+
|
|
+ # Walk the front path one step.
|
|
+ for k1 in range(-d + k1start, d + 1 - k1end, 2):
|
|
+ k1_offset = v_offset + k1
|
|
+ if k1 == -d or (k1 != d and v1[k1_offset - 1] < v1[k1_offset + 1]):
|
|
+ x1 = v1[k1_offset + 1]
|
|
+ else:
|
|
+ x1 = v1[k1_offset - 1] + 1
|
|
+ y1 = x1 - k1
|
|
+ while (
|
|
+ x1 < text1_length and y1 < text2_length and text1[x1] == text2[y1]
|
|
+ ):
|
|
+ x1 += 1
|
|
+ y1 += 1
|
|
+ v1[k1_offset] = x1
|
|
+ if x1 > text1_length:
|
|
+ # Ran off the right of the graph.
|
|
+ k1end += 2
|
|
+ elif y1 > text2_length:
|
|
+ # Ran off the bottom of the graph.
|
|
+ k1start += 2
|
|
+ elif front:
|
|
+ k2_offset = v_offset + delta - k1
|
|
+ if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1:
|
|
+ # Mirror x2 onto top-left coordinate system.
|
|
+ x2 = text1_length - v2[k2_offset]
|
|
+ if x1 >= x2:
|
|
+ # Overlap detected.
|
|
+ return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
|
|
+
|
|
+ # Walk the reverse path one step.
|
|
+ for k2 in range(-d + k2start, d + 1 - k2end, 2):
|
|
+ k2_offset = v_offset + k2
|
|
+ if k2 == -d or (k2 != d and v2[k2_offset - 1] < v2[k2_offset + 1]):
|
|
+ x2 = v2[k2_offset + 1]
|
|
+ else:
|
|
+ x2 = v2[k2_offset - 1] + 1
|
|
+ y2 = x2 - k2
|
|
+ while (
|
|
+ x2 < text1_length
|
|
+ and y2 < text2_length
|
|
+ and text1[-x2 - 1] == text2[-y2 - 1]
|
|
+ ):
|
|
+ x2 += 1
|
|
+ y2 += 1
|
|
+ v2[k2_offset] = x2
|
|
+ if x2 > text1_length:
|
|
+ # Ran off the left of the graph.
|
|
+ k2end += 2
|
|
+ elif y2 > text2_length:
|
|
+ # Ran off the top of the graph.
|
|
+ k2start += 2
|
|
+ elif not front:
|
|
+ k1_offset = v_offset + delta - k2
|
|
+ if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1:
|
|
+ x1 = v1[k1_offset]
|
|
+ y1 = v_offset + x1 - k1_offset
|
|
+ # Mirror x2 onto top-left coordinate system.
|
|
+ x2 = text1_length - x2
|
|
+ if x1 >= x2:
|
|
+ # Overlap detected.
|
|
+ return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
|
|
+
|
|
+ # Diff took too long and hit the deadline or
|
|
+ # number of diffs equals number of characters, no commonality at all.
|
|
+ return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
|
|
|
|
- def diff_bisectSplit(self, text1, text2, x, y, deadline):
|
|
- """Given the location of the 'middle snake', split the diff in two parts
|
|
+ def diff_bisectSplit(self, text1, text2, x, y, deadline):
|
|
+ """Given the location of the 'middle snake', split the diff in two parts
|
|
and recurse.
|
|
|
|
Args:
|
|
@@ -367,19 +372,19 @@ class diff_match_patch:
|
|
Returns:
|
|
Array of diff tuples.
|
|
"""
|
|
- text1a = text1[:x]
|
|
- text2a = text2[:y]
|
|
- text1b = text1[x:]
|
|
- text2b = text2[y:]
|
|
-
|
|
- # Compute both diffs serially.
|
|
- diffs = self.diff_main(text1a, text2a, False, deadline)
|
|
- diffsb = self.diff_main(text1b, text2b, False, deadline)
|
|
+ text1a = text1[:x]
|
|
+ text2a = text2[:y]
|
|
+ text1b = text1[x:]
|
|
+ text2b = text2[y:]
|
|
+
|
|
+ # Compute both diffs serially.
|
|
+ diffs = self.diff_main(text1a, text2a, False, deadline)
|
|
+ diffsb = self.diff_main(text1b, text2b, False, deadline)
|
|
|
|
- return diffs + diffsb
|
|
+ return diffs + diffsb
|
|
|
|
- def diff_linesToChars(self, text1, text2):
|
|
- """Split two texts into an array of strings. Reduce the texts to a string
|
|
+ def diff_linesToChars(self, text1, text2):
|
|
+ """Split two texts into an array of strings. Reduce the texts to a string
|
|
of hashes where each Unicode character represents one line.
|
|
|
|
Args:
|
|
@@ -391,15 +396,15 @@ class diff_match_patch:
|
|
the array of unique strings. The zeroth element of the array of unique
|
|
strings is intentionally blank.
|
|
"""
|
|
- lineArray = [] # e.g. lineArray[4] == "Hello\n"
|
|
- lineHash = {} # e.g. lineHash["Hello\n"] == 4
|
|
+ lineArray = [] # e.g. lineArray[4] == "Hello\n"
|
|
+ lineHash = {} # e.g. lineHash["Hello\n"] == 4
|
|
|
|
- # "\x00" is a valid character, but various debuggers don't like it.
|
|
- # So we'll insert a junk entry to avoid generating a null character.
|
|
- lineArray.append('')
|
|
+ # "\x00" is a valid character, but various debuggers don't like it.
|
|
+ # So we'll insert a junk entry to avoid generating a null character.
|
|
+ lineArray.append("")
|
|
|
|
- def diff_linesToCharsMunge(text):
|
|
- """Split a text into an array of strings. Reduce the texts to a string
|
|
+ def diff_linesToCharsMunge(text):
|
|
+ """Split a text into an array of strings. Reduce the texts to a string
|
|
of hashes where each Unicode character represents one line.
|
|
Modifies linearray and linehash through being a closure.
|
|
|
|
@@ -409,54 +414,54 @@ class diff_match_patch:
|
|
Returns:
|
|
Encoded string.
|
|
"""
|
|
- chars = []
|
|
- # Walk the text, pulling out a substring for each line.
|
|
- # text.split('\n') would would temporarily double our memory footprint.
|
|
- # Modifying text would create many large strings to garbage collect.
|
|
- lineStart = 0
|
|
- lineEnd = -1
|
|
- while lineEnd < len(text) - 1:
|
|
- lineEnd = text.find('\n', lineStart)
|
|
- if lineEnd == -1:
|
|
- lineEnd = len(text) - 1
|
|
- line = text[lineStart:lineEnd + 1]
|
|
-
|
|
- if line in lineHash:
|
|
- chars.append(chr(lineHash[line]))
|
|
- else:
|
|
- if len(lineArray) == maxLines:
|
|
- # Bail out at 1114111 because chr(1114112) throws.
|
|
- line = text[lineStart:]
|
|
- lineEnd = len(text)
|
|
- lineArray.append(line)
|
|
- lineHash[line] = len(lineArray) - 1
|
|
- chars.append(chr(len(lineArray) - 1))
|
|
- lineStart = lineEnd + 1
|
|
- return "".join(chars)
|
|
-
|
|
- # Allocate 2/3rds of the space for text1, the rest for text2.
|
|
- maxLines = 666666
|
|
- chars1 = diff_linesToCharsMunge(text1)
|
|
- maxLines = 1114111
|
|
- chars2 = diff_linesToCharsMunge(text2)
|
|
- return (chars1, chars2, lineArray)
|
|
+ chars = []
|
|
+ # Walk the text, pulling out a substring for each line.
|
|
+ # text.split('\n') would would temporarily double our memory footprint.
|
|
+ # Modifying text would create many large strings to garbage collect.
|
|
+ lineStart = 0
|
|
+ lineEnd = -1
|
|
+ while lineEnd < len(text) - 1:
|
|
+ lineEnd = text.find("\n", lineStart)
|
|
+ if lineEnd == -1:
|
|
+ lineEnd = len(text) - 1
|
|
+ line = text[lineStart : lineEnd + 1]
|
|
+
|
|
+ if line in lineHash:
|
|
+ chars.append(chr(lineHash[line]))
|
|
+ else:
|
|
+ if len(lineArray) == maxLines:
|
|
+ # Bail out at 1114111 because chr(1114112) throws.
|
|
+ line = text[lineStart:]
|
|
+ lineEnd = len(text)
|
|
+ lineArray.append(line)
|
|
+ lineHash[line] = len(lineArray) - 1
|
|
+ chars.append(chr(len(lineArray) - 1))
|
|
+ lineStart = lineEnd + 1
|
|
+ return "".join(chars)
|
|
+
|
|
+ # Allocate 2/3rds of the space for text1, the rest for text2.
|
|
+ maxLines = 666666
|
|
+ chars1 = diff_linesToCharsMunge(text1)
|
|
+ maxLines = 1114111
|
|
+ chars2 = diff_linesToCharsMunge(text2)
|
|
+ return (chars1, chars2, lineArray)
|
|
|
|
- def diff_charsToLines(self, diffs, lineArray):
|
|
- """Rehydrate the text in a diff from a string of line hashes to real lines
|
|
+ def diff_charsToLines(self, diffs, lineArray):
|
|
+ """Rehydrate the text in a diff from a string of line hashes to real lines
|
|
of text.
|
|
|
|
Args:
|
|
diffs: Array of diff tuples.
|
|
lineArray: Array of unique strings.
|
|
"""
|
|
- for i in range(len(diffs)):
|
|
- text = []
|
|
- for char in diffs[i][1]:
|
|
- text.append(lineArray[ord(char)])
|
|
- diffs[i] = (diffs[i][0], "".join(text))
|
|
+ for i in range(len(diffs)):
|
|
+ text = []
|
|
+ for char in diffs[i][1]:
|
|
+ text.append(lineArray[ord(char)])
|
|
+ diffs[i] = (diffs[i][0], "".join(text))
|
|
|
|
- def diff_commonPrefix(self, text1, text2):
|
|
- """Determine the common prefix of two strings.
|
|
+ def diff_commonPrefix(self, text1, text2):
|
|
+ """Determine the common prefix of two strings.
|
|
|
|
Args:
|
|
text1: First string.
|
|
@@ -465,26 +470,26 @@ class diff_match_patch:
|
|
Returns:
|
|
The number of characters common to the start of each string.
|
|
"""
|
|
- # Quick check for common null cases.
|
|
- if not text1 or not text2 or text1[0] != text2[0]:
|
|
- return 0
|
|
- # Binary search.
|
|
- # Performance analysis: https://neil.fraser.name/news/2007/10/09/
|
|
- pointermin = 0
|
|
- pointermax = min(len(text1), len(text2))
|
|
- pointermid = pointermax
|
|
- pointerstart = 0
|
|
- while pointermin < pointermid:
|
|
- if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
|
|
- pointermin = pointermid
|
|
- pointerstart = pointermin
|
|
- else:
|
|
- pointermax = pointermid
|
|
- pointermid = (pointermax - pointermin) // 2 + pointermin
|
|
- return pointermid
|
|
+ # Quick check for common null cases.
|
|
+ if not text1 or not text2 or text1[0] != text2[0]:
|
|
+ return 0
|
|
+ # Binary search.
|
|
+ # Performance analysis: https://neil.fraser.name/news/2007/10/09/
|
|
+ pointermin = 0
|
|
+ pointermax = min(len(text1), len(text2))
|
|
+ pointermid = pointermax
|
|
+ pointerstart = 0
|
|
+ while pointermin < pointermid:
|
|
+ if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
|
|
+ pointermin = pointermid
|
|
+ pointerstart = pointermin
|
|
+ else:
|
|
+ pointermax = pointermid
|
|
+ pointermid = (pointermax - pointermin) // 2 + pointermin
|
|
+ return pointermid
|
|
|
|
- def diff_commonSuffix(self, text1, text2):
|
|
- """Determine the common suffix of two strings.
|
|
+ def diff_commonSuffix(self, text1, text2):
|
|
+ """Determine the common suffix of two strings.
|
|
|
|
Args:
|
|
text1: First string.
|
|
@@ -493,27 +498,29 @@ class diff_match_patch:
|
|
Returns:
|
|
The number of characters common to the end of each string.
|
|
"""
|
|
- # Quick check for common null cases.
|
|
- if not text1 or not text2 or text1[-1] != text2[-1]:
|
|
- return 0
|
|
- # Binary search.
|
|
- # Performance analysis: https://neil.fraser.name/news/2007/10/09/
|
|
- pointermin = 0
|
|
- pointermax = min(len(text1), len(text2))
|
|
- pointermid = pointermax
|
|
- pointerend = 0
|
|
- while pointermin < pointermid:
|
|
- if (text1[-pointermid:len(text1) - pointerend] ==
|
|
- text2[-pointermid:len(text2) - pointerend]):
|
|
- pointermin = pointermid
|
|
- pointerend = pointermin
|
|
- else:
|
|
- pointermax = pointermid
|
|
- pointermid = (pointermax - pointermin) // 2 + pointermin
|
|
- return pointermid
|
|
+ # Quick check for common null cases.
|
|
+ if not text1 or not text2 or text1[-1] != text2[-1]:
|
|
+ return 0
|
|
+ # Binary search.
|
|
+ # Performance analysis: https://neil.fraser.name/news/2007/10/09/
|
|
+ pointermin = 0
|
|
+ pointermax = min(len(text1), len(text2))
|
|
+ pointermid = pointermax
|
|
+ pointerend = 0
|
|
+ while pointermin < pointermid:
|
|
+ if (
|
|
+ text1[-pointermid : len(text1) - pointerend]
|
|
+ == text2[-pointermid : len(text2) - pointerend]
|
|
+ ):
|
|
+ pointermin = pointermid
|
|
+ pointerend = pointermin
|
|
+ else:
|
|
+ pointermax = pointermid
|
|
+ pointermid = (pointermax - pointermin) // 2 + pointermin
|
|
+ return pointermid
|
|
|
|
- def diff_commonOverlap(self, text1, text2):
|
|
- """Determine if the suffix of one string is the prefix of another.
|
|
+ def diff_commonOverlap(self, text1, text2):
|
|
+ """Determine if the suffix of one string is the prefix of another.
|
|
|
|
Args:
|
|
text1 First string.
|
|
@@ -523,39 +530,39 @@ class diff_match_patch:
|
|
The number of characters common to the end of the first
|
|
string and the start of the second string.
|
|
"""
|
|
- # Cache the text lengths to prevent multiple calls.
|
|
- text1_length = len(text1)
|
|
- text2_length = len(text2)
|
|
- # Eliminate the null case.
|
|
- if text1_length == 0 or text2_length == 0:
|
|
- return 0
|
|
- # Truncate the longer string.
|
|
- if text1_length > text2_length:
|
|
- text1 = text1[-text2_length:]
|
|
- elif text1_length < text2_length:
|
|
- text2 = text2[:text1_length]
|
|
- text_length = min(text1_length, text2_length)
|
|
- # Quick check for the worst case.
|
|
- if text1 == text2:
|
|
- return text_length
|
|
-
|
|
- # Start by looking for a single character match
|
|
- # and increase length until no match is found.
|
|
- # Performance analysis: https://neil.fraser.name/news/2010/11/04/
|
|
- best = 0
|
|
- length = 1
|
|
- while True:
|
|
- pattern = text1[-length:]
|
|
- found = text2.find(pattern)
|
|
- if found == -1:
|
|
- return best
|
|
- length += found
|
|
- if found == 0 or text1[-length:] == text2[:length]:
|
|
- best = length
|
|
- length += 1
|
|
+ # Cache the text lengths to prevent multiple calls.
|
|
+ text1_length = len(text1)
|
|
+ text2_length = len(text2)
|
|
+ # Eliminate the null case.
|
|
+ if text1_length == 0 or text2_length == 0:
|
|
+ return 0
|
|
+ # Truncate the longer string.
|
|
+ if text1_length > text2_length:
|
|
+ text1 = text1[-text2_length:]
|
|
+ elif text1_length < text2_length:
|
|
+ text2 = text2[:text1_length]
|
|
+ text_length = min(text1_length, text2_length)
|
|
+ # Quick check for the worst case.
|
|
+ if text1 == text2:
|
|
+ return text_length
|
|
+
|
|
+ # Start by looking for a single character match
|
|
+ # and increase length until no match is found.
|
|
+ # Performance analysis: https://neil.fraser.name/news/2010/11/04/
|
|
+ best = 0
|
|
+ length = 1
|
|
+ while True:
|
|
+ pattern = text1[-length:]
|
|
+ found = text2.find(pattern)
|
|
+ if found == -1:
|
|
+ return best
|
|
+ length += found
|
|
+ if found == 0 or text1[-length:] == text2[:length]:
|
|
+ best = length
|
|
+ length += 1
|
|
|
|
- def diff_halfMatch(self, text1, text2):
|
|
- """Do the two texts share a substring which is at least half the length of
|
|
+ def diff_halfMatch(self, text1, text2):
|
|
+ """Do the two texts share a substring which is at least half the length of
|
|
the longer text?
|
|
This speedup can produce non-minimal diffs.
|
|
|
|
@@ -568,18 +575,18 @@ class diff_match_patch:
|
|
the prefix of text2, the suffix of text2 and the common middle. Or None
|
|
if there was no match.
|
|
"""
|
|
- if self.Diff_Timeout <= 0:
|
|
- # Don't risk returning a non-optimal diff if we have unlimited time.
|
|
- return None
|
|
- if len(text1) > len(text2):
|
|
- (longtext, shorttext) = (text1, text2)
|
|
- else:
|
|
- (shorttext, longtext) = (text1, text2)
|
|
- if len(longtext) < 4 or len(shorttext) * 2 < len(longtext):
|
|
- return None # Pointless.
|
|
+ if self.Diff_Timeout <= 0:
|
|
+ # Don't risk returning a non-optimal diff if we have unlimited time.
|
|
+ return None
|
|
+ if len(text1) > len(text2):
|
|
+ (longtext, shorttext) = (text1, text2)
|
|
+ else:
|
|
+ (shorttext, longtext) = (text1, text2)
|
|
+ if len(longtext) < 4 or len(shorttext) * 2 < len(longtext):
|
|
+ return None # Pointless.
|
|
|
|
- def diff_halfMatchI(longtext, shorttext, i):
|
|
- """Does a substring of shorttext exist within longtext such that the
|
|
+ def diff_halfMatchI(longtext, shorttext, i):
|
|
+ """Does a substring of shorttext exist within longtext such that the
|
|
substring is at least half the length of longtext?
|
|
Closure, but does not reference any external variables.
|
|
|
|
@@ -593,148 +600,181 @@ class diff_match_patch:
|
|
longtext, the prefix of shorttext, the suffix of shorttext and the
|
|
common middle. Or None if there was no match.
|
|
"""
|
|
- seed = longtext[i:i + len(longtext) // 4]
|
|
- best_common = ''
|
|
- j = shorttext.find(seed)
|
|
- while j != -1:
|
|
- prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
|
|
- suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
|
|
- if len(best_common) < suffixLength + prefixLength:
|
|
- best_common = (shorttext[j - suffixLength:j] +
|
|
- shorttext[j:j + prefixLength])
|
|
- best_longtext_a = longtext[:i - suffixLength]
|
|
- best_longtext_b = longtext[i + prefixLength:]
|
|
- best_shorttext_a = shorttext[:j - suffixLength]
|
|
- best_shorttext_b = shorttext[j + prefixLength:]
|
|
- j = shorttext.find(seed, j + 1)
|
|
-
|
|
- if len(best_common) * 2 >= len(longtext):
|
|
- return (best_longtext_a, best_longtext_b,
|
|
- best_shorttext_a, best_shorttext_b, best_common)
|
|
- else:
|
|
- return None
|
|
-
|
|
- # First check if the second quarter is the seed for a half-match.
|
|
- hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4)
|
|
- # Check again based on the third quarter.
|
|
- hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2)
|
|
- if not hm1 and not hm2:
|
|
- return None
|
|
- elif not hm2:
|
|
- hm = hm1
|
|
- elif not hm1:
|
|
- hm = hm2
|
|
- else:
|
|
- # Both matched. Select the longest.
|
|
- if len(hm1[4]) > len(hm2[4]):
|
|
- hm = hm1
|
|
- else:
|
|
- hm = hm2
|
|
-
|
|
- # A half-match was found, sort out the return data.
|
|
- if len(text1) > len(text2):
|
|
- (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
|
|
- else:
|
|
- (text2_a, text2_b, text1_a, text1_b, mid_common) = hm
|
|
- return (text1_a, text1_b, text2_a, text2_b, mid_common)
|
|
+ seed = longtext[i : i + len(longtext) // 4]
|
|
+ best_common = ""
|
|
+ j = shorttext.find(seed)
|
|
+ while j != -1:
|
|
+ prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
|
|
+ suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
|
|
+ if len(best_common) < suffixLength + prefixLength:
|
|
+ best_common = (
|
|
+ shorttext[j - suffixLength : j]
|
|
+ + shorttext[j : j + prefixLength]
|
|
+ )
|
|
+ best_longtext_a = longtext[: i - suffixLength]
|
|
+ best_longtext_b = longtext[i + prefixLength :]
|
|
+ best_shorttext_a = shorttext[: j - suffixLength]
|
|
+ best_shorttext_b = shorttext[j + prefixLength :]
|
|
+ j = shorttext.find(seed, j + 1)
|
|
+
|
|
+ if len(best_common) * 2 >= len(longtext):
|
|
+ return (
|
|
+ best_longtext_a,
|
|
+ best_longtext_b,
|
|
+ best_shorttext_a,
|
|
+ best_shorttext_b,
|
|
+ best_common,
|
|
+ )
|
|
+ else:
|
|
+ return None
|
|
+
|
|
+ # First check if the second quarter is the seed for a half-match.
|
|
+ hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4)
|
|
+ # Check again based on the third quarter.
|
|
+ hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2)
|
|
+ if not hm1 and not hm2:
|
|
+ return None
|
|
+ elif not hm2:
|
|
+ hm = hm1
|
|
+ elif not hm1:
|
|
+ hm = hm2
|
|
+ else:
|
|
+ # Both matched. Select the longest.
|
|
+ if len(hm1[4]) > len(hm2[4]):
|
|
+ hm = hm1
|
|
+ else:
|
|
+ hm = hm2
|
|
|
|
- def diff_cleanupSemantic(self, diffs):
|
|
- """Reduce the number of edits by eliminating semantically trivial
|
|
+ # A half-match was found, sort out the return data.
|
|
+ if len(text1) > len(text2):
|
|
+ (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
|
|
+ else:
|
|
+ (text2_a, text2_b, text1_a, text1_b, mid_common) = hm
|
|
+ return (text1_a, text1_b, text2_a, text2_b, mid_common)
|
|
+
|
|
+ def diff_cleanupSemantic(self, diffs):
|
|
+ """Reduce the number of edits by eliminating semantically trivial
|
|
equalities.
|
|
|
|
Args:
|
|
diffs: Array of diff tuples.
|
|
"""
|
|
- changes = False
|
|
- equalities = [] # Stack of indices where equalities are found.
|
|
- lastEquality = None # Always equal to diffs[equalities[-1]][1]
|
|
- pointer = 0 # Index of current position.
|
|
- # Number of chars that changed prior to the equality.
|
|
- length_insertions1, length_deletions1 = 0, 0
|
|
- # Number of chars that changed after the equality.
|
|
- length_insertions2, length_deletions2 = 0, 0
|
|
- while pointer < len(diffs):
|
|
- if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.
|
|
- equalities.append(pointer)
|
|
- length_insertions1, length_insertions2 = length_insertions2, 0
|
|
- length_deletions1, length_deletions2 = length_deletions2, 0
|
|
- lastEquality = diffs[pointer][1]
|
|
- else: # An insertion or deletion.
|
|
- if diffs[pointer][0] == self.DIFF_INSERT:
|
|
- length_insertions2 += len(diffs[pointer][1])
|
|
- else:
|
|
- length_deletions2 += len(diffs[pointer][1])
|
|
- # Eliminate an equality that is smaller or equal to the edits on both
|
|
- # sides of it.
|
|
- if (lastEquality and (len(lastEquality) <=
|
|
- max(length_insertions1, length_deletions1)) and
|
|
- (len(lastEquality) <= max(length_insertions2, length_deletions2))):
|
|
- # Duplicate record.
|
|
- diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality))
|
|
- # Change second copy to insert.
|
|
- diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
|
|
- diffs[equalities[-1] + 1][1])
|
|
- # Throw away the equality we just deleted.
|
|
- equalities.pop()
|
|
- # Throw away the previous equality (it needs to be reevaluated).
|
|
- if len(equalities):
|
|
- equalities.pop()
|
|
- if len(equalities):
|
|
- pointer = equalities[-1]
|
|
- else:
|
|
- pointer = -1
|
|
- # Reset the counters.
|
|
- length_insertions1, length_deletions1 = 0, 0
|
|
- length_insertions2, length_deletions2 = 0, 0
|
|
- lastEquality = None
|
|
- changes = True
|
|
- pointer += 1
|
|
-
|
|
- # Normalize the diff.
|
|
- if changes:
|
|
- self.diff_cleanupMerge(diffs)
|
|
- self.diff_cleanupSemanticLossless(diffs)
|
|
-
|
|
- # Find any overlaps between deletions and insertions.
|
|
- # e.g: <del>abcxxx</del><ins>xxxdef</ins>
|
|
- # -> <del>abc</del>xxx<ins>def</ins>
|
|
- # e.g: <del>xxxabc</del><ins>defxxx</ins>
|
|
- # -> <ins>def</ins>xxx<del>abc</del>
|
|
- # Only extract an overlap if it is as big as the edit ahead or behind it.
|
|
- pointer = 1
|
|
- while pointer < len(diffs):
|
|
- if (diffs[pointer - 1][0] == self.DIFF_DELETE and
|
|
- diffs[pointer][0] == self.DIFF_INSERT):
|
|
- deletion = diffs[pointer - 1][1]
|
|
- insertion = diffs[pointer][1]
|
|
- overlap_length1 = self.diff_commonOverlap(deletion, insertion)
|
|
- overlap_length2 = self.diff_commonOverlap(insertion, deletion)
|
|
- if overlap_length1 >= overlap_length2:
|
|
- if (overlap_length1 >= len(deletion) / 2.0 or
|
|
- overlap_length1 >= len(insertion) / 2.0):
|
|
- # Overlap found. Insert an equality and trim the surrounding edits.
|
|
- diffs.insert(pointer, (self.DIFF_EQUAL,
|
|
- insertion[:overlap_length1]))
|
|
- diffs[pointer - 1] = (self.DIFF_DELETE,
|
|
- deletion[:len(deletion) - overlap_length1])
|
|
- diffs[pointer + 1] = (self.DIFF_INSERT,
|
|
- insertion[overlap_length1:])
|
|
+ changes = False
|
|
+ equalities = [] # Stack of indices where equalities are found.
|
|
+ lastEquality = None # Always equal to diffs[equalities[-1]][1]
|
|
+ pointer = 0 # Index of current position.
|
|
+ # Number of chars that changed prior to the equality.
|
|
+ length_insertions1, length_deletions1 = 0, 0
|
|
+ # Number of chars that changed after the equality.
|
|
+ length_insertions2, length_deletions2 = 0, 0
|
|
+ while pointer < len(diffs):
|
|
+ if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.
|
|
+ equalities.append(pointer)
|
|
+ length_insertions1, length_insertions2 = length_insertions2, 0
|
|
+ length_deletions1, length_deletions2 = length_deletions2, 0
|
|
+ lastEquality = diffs[pointer][1]
|
|
+ else: # An insertion or deletion.
|
|
+ if diffs[pointer][0] == self.DIFF_INSERT:
|
|
+ length_insertions2 += len(diffs[pointer][1])
|
|
+ else:
|
|
+ length_deletions2 += len(diffs[pointer][1])
|
|
+ # Eliminate an equality that is smaller or equal to the edits on both
|
|
+ # sides of it.
|
|
+ if (
|
|
+ lastEquality
|
|
+ and (
|
|
+ len(lastEquality) <= max(length_insertions1, length_deletions1)
|
|
+ )
|
|
+ and (
|
|
+ len(lastEquality) <= max(length_insertions2, length_deletions2)
|
|
+ )
|
|
+ ):
|
|
+ # Duplicate record.
|
|
+ diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality))
|
|
+ # Change second copy to insert.
|
|
+ diffs[equalities[-1] + 1] = (
|
|
+ self.DIFF_INSERT,
|
|
+ diffs[equalities[-1] + 1][1],
|
|
+ )
|
|
+ # Throw away the equality we just deleted.
|
|
+ equalities.pop()
|
|
+ # Throw away the previous equality (it needs to be reevaluated).
|
|
+ if len(equalities):
|
|
+ equalities.pop()
|
|
+ if len(equalities):
|
|
+ pointer = equalities[-1]
|
|
+ else:
|
|
+ pointer = -1
|
|
+ # Reset the counters.
|
|
+ length_insertions1, length_deletions1 = 0, 0
|
|
+ length_insertions2, length_deletions2 = 0, 0
|
|
+ lastEquality = None
|
|
+ changes = True
|
|
pointer += 1
|
|
- else:
|
|
- if (overlap_length2 >= len(deletion) / 2.0 or
|
|
- overlap_length2 >= len(insertion) / 2.0):
|
|
- # Reverse overlap found.
|
|
- # Insert an equality and swap and trim the surrounding edits.
|
|
- diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2]))
|
|
- diffs[pointer - 1] = (self.DIFF_INSERT,
|
|
- insertion[:len(insertion) - overlap_length2])
|
|
- diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:])
|
|
+
|
|
+ # Normalize the diff.
|
|
+ if changes:
|
|
+ self.diff_cleanupMerge(diffs)
|
|
+ self.diff_cleanupSemanticLossless(diffs)
|
|
+
|
|
+ # Find any overlaps between deletions and insertions.
|
|
+ # e.g: <del>abcxxx</del><ins>xxxdef</ins>
|
|
+ # -> <del>abc</del>xxx<ins>def</ins>
|
|
+ # e.g: <del>xxxabc</del><ins>defxxx</ins>
|
|
+ # -> <ins>def</ins>xxx<del>abc</del>
|
|
+ # Only extract an overlap if it is as big as the edit ahead or behind it.
|
|
+ pointer = 1
|
|
+ while pointer < len(diffs):
|
|
+ if (
|
|
+ diffs[pointer - 1][0] == self.DIFF_DELETE
|
|
+ and diffs[pointer][0] == self.DIFF_INSERT
|
|
+ ):
|
|
+ deletion = diffs[pointer - 1][1]
|
|
+ insertion = diffs[pointer][1]
|
|
+ overlap_length1 = self.diff_commonOverlap(deletion, insertion)
|
|
+ overlap_length2 = self.diff_commonOverlap(insertion, deletion)
|
|
+ if overlap_length1 >= overlap_length2:
|
|
+ if (
|
|
+ overlap_length1 >= len(deletion) / 2.0
|
|
+ or overlap_length1 >= len(insertion) / 2.0
|
|
+ ):
|
|
+ # Overlap found. Insert an equality and trim the surrounding edits.
|
|
+ diffs.insert(
|
|
+ pointer, (self.DIFF_EQUAL, insertion[:overlap_length1])
|
|
+ )
|
|
+ diffs[pointer - 1] = (
|
|
+ self.DIFF_DELETE,
|
|
+ deletion[: len(deletion) - overlap_length1],
|
|
+ )
|
|
+ diffs[pointer + 1] = (
|
|
+ self.DIFF_INSERT,
|
|
+ insertion[overlap_length1:],
|
|
+ )
|
|
+ pointer += 1
|
|
+ else:
|
|
+ if (
|
|
+ overlap_length2 >= len(deletion) / 2.0
|
|
+ or overlap_length2 >= len(insertion) / 2.0
|
|
+ ):
|
|
+ # Reverse overlap found.
|
|
+ # Insert an equality and swap and trim the surrounding edits.
|
|
+ diffs.insert(
|
|
+ pointer, (self.DIFF_EQUAL, deletion[:overlap_length2])
|
|
+ )
|
|
+ diffs[pointer - 1] = (
|
|
+ self.DIFF_INSERT,
|
|
+ insertion[: len(insertion) - overlap_length2],
|
|
+ )
|
|
+ diffs[pointer + 1] = (
|
|
+ self.DIFF_DELETE,
|
|
+ deletion[overlap_length2:],
|
|
+ )
|
|
+ pointer += 1
|
|
+ pointer += 1
|
|
pointer += 1
|
|
- pointer += 1
|
|
- pointer += 1
|
|
|
|
- def diff_cleanupSemanticLossless(self, diffs):
|
|
- """Look for single edits surrounded on both sides by equalities
|
|
+ def diff_cleanupSemanticLossless(self, diffs):
|
|
+ """Look for single edits surrounded on both sides by equalities
|
|
which can be shifted sideways to align the edit to a word boundary.
|
|
e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
|
|
|
|
@@ -742,8 +782,8 @@ class diff_match_patch:
|
|
diffs: Array of diff tuples.
|
|
"""
|
|
|
|
- def diff_cleanupSemanticScore(one, two):
|
|
- """Given two strings, compute a score representing whether the
|
|
+ def diff_cleanupSemanticScore(one, two):
|
|
+ """Given two strings, compute a score representing whether the
|
|
internal boundary falls on logical boundaries.
|
|
Scores range from 6 (best) to 0 (worst).
|
|
Closure, but does not reference any external variables.
|
|
@@ -755,277 +795,306 @@ class diff_match_patch:
|
|
Returns:
|
|
The score.
|
|
"""
|
|
- if not one or not two:
|
|
- # Edges are the best.
|
|
- return 6
|
|
-
|
|
- # Each port of this function behaves slightly differently due to
|
|
- # subtle differences in each language's definition of things like
|
|
- # 'whitespace'. Since this function's purpose is largely cosmetic,
|
|
- # the choice has been made to use each language's native features
|
|
- # rather than force total conformity.
|
|
- char1 = one[-1]
|
|
- char2 = two[0]
|
|
- nonAlphaNumeric1 = not char1.isalnum()
|
|
- nonAlphaNumeric2 = not char2.isalnum()
|
|
- whitespace1 = nonAlphaNumeric1 and char1.isspace()
|
|
- whitespace2 = nonAlphaNumeric2 and char2.isspace()
|
|
- lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n")
|
|
- lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n")
|
|
- blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one)
|
|
- blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two)
|
|
-
|
|
- if blankLine1 or blankLine2:
|
|
- # Five points for blank lines.
|
|
- return 5
|
|
- elif lineBreak1 or lineBreak2:
|
|
- # Four points for line breaks.
|
|
- return 4
|
|
- elif nonAlphaNumeric1 and not whitespace1 and whitespace2:
|
|
- # Three points for end of sentences.
|
|
- return 3
|
|
- elif whitespace1 or whitespace2:
|
|
- # Two points for whitespace.
|
|
- return 2
|
|
- elif nonAlphaNumeric1 or nonAlphaNumeric2:
|
|
- # One point for non-alphanumeric.
|
|
- return 1
|
|
- return 0
|
|
-
|
|
- pointer = 1
|
|
- # Intentionally ignore the first and last element (don't need checking).
|
|
- while pointer < len(diffs) - 1:
|
|
- if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
|
|
- diffs[pointer + 1][0] == self.DIFF_EQUAL):
|
|
- # This is a single edit surrounded by equalities.
|
|
- equality1 = diffs[pointer - 1][1]
|
|
- edit = diffs[pointer][1]
|
|
- equality2 = diffs[pointer + 1][1]
|
|
-
|
|
- # First, shift the edit as far left as possible.
|
|
- commonOffset = self.diff_commonSuffix(equality1, edit)
|
|
- if commonOffset:
|
|
- commonString = edit[-commonOffset:]
|
|
- equality1 = equality1[:-commonOffset]
|
|
- edit = commonString + edit[:-commonOffset]
|
|
- equality2 = commonString + equality2
|
|
-
|
|
- # Second, step character by character right, looking for the best fit.
|
|
- bestEquality1 = equality1
|
|
- bestEdit = edit
|
|
- bestEquality2 = equality2
|
|
- bestScore = (diff_cleanupSemanticScore(equality1, edit) +
|
|
- diff_cleanupSemanticScore(edit, equality2))
|
|
- while edit and equality2 and edit[0] == equality2[0]:
|
|
- equality1 += edit[0]
|
|
- edit = edit[1:] + equality2[0]
|
|
- equality2 = equality2[1:]
|
|
- score = (diff_cleanupSemanticScore(equality1, edit) +
|
|
- diff_cleanupSemanticScore(edit, equality2))
|
|
- # The >= encourages trailing rather than leading whitespace on edits.
|
|
- if score >= bestScore:
|
|
- bestScore = score
|
|
- bestEquality1 = equality1
|
|
- bestEdit = edit
|
|
- bestEquality2 = equality2
|
|
-
|
|
- if diffs[pointer - 1][1] != bestEquality1:
|
|
- # We have an improvement, save it back to the diff.
|
|
- if bestEquality1:
|
|
- diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)
|
|
- else:
|
|
- del diffs[pointer - 1]
|
|
- pointer -= 1
|
|
- diffs[pointer] = (diffs[pointer][0], bestEdit)
|
|
- if bestEquality2:
|
|
- diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)
|
|
- else:
|
|
- del diffs[pointer + 1]
|
|
- pointer -= 1
|
|
- pointer += 1
|
|
-
|
|
- # Define some regex patterns for matching boundaries.
|
|
- BLANKLINEEND = re.compile(r"\n\r?\n$")
|
|
- BLANKLINESTART = re.compile(r"^\r?\n\r?\n")
|
|
+ if not one or not two:
|
|
+ # Edges are the best.
|
|
+ return 6
|
|
+
|
|
+ # Each port of this function behaves slightly differently due to
|
|
+ # subtle differences in each language's definition of things like
|
|
+ # 'whitespace'. Since this function's purpose is largely cosmetic,
|
|
+ # the choice has been made to use each language's native features
|
|
+ # rather than force total conformity.
|
|
+ char1 = one[-1]
|
|
+ char2 = two[0]
|
|
+ nonAlphaNumeric1 = not char1.isalnum()
|
|
+ nonAlphaNumeric2 = not char2.isalnum()
|
|
+ whitespace1 = nonAlphaNumeric1 and char1.isspace()
|
|
+ whitespace2 = nonAlphaNumeric2 and char2.isspace()
|
|
+ lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n")
|
|
+ lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n")
|
|
+ blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one)
|
|
+ blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two)
|
|
+
|
|
+ if blankLine1 or blankLine2:
|
|
+ # Five points for blank lines.
|
|
+ return 5
|
|
+ elif lineBreak1 or lineBreak2:
|
|
+ # Four points for line breaks.
|
|
+ return 4
|
|
+ elif nonAlphaNumeric1 and not whitespace1 and whitespace2:
|
|
+ # Three points for end of sentences.
|
|
+ return 3
|
|
+ elif whitespace1 or whitespace2:
|
|
+ # Two points for whitespace.
|
|
+ return 2
|
|
+ elif nonAlphaNumeric1 or nonAlphaNumeric2:
|
|
+ # One point for non-alphanumeric.
|
|
+ return 1
|
|
+ return 0
|
|
+
|
|
+ pointer = 1
|
|
+ # Intentionally ignore the first and last element (don't need checking).
|
|
+ while pointer < len(diffs) - 1:
|
|
+ if (
|
|
+ diffs[pointer - 1][0] == self.DIFF_EQUAL
|
|
+ and diffs[pointer + 1][0] == self.DIFF_EQUAL
|
|
+ ):
|
|
+ # This is a single edit surrounded by equalities.
|
|
+ equality1 = diffs[pointer - 1][1]
|
|
+ edit = diffs[pointer][1]
|
|
+ equality2 = diffs[pointer + 1][1]
|
|
+
|
|
+ # First, shift the edit as far left as possible.
|
|
+ commonOffset = self.diff_commonSuffix(equality1, edit)
|
|
+ if commonOffset:
|
|
+ commonString = edit[-commonOffset:]
|
|
+ equality1 = equality1[:-commonOffset]
|
|
+ edit = commonString + edit[:-commonOffset]
|
|
+ equality2 = commonString + equality2
|
|
+
|
|
+ # Second, step character by character right, looking for the best fit.
|
|
+ bestEquality1 = equality1
|
|
+ bestEdit = edit
|
|
+ bestEquality2 = equality2
|
|
+ bestScore = diff_cleanupSemanticScore(
|
|
+ equality1, edit
|
|
+ ) + diff_cleanupSemanticScore(edit, equality2)
|
|
+ while edit and equality2 and edit[0] == equality2[0]:
|
|
+ equality1 += edit[0]
|
|
+ edit = edit[1:] + equality2[0]
|
|
+ equality2 = equality2[1:]
|
|
+ score = diff_cleanupSemanticScore(
|
|
+ equality1, edit
|
|
+ ) + diff_cleanupSemanticScore(edit, equality2)
|
|
+ # The >= encourages trailing rather than leading whitespace on edits.
|
|
+ if score >= bestScore:
|
|
+ bestScore = score
|
|
+ bestEquality1 = equality1
|
|
+ bestEdit = edit
|
|
+ bestEquality2 = equality2
|
|
+
|
|
+ if diffs[pointer - 1][1] != bestEquality1:
|
|
+ # We have an improvement, save it back to the diff.
|
|
+ if bestEquality1:
|
|
+ diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)
|
|
+ else:
|
|
+ del diffs[pointer - 1]
|
|
+ pointer -= 1
|
|
+ diffs[pointer] = (diffs[pointer][0], bestEdit)
|
|
+ if bestEquality2:
|
|
+ diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)
|
|
+ else:
|
|
+ del diffs[pointer + 1]
|
|
+ pointer -= 1
|
|
+ pointer += 1
|
|
+
|
|
+ # Define some regex patterns for matching boundaries.
|
|
+ BLANKLINEEND = re.compile(r"\n\r?\n$")
|
|
+ BLANKLINESTART = re.compile(r"^\r?\n\r?\n")
|
|
|
|
- def diff_cleanupEfficiency(self, diffs):
|
|
- """Reduce the number of edits by eliminating operationally trivial
|
|
+ def diff_cleanupEfficiency(self, diffs):
|
|
+ """Reduce the number of edits by eliminating operationally trivial
|
|
equalities.
|
|
|
|
Args:
|
|
diffs: Array of diff tuples.
|
|
"""
|
|
- changes = False
|
|
- equalities = [] # Stack of indices where equalities are found.
|
|
- lastEquality = None # Always equal to diffs[equalities[-1]][1]
|
|
- pointer = 0 # Index of current position.
|
|
- pre_ins = False # Is there an insertion operation before the last equality.
|
|
- pre_del = False # Is there a deletion operation before the last equality.
|
|
- post_ins = False # Is there an insertion operation after the last equality.
|
|
- post_del = False # Is there a deletion operation after the last equality.
|
|
- while pointer < len(diffs):
|
|
- if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.
|
|
- if (len(diffs[pointer][1]) < self.Diff_EditCost and
|
|
- (post_ins or post_del)):
|
|
- # Candidate found.
|
|
- equalities.append(pointer)
|
|
- pre_ins = post_ins
|
|
- pre_del = post_del
|
|
- lastEquality = diffs[pointer][1]
|
|
- else:
|
|
- # Not a candidate, and can never become one.
|
|
- equalities = []
|
|
- lastEquality = None
|
|
-
|
|
- post_ins = post_del = False
|
|
- else: # An insertion or deletion.
|
|
- if diffs[pointer][0] == self.DIFF_DELETE:
|
|
- post_del = True
|
|
- else:
|
|
- post_ins = True
|
|
-
|
|
- # Five types to be split:
|
|
- # <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
|
|
- # <ins>A</ins>X<ins>C</ins><del>D</del>
|
|
- # <ins>A</ins><del>B</del>X<ins>C</ins>
|
|
- # <ins>A</del>X<ins>C</ins><del>D</del>
|
|
- # <ins>A</ins><del>B</del>X<del>C</del>
|
|
-
|
|
- if lastEquality and ((pre_ins and pre_del and post_ins and post_del) or
|
|
- ((len(lastEquality) < self.Diff_EditCost / 2) and
|
|
- (pre_ins + pre_del + post_ins + post_del) == 3)):
|
|
- # Duplicate record.
|
|
- diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality))
|
|
- # Change second copy to insert.
|
|
- diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
|
|
- diffs[equalities[-1] + 1][1])
|
|
- equalities.pop() # Throw away the equality we just deleted.
|
|
- lastEquality = None
|
|
- if pre_ins and pre_del:
|
|
- # No changes made which could affect previous entry, keep going.
|
|
- post_ins = post_del = True
|
|
- equalities = []
|
|
- else:
|
|
- if len(equalities):
|
|
- equalities.pop() # Throw away the previous equality.
|
|
- if len(equalities):
|
|
- pointer = equalities[-1]
|
|
- else:
|
|
- pointer = -1
|
|
- post_ins = post_del = False
|
|
- changes = True
|
|
- pointer += 1
|
|
+ changes = False
|
|
+ equalities = [] # Stack of indices where equalities are found.
|
|
+ lastEquality = None # Always equal to diffs[equalities[-1]][1]
|
|
+ pointer = 0 # Index of current position.
|
|
+ pre_ins = False # Is there an insertion operation before the last equality.
|
|
+ pre_del = False # Is there a deletion operation before the last equality.
|
|
+ post_ins = False # Is there an insertion operation after the last equality.
|
|
+ post_del = False # Is there a deletion operation after the last equality.
|
|
+ while pointer < len(diffs):
|
|
+ if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.
|
|
+ if len(diffs[pointer][1]) < self.Diff_EditCost and (
|
|
+ post_ins or post_del
|
|
+ ):
|
|
+ # Candidate found.
|
|
+ equalities.append(pointer)
|
|
+ pre_ins = post_ins
|
|
+ pre_del = post_del
|
|
+ lastEquality = diffs[pointer][1]
|
|
+ else:
|
|
+ # Not a candidate, and can never become one.
|
|
+ equalities = []
|
|
+ lastEquality = None
|
|
+
|
|
+ post_ins = post_del = False
|
|
+ else: # An insertion or deletion.
|
|
+ if diffs[pointer][0] == self.DIFF_DELETE:
|
|
+ post_del = True
|
|
+ else:
|
|
+ post_ins = True
|
|
+
|
|
+ # Five types to be split:
|
|
+ # <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
|
|
+ # <ins>A</ins>X<ins>C</ins><del>D</del>
|
|
+ # <ins>A</ins><del>B</del>X<ins>C</ins>
|
|
+ # <ins>A</del>X<ins>C</ins><del>D</del>
|
|
+ # <ins>A</ins><del>B</del>X<del>C</del>
|
|
+
|
|
+ if lastEquality and (
|
|
+ (pre_ins and pre_del and post_ins and post_del)
|
|
+ or (
|
|
+ (len(lastEquality) < self.Diff_EditCost / 2)
|
|
+ and (pre_ins + pre_del + post_ins + post_del) == 3
|
|
+ )
|
|
+ ):
|
|
+ # Duplicate record.
|
|
+ diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality))
|
|
+ # Change second copy to insert.
|
|
+ diffs[equalities[-1] + 1] = (
|
|
+ self.DIFF_INSERT,
|
|
+ diffs[equalities[-1] + 1][1],
|
|
+ )
|
|
+ equalities.pop() # Throw away the equality we just deleted.
|
|
+ lastEquality = None
|
|
+ if pre_ins and pre_del:
|
|
+ # No changes made which could affect previous entry, keep going.
|
|
+ post_ins = post_del = True
|
|
+ equalities = []
|
|
+ else:
|
|
+ if len(equalities):
|
|
+ equalities.pop() # Throw away the previous equality.
|
|
+ if len(equalities):
|
|
+ pointer = equalities[-1]
|
|
+ else:
|
|
+ pointer = -1
|
|
+ post_ins = post_del = False
|
|
+ changes = True
|
|
+ pointer += 1
|
|
|
|
- if changes:
|
|
- self.diff_cleanupMerge(diffs)
|
|
+ if changes:
|
|
+ self.diff_cleanupMerge(diffs)
|
|
|
|
- def diff_cleanupMerge(self, diffs):
|
|
- """Reorder and merge like edit sections. Merge equalities.
|
|
+ def diff_cleanupMerge(self, diffs):
|
|
+ """Reorder and merge like edit sections. Merge equalities.
|
|
Any edit section can move as long as it doesn't cross an equality.
|
|
|
|
Args:
|
|
diffs: Array of diff tuples.
|
|
"""
|
|
- diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end.
|
|
- pointer = 0
|
|
- count_delete = 0
|
|
- count_insert = 0
|
|
- text_delete = ''
|
|
- text_insert = ''
|
|
- while pointer < len(diffs):
|
|
- if diffs[pointer][0] == self.DIFF_INSERT:
|
|
- count_insert += 1
|
|
- text_insert += diffs[pointer][1]
|
|
- pointer += 1
|
|
- elif diffs[pointer][0] == self.DIFF_DELETE:
|
|
- count_delete += 1
|
|
- text_delete += diffs[pointer][1]
|
|
- pointer += 1
|
|
- elif diffs[pointer][0] == self.DIFF_EQUAL:
|
|
- # Upon reaching an equality, check for prior redundancies.
|
|
- if count_delete + count_insert > 1:
|
|
- if count_delete != 0 and count_insert != 0:
|
|
- # Factor out any common prefixies.
|
|
- commonlength = self.diff_commonPrefix(text_insert, text_delete)
|
|
- if commonlength != 0:
|
|
- x = pointer - count_delete - count_insert - 1
|
|
- if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
|
|
- diffs[x] = (diffs[x][0], diffs[x][1] +
|
|
- text_insert[:commonlength])
|
|
- else:
|
|
- diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength]))
|
|
- pointer += 1
|
|
- text_insert = text_insert[commonlength:]
|
|
- text_delete = text_delete[commonlength:]
|
|
- # Factor out any common suffixes.
|
|
- commonlength = self.diff_commonSuffix(text_insert, text_delete)
|
|
- if commonlength != 0:
|
|
- diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] +
|
|
- diffs[pointer][1])
|
|
- text_insert = text_insert[:-commonlength]
|
|
- text_delete = text_delete[:-commonlength]
|
|
- # Delete the offending records and add the merged ones.
|
|
- new_ops = []
|
|
- if len(text_delete) != 0:
|
|
- new_ops.append((self.DIFF_DELETE, text_delete))
|
|
- if len(text_insert) != 0:
|
|
- new_ops.append((self.DIFF_INSERT, text_insert))
|
|
- pointer -= count_delete + count_insert
|
|
- diffs[pointer : pointer + count_delete + count_insert] = new_ops
|
|
- pointer += len(new_ops) + 1
|
|
- elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
|
|
- # Merge this equality with the previous one.
|
|
- diffs[pointer - 1] = (diffs[pointer - 1][0],
|
|
- diffs[pointer - 1][1] + diffs[pointer][1])
|
|
- del diffs[pointer]
|
|
- else:
|
|
- pointer += 1
|
|
-
|
|
- count_insert = 0
|
|
+ diffs.append((self.DIFF_EQUAL, "")) # Add a dummy entry at the end.
|
|
+ pointer = 0
|
|
count_delete = 0
|
|
- text_delete = ''
|
|
- text_insert = ''
|
|
-
|
|
- if diffs[-1][1] == '':
|
|
- diffs.pop() # Remove the dummy entry at the end.
|
|
+ count_insert = 0
|
|
+ text_delete = ""
|
|
+ text_insert = ""
|
|
+ while pointer < len(diffs):
|
|
+ if diffs[pointer][0] == self.DIFF_INSERT:
|
|
+ count_insert += 1
|
|
+ text_insert += diffs[pointer][1]
|
|
+ pointer += 1
|
|
+ elif diffs[pointer][0] == self.DIFF_DELETE:
|
|
+ count_delete += 1
|
|
+ text_delete += diffs[pointer][1]
|
|
+ pointer += 1
|
|
+ elif diffs[pointer][0] == self.DIFF_EQUAL:
|
|
+ # Upon reaching an equality, check for prior redundancies.
|
|
+ if count_delete + count_insert > 1:
|
|
+ if count_delete != 0 and count_insert != 0:
|
|
+ # Factor out any common prefixies.
|
|
+ commonlength = self.diff_commonPrefix(text_insert, text_delete)
|
|
+ if commonlength != 0:
|
|
+ x = pointer - count_delete - count_insert - 1
|
|
+ if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
|
|
+ diffs[x] = (
|
|
+ diffs[x][0],
|
|
+ diffs[x][1] + text_insert[:commonlength],
|
|
+ )
|
|
+ else:
|
|
+ diffs.insert(
|
|
+ 0, (self.DIFF_EQUAL, text_insert[:commonlength])
|
|
+ )
|
|
+ pointer += 1
|
|
+ text_insert = text_insert[commonlength:]
|
|
+ text_delete = text_delete[commonlength:]
|
|
+ # Factor out any common suffixes.
|
|
+ commonlength = self.diff_commonSuffix(text_insert, text_delete)
|
|
+ if commonlength != 0:
|
|
+ diffs[pointer] = (
|
|
+ diffs[pointer][0],
|
|
+ text_insert[-commonlength:] + diffs[pointer][1],
|
|
+ )
|
|
+ text_insert = text_insert[:-commonlength]
|
|
+ text_delete = text_delete[:-commonlength]
|
|
+ # Delete the offending records and add the merged ones.
|
|
+ new_ops = []
|
|
+ if len(text_delete) != 0:
|
|
+ new_ops.append((self.DIFF_DELETE, text_delete))
|
|
+ if len(text_insert) != 0:
|
|
+ new_ops.append((self.DIFF_INSERT, text_insert))
|
|
+ pointer -= count_delete + count_insert
|
|
+ diffs[pointer : pointer + count_delete + count_insert] = new_ops
|
|
+ pointer += len(new_ops) + 1
|
|
+ elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
|
|
+ # Merge this equality with the previous one.
|
|
+ diffs[pointer - 1] = (
|
|
+ diffs[pointer - 1][0],
|
|
+ diffs[pointer - 1][1] + diffs[pointer][1],
|
|
+ )
|
|
+ del diffs[pointer]
|
|
+ else:
|
|
+ pointer += 1
|
|
+
|
|
+ count_insert = 0
|
|
+ count_delete = 0
|
|
+ text_delete = ""
|
|
+ text_insert = ""
|
|
+
|
|
+ if diffs[-1][1] == "":
|
|
+ diffs.pop() # Remove the dummy entry at the end.
|
|
+
|
|
+ # Second pass: look for single edits surrounded on both sides by equalities
|
|
+ # which can be shifted sideways to eliminate an equality.
|
|
+ # e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
|
|
+ changes = False
|
|
+ pointer = 1
|
|
+ # Intentionally ignore the first and last element (don't need checking).
|
|
+ while pointer < len(diffs) - 1:
|
|
+ if (
|
|
+ diffs[pointer - 1][0] == self.DIFF_EQUAL
|
|
+ and diffs[pointer + 1][0] == self.DIFF_EQUAL
|
|
+ ):
|
|
+ # This is a single edit surrounded by equalities.
|
|
+ if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
|
|
+ # Shift the edit over the previous equality.
|
|
+ if diffs[pointer - 1][1] != "":
|
|
+ diffs[pointer] = (
|
|
+ diffs[pointer][0],
|
|
+ diffs[pointer - 1][1]
|
|
+ + diffs[pointer][1][: -len(diffs[pointer - 1][1])],
|
|
+ )
|
|
+ diffs[pointer + 1] = (
|
|
+ diffs[pointer + 1][0],
|
|
+ diffs[pointer - 1][1] + diffs[pointer + 1][1],
|
|
+ )
|
|
+ del diffs[pointer - 1]
|
|
+ changes = True
|
|
+ elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
|
|
+ # Shift the edit over the next equality.
|
|
+ diffs[pointer - 1] = (
|
|
+ diffs[pointer - 1][0],
|
|
+ diffs[pointer - 1][1] + diffs[pointer + 1][1],
|
|
+ )
|
|
+ diffs[pointer] = (
|
|
+ diffs[pointer][0],
|
|
+ diffs[pointer][1][len(diffs[pointer + 1][1]) :]
|
|
+ + diffs[pointer + 1][1],
|
|
+ )
|
|
+ del diffs[pointer + 1]
|
|
+ changes = True
|
|
+ pointer += 1
|
|
|
|
- # Second pass: look for single edits surrounded on both sides by equalities
|
|
- # which can be shifted sideways to eliminate an equality.
|
|
- # e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
|
|
- changes = False
|
|
- pointer = 1
|
|
- # Intentionally ignore the first and last element (don't need checking).
|
|
- while pointer < len(diffs) - 1:
|
|
- if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
|
|
- diffs[pointer + 1][0] == self.DIFF_EQUAL):
|
|
- # This is a single edit surrounded by equalities.
|
|
- if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
|
|
- # Shift the edit over the previous equality.
|
|
- if diffs[pointer - 1][1] != "":
|
|
- diffs[pointer] = (diffs[pointer][0],
|
|
- diffs[pointer - 1][1] +
|
|
- diffs[pointer][1][:-len(diffs[pointer - 1][1])])
|
|
- diffs[pointer + 1] = (diffs[pointer + 1][0],
|
|
- diffs[pointer - 1][1] + diffs[pointer + 1][1])
|
|
- del diffs[pointer - 1]
|
|
- changes = True
|
|
- elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
|
|
- # Shift the edit over the next equality.
|
|
- diffs[pointer - 1] = (diffs[pointer - 1][0],
|
|
- diffs[pointer - 1][1] + diffs[pointer + 1][1])
|
|
- diffs[pointer] = (diffs[pointer][0],
|
|
- diffs[pointer][1][len(diffs[pointer + 1][1]):] +
|
|
- diffs[pointer + 1][1])
|
|
- del diffs[pointer + 1]
|
|
- changes = True
|
|
- pointer += 1
|
|
-
|
|
- # If shifts were made, the diff needs reordering and another shift sweep.
|
|
- if changes:
|
|
- self.diff_cleanupMerge(diffs)
|
|
+ # If shifts were made, the diff needs reordering and another shift sweep.
|
|
+ if changes:
|
|
+ self.diff_cleanupMerge(diffs)
|
|
|
|
- def diff_xIndex(self, diffs, loc):
|
|
- """loc is a location in text1, compute and return the equivalent location
|
|
+ def diff_xIndex(self, diffs, loc):
|
|
+ """loc is a location in text1, compute and return the equivalent location
|
|
in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8
|
|
|
|
Args:
|
|
@@ -1035,29 +1104,29 @@ class diff_match_patch:
|
|
Returns:
|
|
Location within text2.
|
|
"""
|
|
- chars1 = 0
|
|
- chars2 = 0
|
|
- last_chars1 = 0
|
|
- last_chars2 = 0
|
|
- for x in range(len(diffs)):
|
|
- (op, text) = diffs[x]
|
|
- if op != self.DIFF_INSERT: # Equality or deletion.
|
|
- chars1 += len(text)
|
|
- if op != self.DIFF_DELETE: # Equality or insertion.
|
|
- chars2 += len(text)
|
|
- if chars1 > loc: # Overshot the location.
|
|
- break
|
|
- last_chars1 = chars1
|
|
- last_chars2 = chars2
|
|
-
|
|
- if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
|
|
- # The location was deleted.
|
|
- return last_chars2
|
|
- # Add the remaining len(character).
|
|
- return last_chars2 + (loc - last_chars1)
|
|
+ chars1 = 0
|
|
+ chars2 = 0
|
|
+ last_chars1 = 0
|
|
+ last_chars2 = 0
|
|
+ for x in range(len(diffs)):
|
|
+ (op, text) = diffs[x]
|
|
+ if op != self.DIFF_INSERT: # Equality or deletion.
|
|
+ chars1 += len(text)
|
|
+ if op != self.DIFF_DELETE: # Equality or insertion.
|
|
+ chars2 += len(text)
|
|
+ if chars1 > loc: # Overshot the location.
|
|
+ break
|
|
+ last_chars1 = chars1
|
|
+ last_chars2 = chars2
|
|
+
|
|
+ if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
|
|
+ # The location was deleted.
|
|
+ return last_chars2
|
|
+ # Add the remaining len(character).
|
|
+ return last_chars2 + (loc - last_chars1)
|
|
|
|
- def diff_prettyHtml(self, diffs):
|
|
- """Convert a diff array into a pretty HTML report.
|
|
+ def diff_prettyHtml(self, diffs):
|
|
+ """Convert a diff array into a pretty HTML report.
|
|
|
|
Args:
|
|
diffs: Array of diff tuples.
|
|
@@ -1065,20 +1134,24 @@ class diff_match_patch:
|
|
Returns:
|
|
HTML representation.
|
|
"""
|
|
- html = []
|
|
- for (op, data) in diffs:
|
|
- text = (data.replace("&", "&").replace("<", "<")
|
|
- .replace(">", ">").replace("\n", "¶<br>"))
|
|
- if op == self.DIFF_INSERT:
|
|
- html.append("<ins style=\"background:#e6ffe6;\">%s</ins>" % text)
|
|
- elif op == self.DIFF_DELETE:
|
|
- html.append("<del style=\"background:#ffe6e6;\">%s</del>" % text)
|
|
- elif op == self.DIFF_EQUAL:
|
|
- html.append("<span>%s</span>" % text)
|
|
- return "".join(html)
|
|
+ html = []
|
|
+ for (op, data) in diffs:
|
|
+ text = (
|
|
+ data.replace("&", "&")
|
|
+ .replace("<", "<")
|
|
+ .replace(">", ">")
|
|
+ .replace("\n", "¶<br>")
|
|
+ )
|
|
+ if op == self.DIFF_INSERT:
|
|
+ html.append('<ins style="background:#e6ffe6;">%s</ins>' % text)
|
|
+ elif op == self.DIFF_DELETE:
|
|
+ html.append('<del style="background:#ffe6e6;">%s</del>' % text)
|
|
+ elif op == self.DIFF_EQUAL:
|
|
+ html.append("<span>%s</span>" % text)
|
|
+ return "".join(html)
|
|
|
|
- def diff_text1(self, diffs):
|
|
- """Compute and return the source text (all equalities and deletions).
|
|
+ def diff_text1(self, diffs):
|
|
+ """Compute and return the source text (all equalities and deletions).
|
|
|
|
Args:
|
|
diffs: Array of diff tuples.
|
|
@@ -1086,14 +1159,14 @@ class diff_match_patch:
|
|
Returns:
|
|
Source text.
|
|
"""
|
|
- text = []
|
|
- for (op, data) in diffs:
|
|
- if op != self.DIFF_INSERT:
|
|
- text.append(data)
|
|
- return "".join(text)
|
|
+ text = []
|
|
+ for (op, data) in diffs:
|
|
+ if op != self.DIFF_INSERT:
|
|
+ text.append(data)
|
|
+ return "".join(text)
|
|
|
|
- def diff_text2(self, diffs):
|
|
- """Compute and return the destination text (all equalities and insertions).
|
|
+ def diff_text2(self, diffs):
|
|
+ """Compute and return the destination text (all equalities and insertions).
|
|
|
|
Args:
|
|
diffs: Array of diff tuples.
|
|
@@ -1101,14 +1174,14 @@ class diff_match_patch:
|
|
Returns:
|
|
Destination text.
|
|
"""
|
|
- text = []
|
|
- for (op, data) in diffs:
|
|
- if op != self.DIFF_DELETE:
|
|
- text.append(data)
|
|
- return "".join(text)
|
|
+ text = []
|
|
+ for (op, data) in diffs:
|
|
+ if op != self.DIFF_DELETE:
|
|
+ text.append(data)
|
|
+ return "".join(text)
|
|
|
|
- def diff_levenshtein(self, diffs):
|
|
- """Compute the Levenshtein distance; the number of inserted, deleted or
|
|
+ def diff_levenshtein(self, diffs):
|
|
+ """Compute the Levenshtein distance; the number of inserted, deleted or
|
|
substituted characters.
|
|
|
|
Args:
|
|
@@ -1117,24 +1190,24 @@ class diff_match_patch:
|
|
Returns:
|
|
Number of changes.
|
|
"""
|
|
- levenshtein = 0
|
|
- insertions = 0
|
|
- deletions = 0
|
|
- for (op, data) in diffs:
|
|
- if op == self.DIFF_INSERT:
|
|
- insertions += len(data)
|
|
- elif op == self.DIFF_DELETE:
|
|
- deletions += len(data)
|
|
- elif op == self.DIFF_EQUAL:
|
|
- # A deletion and an insertion is one substitution.
|
|
- levenshtein += max(insertions, deletions)
|
|
+ levenshtein = 0
|
|
insertions = 0
|
|
deletions = 0
|
|
- levenshtein += max(insertions, deletions)
|
|
- return levenshtein
|
|
+ for (op, data) in diffs:
|
|
+ if op == self.DIFF_INSERT:
|
|
+ insertions += len(data)
|
|
+ elif op == self.DIFF_DELETE:
|
|
+ deletions += len(data)
|
|
+ elif op == self.DIFF_EQUAL:
|
|
+ # A deletion and an insertion is one substitution.
|
|
+ levenshtein += max(insertions, deletions)
|
|
+ insertions = 0
|
|
+ deletions = 0
|
|
+ levenshtein += max(insertions, deletions)
|
|
+ return levenshtein
|
|
|
|
- def diff_toDelta(self, diffs):
|
|
- """Crush the diff into an encoded string which describes the operations
|
|
+ def diff_toDelta(self, diffs):
|
|
+ """Crush the diff into an encoded string which describes the operations
|
|
required to transform text1 into text2.
|
|
E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
|
|
Operations are tab-separated. Inserted text is escaped using %xx notation.
|
|
@@ -1145,20 +1218,20 @@ class diff_match_patch:
|
|
Returns:
|
|
Delta text.
|
|
"""
|
|
- text = []
|
|
- for (op, data) in diffs:
|
|
- if op == self.DIFF_INSERT:
|
|
- # High ascii will raise UnicodeDecodeError. Use Unicode instead.
|
|
- data = data.encode("utf-8")
|
|
- text.append("+" + urllib.parse.quote(data, "!~*'();/?:@&=+$,# "))
|
|
- elif op == self.DIFF_DELETE:
|
|
- text.append("-%d" % len(data))
|
|
- elif op == self.DIFF_EQUAL:
|
|
- text.append("=%d" % len(data))
|
|
- return "\t".join(text)
|
|
+ text = []
|
|
+ for (op, data) in diffs:
|
|
+ if op == self.DIFF_INSERT:
|
|
+ # High ascii will raise UnicodeDecodeError. Use Unicode instead.
|
|
+ data = data.encode("utf-8")
|
|
+ text.append("+" + urllib.parse.quote(data, "!~*'();/?:@&=+$,# "))
|
|
+ elif op == self.DIFF_DELETE:
|
|
+ text.append("-%d" % len(data))
|
|
+ elif op == self.DIFF_EQUAL:
|
|
+ text.append("=%d" % len(data))
|
|
+ return "\t".join(text)
|
|
|
|
- def diff_fromDelta(self, text1, delta):
|
|
- """Given the original text1, and an encoded string which describes the
|
|
+ def diff_fromDelta(self, text1, delta):
|
|
+ """Given the original text1, and an encoded string which describes the
|
|
operations required to transform text1 into text2, compute the full diff.
|
|
|
|
Args:
|
|
@@ -1171,46 +1244,48 @@ class diff_match_patch:
|
|
Raises:
|
|
ValueError: If invalid input.
|
|
"""
|
|
- diffs = []
|
|
- pointer = 0 # Cursor in text1
|
|
- tokens = delta.split("\t")
|
|
- for token in tokens:
|
|
- if token == "":
|
|
- # Blank tokens are ok (from a trailing \t).
|
|
- continue
|
|
- # Each token begins with a one character parameter which specifies the
|
|
- # operation of this token (delete, insert, equality).
|
|
- param = token[1:]
|
|
- if token[0] == "+":
|
|
- param = urllib.parse.unquote(param)
|
|
- diffs.append((self.DIFF_INSERT, param))
|
|
- elif token[0] == "-" or token[0] == "=":
|
|
- try:
|
|
- n = int(param)
|
|
- except ValueError:
|
|
- raise ValueError("Invalid number in diff_fromDelta: " + param)
|
|
- if n < 0:
|
|
- raise ValueError("Negative number in diff_fromDelta: " + param)
|
|
- text = text1[pointer : pointer + n]
|
|
- pointer += n
|
|
- if token[0] == "=":
|
|
- diffs.append((self.DIFF_EQUAL, text))
|
|
- else:
|
|
- diffs.append((self.DIFF_DELETE, text))
|
|
- else:
|
|
- # Anything else is an error.
|
|
- raise ValueError("Invalid diff operation in diff_fromDelta: " +
|
|
- token[0])
|
|
- if pointer != len(text1):
|
|
- raise ValueError(
|
|
- "Delta length (%d) does not equal source text length (%d)." %
|
|
- (pointer, len(text1)))
|
|
- return diffs
|
|
+ diffs = []
|
|
+ pointer = 0 # Cursor in text1
|
|
+ tokens = delta.split("\t")
|
|
+ for token in tokens:
|
|
+ if token == "":
|
|
+ # Blank tokens are ok (from a trailing \t).
|
|
+ continue
|
|
+ # Each token begins with a one character parameter which specifies the
|
|
+ # operation of this token (delete, insert, equality).
|
|
+ param = token[1:]
|
|
+ if token[0] == "+":
|
|
+ param = urllib.parse.unquote(param)
|
|
+ diffs.append((self.DIFF_INSERT, param))
|
|
+ elif token[0] == "-" or token[0] == "=":
|
|
+ try:
|
|
+ n = int(param)
|
|
+ except ValueError:
|
|
+ raise ValueError("Invalid number in diff_fromDelta: " + param)
|
|
+ if n < 0:
|
|
+ raise ValueError("Negative number in diff_fromDelta: " + param)
|
|
+ text = text1[pointer : pointer + n]
|
|
+ pointer += n
|
|
+ if token[0] == "=":
|
|
+ diffs.append((self.DIFF_EQUAL, text))
|
|
+ else:
|
|
+ diffs.append((self.DIFF_DELETE, text))
|
|
+ else:
|
|
+ # Anything else is an error.
|
|
+ raise ValueError(
|
|
+ "Invalid diff operation in diff_fromDelta: " + token[0]
|
|
+ )
|
|
+ if pointer != len(text1):
|
|
+ raise ValueError(
|
|
+ "Delta length (%d) does not equal source text length (%d)."
|
|
+ % (pointer, len(text1))
|
|
+ )
|
|
+ return diffs
|
|
|
|
- # MATCH FUNCTIONS
|
|
+ # MATCH FUNCTIONS
|
|
|
|
- def match_main(self, text, pattern, loc):
|
|
- """Locate the best instance of 'pattern' in 'text' near 'loc'.
|
|
+ def match_main(self, text, pattern, loc):
|
|
+ """Locate the best instance of 'pattern' in 'text' near 'loc'.
|
|
|
|
Args:
|
|
text: The text to search.
|
|
@@ -1220,27 +1295,27 @@ class diff_match_patch:
|
|
Returns:
|
|
Best match index or -1.
|
|
"""
|
|
- # Check for null inputs.
|
|
- if text == None or pattern == None:
|
|
- raise ValueError("Null inputs. (match_main)")
|
|
-
|
|
- loc = max(0, min(loc, len(text)))
|
|
- if text == pattern:
|
|
- # Shortcut (potentially not guaranteed by the algorithm)
|
|
- return 0
|
|
- elif not text:
|
|
- # Nothing to match.
|
|
- return -1
|
|
- elif text[loc:loc + len(pattern)] == pattern:
|
|
- # Perfect match at the perfect spot! (Includes case of null pattern)
|
|
- return loc
|
|
- else:
|
|
- # Do a fuzzy compare.
|
|
- match = self.match_bitap(text, pattern, loc)
|
|
- return match
|
|
+ # Check for null inputs.
|
|
+ if text == None or pattern == None:
|
|
+ raise ValueError("Null inputs. (match_main)")
|
|
+
|
|
+ loc = max(0, min(loc, len(text)))
|
|
+ if text == pattern:
|
|
+ # Shortcut (potentially not guaranteed by the algorithm)
|
|
+ return 0
|
|
+ elif not text:
|
|
+ # Nothing to match.
|
|
+ return -1
|
|
+ elif text[loc : loc + len(pattern)] == pattern:
|
|
+ # Perfect match at the perfect spot! (Includes case of null pattern)
|
|
+ return loc
|
|
+ else:
|
|
+ # Do a fuzzy compare.
|
|
+ match = self.match_bitap(text, pattern, loc)
|
|
+ return match
|
|
|
|
- def match_bitap(self, text, pattern, loc):
|
|
- """Locate the best instance of 'pattern' in 'text' near 'loc' using the
|
|
+ def match_bitap(self, text, pattern, loc):
|
|
+ """Locate the best instance of 'pattern' in 'text' near 'loc' using the
|
|
Bitap algorithm.
|
|
|
|
Args:
|
|
@@ -1251,15 +1326,15 @@ class diff_match_patch:
|
|
Returns:
|
|
Best match index or -1.
|
|
"""
|
|
- # Python doesn't have a maxint limit, so ignore this check.
|
|
- #if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:
|
|
- # raise ValueError("Pattern too long for this application.")
|
|
+ # Python doesn't have a maxint limit, so ignore this check.
|
|
+ # if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:
|
|
+ # raise ValueError("Pattern too long for this application.")
|
|
|
|
- # Initialise the alphabet.
|
|
- s = self.match_alphabet(pattern)
|
|
+ # Initialise the alphabet.
|
|
+ s = self.match_alphabet(pattern)
|
|
|
|
- def match_bitapScore(e, x):
|
|
- """Compute and return the score for a match with e errors and x location.
|
|
+ def match_bitapScore(e, x):
|
|
+ """Compute and return the score for a match with e errors and x location.
|
|
Accesses loc and pattern through being a closure.
|
|
|
|
Args:
|
|
@@ -1269,84 +1344,87 @@ class diff_match_patch:
|
|
Returns:
|
|
Overall score for match (0.0 = good, 1.0 = bad).
|
|
"""
|
|
- accuracy = float(e) / len(pattern)
|
|
- proximity = abs(loc - x)
|
|
- if not self.Match_Distance:
|
|
- # Dodge divide by zero error.
|
|
- return proximity and 1.0 or accuracy
|
|
- return accuracy + (proximity / float(self.Match_Distance))
|
|
-
|
|
- # Highest score beyond which we give up.
|
|
- score_threshold = self.Match_Threshold
|
|
- # Is there a nearby exact match? (speedup)
|
|
- best_loc = text.find(pattern, loc)
|
|
- if best_loc != -1:
|
|
- score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
|
|
- # What about in the other direction? (speedup)
|
|
- best_loc = text.rfind(pattern, loc + len(pattern))
|
|
- if best_loc != -1:
|
|
- score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
|
|
-
|
|
- # Initialise the bit arrays.
|
|
- matchmask = 1 << (len(pattern) - 1)
|
|
- best_loc = -1
|
|
-
|
|
- bin_max = len(pattern) + len(text)
|
|
- # Empty initialization added to appease pychecker.
|
|
- last_rd = None
|
|
- for d in range(len(pattern)):
|
|
- # Scan for the best match each iteration allows for one more error.
|
|
- # Run a binary search to determine how far from 'loc' we can stray at
|
|
- # this error level.
|
|
- bin_min = 0
|
|
- bin_mid = bin_max
|
|
- while bin_min < bin_mid:
|
|
- if match_bitapScore(d, loc + bin_mid) <= score_threshold:
|
|
- bin_min = bin_mid
|
|
- else:
|
|
- bin_max = bin_mid
|
|
- bin_mid = (bin_max - bin_min) // 2 + bin_min
|
|
-
|
|
- # Use the result from this iteration as the maximum for the next.
|
|
- bin_max = bin_mid
|
|
- start = max(1, loc - bin_mid + 1)
|
|
- finish = min(loc + bin_mid, len(text)) + len(pattern)
|
|
-
|
|
- rd = [0] * (finish + 2)
|
|
- rd[finish + 1] = (1 << d) - 1
|
|
- for j in range(finish, start - 1, -1):
|
|
- if len(text) <= j - 1:
|
|
- # Out of range.
|
|
- charMatch = 0
|
|
- else:
|
|
- charMatch = s.get(text[j - 1], 0)
|
|
- if d == 0: # First pass: exact match.
|
|
- rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
|
|
- else: # Subsequent passes: fuzzy match.
|
|
- rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | (
|
|
- ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
|
|
- if rd[j] & matchmask:
|
|
- score = match_bitapScore(d, j - 1)
|
|
- # This match will almost certainly be better than any existing match.
|
|
- # But check anyway.
|
|
- if score <= score_threshold:
|
|
- # Told you so.
|
|
- score_threshold = score
|
|
- best_loc = j - 1
|
|
- if best_loc > loc:
|
|
- # When passing loc, don't exceed our current distance from loc.
|
|
- start = max(1, 2 * loc - best_loc)
|
|
- else:
|
|
- # Already passed loc, downhill from here on in.
|
|
- break
|
|
- # No hope for a (better) match at greater error levels.
|
|
- if match_bitapScore(d + 1, loc) > score_threshold:
|
|
- break
|
|
- last_rd = rd
|
|
- return best_loc
|
|
+ accuracy = float(e) / len(pattern)
|
|
+ proximity = abs(loc - x)
|
|
+ if not self.Match_Distance:
|
|
+ # Dodge divide by zero error.
|
|
+ return proximity and 1.0 or accuracy
|
|
+ return accuracy + (proximity / float(self.Match_Distance))
|
|
+
|
|
+ # Highest score beyond which we give up.
|
|
+ score_threshold = self.Match_Threshold
|
|
+ # Is there a nearby exact match? (speedup)
|
|
+ best_loc = text.find(pattern, loc)
|
|
+ if best_loc != -1:
|
|
+ score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
|
|
+ # What about in the other direction? (speedup)
|
|
+ best_loc = text.rfind(pattern, loc + len(pattern))
|
|
+ if best_loc != -1:
|
|
+ score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
|
|
+
|
|
+ # Initialise the bit arrays.
|
|
+ matchmask = 1 << (len(pattern) - 1)
|
|
+ best_loc = -1
|
|
+
|
|
+ bin_max = len(pattern) + len(text)
|
|
+ # Empty initialization added to appease pychecker.
|
|
+ last_rd = None
|
|
+ for d in range(len(pattern)):
|
|
+ # Scan for the best match each iteration allows for one more error.
|
|
+ # Run a binary search to determine how far from 'loc' we can stray at
|
|
+ # this error level.
|
|
+ bin_min = 0
|
|
+ bin_mid = bin_max
|
|
+ while bin_min < bin_mid:
|
|
+ if match_bitapScore(d, loc + bin_mid) <= score_threshold:
|
|
+ bin_min = bin_mid
|
|
+ else:
|
|
+ bin_max = bin_mid
|
|
+ bin_mid = (bin_max - bin_min) // 2 + bin_min
|
|
+
|
|
+ # Use the result from this iteration as the maximum for the next.
|
|
+ bin_max = bin_mid
|
|
+ start = max(1, loc - bin_mid + 1)
|
|
+ finish = min(loc + bin_mid, len(text)) + len(pattern)
|
|
+
|
|
+ rd = [0] * (finish + 2)
|
|
+ rd[finish + 1] = (1 << d) - 1
|
|
+ for j in range(finish, start - 1, -1):
|
|
+ if len(text) <= j - 1:
|
|
+ # Out of range.
|
|
+ charMatch = 0
|
|
+ else:
|
|
+ charMatch = s.get(text[j - 1], 0)
|
|
+ if d == 0: # First pass: exact match.
|
|
+ rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
|
|
+ else: # Subsequent passes: fuzzy match.
|
|
+ rd[j] = (
|
|
+ (((rd[j + 1] << 1) | 1) & charMatch)
|
|
+ | (((last_rd[j + 1] | last_rd[j]) << 1) | 1)
|
|
+ | last_rd[j + 1]
|
|
+ )
|
|
+ if rd[j] & matchmask:
|
|
+ score = match_bitapScore(d, j - 1)
|
|
+ # This match will almost certainly be better than any existing match.
|
|
+ # But check anyway.
|
|
+ if score <= score_threshold:
|
|
+ # Told you so.
|
|
+ score_threshold = score
|
|
+ best_loc = j - 1
|
|
+ if best_loc > loc:
|
|
+ # When passing loc, don't exceed our current distance from loc.
|
|
+ start = max(1, 2 * loc - best_loc)
|
|
+ else:
|
|
+ # Already passed loc, downhill from here on in.
|
|
+ break
|
|
+ # No hope for a (better) match at greater error levels.
|
|
+ if match_bitapScore(d + 1, loc) > score_threshold:
|
|
+ break
|
|
+ last_rd = rd
|
|
+ return best_loc
|
|
|
|
- def match_alphabet(self, pattern):
|
|
- """Initialise the alphabet for the Bitap algorithm.
|
|
+ def match_alphabet(self, pattern):
|
|
+ """Initialise the alphabet for the Bitap algorithm.
|
|
|
|
Args:
|
|
pattern: The text to encode.
|
|
@@ -1354,58 +1432,61 @@ class diff_match_patch:
|
|
Returns:
|
|
Hash of character locations.
|
|
"""
|
|
- s = {}
|
|
- for char in pattern:
|
|
- s[char] = 0
|
|
- for i in range(len(pattern)):
|
|
- s[pattern[i]] |= 1 << (len(pattern) - i - 1)
|
|
- return s
|
|
+ s = {}
|
|
+ for char in pattern:
|
|
+ s[char] = 0
|
|
+ for i in range(len(pattern)):
|
|
+ s[pattern[i]] |= 1 << (len(pattern) - i - 1)
|
|
+ return s
|
|
|
|
- # PATCH FUNCTIONS
|
|
+ # PATCH FUNCTIONS
|
|
|
|
- def patch_addContext(self, patch, text):
|
|
- """Increase the context until it is unique,
|
|
+ def patch_addContext(self, patch, text):
|
|
+ """Increase the context until it is unique,
|
|
but don't let the pattern expand beyond Match_MaxBits.
|
|
|
|
Args:
|
|
patch: The patch to grow.
|
|
text: Source text.
|
|
"""
|
|
- if len(text) == 0:
|
|
- return
|
|
- pattern = text[patch.start2 : patch.start2 + patch.length1]
|
|
- padding = 0
|
|
-
|
|
- # Look for the first and last matches of pattern in text. If two different
|
|
- # matches are found, increase the pattern length.
|
|
- while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits ==
|
|
- 0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin -
|
|
- self.Patch_Margin)):
|
|
- padding += self.Patch_Margin
|
|
- pattern = text[max(0, patch.start2 - padding) :
|
|
- patch.start2 + patch.length1 + padding]
|
|
- # Add one chunk for good luck.
|
|
- padding += self.Patch_Margin
|
|
-
|
|
- # Add the prefix.
|
|
- prefix = text[max(0, patch.start2 - padding) : patch.start2]
|
|
- if prefix:
|
|
- patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
|
|
- # Add the suffix.
|
|
- suffix = text[patch.start2 + patch.length1 :
|
|
- patch.start2 + patch.length1 + padding]
|
|
- if suffix:
|
|
- patch.diffs.append((self.DIFF_EQUAL, suffix))
|
|
-
|
|
- # Roll back the start points.
|
|
- patch.start1 -= len(prefix)
|
|
- patch.start2 -= len(prefix)
|
|
- # Extend lengths.
|
|
- patch.length1 += len(prefix) + len(suffix)
|
|
- patch.length2 += len(prefix) + len(suffix)
|
|
+ if len(text) == 0:
|
|
+ return
|
|
+ pattern = text[patch.start2 : patch.start2 + patch.length1]
|
|
+ padding = 0
|
|
+
|
|
+ # Look for the first and last matches of pattern in text. If two different
|
|
+ # matches are found, increase the pattern length.
|
|
+ while text.find(pattern) != text.rfind(pattern) and (
|
|
+ self.Match_MaxBits == 0
|
|
+ or len(pattern) < self.Match_MaxBits - self.Patch_Margin - self.Patch_Margin
|
|
+ ):
|
|
+ padding += self.Patch_Margin
|
|
+ pattern = text[
|
|
+ max(0, patch.start2 - padding) : patch.start2 + patch.length1 + padding
|
|
+ ]
|
|
+ # Add one chunk for good luck.
|
|
+ padding += self.Patch_Margin
|
|
+
|
|
+ # Add the prefix.
|
|
+ prefix = text[max(0, patch.start2 - padding) : patch.start2]
|
|
+ if prefix:
|
|
+ patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
|
|
+ # Add the suffix.
|
|
+ suffix = text[
|
|
+ patch.start2 + patch.length1 : patch.start2 + patch.length1 + padding
|
|
+ ]
|
|
+ if suffix:
|
|
+ patch.diffs.append((self.DIFF_EQUAL, suffix))
|
|
+
|
|
+ # Roll back the start points.
|
|
+ patch.start1 -= len(prefix)
|
|
+ patch.start2 -= len(prefix)
|
|
+ # Extend lengths.
|
|
+ patch.length1 += len(prefix) + len(suffix)
|
|
+ patch.length2 += len(prefix) + len(suffix)
|
|
|
|
- def patch_make(self, a, b=None, c=None):
|
|
- """Compute a list of patches to turn text1 into text2.
|
|
+ def patch_make(self, a, b=None, c=None):
|
|
+ """Compute a list of patches to turn text1 into text2.
|
|
Use diffs if provided, otherwise compute it ourselves.
|
|
There are four ways to call this function, depending on what data is
|
|
available to the caller:
|
|
@@ -1429,96 +1510,102 @@ class diff_match_patch:
|
|
Returns:
|
|
Array of Patch objects.
|
|
"""
|
|
- text1 = None
|
|
- diffs = None
|
|
- if isinstance(a, str) and isinstance(b, str) and c is None:
|
|
- # Method 1: text1, text2
|
|
- # Compute diffs from text1 and text2.
|
|
- text1 = a
|
|
- diffs = self.diff_main(text1, b, True)
|
|
- if len(diffs) > 2:
|
|
- self.diff_cleanupSemantic(diffs)
|
|
- self.diff_cleanupEfficiency(diffs)
|
|
- elif isinstance(a, list) and b is None and c is None:
|
|
- # Method 2: diffs
|
|
- # Compute text1 from diffs.
|
|
- diffs = a
|
|
- text1 = self.diff_text1(diffs)
|
|
- elif isinstance(a, str) and isinstance(b, list) and c is None:
|
|
- # Method 3: text1, diffs
|
|
- text1 = a
|
|
- diffs = b
|
|
- elif (isinstance(a, str) and isinstance(b, str) and
|
|
- isinstance(c, list)):
|
|
- # Method 4: text1, text2, diffs
|
|
- # text2 is not used.
|
|
- text1 = a
|
|
- diffs = c
|
|
- else:
|
|
- raise ValueError("Unknown call format to patch_make.")
|
|
-
|
|
- if not diffs:
|
|
- return [] # Get rid of the None case.
|
|
- patches = []
|
|
- patch = patch_obj()
|
|
- char_count1 = 0 # Number of characters into the text1 string.
|
|
- char_count2 = 0 # Number of characters into the text2 string.
|
|
- prepatch_text = text1 # Recreate the patches to determine context info.
|
|
- postpatch_text = text1
|
|
- for x in range(len(diffs)):
|
|
- (diff_type, diff_text) = diffs[x]
|
|
- if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
|
|
- # A new patch starts here.
|
|
- patch.start1 = char_count1
|
|
- patch.start2 = char_count2
|
|
- if diff_type == self.DIFF_INSERT:
|
|
- # Insertion
|
|
- patch.diffs.append(diffs[x])
|
|
- patch.length2 += len(diff_text)
|
|
- postpatch_text = (postpatch_text[:char_count2] + diff_text +
|
|
- postpatch_text[char_count2:])
|
|
- elif diff_type == self.DIFF_DELETE:
|
|
- # Deletion.
|
|
- patch.length1 += len(diff_text)
|
|
- patch.diffs.append(diffs[x])
|
|
- postpatch_text = (postpatch_text[:char_count2] +
|
|
- postpatch_text[char_count2 + len(diff_text):])
|
|
- elif (diff_type == self.DIFF_EQUAL and
|
|
- len(diff_text) <= 2 * self.Patch_Margin and
|
|
- len(patch.diffs) != 0 and len(diffs) != x + 1):
|
|
- # Small equality inside a patch.
|
|
- patch.diffs.append(diffs[x])
|
|
- patch.length1 += len(diff_text)
|
|
- patch.length2 += len(diff_text)
|
|
-
|
|
- if (diff_type == self.DIFF_EQUAL and
|
|
- len(diff_text) >= 2 * self.Patch_Margin):
|
|
- # Time for a new patch.
|
|
+ text1 = None
|
|
+ diffs = None
|
|
+ if isinstance(a, str) and isinstance(b, str) and c is None:
|
|
+ # Method 1: text1, text2
|
|
+ # Compute diffs from text1 and text2.
|
|
+ text1 = a
|
|
+ diffs = self.diff_main(text1, b, True)
|
|
+ if len(diffs) > 2:
|
|
+ self.diff_cleanupSemantic(diffs)
|
|
+ self.diff_cleanupEfficiency(diffs)
|
|
+ elif isinstance(a, list) and b is None and c is None:
|
|
+ # Method 2: diffs
|
|
+ # Compute text1 from diffs.
|
|
+ diffs = a
|
|
+ text1 = self.diff_text1(diffs)
|
|
+ elif isinstance(a, str) and isinstance(b, list) and c is None:
|
|
+ # Method 3: text1, diffs
|
|
+ text1 = a
|
|
+ diffs = b
|
|
+ elif isinstance(a, str) and isinstance(b, str) and isinstance(c, list):
|
|
+ # Method 4: text1, text2, diffs
|
|
+ # text2 is not used.
|
|
+ text1 = a
|
|
+ diffs = c
|
|
+ else:
|
|
+ raise ValueError("Unknown call format to patch_make.")
|
|
+
|
|
+ if not diffs:
|
|
+ return [] # Get rid of the None case.
|
|
+ patches = []
|
|
+ patch = patch_obj()
|
|
+ char_count1 = 0 # Number of characters into the text1 string.
|
|
+ char_count2 = 0 # Number of characters into the text2 string.
|
|
+ prepatch_text = text1 # Recreate the patches to determine context info.
|
|
+ postpatch_text = text1
|
|
+ for x in range(len(diffs)):
|
|
+ (diff_type, diff_text) = diffs[x]
|
|
+ if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
|
|
+ # A new patch starts here.
|
|
+ patch.start1 = char_count1
|
|
+ patch.start2 = char_count2
|
|
+ if diff_type == self.DIFF_INSERT:
|
|
+ # Insertion
|
|
+ patch.diffs.append(diffs[x])
|
|
+ patch.length2 += len(diff_text)
|
|
+ postpatch_text = (
|
|
+ postpatch_text[:char_count2]
|
|
+ + diff_text
|
|
+ + postpatch_text[char_count2:]
|
|
+ )
|
|
+ elif diff_type == self.DIFF_DELETE:
|
|
+ # Deletion.
|
|
+ patch.length1 += len(diff_text)
|
|
+ patch.diffs.append(diffs[x])
|
|
+ postpatch_text = (
|
|
+ postpatch_text[:char_count2]
|
|
+ + postpatch_text[char_count2 + len(diff_text) :]
|
|
+ )
|
|
+ elif (
|
|
+ diff_type == self.DIFF_EQUAL
|
|
+ and len(diff_text) <= 2 * self.Patch_Margin
|
|
+ and len(patch.diffs) != 0
|
|
+ and len(diffs) != x + 1
|
|
+ ):
|
|
+ # Small equality inside a patch.
|
|
+ patch.diffs.append(diffs[x])
|
|
+ patch.length1 += len(diff_text)
|
|
+ patch.length2 += len(diff_text)
|
|
+
|
|
+ if diff_type == self.DIFF_EQUAL and len(diff_text) >= 2 * self.Patch_Margin:
|
|
+ # Time for a new patch.
|
|
+ if len(patch.diffs) != 0:
|
|
+ self.patch_addContext(patch, prepatch_text)
|
|
+ patches.append(patch)
|
|
+ patch = patch_obj()
|
|
+ # Unlike Unidiff, our patch lists have a rolling context.
|
|
+ # https://github.com/google/diff-match-patch/wiki/Unidiff
|
|
+ # Update prepatch text & pos to reflect the application of the
|
|
+ # just completed patch.
|
|
+ prepatch_text = postpatch_text
|
|
+ char_count1 = char_count2
|
|
+
|
|
+ # Update the current character count.
|
|
+ if diff_type != self.DIFF_INSERT:
|
|
+ char_count1 += len(diff_text)
|
|
+ if diff_type != self.DIFF_DELETE:
|
|
+ char_count2 += len(diff_text)
|
|
+
|
|
+ # Pick up the leftover patch if not empty.
|
|
if len(patch.diffs) != 0:
|
|
- self.patch_addContext(patch, prepatch_text)
|
|
- patches.append(patch)
|
|
- patch = patch_obj()
|
|
- # Unlike Unidiff, our patch lists have a rolling context.
|
|
- # https://github.com/google/diff-match-patch/wiki/Unidiff
|
|
- # Update prepatch text & pos to reflect the application of the
|
|
- # just completed patch.
|
|
- prepatch_text = postpatch_text
|
|
- char_count1 = char_count2
|
|
-
|
|
- # Update the current character count.
|
|
- if diff_type != self.DIFF_INSERT:
|
|
- char_count1 += len(diff_text)
|
|
- if diff_type != self.DIFF_DELETE:
|
|
- char_count2 += len(diff_text)
|
|
-
|
|
- # Pick up the leftover patch if not empty.
|
|
- if len(patch.diffs) != 0:
|
|
- self.patch_addContext(patch, prepatch_text)
|
|
- patches.append(patch)
|
|
- return patches
|
|
+ self.patch_addContext(patch, prepatch_text)
|
|
+ patches.append(patch)
|
|
+ return patches
|
|
|
|
- def patch_deepCopy(self, patches):
|
|
- """Given an array of patches, return another array that is identical.
|
|
+ def patch_deepCopy(self, patches):
|
|
+ """Given an array of patches, return another array that is identical.
|
|
|
|
Args:
|
|
patches: Array of Patch objects.
|
|
@@ -1526,20 +1613,20 @@ class diff_match_patch:
|
|
Returns:
|
|
Array of Patch objects.
|
|
"""
|
|
- patchesCopy = []
|
|
- for patch in patches:
|
|
- patchCopy = patch_obj()
|
|
- # No need to deep copy the tuples since they are immutable.
|
|
- patchCopy.diffs = patch.diffs[:]
|
|
- patchCopy.start1 = patch.start1
|
|
- patchCopy.start2 = patch.start2
|
|
- patchCopy.length1 = patch.length1
|
|
- patchCopy.length2 = patch.length2
|
|
- patchesCopy.append(patchCopy)
|
|
- return patchesCopy
|
|
+ patchesCopy = []
|
|
+ for patch in patches:
|
|
+ patchCopy = patch_obj()
|
|
+ # No need to deep copy the tuples since they are immutable.
|
|
+ patchCopy.diffs = patch.diffs[:]
|
|
+ patchCopy.start1 = patch.start1
|
|
+ patchCopy.start2 = patch.start2
|
|
+ patchCopy.length1 = patch.length1
|
|
+ patchCopy.length2 = patch.length2
|
|
+ patchesCopy.append(patchCopy)
|
|
+ return patchesCopy
|
|
|
|
- def patch_apply(self, patches, text):
|
|
- """Merge a set of patches onto the text. Return a patched text, as well
|
|
+ def patch_apply(self, patches, text):
|
|
+ """Merge a set of patches onto the text. Return a patched text, as well
|
|
as a list of true/false values indicating which patches were applied.
|
|
|
|
Args:
|
|
@@ -1549,85 +1636,102 @@ class diff_match_patch:
|
|
Returns:
|
|
Two element Array, containing the new text and an array of boolean values.
|
|
"""
|
|
- if not patches:
|
|
- return (text, [])
|
|
+ if not patches:
|
|
+ return (text, [])
|
|
|
|
- # Deep copy the patches so that no changes are made to originals.
|
|
- patches = self.patch_deepCopy(patches)
|
|
+ # Deep copy the patches so that no changes are made to originals.
|
|
+ patches = self.patch_deepCopy(patches)
|
|
|
|
- nullPadding = self.patch_addPadding(patches)
|
|
- text = nullPadding + text + nullPadding
|
|
- self.patch_splitMax(patches)
|
|
-
|
|
- # delta keeps track of the offset between the expected and actual location
|
|
- # of the previous patch. If there are patches expected at positions 10 and
|
|
- # 20, but the first patch was found at 12, delta is 2 and the second patch
|
|
- # has an effective expected position of 22.
|
|
- delta = 0
|
|
- results = []
|
|
- for patch in patches:
|
|
- expected_loc = patch.start2 + delta
|
|
- text1 = self.diff_text1(patch.diffs)
|
|
- end_loc = -1
|
|
- if len(text1) > self.Match_MaxBits:
|
|
- # patch_splitMax will only provide an oversized pattern in the case of
|
|
- # a monster delete.
|
|
- start_loc = self.match_main(text, text1[:self.Match_MaxBits],
|
|
- expected_loc)
|
|
- if start_loc != -1:
|
|
- end_loc = self.match_main(text, text1[-self.Match_MaxBits:],
|
|
- expected_loc + len(text1) - self.Match_MaxBits)
|
|
- if end_loc == -1 or start_loc >= end_loc:
|
|
- # Can't find valid trailing context. Drop this patch.
|
|
- start_loc = -1
|
|
- else:
|
|
- start_loc = self.match_main(text, text1, expected_loc)
|
|
- if start_loc == -1:
|
|
- # No match found. :(
|
|
- results.append(False)
|
|
- # Subtract the delta for this failed patch from subsequent patches.
|
|
- delta -= patch.length2 - patch.length1
|
|
- else:
|
|
- # Found a match. :)
|
|
- results.append(True)
|
|
- delta = start_loc - expected_loc
|
|
- if end_loc == -1:
|
|
- text2 = text[start_loc : start_loc + len(text1)]
|
|
- else:
|
|
- text2 = text[start_loc : end_loc + self.Match_MaxBits]
|
|
- if text1 == text2:
|
|
- # Perfect match, just shove the replacement text in.
|
|
- text = (text[:start_loc] + self.diff_text2(patch.diffs) +
|
|
- text[start_loc + len(text1):])
|
|
- else:
|
|
- # Imperfect match.
|
|
- # Run a diff to get a framework of equivalent indices.
|
|
- diffs = self.diff_main(text1, text2, False)
|
|
- if (len(text1) > self.Match_MaxBits and
|
|
- self.diff_levenshtein(diffs) / float(len(text1)) >
|
|
- self.Patch_DeleteThreshold):
|
|
- # The end points match, but the content is unacceptably bad.
|
|
- results[-1] = False
|
|
- else:
|
|
- self.diff_cleanupSemanticLossless(diffs)
|
|
- index1 = 0
|
|
- for (op, data) in patch.diffs:
|
|
- if op != self.DIFF_EQUAL:
|
|
- index2 = self.diff_xIndex(diffs, index1)
|
|
- if op == self.DIFF_INSERT: # Insertion
|
|
- text = text[:start_loc + index2] + data + text[start_loc +
|
|
- index2:]
|
|
- elif op == self.DIFF_DELETE: # Deletion
|
|
- text = text[:start_loc + index2] + text[start_loc +
|
|
- self.diff_xIndex(diffs, index1 + len(data)):]
|
|
- if op != self.DIFF_DELETE:
|
|
- index1 += len(data)
|
|
- # Strip the padding off.
|
|
- text = text[len(nullPadding):-len(nullPadding)]
|
|
- return (text, results)
|
|
+ nullPadding = self.patch_addPadding(patches)
|
|
+ text = nullPadding + text + nullPadding
|
|
+ self.patch_splitMax(patches)
|
|
+
|
|
+ # delta keeps track of the offset between the expected and actual location
|
|
+ # of the previous patch. If there are patches expected at positions 10 and
|
|
+ # 20, but the first patch was found at 12, delta is 2 and the second patch
|
|
+ # has an effective expected position of 22.
|
|
+ delta = 0
|
|
+ results = []
|
|
+ for patch in patches:
|
|
+ expected_loc = patch.start2 + delta
|
|
+ text1 = self.diff_text1(patch.diffs)
|
|
+ end_loc = -1
|
|
+ if len(text1) > self.Match_MaxBits:
|
|
+ # patch_splitMax will only provide an oversized pattern in the case of
|
|
+ # a monster delete.
|
|
+ start_loc = self.match_main(
|
|
+ text, text1[: self.Match_MaxBits], expected_loc
|
|
+ )
|
|
+ if start_loc != -1:
|
|
+ end_loc = self.match_main(
|
|
+ text,
|
|
+ text1[-self.Match_MaxBits :],
|
|
+ expected_loc + len(text1) - self.Match_MaxBits,
|
|
+ )
|
|
+ if end_loc == -1 or start_loc >= end_loc:
|
|
+ # Can't find valid trailing context. Drop this patch.
|
|
+ start_loc = -1
|
|
+ else:
|
|
+ start_loc = self.match_main(text, text1, expected_loc)
|
|
+ if start_loc == -1:
|
|
+ # No match found. :(
|
|
+ results.append(False)
|
|
+ # Subtract the delta for this failed patch from subsequent patches.
|
|
+ delta -= patch.length2 - patch.length1
|
|
+ else:
|
|
+ # Found a match. :)
|
|
+ results.append(True)
|
|
+ delta = start_loc - expected_loc
|
|
+ if end_loc == -1:
|
|
+ text2 = text[start_loc : start_loc + len(text1)]
|
|
+ else:
|
|
+ text2 = text[start_loc : end_loc + self.Match_MaxBits]
|
|
+ if text1 == text2:
|
|
+ # Perfect match, just shove the replacement text in.
|
|
+ text = (
|
|
+ text[:start_loc]
|
|
+ + self.diff_text2(patch.diffs)
|
|
+ + text[start_loc + len(text1) :]
|
|
+ )
|
|
+ else:
|
|
+ # Imperfect match.
|
|
+ # Run a diff to get a framework of equivalent indices.
|
|
+ diffs = self.diff_main(text1, text2, False)
|
|
+ if (
|
|
+ len(text1) > self.Match_MaxBits
|
|
+ and self.diff_levenshtein(diffs) / float(len(text1))
|
|
+ > self.Patch_DeleteThreshold
|
|
+ ):
|
|
+ # The end points match, but the content is unacceptably bad.
|
|
+ results[-1] = False
|
|
+ else:
|
|
+ self.diff_cleanupSemanticLossless(diffs)
|
|
+ index1 = 0
|
|
+ for (op, data) in patch.diffs:
|
|
+ if op != self.DIFF_EQUAL:
|
|
+ index2 = self.diff_xIndex(diffs, index1)
|
|
+ if op == self.DIFF_INSERT: # Insertion
|
|
+ text = (
|
|
+ text[: start_loc + index2]
|
|
+ + data
|
|
+ + text[start_loc + index2 :]
|
|
+ )
|
|
+ elif op == self.DIFF_DELETE: # Deletion
|
|
+ text = (
|
|
+ text[: start_loc + index2]
|
|
+ + text[
|
|
+ start_loc
|
|
+ + self.diff_xIndex(diffs, index1 + len(data)) :
|
|
+ ]
|
|
+ )
|
|
+ if op != self.DIFF_DELETE:
|
|
+ index1 += len(data)
|
|
+ # Strip the padding off.
|
|
+ text = text[len(nullPadding) : -len(nullPadding)]
|
|
+ return (text, results)
|
|
|
|
- def patch_addPadding(self, patches):
|
|
- """Add some padding on text start and end so that edges can match
|
|
+ def patch_addPadding(self, patches):
|
|
+ """Add some padding on text start and end so that edges can match
|
|
something. Intended to be called only from within patch_apply.
|
|
|
|
Args:
|
|
@@ -1636,144 +1740,154 @@ class diff_match_patch:
|
|
Returns:
|
|
The padding string added to each side.
|
|
"""
|
|
- paddingLength = self.Patch_Margin
|
|
- nullPadding = ""
|
|
- for x in range(1, paddingLength + 1):
|
|
- nullPadding += chr(x)
|
|
-
|
|
- # Bump all the patches forward.
|
|
- for patch in patches:
|
|
- patch.start1 += paddingLength
|
|
- patch.start2 += paddingLength
|
|
-
|
|
- # Add some padding on start of first diff.
|
|
- patch = patches[0]
|
|
- diffs = patch.diffs
|
|
- if not diffs or diffs[0][0] != self.DIFF_EQUAL:
|
|
- # Add nullPadding equality.
|
|
- diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
|
|
- patch.start1 -= paddingLength # Should be 0.
|
|
- patch.start2 -= paddingLength # Should be 0.
|
|
- patch.length1 += paddingLength
|
|
- patch.length2 += paddingLength
|
|
- elif paddingLength > len(diffs[0][1]):
|
|
- # Grow first equality.
|
|
- extraLength = paddingLength - len(diffs[0][1])
|
|
- newText = nullPadding[len(diffs[0][1]):] + diffs[0][1]
|
|
- diffs[0] = (diffs[0][0], newText)
|
|
- patch.start1 -= extraLength
|
|
- patch.start2 -= extraLength
|
|
- patch.length1 += extraLength
|
|
- patch.length2 += extraLength
|
|
-
|
|
- # Add some padding on end of last diff.
|
|
- patch = patches[-1]
|
|
- diffs = patch.diffs
|
|
- if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
|
|
- # Add nullPadding equality.
|
|
- diffs.append((self.DIFF_EQUAL, nullPadding))
|
|
- patch.length1 += paddingLength
|
|
- patch.length2 += paddingLength
|
|
- elif paddingLength > len(diffs[-1][1]):
|
|
- # Grow last equality.
|
|
- extraLength = paddingLength - len(diffs[-1][1])
|
|
- newText = diffs[-1][1] + nullPadding[:extraLength]
|
|
- diffs[-1] = (diffs[-1][0], newText)
|
|
- patch.length1 += extraLength
|
|
- patch.length2 += extraLength
|
|
+ paddingLength = self.Patch_Margin
|
|
+ nullPadding = ""
|
|
+ for x in range(1, paddingLength + 1):
|
|
+ nullPadding += chr(x)
|
|
+
|
|
+ # Bump all the patches forward.
|
|
+ for patch in patches:
|
|
+ patch.start1 += paddingLength
|
|
+ patch.start2 += paddingLength
|
|
+
|
|
+ # Add some padding on start of first diff.
|
|
+ patch = patches[0]
|
|
+ diffs = patch.diffs
|
|
+ if not diffs or diffs[0][0] != self.DIFF_EQUAL:
|
|
+ # Add nullPadding equality.
|
|
+ diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
|
|
+ patch.start1 -= paddingLength # Should be 0.
|
|
+ patch.start2 -= paddingLength # Should be 0.
|
|
+ patch.length1 += paddingLength
|
|
+ patch.length2 += paddingLength
|
|
+ elif paddingLength > len(diffs[0][1]):
|
|
+ # Grow first equality.
|
|
+ extraLength = paddingLength - len(diffs[0][1])
|
|
+ newText = nullPadding[len(diffs[0][1]) :] + diffs[0][1]
|
|
+ diffs[0] = (diffs[0][0], newText)
|
|
+ patch.start1 -= extraLength
|
|
+ patch.start2 -= extraLength
|
|
+ patch.length1 += extraLength
|
|
+ patch.length2 += extraLength
|
|
+
|
|
+ # Add some padding on end of last diff.
|
|
+ patch = patches[-1]
|
|
+ diffs = patch.diffs
|
|
+ if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
|
|
+ # Add nullPadding equality.
|
|
+ diffs.append((self.DIFF_EQUAL, nullPadding))
|
|
+ patch.length1 += paddingLength
|
|
+ patch.length2 += paddingLength
|
|
+ elif paddingLength > len(diffs[-1][1]):
|
|
+ # Grow last equality.
|
|
+ extraLength = paddingLength - len(diffs[-1][1])
|
|
+ newText = diffs[-1][1] + nullPadding[:extraLength]
|
|
+ diffs[-1] = (diffs[-1][0], newText)
|
|
+ patch.length1 += extraLength
|
|
+ patch.length2 += extraLength
|
|
|
|
- return nullPadding
|
|
+ return nullPadding
|
|
|
|
- def patch_splitMax(self, patches):
|
|
- """Look through the patches and break up any which are longer than the
|
|
+ def patch_splitMax(self, patches):
|
|
+ """Look through the patches and break up any which are longer than the
|
|
maximum limit of the match algorithm.
|
|
Intended to be called only from within patch_apply.
|
|
|
|
Args:
|
|
patches: Array of Patch objects.
|
|
"""
|
|
- patch_size = self.Match_MaxBits
|
|
- if patch_size == 0:
|
|
- # Python has the option of not splitting strings due to its ability
|
|
- # to handle integers of arbitrary precision.
|
|
- return
|
|
- for x in range(len(patches)):
|
|
- if patches[x].length1 <= patch_size:
|
|
- continue
|
|
- bigpatch = patches[x]
|
|
- # Remove the big old patch.
|
|
- del patches[x]
|
|
- x -= 1
|
|
- start1 = bigpatch.start1
|
|
- start2 = bigpatch.start2
|
|
- precontext = ''
|
|
- while len(bigpatch.diffs) != 0:
|
|
- # Create one of several smaller patches.
|
|
- patch = patch_obj()
|
|
- empty = True
|
|
- patch.start1 = start1 - len(precontext)
|
|
- patch.start2 = start2 - len(precontext)
|
|
- if precontext:
|
|
- patch.length1 = patch.length2 = len(precontext)
|
|
- patch.diffs.append((self.DIFF_EQUAL, precontext))
|
|
-
|
|
- while (len(bigpatch.diffs) != 0 and
|
|
- patch.length1 < patch_size - self.Patch_Margin):
|
|
- (diff_type, diff_text) = bigpatch.diffs[0]
|
|
- if diff_type == self.DIFF_INSERT:
|
|
- # Insertions are harmless.
|
|
- patch.length2 += len(diff_text)
|
|
- start2 += len(diff_text)
|
|
- patch.diffs.append(bigpatch.diffs.pop(0))
|
|
- empty = False
|
|
- elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
|
|
- patch.diffs[0][0] == self.DIFF_EQUAL and
|
|
- len(diff_text) > 2 * patch_size):
|
|
- # This is a large deletion. Let it pass in one chunk.
|
|
- patch.length1 += len(diff_text)
|
|
- start1 += len(diff_text)
|
|
- empty = False
|
|
- patch.diffs.append((diff_type, diff_text))
|
|
- del bigpatch.diffs[0]
|
|
- else:
|
|
- # Deletion or equality. Only take as much as we can stomach.
|
|
- diff_text = diff_text[:patch_size - patch.length1 -
|
|
- self.Patch_Margin]
|
|
- patch.length1 += len(diff_text)
|
|
- start1 += len(diff_text)
|
|
- if diff_type == self.DIFF_EQUAL:
|
|
- patch.length2 += len(diff_text)
|
|
- start2 += len(diff_text)
|
|
- else:
|
|
- empty = False
|
|
-
|
|
- patch.diffs.append((diff_type, diff_text))
|
|
- if diff_text == bigpatch.diffs[0][1]:
|
|
- del bigpatch.diffs[0]
|
|
- else:
|
|
- bigpatch.diffs[0] = (bigpatch.diffs[0][0],
|
|
- bigpatch.diffs[0][1][len(diff_text):])
|
|
+ patch_size = self.Match_MaxBits
|
|
+ if patch_size == 0:
|
|
+ # Python has the option of not splitting strings due to its ability
|
|
+ # to handle integers of arbitrary precision.
|
|
+ return
|
|
+ for x in range(len(patches)):
|
|
+ if patches[x].length1 <= patch_size:
|
|
+ continue
|
|
+ bigpatch = patches[x]
|
|
+ # Remove the big old patch.
|
|
+ del patches[x]
|
|
+ x -= 1
|
|
+ start1 = bigpatch.start1
|
|
+ start2 = bigpatch.start2
|
|
+ precontext = ""
|
|
+ while len(bigpatch.diffs) != 0:
|
|
+ # Create one of several smaller patches.
|
|
+ patch = patch_obj()
|
|
+ empty = True
|
|
+ patch.start1 = start1 - len(precontext)
|
|
+ patch.start2 = start2 - len(precontext)
|
|
+ if precontext:
|
|
+ patch.length1 = patch.length2 = len(precontext)
|
|
+ patch.diffs.append((self.DIFF_EQUAL, precontext))
|
|
+
|
|
+ while (
|
|
+ len(bigpatch.diffs) != 0
|
|
+ and patch.length1 < patch_size - self.Patch_Margin
|
|
+ ):
|
|
+ (diff_type, diff_text) = bigpatch.diffs[0]
|
|
+ if diff_type == self.DIFF_INSERT:
|
|
+ # Insertions are harmless.
|
|
+ patch.length2 += len(diff_text)
|
|
+ start2 += len(diff_text)
|
|
+ patch.diffs.append(bigpatch.diffs.pop(0))
|
|
+ empty = False
|
|
+ elif (
|
|
+ diff_type == self.DIFF_DELETE
|
|
+ and len(patch.diffs) == 1
|
|
+ and patch.diffs[0][0] == self.DIFF_EQUAL
|
|
+ and len(diff_text) > 2 * patch_size
|
|
+ ):
|
|
+ # This is a large deletion. Let it pass in one chunk.
|
|
+ patch.length1 += len(diff_text)
|
|
+ start1 += len(diff_text)
|
|
+ empty = False
|
|
+ patch.diffs.append((diff_type, diff_text))
|
|
+ del bigpatch.diffs[0]
|
|
+ else:
|
|
+ # Deletion or equality. Only take as much as we can stomach.
|
|
+ diff_text = diff_text[
|
|
+ : patch_size - patch.length1 - self.Patch_Margin
|
|
+ ]
|
|
+ patch.length1 += len(diff_text)
|
|
+ start1 += len(diff_text)
|
|
+ if diff_type == self.DIFF_EQUAL:
|
|
+ patch.length2 += len(diff_text)
|
|
+ start2 += len(diff_text)
|
|
+ else:
|
|
+ empty = False
|
|
+
|
|
+ patch.diffs.append((diff_type, diff_text))
|
|
+ if diff_text == bigpatch.diffs[0][1]:
|
|
+ del bigpatch.diffs[0]
|
|
+ else:
|
|
+ bigpatch.diffs[0] = (
|
|
+ bigpatch.diffs[0][0],
|
|
+ bigpatch.diffs[0][1][len(diff_text) :],
|
|
+ )
|
|
+
|
|
+ # Compute the head context for the next patch.
|
|
+ precontext = self.diff_text2(patch.diffs)
|
|
+ precontext = precontext[-self.Patch_Margin :]
|
|
+ # Append the end context for this patch.
|
|
+ postcontext = self.diff_text1(bigpatch.diffs)[: self.Patch_Margin]
|
|
+ if postcontext:
|
|
+ patch.length1 += len(postcontext)
|
|
+ patch.length2 += len(postcontext)
|
|
+ if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
|
|
+ patch.diffs[-1] = (
|
|
+ self.DIFF_EQUAL,
|
|
+ patch.diffs[-1][1] + postcontext,
|
|
+ )
|
|
+ else:
|
|
+ patch.diffs.append((self.DIFF_EQUAL, postcontext))
|
|
+
|
|
+ if not empty:
|
|
+ x += 1
|
|
+ patches.insert(x, patch)
|
|
|
|
- # Compute the head context for the next patch.
|
|
- precontext = self.diff_text2(patch.diffs)
|
|
- precontext = precontext[-self.Patch_Margin:]
|
|
- # Append the end context for this patch.
|
|
- postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]
|
|
- if postcontext:
|
|
- patch.length1 += len(postcontext)
|
|
- patch.length2 += len(postcontext)
|
|
- if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
|
|
- patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
|
|
- postcontext)
|
|
- else:
|
|
- patch.diffs.append((self.DIFF_EQUAL, postcontext))
|
|
-
|
|
- if not empty:
|
|
- x += 1
|
|
- patches.insert(x, patch)
|
|
-
|
|
- def patch_toText(self, patches):
|
|
- """Take a list of patches and return a textual representation.
|
|
+ def patch_toText(self, patches):
|
|
+ """Take a list of patches and return a textual representation.
|
|
|
|
Args:
|
|
patches: Array of Patch objects.
|
|
@@ -1781,13 +1895,13 @@ class diff_match_patch:
|
|
Returns:
|
|
Text representation of patches.
|
|
"""
|
|
- text = []
|
|
- for patch in patches:
|
|
- text.append(str(patch))
|
|
- return "".join(text)
|
|
+ text = []
|
|
+ for patch in patches:
|
|
+ text.append(str(patch))
|
|
+ return "".join(text)
|
|
|
|
- def patch_fromText(self, textline):
|
|
- """Parse a textual representation of patches and return a list of patch
|
|
+ def patch_fromText(self, textline):
|
|
+ """Parse a textual representation of patches and return a list of patch
|
|
objects.
|
|
|
|
Args:
|
|
@@ -1799,109 +1913,109 @@ class diff_match_patch:
|
|
Raises:
|
|
ValueError: If invalid input.
|
|
"""
|
|
- patches = []
|
|
- if not textline:
|
|
- return patches
|
|
- text = textline.split('\n')
|
|
- while len(text) != 0:
|
|
- m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
|
|
- if not m:
|
|
- raise ValueError("Invalid patch string: " + text[0])
|
|
- patch = patch_obj()
|
|
- patches.append(patch)
|
|
- patch.start1 = int(m.group(1))
|
|
- if m.group(2) == '':
|
|
- patch.start1 -= 1
|
|
- patch.length1 = 1
|
|
- elif m.group(2) == '0':
|
|
- patch.length1 = 0
|
|
- else:
|
|
- patch.start1 -= 1
|
|
- patch.length1 = int(m.group(2))
|
|
-
|
|
- patch.start2 = int(m.group(3))
|
|
- if m.group(4) == '':
|
|
- patch.start2 -= 1
|
|
- patch.length2 = 1
|
|
- elif m.group(4) == '0':
|
|
- patch.length2 = 0
|
|
- else:
|
|
- patch.start2 -= 1
|
|
- patch.length2 = int(m.group(4))
|
|
-
|
|
- del text[0]
|
|
-
|
|
- while len(text) != 0:
|
|
- if text[0]:
|
|
- sign = text[0][0]
|
|
- else:
|
|
- sign = ''
|
|
- line = urllib.parse.unquote(text[0][1:])
|
|
- if sign == '+':
|
|
- # Insertion.
|
|
- patch.diffs.append((self.DIFF_INSERT, line))
|
|
- elif sign == '-':
|
|
- # Deletion.
|
|
- patch.diffs.append((self.DIFF_DELETE, line))
|
|
- elif sign == ' ':
|
|
- # Minor equality.
|
|
- patch.diffs.append((self.DIFF_EQUAL, line))
|
|
- elif sign == '@':
|
|
- # Start of next patch.
|
|
- break
|
|
- elif sign == '':
|
|
- # Blank line? Whatever.
|
|
- pass
|
|
- else:
|
|
- # WTF?
|
|
- raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line))
|
|
- del text[0]
|
|
- return patches
|
|
+ patches = []
|
|
+ if not textline:
|
|
+ return patches
|
|
+ text = textline.split("\n")
|
|
+ while len(text) != 0:
|
|
+ m = re.match(r"^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
|
|
+ if not m:
|
|
+ raise ValueError("Invalid patch string: " + text[0])
|
|
+ patch = patch_obj()
|
|
+ patches.append(patch)
|
|
+ patch.start1 = int(m.group(1))
|
|
+ if m.group(2) == "":
|
|
+ patch.start1 -= 1
|
|
+ patch.length1 = 1
|
|
+ elif m.group(2) == "0":
|
|
+ patch.length1 = 0
|
|
+ else:
|
|
+ patch.start1 -= 1
|
|
+ patch.length1 = int(m.group(2))
|
|
+
|
|
+ patch.start2 = int(m.group(3))
|
|
+ if m.group(4) == "":
|
|
+ patch.start2 -= 1
|
|
+ patch.length2 = 1
|
|
+ elif m.group(4) == "0":
|
|
+ patch.length2 = 0
|
|
+ else:
|
|
+ patch.start2 -= 1
|
|
+ patch.length2 = int(m.group(4))
|
|
+
|
|
+ del text[0]
|
|
+
|
|
+ while len(text) != 0:
|
|
+ if text[0]:
|
|
+ sign = text[0][0]
|
|
+ else:
|
|
+ sign = ""
|
|
+ line = urllib.parse.unquote(text[0][1:])
|
|
+ if sign == "+":
|
|
+ # Insertion.
|
|
+ patch.diffs.append((self.DIFF_INSERT, line))
|
|
+ elif sign == "-":
|
|
+ # Deletion.
|
|
+ patch.diffs.append((self.DIFF_DELETE, line))
|
|
+ elif sign == " ":
|
|
+ # Minor equality.
|
|
+ patch.diffs.append((self.DIFF_EQUAL, line))
|
|
+ elif sign == "@":
|
|
+ # Start of next patch.
|
|
+ break
|
|
+ elif sign == "":
|
|
+ # Blank line? Whatever.
|
|
+ pass
|
|
+ else:
|
|
+ # WTF?
|
|
+ raise ValueError(f"Invalid patch mode: '{sign}'\n{line}")
|
|
+ del text[0]
|
|
+ return patches
|
|
|
|
|
|
class patch_obj:
|
|
- """Class representing one patch operation.
|
|
+ """Class representing one patch operation.
|
|
"""
|
|
|
|
- def __init__(self):
|
|
- """Initializes with an empty list of diffs.
|
|
+ def __init__(self):
|
|
+ """Initializes with an empty list of diffs.
|
|
"""
|
|
- self.diffs = []
|
|
- self.start1 = None
|
|
- self.start2 = None
|
|
- self.length1 = 0
|
|
- self.length2 = 0
|
|
+ self.diffs = []
|
|
+ self.start1 = None
|
|
+ self.start2 = None
|
|
+ self.length1 = 0
|
|
+ self.length2 = 0
|
|
|
|
- def __str__(self):
|
|
- """Emulate GNU diff's format.
|
|
+ def __str__(self):
|
|
+ """Emulate GNU diff's format.
|
|
Header: @@ -382,8 +481,9 @@
|
|
Indices are printed as 1-based, not 0-based.
|
|
|
|
Returns:
|
|
The GNU diff string.
|
|
"""
|
|
- if self.length1 == 0:
|
|
- coords1 = str(self.start1) + ",0"
|
|
- elif self.length1 == 1:
|
|
- coords1 = str(self.start1 + 1)
|
|
- else:
|
|
- coords1 = str(self.start1 + 1) + "," + str(self.length1)
|
|
- if self.length2 == 0:
|
|
- coords2 = str(self.start2) + ",0"
|
|
- elif self.length2 == 1:
|
|
- coords2 = str(self.start2 + 1)
|
|
- else:
|
|
- coords2 = str(self.start2 + 1) + "," + str(self.length2)
|
|
- text = ["@@ -", coords1, " +", coords2, " @@\n"]
|
|
- # Escape the body of the patch with %xx notation.
|
|
- for (op, data) in self.diffs:
|
|
- if op == diff_match_patch.DIFF_INSERT:
|
|
- text.append("+")
|
|
- elif op == diff_match_patch.DIFF_DELETE:
|
|
- text.append("-")
|
|
- elif op == diff_match_patch.DIFF_EQUAL:
|
|
- text.append(" ")
|
|
- # High ascii will raise UnicodeDecodeError. Use Unicode instead.
|
|
- data = data.encode("utf-8")
|
|
- text.append(urllib.parse.quote(data, "!~*'();/?:@&=+$,# ") + "\n")
|
|
- return "".join(text)
|
|
+ if self.length1 == 0:
|
|
+ coords1 = str(self.start1) + ",0"
|
|
+ elif self.length1 == 1:
|
|
+ coords1 = str(self.start1 + 1)
|
|
+ else:
|
|
+ coords1 = str(self.start1 + 1) + "," + str(self.length1)
|
|
+ if self.length2 == 0:
|
|
+ coords2 = str(self.start2) + ",0"
|
|
+ elif self.length2 == 1:
|
|
+ coords2 = str(self.start2 + 1)
|
|
+ else:
|
|
+ coords2 = str(self.start2 + 1) + "," + str(self.length2)
|
|
+ text = ["@@ -", coords1, " +", coords2, " @@\n"]
|
|
+ # Escape the body of the patch with %xx notation.
|
|
+ for (op, data) in self.diffs:
|
|
+ if op == diff_match_patch.DIFF_INSERT:
|
|
+ text.append("+")
|
|
+ elif op == diff_match_patch.DIFF_DELETE:
|
|
+ text.append("-")
|
|
+ elif op == diff_match_patch.DIFF_EQUAL:
|
|
+ text.append(" ")
|
|
+ # High ascii will raise UnicodeDecodeError. Use Unicode instead.
|
|
+ data = data.encode("utf-8")
|
|
+ text.append(urllib.parse.quote(data, "!~*'();/?:@&=+$,# ") + "\n")
|
|
+ return "".join(text)
|
|
Index: xmldiff-2.4/xmldiff/actions.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/xmldiff/actions.py
|
|
+++ xmldiff-2.4/xmldiff/actions.py
|
|
@@ -1,17 +1,17 @@
|
|
from collections import namedtuple
|
|
|
|
# The edit script actions used in xmldiff
|
|
-DeleteNode = namedtuple('DeleteNode', 'node')
|
|
-InsertNode = namedtuple('InsertNode', 'target tag position')
|
|
-RenameNode = namedtuple('RenameNode', 'node tag')
|
|
-MoveNode = namedtuple('MoveNode', 'node target position')
|
|
+DeleteNode = namedtuple("DeleteNode", "node")
|
|
+InsertNode = namedtuple("InsertNode", "target tag position")
|
|
+RenameNode = namedtuple("RenameNode", "node tag")
|
|
+MoveNode = namedtuple("MoveNode", "node target position")
|
|
|
|
-UpdateTextIn = namedtuple('UpdateTextIn', 'node text')
|
|
-UpdateTextAfter = namedtuple('UpdateTextAfter', 'node text')
|
|
+UpdateTextIn = namedtuple("UpdateTextIn", "node text")
|
|
+UpdateTextAfter = namedtuple("UpdateTextAfter", "node text")
|
|
|
|
-UpdateAttrib = namedtuple('UpdateAttrib', 'node name value')
|
|
-DeleteAttrib = namedtuple('DeleteAttrib', 'node name')
|
|
-InsertAttrib = namedtuple('InsertAttrib', 'node name value')
|
|
-RenameAttrib = namedtuple('RenameAttrib', 'node oldname newname')
|
|
+UpdateAttrib = namedtuple("UpdateAttrib", "node name value")
|
|
+DeleteAttrib = namedtuple("DeleteAttrib", "node name")
|
|
+InsertAttrib = namedtuple("InsertAttrib", "node name value")
|
|
+RenameAttrib = namedtuple("RenameAttrib", "node oldname newname")
|
|
|
|
-InsertComment = namedtuple('InsertComment', 'target position text')
|
|
+InsertComment = namedtuple("InsertComment", "target position text")
|
|
Index: xmldiff-2.4/xmldiff/diff.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/xmldiff/diff.py
|
|
+++ xmldiff-2.4/xmldiff/diff.py
|
|
@@ -1,15 +1,11 @@
|
|
-from __future__ import division
|
|
-
|
|
from copy import deepcopy
|
|
from difflib import SequenceMatcher
|
|
from lxml import etree
|
|
from xmldiff import utils, actions
|
|
|
|
|
|
-class Differ(object):
|
|
-
|
|
- def __init__(self, F=None, uniqueattrs=None, ratio_mode='fast',
|
|
- fast_match=False):
|
|
+class Differ:
|
|
+ def __init__(self, F=None, uniqueattrs=None, ratio_mode="fast", fast_match=False):
|
|
# The minimum similarity between two nodes to consider them equal
|
|
if F is None:
|
|
F = 0.5
|
|
@@ -18,17 +14,17 @@ class Differ(object):
|
|
# that uniquely identifies a node inside a document. Defaults
|
|
# to 'xml:id'.
|
|
if uniqueattrs is None:
|
|
- uniqueattrs = ['{http://www.w3.org/XML/1998/namespace}id']
|
|
+ uniqueattrs = ["{http://www.w3.org/XML/1998/namespace}id"]
|
|
self.uniqueattrs = uniqueattrs
|
|
self.fast_match = fast_match
|
|
|
|
# Avoid recreating this for every node
|
|
self._sequencematcher = SequenceMatcher()
|
|
- if ratio_mode == 'fast':
|
|
+ if ratio_mode == "fast":
|
|
self._sequence_ratio = self._sequencematcher.quick_ratio
|
|
- elif ratio_mode == 'accurate':
|
|
+ elif ratio_mode == "accurate":
|
|
self._sequence_ratio = self._sequencematcher.ratio
|
|
- elif ratio_mode == 'faster':
|
|
+ elif ratio_mode == "faster":
|
|
self._sequence_ratio = self._sequencematcher.real_quick_ratio
|
|
else:
|
|
raise ValueError("Unknown ratio_mode '%s'" % ratio_mode)
|
|
@@ -57,8 +53,9 @@ class Differ(object):
|
|
right = right.getroot()
|
|
|
|
if not (etree.iselement(left) and etree.iselement(right)):
|
|
- raise TypeError("The 'left' and 'right' parameters must be "
|
|
- "lxml Elements.")
|
|
+ raise TypeError(
|
|
+ "The 'left' and 'right' parameters must be " "lxml Elements."
|
|
+ )
|
|
|
|
# Left gets modified as a part of the diff, deepcopy it first.
|
|
self.left = deepcopy(left)
|
|
@@ -111,14 +108,15 @@ class Differ(object):
|
|
|
|
if self.fast_match:
|
|
# First find matches with longest_common_subsequence:
|
|
- matches = list(utils.longest_common_subsequence(
|
|
- lnodes, rnodes, lambda x, y: self.node_ratio(x, y) >= 0.5))
|
|
+ matches = list(
|
|
+ utils.longest_common_subsequence(
|
|
+ lnodes, rnodes, lambda x, y: self.node_ratio(x, y) >= 0.5
|
|
+ )
|
|
+ )
|
|
|
|
# Add the matches (I prefer this from start to finish):
|
|
for left_match, right_match in matches:
|
|
- self.append_match(lnodes[left_match],
|
|
- rnodes[right_match],
|
|
- None)
|
|
+ self.append_match(lnodes[left_match], rnodes[right_match], None)
|
|
|
|
# Then remove the nodes (needs to be done backwards):
|
|
for left_match, right_match in reversed(matches):
|
|
@@ -185,16 +183,16 @@ class Differ(object):
|
|
if node in self._text_cache:
|
|
return self._text_cache[node]
|
|
# Get the texts and the tag as a start
|
|
- texts = node.xpath('text()')
|
|
+ texts = node.xpath("text()")
|
|
|
|
# Then add attributes and values
|
|
for tag, value in sorted(node.attrib.items()):
|
|
- if tag[0] == '{':
|
|
- tag = tag.split('}',)[-1]
|
|
- texts.append('%s:%s' % (tag, value))
|
|
+ if tag[0] == "{":
|
|
+ tag = tag.split("}",)[-1]
|
|
+ texts.append(f"{tag}:{value}")
|
|
|
|
# Finally make one string, useful to see how similar two nodes are
|
|
- text = u' '.join(texts).strip()
|
|
+ text = " ".join(texts).strip()
|
|
result = utils.cleanup_whitespace(text)
|
|
self._text_cache[node] = result
|
|
return result
|
|
@@ -256,8 +254,7 @@ class Differ(object):
|
|
# Move: Check if any of the new attributes have the same value
|
|
# as the removed attributes. If they do, it's actually
|
|
# a renaming, and a move is one action instead of remove + insert
|
|
- newattrmap = {v: k for (k, v) in right.attrib.items()
|
|
- if k in new_keys}
|
|
+ newattrmap = {v: k for (k, v) in right.attrib.items() if k in new_keys}
|
|
for lk in sorted(removed_keys):
|
|
value = left.attrib[lk]
|
|
if value in newattrmap:
|
|
@@ -332,19 +329,23 @@ class Differ(object):
|
|
return i
|
|
|
|
def align_children(self, left, right):
|
|
- lchildren = [c for c in left.getchildren()
|
|
- if (id(c) in self._l2rmap and
|
|
- self._l2rmap[id(c)].getparent() is right)]
|
|
- rchildren = [c for c in right.getchildren()
|
|
- if (id(c) in self._r2lmap and
|
|
- self._r2lmap[id(c)].getparent() is left)]
|
|
+ lchildren = [
|
|
+ c
|
|
+ for c in left.getchildren()
|
|
+ if (id(c) in self._l2rmap and self._l2rmap[id(c)].getparent() is right)
|
|
+ ]
|
|
+ rchildren = [
|
|
+ c
|
|
+ for c in right.getchildren()
|
|
+ if (id(c) in self._r2lmap and self._r2lmap[id(c)].getparent() is left)
|
|
+ ]
|
|
if not lchildren or not rchildren:
|
|
# Nothing to align
|
|
return
|
|
|
|
lcs = utils.longest_common_subsequence(
|
|
- lchildren, rchildren,
|
|
- lambda x, y: self._l2rmap[id(x)] is y)
|
|
+ lchildren, rchildren, lambda x, y: self._l2rmap[id(x)] is y
|
|
+ )
|
|
|
|
for x, y in lcs:
|
|
# Mark these as in order
|
|
@@ -362,9 +363,8 @@ class Differ(object):
|
|
rtarget = rchild.getparent()
|
|
ltarget = self._r2lmap[id(rtarget)]
|
|
yield actions.MoveNode(
|
|
- utils.getpath(lchild),
|
|
- utils.getpath(ltarget),
|
|
- right_pos)
|
|
+ utils.getpath(lchild), utils.getpath(ltarget), right_pos
|
|
+ )
|
|
# Do the actual move:
|
|
left.remove(lchild)
|
|
ltarget.insert(right_pos, lchild)
|
|
@@ -394,11 +394,13 @@ class Differ(object):
|
|
# (ii)
|
|
if rnode.tag is etree.Comment:
|
|
yield actions.InsertComment(
|
|
- utils.getpath(ltarget, ltree), pos, rnode.text)
|
|
+ utils.getpath(ltarget, ltree), pos, rnode.text
|
|
+ )
|
|
lnode = etree.Comment(rnode.text)
|
|
else:
|
|
- yield actions.InsertNode(utils.getpath(ltarget, ltree),
|
|
- rnode.tag, pos)
|
|
+ yield actions.InsertNode(
|
|
+ utils.getpath(ltarget, ltree), rnode.tag, pos
|
|
+ )
|
|
lnode = ltarget.makeelement(rnode.tag)
|
|
|
|
# (iii)
|
|
@@ -409,8 +411,7 @@ class Differ(object):
|
|
# And then we update attributes. This is different from the
|
|
# paper, because the paper assumes nodes only has labels and
|
|
# values. Nodes also has texts, we do them later.
|
|
- for action in self.update_node_attr(lnode, rnode):
|
|
- yield action
|
|
+ yield from self.update_node_attr(lnode, rnode)
|
|
|
|
# (c)
|
|
else:
|
|
@@ -426,9 +427,8 @@ class Differ(object):
|
|
if ltarget is not lparent:
|
|
pos = self.find_pos(rnode)
|
|
yield actions.MoveNode(
|
|
- utils.getpath(lnode, ltree),
|
|
- utils.getpath(ltarget, ltree),
|
|
- pos)
|
|
+ utils.getpath(lnode, ltree), utils.getpath(ltarget, ltree), pos
|
|
+ )
|
|
# Move the node from current parent to target
|
|
lparent.remove(lnode)
|
|
ltarget.insert(pos, lnode)
|
|
@@ -436,26 +436,22 @@ class Differ(object):
|
|
self._inorder.add(rnode)
|
|
|
|
# Rename
|
|
- for action in self.update_node_tag(lnode, rnode):
|
|
- yield action
|
|
+ yield from self.update_node_tag(lnode, rnode)
|
|
|
|
# (ii) Update
|
|
# XXX If they are exactly equal, we can skip this,
|
|
# maybe store match results in a cache?
|
|
- for action in self.update_node_attr(lnode, rnode):
|
|
- yield action
|
|
+ yield from self.update_node_attr(lnode, rnode)
|
|
|
|
# (d) Align
|
|
- for action in self.align_children(lnode, rnode):
|
|
- yield action
|
|
+ yield from self.align_children(lnode, rnode)
|
|
|
|
# And lastly, we update all node texts. We do this after
|
|
# aligning children, because when you generate an XML diff
|
|
# from this, that XML diff update generates more children,
|
|
# confusing later inserts or deletes.
|
|
lnode = self._r2lmap[id(rnode)]
|
|
- for action in self.update_node_text(lnode, rnode):
|
|
- yield action
|
|
+ yield from self.update_node_text(lnode, rnode)
|
|
|
|
for lnode in utils.reverse_post_order_traverse(self.left):
|
|
if id(lnode) not in self._l2rmap:
|
|
Index: xmldiff-2.4/xmldiff/diff_match_patch.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/xmldiff/diff_match_patch.py
|
|
+++ xmldiff-2.4/xmldiff/diff_match_patch.py
|
|
@@ -1,4 +1,5 @@
|
|
import sys
|
|
+
|
|
if sys.version_info[0] == 3:
|
|
from xmldiff._diff_match_patch_py3 import *
|
|
else:
|
|
Index: xmldiff-2.4/xmldiff/formatting.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/xmldiff/formatting.py
|
|
+++ xmldiff-2.4/xmldiff/formatting.py
|
|
@@ -1,6 +1,5 @@
|
|
import json
|
|
import re
|
|
-import six
|
|
|
|
from collections import namedtuple
|
|
from copy import deepcopy
|
|
@@ -9,12 +8,12 @@ from xmldiff.diff_match_patch import dif
|
|
from xmldiff import utils
|
|
|
|
|
|
-DIFF_NS = 'http://namespaces.shoobx.com/diff'
|
|
-DIFF_PREFIX = 'diff'
|
|
+DIFF_NS = "http://namespaces.shoobx.com/diff"
|
|
+DIFF_PREFIX = "diff"
|
|
|
|
-INSERT_NAME = '{%s}insert' % DIFF_NS
|
|
-DELETE_NAME = '{%s}delete' % DIFF_NS
|
|
-RENAME_NAME = '{%s}rename' % DIFF_NS
|
|
+INSERT_NAME = "{%s}insert" % DIFF_NS
|
|
+DELETE_NAME = "{%s}delete" % DIFF_NS
|
|
+RENAME_NAME = "{%s}rename" % DIFF_NS
|
|
|
|
# Flags for whitespace handling in the text aware formatters:
|
|
WS_BOTH = 3 # Normalize ignorable whitespace and text whitespace
|
|
@@ -34,15 +33,15 @@ T_SINGLE = 2
|
|
# that have narrow builds, we can change this to 0xf00000, which is
|
|
# the start of two 64,000 private use blocks.
|
|
# PY3: Once Python 2.7 support is dropped we should change this to 0xf00000
|
|
-PLACEHOLDER_START = 0xe000
|
|
+PLACEHOLDER_START = 0xE000
|
|
|
|
|
|
# These Bases can be abstract baseclasses, but it's a pain to support
|
|
# Python 2.7 in that case, because there is no abc.ABC. Right now this
|
|
# is just a description of the API.
|
|
|
|
-class BaseFormatter(object):
|
|
|
|
+class BaseFormatter:
|
|
def __init__(self, normalize=WS_TAGS, pretty_print=False):
|
|
"""Formatters must as a minimum have a normalize parameter
|
|
|
|
@@ -75,10 +74,10 @@ class BaseFormatter(object):
|
|
"""
|
|
|
|
|
|
-PlaceholderEntry = namedtuple('PlaceholderEntry', 'element ttype close_ph')
|
|
+PlaceholderEntry = namedtuple("PlaceholderEntry", "element ttype close_ph")
|
|
|
|
|
|
-class PlaceholderMaker(object):
|
|
+class PlaceholderMaker:
|
|
"""Replace tags with unicode placeholders
|
|
|
|
This class searches for certain tags in an XML tree and replaces them
|
|
@@ -98,20 +97,17 @@ class PlaceholderMaker(object):
|
|
self.placeholder = PLACEHOLDER_START
|
|
|
|
insert_elem = etree.Element(INSERT_NAME)
|
|
- insert_close = self.get_placeholder(
|
|
- insert_elem, T_CLOSE, None)
|
|
- insert_open = self.get_placeholder(
|
|
- insert_elem, T_OPEN, insert_close)
|
|
+ insert_close = self.get_placeholder(insert_elem, T_CLOSE, None)
|
|
+ insert_open = self.get_placeholder(insert_elem, T_OPEN, insert_close)
|
|
|
|
delete_elem = etree.Element(DELETE_NAME)
|
|
- delete_close = self.get_placeholder(
|
|
- delete_elem, T_CLOSE, None)
|
|
- delete_open = self.get_placeholder(
|
|
- delete_elem, T_OPEN, delete_close)
|
|
+ delete_close = self.get_placeholder(delete_elem, T_CLOSE, None)
|
|
+ delete_open = self.get_placeholder(delete_elem, T_OPEN, delete_close)
|
|
|
|
self.diff_tags = {
|
|
- 'insert': (insert_open, insert_close),
|
|
- 'delete': (delete_open, delete_close)}
|
|
+ "insert": (insert_open, insert_close),
|
|
+ "delete": (delete_open, delete_close),
|
|
+ }
|
|
|
|
def get_placeholder(self, element, ttype, close_ph):
|
|
tag = etree.tounicode(element)
|
|
@@ -120,7 +116,7 @@ class PlaceholderMaker(object):
|
|
return ph
|
|
|
|
self.placeholder += 1
|
|
- ph = six.unichr(self.placeholder)
|
|
+ ph = chr(self.placeholder)
|
|
self.placeholder2tag[ph] = PlaceholderEntry(element, ttype, close_ph)
|
|
self.tag2placeholder[tag, ttype, close_ph] = ph
|
|
return ph
|
|
@@ -135,17 +131,17 @@ class PlaceholderMaker(object):
|
|
for child in element:
|
|
# Resolve all formatting text by allowing the inside text to
|
|
# participate in the text diffing.
|
|
- tail = child.tail or u''
|
|
- child.tail = u''
|
|
- new_text = element.text or u''
|
|
+ tail = child.tail or ""
|
|
+ child.tail = ""
|
|
+ new_text = element.text or ""
|
|
|
|
if self.is_formatting(child):
|
|
ph_close = self.get_placeholder(child, T_CLOSE, None)
|
|
ph_open = self.get_placeholder(child, T_OPEN, ph_close)
|
|
# If it's known text formatting tags, do this hierarchically
|
|
self.do_element(child)
|
|
- text = child.text or u''
|
|
- child.text = u''
|
|
+ text = child.text or ""
|
|
+ child.text = ""
|
|
# Stick the placeholder in instead of the start and end tags:
|
|
element.text = new_text + ph_open + text + ph_close + tail
|
|
else:
|
|
@@ -159,15 +155,15 @@ class PlaceholderMaker(object):
|
|
|
|
def do_tree(self, tree):
|
|
if self.text_tags:
|
|
- for elem in tree.xpath('//'+'|//'.join(self.text_tags)):
|
|
+ for elem in tree.xpath("//" + "|//".join(self.text_tags)):
|
|
self.do_element(elem)
|
|
|
|
def split_string(self, text):
|
|
- regexp = u'([%s])' % u''.join(self.placeholder2tag)
|
|
+ regexp = "([%s])" % "".join(self.placeholder2tag)
|
|
return re.split(regexp, text, flags=re.MULTILINE)
|
|
|
|
def undo_string(self, text):
|
|
- result = etree.Element('wrap')
|
|
+ result = etree.Element("wrap")
|
|
element = None
|
|
|
|
segments = self.split_string(text)
|
|
@@ -184,7 +180,7 @@ class PlaceholderMaker(object):
|
|
if entry.ttype == T_OPEN:
|
|
# Yup
|
|
next_seg = segments.pop(0)
|
|
- new_text = u''
|
|
+ new_text = ""
|
|
while next_seg != entry.close_ph:
|
|
new_text += next_seg
|
|
next_seg = segments.pop(0)
|
|
@@ -195,9 +191,9 @@ class PlaceholderMaker(object):
|
|
result.append(element)
|
|
else:
|
|
if element is not None:
|
|
- element.tail = element.tail or u'' + seg
|
|
+ element.tail = element.tail or "" + seg
|
|
else:
|
|
- result.text = result.text or u'' + seg
|
|
+ result.text = result.text or "" + seg
|
|
|
|
return result
|
|
|
|
@@ -244,8 +240,8 @@ class PlaceholderMaker(object):
|
|
elem = deepcopy(elem)
|
|
if self.is_formatting(elem):
|
|
# Formatting element, add a diff attribute
|
|
- action += '-formatting'
|
|
- elem.attrib['{%s}%s' % (DIFF_NS, action)] = ''
|
|
+ action += "-formatting"
|
|
+ elem.attrib[f"{{{DIFF_NS}}}{action}"] = ""
|
|
|
|
# And make a new placeholder for this new entry:
|
|
return self.get_placeholder(elem, entry.ttype, entry.close_ph)
|
|
@@ -301,15 +297,17 @@ class XMLFormatter(BaseFormatter):
|
|
all whitespace.
|
|
"""
|
|
|
|
- def __init__(self, normalize=WS_NONE, pretty_print=True,
|
|
- text_tags=(), formatting_tags=()):
|
|
+ def __init__(
|
|
+ self, normalize=WS_NONE, pretty_print=True, text_tags=(), formatting_tags=()
|
|
+ ):
|
|
# Mapping from placeholders -> structural content and vice versa.
|
|
self.normalize = normalize
|
|
self.pretty_print = pretty_print
|
|
self.text_tags = text_tags
|
|
self.formatting_tags = formatting_tags
|
|
self.placeholderer = PlaceholderMaker(
|
|
- text_tags=text_tags, formatting_tags=formatting_tags)
|
|
+ text_tags=text_tags, formatting_tags=formatting_tags
|
|
+ )
|
|
|
|
def prepare(self, left_tree, right_tree):
|
|
"""prepare() is run on the trees before diffing
|
|
@@ -352,11 +350,11 @@ class XMLFormatter(BaseFormatter):
|
|
|
|
def handle_action(self, action, result):
|
|
action_type = type(action)
|
|
- method = getattr(self, '_handle_' + action_type.__name__)
|
|
+ method = getattr(self, "_handle_" + action_type.__name__)
|
|
method(action, result)
|
|
|
|
def _remove_comments(self, tree):
|
|
- comments = tree.xpath('//comment()')
|
|
+ comments = tree.xpath("//comment()")
|
|
|
|
for element in comments:
|
|
parent = element.getparent()
|
|
@@ -371,20 +369,20 @@ class XMLFormatter(BaseFormatter):
|
|
# one and exactly one element is found. This is to protect against
|
|
# formatting a diff on the wrong tree, or against using ambiguous
|
|
# edit script xpaths.
|
|
- if xpath[0] == '/':
|
|
+ if xpath[0] == "/":
|
|
root = True
|
|
xpath = xpath[1:]
|
|
else:
|
|
root = False
|
|
|
|
- if '/' in xpath:
|
|
- path, rest = xpath.split('/', 1)
|
|
+ if "/" in xpath:
|
|
+ path, rest = xpath.split("/", 1)
|
|
else:
|
|
path = xpath
|
|
- rest = ''
|
|
+ rest = ""
|
|
|
|
- if '[' in path:
|
|
- path, index = path[:-1].split('[')
|
|
+ if "[" in path:
|
|
+ path, index = path[:-1].split("[")
|
|
index = int(index) - 1
|
|
multiple = False
|
|
else:
|
|
@@ -392,7 +390,7 @@ class XMLFormatter(BaseFormatter):
|
|
multiple = True
|
|
|
|
if root:
|
|
- path = '/' + path
|
|
+ path = "/" + path
|
|
|
|
matches = []
|
|
for match in node.xpath(path, namespaces=node.nsmap):
|
|
@@ -401,33 +399,39 @@ class XMLFormatter(BaseFormatter):
|
|
matches.append(match)
|
|
|
|
if index >= len(matches):
|
|
- raise ValueError('xpath %s[%s] not found at %s.' % (
|
|
- path, index + 1, utils.getpath(node)))
|
|
+ raise ValueError(
|
|
+ "xpath {}[{}] not found at {}.".format(
|
|
+ path, index + 1, utils.getpath(node)
|
|
+ )
|
|
+ )
|
|
if len(matches) > 1 and multiple:
|
|
- raise ValueError('Multiple nodes found for xpath %s at %s.' % (
|
|
- path, utils.getpath(node)))
|
|
+ raise ValueError(
|
|
+ "Multiple nodes found for xpath {} at {}.".format(
|
|
+ path, utils.getpath(node)
|
|
+ )
|
|
+ )
|
|
match = matches[index]
|
|
if rest:
|
|
return self._xpath(match, rest)
|
|
return match
|
|
|
|
def _extend_diff_attr(self, node, action, value):
|
|
- diffattr = '{%s}%s-attr' % (DIFF_NS, action)
|
|
- oldvalue = node.attrib.get(diffattr, '')
|
|
+ diffattr = f"{{{DIFF_NS}}}{action}-attr"
|
|
+ oldvalue = node.attrib.get(diffattr, "")
|
|
if oldvalue:
|
|
- value = oldvalue + ';' + value
|
|
+ value = oldvalue + ";" + value
|
|
node.attrib[diffattr] = value
|
|
|
|
def _delete_attrib(self, node, name):
|
|
del node.attrib[name]
|
|
- self._extend_diff_attr(node, 'delete', name)
|
|
+ self._extend_diff_attr(node, "delete", name)
|
|
|
|
def _handle_DeleteAttrib(self, action, tree):
|
|
node = self._xpath(tree, action.node)
|
|
self._delete_attrib(node, action.name)
|
|
|
|
def _delete_node(self, node):
|
|
- node.attrib[DELETE_NAME] = ''
|
|
+ node.attrib[DELETE_NAME] = ""
|
|
|
|
def _handle_DeleteNode(self, action, tree):
|
|
node = self._xpath(tree, action.node)
|
|
@@ -435,14 +439,14 @@ class XMLFormatter(BaseFormatter):
|
|
|
|
def _insert_attrib(self, node, name, value):
|
|
node.attrib[name] = value
|
|
- self._extend_diff_attr(node, 'add', name)
|
|
+ self._extend_diff_attr(node, "add", name)
|
|
|
|
def _handle_InsertAttrib(self, action, tree):
|
|
node = self._xpath(tree, action.node)
|
|
self._insert_attrib(node, action.name, action.value)
|
|
|
|
def _insert_node(self, target, node, position):
|
|
- node.attrib[INSERT_NAME] = ''
|
|
+ node.attrib[INSERT_NAME] = ""
|
|
target.insert(position, node)
|
|
|
|
def _get_real_insert_position(self, target, position):
|
|
@@ -472,7 +476,7 @@ class XMLFormatter(BaseFormatter):
|
|
def _rename_attrib(self, node, oldname, newname):
|
|
node.attrib[newname] = node.attrib[oldname]
|
|
del node.attrib[oldname]
|
|
- self._extend_diff_attr(node, 'rename', '%s:%s' % (oldname, newname))
|
|
+ self._extend_diff_attr(node, "rename", f"{oldname}:{newname}")
|
|
|
|
def _handle_RenameAttrib(self, action, tree):
|
|
node = self._xpath(tree, action.node)
|
|
@@ -494,7 +498,7 @@ class XMLFormatter(BaseFormatter):
|
|
def _update_attrib(self, node, name, value):
|
|
oldval = node.attrib[name]
|
|
node.attrib[name] = value
|
|
- self._extend_diff_attr(node, 'update', '%s:%s' % (name, oldval))
|
|
+ self._extend_diff_attr(node, "update", f"{name}:{oldval}")
|
|
|
|
def _handle_UpdateAttrib(self, action, tree):
|
|
node = self._xpath(tree, action.node)
|
|
@@ -540,10 +544,7 @@ class XMLFormatter(BaseFormatter):
|
|
# needs to be closed before the requested node closure can
|
|
# happen.
|
|
stack_op, stack_entry = _stack_pop()
|
|
- while (
|
|
- stack_entry is not None and
|
|
- stack_entry.close_ph != seg
|
|
- ):
|
|
+ while stack_entry is not None and stack_entry.close_ph != seg:
|
|
new_diff.append((stack_op, stack_entry.close_ph))
|
|
stack_op, stack_entry = _stack_pop()
|
|
|
|
@@ -564,11 +565,11 @@ class XMLFormatter(BaseFormatter):
|
|
|
|
def _make_diff_tags(self, left_value, right_value, node, target=None):
|
|
if bool(self.normalize & WS_TEXT):
|
|
- left_value = utils.cleanup_whitespace(left_value or u'').strip()
|
|
- right_value = utils.cleanup_whitespace(right_value or u'').strip()
|
|
+ left_value = utils.cleanup_whitespace(left_value or "").strip()
|
|
+ right_value = utils.cleanup_whitespace(right_value or "").strip()
|
|
|
|
text_diff = diff_match_patch()
|
|
- diff = text_diff.diff_main(left_value or '', right_value or '')
|
|
+ diff = text_diff.diff_main(left_value or "", right_value or "")
|
|
text_diff.diff_cleanupSemantic(diff)
|
|
|
|
diff = self._realign_placeholders(diff)
|
|
@@ -582,29 +583,29 @@ class XMLFormatter(BaseFormatter):
|
|
for op, text in diff:
|
|
if op == 0:
|
|
if cur_child is None:
|
|
- node.text = (node.text or u'') + text
|
|
+ node.text = (node.text or "") + text
|
|
else:
|
|
- cur_child.tail = (cur_child.tail or u'') + text
|
|
+ cur_child.tail = (cur_child.tail or "") + text
|
|
continue
|
|
|
|
if op == -1:
|
|
- action = 'delete'
|
|
+ action = "delete"
|
|
elif op == 1:
|
|
- action = 'insert'
|
|
+ action = "insert"
|
|
|
|
if self.placeholderer.is_placeholder(text):
|
|
ph = self.placeholderer.mark_diff(text, action)
|
|
|
|
if cur_child is None:
|
|
- node.text = (node.text or u'') + ph
|
|
+ node.text = (node.text or "") + ph
|
|
|
|
else:
|
|
new_text = self.placeholderer.wrap_diff(text, action)
|
|
|
|
if cur_child is None:
|
|
- node.text = (node.text or u'') + new_text
|
|
+ node.text = (node.text or "") + new_text
|
|
else:
|
|
- cur_child.tail = (cur_child.tail or u'') + new_text
|
|
+ cur_child.tail = (cur_child.tail or "") + new_text
|
|
|
|
def _handle_UpdateTextIn(self, action, tree):
|
|
node = self._xpath(tree, action.node)
|
|
@@ -635,66 +636,70 @@ class XMLFormatter(BaseFormatter):
|
|
|
|
|
|
class DiffFormatter(BaseFormatter):
|
|
-
|
|
def __init__(self, normalize=WS_TAGS, pretty_print=False):
|
|
self.normalize = normalize
|
|
# No pretty print support, nothing to be pretty about
|
|
|
|
# Nothing to prepare or finalize (one-liners for code coverage)
|
|
- def prepare(self, left, right): return
|
|
+ def prepare(self, left, right):
|
|
+ return
|
|
|
|
- def finalize(self, left, right): return
|
|
+ def finalize(self, left, right):
|
|
+ return
|
|
|
|
def format(self, diff, orig_tree):
|
|
# This Formatter don't need the left tree, but the XMLFormatter
|
|
# does, so the parameter is required.
|
|
- res = u'\n'.join(self._format_action(action) for action in diff)
|
|
+ res = "\n".join(self._format_action(action) for action in diff)
|
|
return res
|
|
|
|
- def _format_action(self, action, ):
|
|
- return u'[%s]' % self.handle_action(action)
|
|
+ def _format_action(
|
|
+ self, action,
|
|
+ ):
|
|
+ return "[%s]" % self.handle_action(action)
|
|
|
|
def handle_action(self, action):
|
|
action_type = type(action)
|
|
- method = getattr(self, '_handle_' + action_type.__name__)
|
|
- return u', '.join(method(action))
|
|
+ method = getattr(self, "_handle_" + action_type.__name__)
|
|
+ return ", ".join(method(action))
|
|
|
|
def _handle_DeleteAttrib(self, action):
|
|
- return u"delete-attribute", action.node, action.name
|
|
+ return "delete-attribute", action.node, action.name
|
|
|
|
def _handle_DeleteNode(self, action):
|
|
- return u"delete", action.node
|
|
+ return "delete", action.node
|
|
|
|
def _handle_InsertAttrib(self, action):
|
|
- return (u"insert-attribute", action.node, action.name,
|
|
- json.dumps(action.value))
|
|
+ return ("insert-attribute", action.node, action.name, json.dumps(action.value))
|
|
|
|
def _handle_InsertNode(self, action):
|
|
- return u"insert", action.target, action.tag, str(action.position)
|
|
+ return "insert", action.target, action.tag, str(action.position)
|
|
|
|
def _handle_RenameAttrib(self, action):
|
|
- return (u"rename-attribute", action.node, action.oldname,
|
|
- action.newname)
|
|
+ return ("rename-attribute", action.node, action.oldname, action.newname)
|
|
|
|
def _handle_MoveNode(self, action):
|
|
- return u"move", action.node, action.target, str(action.position)
|
|
+ return "move", action.node, action.target, str(action.position)
|
|
|
|
def _handle_UpdateAttrib(self, action):
|
|
- return (u"update-attribute", action.node, action.name,
|
|
- json.dumps(action.value))
|
|
+ return ("update-attribute", action.node, action.name, json.dumps(action.value))
|
|
|
|
def _handle_UpdateTextIn(self, action):
|
|
- return u"update-text", action.node, json.dumps(action.text)
|
|
+ return "update-text", action.node, json.dumps(action.text)
|
|
|
|
def _handle_UpdateTextAfter(self, action):
|
|
- return u"update-text-after", action.node, json.dumps(action.text)
|
|
+ return "update-text-after", action.node, json.dumps(action.text)
|
|
|
|
def _handle_RenameNode(self, action):
|
|
- return u"rename", action.node, action.tag
|
|
+ return "rename", action.node, action.tag
|
|
|
|
def _handle_InsertComment(self, action):
|
|
- return (u"insert-comment", action.target, str(action.position),
|
|
- json.dumps(action.text))
|
|
+ return (
|
|
+ "insert-comment",
|
|
+ action.target,
|
|
+ str(action.position),
|
|
+ json.dumps(action.text),
|
|
+ )
|
|
|
|
|
|
class XmlDiffFormatter(BaseFormatter):
|
|
@@ -705,9 +710,11 @@ class XmlDiffFormatter(BaseFormatter):
|
|
# No pretty print support, nothing to be pretty about
|
|
|
|
# Nothing to prepare or finalize (one-liners for code coverage)
|
|
- def prepare(self, left, right): return
|
|
+ def prepare(self, left, right):
|
|
+ return
|
|
|
|
- def finalize(self, left, right): return
|
|
+ def finalize(self, left, right):
|
|
+ return
|
|
|
|
def format(self, diff, orig_tree):
|
|
# This Formatter don't need the left tree, but the XMLFormatter
|
|
@@ -715,45 +722,44 @@ class XmlDiffFormatter(BaseFormatter):
|
|
actions = []
|
|
for action in diff:
|
|
actions.extend(self.handle_action(action, orig_tree))
|
|
- res = u'\n'.join(self._format_action(action) for action in actions)
|
|
+ res = "\n".join(self._format_action(action) for action in actions)
|
|
return res
|
|
|
|
def _format_action(self, action):
|
|
- return u'[%s]' % ', '.join(action)
|
|
+ return "[%s]" % ", ".join(action)
|
|
|
|
def handle_action(self, action, orig_tree):
|
|
action_type = type(action)
|
|
- method = getattr(self, '_handle_' + action_type.__name__)
|
|
- for item in method(action, orig_tree):
|
|
- yield item
|
|
+ method = getattr(self, "_handle_" + action_type.__name__)
|
|
+ yield from method(action, orig_tree)
|
|
|
|
def _handle_DeleteAttrib(self, action, orig_tree):
|
|
- yield u"remove", '%s/@%s' % (action.node, action.name)
|
|
+ yield "remove", f"{action.node}/@{action.name}"
|
|
|
|
def _handle_DeleteNode(self, action, orig_tree):
|
|
- yield u"remove", action.node
|
|
+ yield "remove", action.node
|
|
|
|
def _handle_InsertAttrib(self, action, orig_tree):
|
|
value_text = "\n<@{0}>\n{1}\n</@{0}>".format(action.name, action.value)
|
|
- yield u"insert", action.node, value_text
|
|
+ yield "insert", action.node, value_text
|
|
|
|
def _handle_InsertNode(self, action, orig_tree):
|
|
if action.position == 0:
|
|
- yield u"insert-first", action.target, '\n<%s/>' % action.tag
|
|
+ yield "insert-first", action.target, "\n<%s/>" % action.tag
|
|
return
|
|
sibling = orig_tree.xpath(action.target)[0][action.position - 1]
|
|
- yield u"insert-after", utils.getpath(sibling), '\n<%s/>' % action.tag
|
|
+ yield "insert-after", utils.getpath(sibling), "\n<%s/>" % action.tag
|
|
|
|
def _handle_RenameAttrib(self, action, orig_tree):
|
|
node = orig_tree.xpath(action.node)[0]
|
|
value = node.attrib[action.oldname]
|
|
value_text = "\n<@{0}>\n{1}\n</@{0}>".format(action.newname, value)
|
|
- yield u"remove", '%s/@%s' % (action.node, action.oldname)
|
|
- yield u"insert", action.node, value_text
|
|
+ yield "remove", f"{action.node}/@{action.oldname}"
|
|
+ yield "insert", action.node, value_text
|
|
|
|
def _handle_MoveNode(self, action, orig_tree):
|
|
if action.position == 0:
|
|
- yield u"move-first", action.node, action.target
|
|
+ yield "move-first", action.node, action.target
|
|
return
|
|
node = orig_tree.xpath(action.node)[0]
|
|
target = orig_tree.xpath(action.target)[0]
|
|
@@ -766,21 +772,23 @@ class XmlDiffFormatter(BaseFormatter):
|
|
position += 1
|
|
|
|
sibling = target[position]
|
|
- yield u"move-after", action.node, utils.getpath(sibling)
|
|
+ yield "move-after", action.node, utils.getpath(sibling)
|
|
|
|
def _handle_UpdateAttrib(self, action, orig_tree):
|
|
- yield (u"update", '%s/@%s' % (action.node, action.name),
|
|
- json.dumps(action.value))
|
|
+ yield (
|
|
+ "update",
|
|
+ f"{action.node}/@{action.name}",
|
|
+ json.dumps(action.value),
|
|
+ )
|
|
|
|
def _handle_UpdateTextIn(self, action, orig_tree):
|
|
- yield u"update", action.node + '/text()[1]', json.dumps(action.text)
|
|
+ yield "update", action.node + "/text()[1]", json.dumps(action.text)
|
|
|
|
def _handle_UpdateTextAfter(self, action, orig_tree):
|
|
- yield u"update", action.node + '/text()[2]', json.dumps(action.text)
|
|
+ yield "update", action.node + "/text()[2]", json.dumps(action.text)
|
|
|
|
def _handle_RenameNode(self, action, orig_tree):
|
|
- yield u"rename", action.node, action.tag
|
|
+ yield "rename", action.node, action.tag
|
|
|
|
def _handle_InsertComment(self, action, orig_tree):
|
|
- yield (u"insert-comment", action.target, str(action.position),
|
|
- action.text)
|
|
+ yield ("insert-comment", action.target, str(action.position), action.text)
|
|
Index: xmldiff-2.4/xmldiff/main.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/xmldiff/main.py
|
|
+++ xmldiff-2.4/xmldiff/main.py
|
|
@@ -1,6 +1,5 @@
|
|
"""All major API points and command-line tools"""
|
|
import pkg_resources
|
|
-import six
|
|
|
|
from argparse import ArgumentParser, FileType
|
|
from lxml import etree
|
|
@@ -9,9 +8,9 @@ from xmldiff import diff, formatting, pa
|
|
__version__ = pkg_resources.require("xmldiff")[0].version
|
|
|
|
FORMATTERS = {
|
|
- 'diff': formatting.DiffFormatter,
|
|
- 'xml': formatting.XMLFormatter,
|
|
- 'old': formatting.XmlDiffFormatter,
|
|
+ "diff": formatting.DiffFormatter,
|
|
+ "xml": formatting.XMLFormatter,
|
|
+ "old": formatting.XmlDiffFormatter,
|
|
}
|
|
|
|
|
|
@@ -31,59 +30,89 @@ def diff_trees(left, right, diff_options
|
|
|
|
|
|
def _diff(parse_method, left, right, diff_options=None, formatter=None):
|
|
- normalize = bool(getattr(formatter, 'normalize', 1) & formatting.WS_TAGS)
|
|
+ normalize = bool(getattr(formatter, "normalize", 1) & formatting.WS_TAGS)
|
|
parser = etree.XMLParser(remove_blank_text=normalize)
|
|
left_tree = parse_method(left, parser)
|
|
right_tree = parse_method(right, parser)
|
|
- return diff_trees(left_tree, right_tree, diff_options=diff_options,
|
|
- formatter=formatter)
|
|
+ return diff_trees(
|
|
+ left_tree, right_tree, diff_options=diff_options, formatter=formatter
|
|
+ )
|
|
|
|
|
|
def diff_texts(left, right, diff_options=None, formatter=None):
|
|
"""Takes two Unicode strings containing XML"""
|
|
- return _diff(etree.fromstring, left, right,
|
|
- diff_options=diff_options, formatter=formatter)
|
|
+ return _diff(
|
|
+ etree.fromstring, left, right, diff_options=diff_options, formatter=formatter
|
|
+ )
|
|
|
|
|
|
def diff_files(left, right, diff_options=None, formatter=None):
|
|
"""Takes two filenames or streams, and diffs the XML in those files"""
|
|
- return _diff(etree.parse, left, right,
|
|
- diff_options=diff_options, formatter=formatter)
|
|
+ return _diff(
|
|
+ etree.parse, left, right, diff_options=diff_options, formatter=formatter
|
|
+ )
|
|
|
|
|
|
def make_diff_parser():
|
|
- parser = ArgumentParser(description='Create a diff for two XML files.',
|
|
- add_help=False)
|
|
- parser.add_argument('file1', type=FileType('r'),
|
|
- help='The first input file.')
|
|
- parser.add_argument('file2', type=FileType('r'),
|
|
- help='The second input file.')
|
|
- parser.add_argument('-h', '--help', action='help',
|
|
- help='Show this help message and exit.')
|
|
- parser.add_argument('-v', '--version', action='version',
|
|
- help='Display version and exit.',
|
|
- version='xmldiff %s' % __version__)
|
|
- parser.add_argument('-f', '--formatter', default='diff',
|
|
- choices=list(FORMATTERS.keys()),
|
|
- help='Formatter selection.')
|
|
- parser.add_argument('-w', '--keep-whitespace', action='store_true',
|
|
- help='Do not strip ignorable whitespace.')
|
|
- parser.add_argument('-p', '--pretty-print', action='store_true',
|
|
- help='Try to make XML output more readable.')
|
|
- parser.add_argument('-F', type=float,
|
|
- help='A value between 0 and 1 that determines how '
|
|
- 'similar nodes must be to match.')
|
|
- parser.add_argument('--unique-attributes', type=str, nargs='?',
|
|
- default='{http://www.w3.org/XML/1998/namespace}id',
|
|
- help='A comma separated list of attributes '
|
|
- 'that uniquely identify a node. Can be empty. '
|
|
- 'Unique attributes for certain elements can '
|
|
- 'be specified in the format {NS}element@attr.')
|
|
- parser.add_argument('--ratio-mode', default='fast',
|
|
- choices={'accurate', 'fast', 'faster'},
|
|
- help='Choose the node comparison optimization.')
|
|
- parser.add_argument('--fast-match', action='store_true',
|
|
- help='A faster, less optimal match run.')
|
|
+ parser = ArgumentParser(
|
|
+ description="Create a diff for two XML files.", add_help=False
|
|
+ )
|
|
+ parser.add_argument("file1", type=FileType("r"), help="The first input file.")
|
|
+ parser.add_argument("file2", type=FileType("r"), help="The second input file.")
|
|
+ parser.add_argument(
|
|
+ "-h", "--help", action="help", help="Show this help message and exit."
|
|
+ )
|
|
+ parser.add_argument(
|
|
+ "-v",
|
|
+ "--version",
|
|
+ action="version",
|
|
+ help="Display version and exit.",
|
|
+ version="xmldiff %s" % __version__,
|
|
+ )
|
|
+ parser.add_argument(
|
|
+ "-f",
|
|
+ "--formatter",
|
|
+ default="diff",
|
|
+ choices=list(FORMATTERS.keys()),
|
|
+ help="Formatter selection.",
|
|
+ )
|
|
+ parser.add_argument(
|
|
+ "-w",
|
|
+ "--keep-whitespace",
|
|
+ action="store_true",
|
|
+ help="Do not strip ignorable whitespace.",
|
|
+ )
|
|
+ parser.add_argument(
|
|
+ "-p",
|
|
+ "--pretty-print",
|
|
+ action="store_true",
|
|
+ help="Try to make XML output more readable.",
|
|
+ )
|
|
+ parser.add_argument(
|
|
+ "-F",
|
|
+ type=float,
|
|
+ help="A value between 0 and 1 that determines how "
|
|
+ "similar nodes must be to match.",
|
|
+ )
|
|
+ parser.add_argument(
|
|
+ "--unique-attributes",
|
|
+ type=str,
|
|
+ nargs="?",
|
|
+ default="{http://www.w3.org/XML/1998/namespace}id",
|
|
+ help="A comma separated list of attributes "
|
|
+ "that uniquely identify a node. Can be empty. "
|
|
+ "Unique attributes for certain elements can "
|
|
+ "be specified in the format {NS}element@attr.",
|
|
+ )
|
|
+ parser.add_argument(
|
|
+ "--ratio-mode",
|
|
+ default="fast",
|
|
+ choices={"accurate", "fast", "faster"},
|
|
+ help="Choose the node comparison optimization.",
|
|
+ )
|
|
+ parser.add_argument(
|
|
+ "--fast-match", action="store_true", help="A faster, less optimal match run."
|
|
+ )
|
|
return parser
|
|
|
|
|
|
@@ -91,8 +120,8 @@ def _parse_uniqueattrs(uniqueattrs):
|
|
if uniqueattrs is None:
|
|
return []
|
|
return [
|
|
- attr if '@' not in attr else attr.split('@', 1)
|
|
- for attr in uniqueattrs.split(',')
|
|
+ attr if "@" not in attr else attr.split("@", 1)
|
|
+ for attr in uniqueattrs.split(",")
|
|
]
|
|
|
|
|
|
@@ -105,16 +134,19 @@ def diff_command(args=None):
|
|
else:
|
|
normalize = formatting.WS_BOTH
|
|
|
|
- formatter = FORMATTERS[args.formatter](normalize=normalize,
|
|
- pretty_print=args.pretty_print)
|
|
-
|
|
- diff_options = {'ratio_mode': args.ratio_mode,
|
|
- 'F': args.F,
|
|
- 'fast_match': args.fast_match,
|
|
- 'uniqueattrs': _parse_uniqueattrs(args.unique_attributes),
|
|
- }
|
|
- result = diff_files(args.file1, args.file2, diff_options=diff_options,
|
|
- formatter=formatter)
|
|
+ formatter = FORMATTERS[args.formatter](
|
|
+ normalize=normalize, pretty_print=args.pretty_print
|
|
+ )
|
|
+
|
|
+ diff_options = {
|
|
+ "ratio_mode": args.ratio_mode,
|
|
+ "F": args.F,
|
|
+ "fast_match": args.fast_match,
|
|
+ "uniqueattrs": _parse_uniqueattrs(args.unique_attributes),
|
|
+ }
|
|
+ result = diff_files(
|
|
+ args.file1, args.file2, diff_options=diff_options, formatter=formatter
|
|
+ )
|
|
print(result)
|
|
|
|
|
|
@@ -136,7 +168,7 @@ def patch_file(actions, tree):
|
|
"""Takes two filenames or streams, one with XML the other a diff"""
|
|
tree = etree.parse(tree)
|
|
|
|
- if isinstance(actions, six.string_types):
|
|
+ if isinstance(actions, str):
|
|
# It's a string, so it's a filename
|
|
with open(actions) as f:
|
|
actions = f.read()
|
|
@@ -150,17 +182,21 @@ def patch_file(actions, tree):
|
|
|
|
|
|
def make_patch_parser():
|
|
- parser = ArgumentParser(description='Patch an XML file with an xmldiff',
|
|
- add_help=False)
|
|
- parser.add_argument('patchfile', type=FileType('r'),
|
|
- help='An xmldiff diff file.')
|
|
- parser.add_argument('xmlfile', type=FileType('r'),
|
|
- help='An unpatched XML file.')
|
|
- parser.add_argument('-h', '--help', action='help',
|
|
- help='Show this help message and exit.')
|
|
- parser.add_argument('-v', '--version', action='version',
|
|
- help='Display version and exit.',
|
|
- version='xmldiff %s' % __version__)
|
|
+ parser = ArgumentParser(
|
|
+ description="Patch an XML file with an xmldiff", add_help=False
|
|
+ )
|
|
+ parser.add_argument("patchfile", type=FileType("r"), help="An xmldiff diff file.")
|
|
+ parser.add_argument("xmlfile", type=FileType("r"), help="An unpatched XML file.")
|
|
+ parser.add_argument(
|
|
+ "-h", "--help", action="help", help="Show this help message and exit."
|
|
+ )
|
|
+ parser.add_argument(
|
|
+ "-v",
|
|
+ "--version",
|
|
+ action="version",
|
|
+ help="Display version and exit.",
|
|
+ version="xmldiff %s" % __version__,
|
|
+ )
|
|
return parser
|
|
|
|
|
|
Index: xmldiff-2.4/xmldiff/patch.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/xmldiff/patch.py
|
|
+++ xmldiff-2.4/xmldiff/patch.py
|
|
@@ -5,8 +5,7 @@ from lxml import etree
|
|
from xmldiff import actions
|
|
|
|
|
|
-class Patcher(object):
|
|
-
|
|
+class Patcher:
|
|
def patch(self, actions, tree):
|
|
# Copy the tree so we don't modify the original
|
|
result = deepcopy(tree)
|
|
@@ -18,7 +17,7 @@ class Patcher(object):
|
|
|
|
def handle_action(self, action, tree):
|
|
action_type = type(action)
|
|
- method = getattr(self, '_handle_' + action_type.__name__)
|
|
+ method = getattr(self, "_handle_" + action_type.__name__)
|
|
method(action, tree)
|
|
|
|
def _handle_DeleteNode(self, action, tree):
|
|
@@ -72,25 +71,25 @@ class Patcher(object):
|
|
target.insert(action.position, etree.Comment(action.text))
|
|
|
|
|
|
-class DiffParser(object):
|
|
+class DiffParser:
|
|
"""Makes a text diff into a list of actions"""
|
|
|
|
def parse(self, diff):
|
|
- incomplete = ''
|
|
+ incomplete = ""
|
|
|
|
for line in diff.splitlines():
|
|
line = incomplete + line
|
|
|
|
- if line[0] != '[':
|
|
+ if line[0] != "[":
|
|
# All actions should start with "["
|
|
raise ValueError("Unknown diff format")
|
|
- if line[-1] != ']':
|
|
+ if line[-1] != "]":
|
|
# This line has been broken into several lines
|
|
incomplete = line
|
|
continue
|
|
|
|
# OK, we found an action
|
|
- incomplete = ''
|
|
+ incomplete = ""
|
|
yield self.make_action(line)
|
|
|
|
if incomplete:
|
|
@@ -105,7 +104,7 @@ class DiffParser(object):
|
|
action = parts[0]
|
|
params = parts[1:]
|
|
# Get the method, and return the result of calling it
|
|
- method = getattr(self, '_handle_' + action.replace('-', '_'))
|
|
+ method = getattr(self, "_handle_" + action.replace("-", "_"))
|
|
return method(*params)
|
|
|
|
def _handle_delete(self, node):
|
|
Index: xmldiff-2.4/xmldiff/utils.py
|
|
===================================================================
|
|
--- xmldiff-2.4.orig/xmldiff/utils.py
|
|
+++ xmldiff-2.4/xmldiff/utils.py
|
|
@@ -1,5 +1,3 @@
|
|
-from __future__ import division
|
|
-
|
|
import re
|
|
|
|
from operator import eq
|
|
@@ -8,16 +6,14 @@ from operator import eq
|
|
def post_order_traverse(node):
|
|
for child in node.getchildren():
|
|
# PY3: Man, I want yield from!
|
|
- for item in post_order_traverse(child):
|
|
- yield item
|
|
+ yield from post_order_traverse(child)
|
|
yield node
|
|
|
|
|
|
def reverse_post_order_traverse(node):
|
|
for child in reversed(node.getchildren()):
|
|
# PY3: Man, I want yield from!
|
|
- for item in reverse_post_order_traverse(child):
|
|
- yield item
|
|
+ yield from reverse_post_order_traverse(child)
|
|
yield node
|
|
|
|
|
|
@@ -43,13 +39,19 @@ def longest_common_subsequence(left_sequ
|
|
rend = rslen = len(right_sequence)
|
|
|
|
# Trim off the matching items at the beginning
|
|
- while (start < lend and start < rend and
|
|
- eqfn(left_sequence[start], right_sequence[start])):
|
|
+ while (
|
|
+ start < lend
|
|
+ and start < rend
|
|
+ and eqfn(left_sequence[start], right_sequence[start])
|
|
+ ):
|
|
start += 1
|
|
|
|
# trim off the matching items at the end
|
|
- while (start < lend and start < rend and
|
|
- eqfn(left_sequence[lend - 1], right_sequence[rend - 1])):
|
|
+ while (
|
|
+ start < lend
|
|
+ and start < rend
|
|
+ and eqfn(left_sequence[lend - 1], right_sequence[rend - 1])
|
|
+ ):
|
|
lend -= 1
|
|
rend -= 1
|
|
|
|
@@ -67,8 +69,7 @@ def longest_common_subsequence(left_sequ
|
|
|
|
for d in range(0, lmax + rmax + 1):
|
|
for k in range(-d, d + 1, 2):
|
|
- if (k == -d or
|
|
- (k != d and furthest[k - 1][0] < furthest[k + 1][0])):
|
|
+ if k == -d or (k != d and furthest[k - 1][0] < furthest[k + 1][0]):
|
|
# Go down
|
|
old_x, history = furthest[k + 1]
|
|
x = old_x
|
|
@@ -89,47 +90,51 @@ def longest_common_subsequence(left_sequ
|
|
|
|
if x >= lmax and y >= rmax:
|
|
# This is the best match
|
|
- return [(e, e) for e in range(start)] + history + \
|
|
- list(zip(range(lend, lslen), range(rend, rslen)))
|
|
+ return (
|
|
+ [(e, e) for e in range(start)]
|
|
+ + history
|
|
+ + list(zip(range(lend, lslen), range(rend, rslen)))
|
|
+ )
|
|
else:
|
|
furthest[k] = (x, history)
|
|
|
|
|
|
-WHITESPACE = re.compile(u'\\s+', flags=re.MULTILINE)
|
|
+WHITESPACE = re.compile("\\s+", flags=re.MULTILINE)
|
|
|
|
|
|
def cleanup_whitespace(text):
|
|
- return WHITESPACE.sub(' ', text)
|
|
+ return WHITESPACE.sub(" ", text)
|
|
|
|
|
|
def getpath(element, tree=None):
|
|
if tree is None:
|
|
tree = element.getroottree()
|
|
xpath = tree.getpath(element)
|
|
- if xpath[-1] != ']':
|
|
+ if xpath[-1] != "]":
|
|
# The path is unique without specifying a count. However, we always
|
|
# want that count, so we add [1].
|
|
- xpath = xpath + '[1]'
|
|
+ xpath = xpath + "[1]"
|
|
return xpath
|
|
|
|
|
|
# The remainder of the functions here are helpful when debugging.
|
|
# They aren't documented, nor very well tested.
|
|
-def _make_ascii_tree(element, indent=''):
|
|
+def _make_ascii_tree(element, indent=""):
|
|
from xmldiff.formatting import DIFF_NS # Avoid circular imports
|
|
- diffns = '{%s}' % DIFF_NS
|
|
+
|
|
+ diffns = "{%s}" % DIFF_NS
|
|
if element.prefix:
|
|
- name = '%s:%s' % (element.prefix, element.tag.split('}')[1])
|
|
+ name = "{}:{}".format(element.prefix, element.tag.split("}")[1])
|
|
else:
|
|
name = element.tag
|
|
diff_attrs = [attr for attr in element.attrib if attr.startswith(diffns)]
|
|
if diff_attrs:
|
|
- diff = '(%s)' % ', '.join(attr.split('}')[1] for attr in diff_attrs)
|
|
+ diff = "(%s)" % ", ".join(attr.split("}")[1] for attr in diff_attrs)
|
|
else:
|
|
- diff = ''
|
|
+ diff = ""
|
|
|
|
- result = [' '.join((indent, name, diff))]
|
|
- indent = ' ' + indent
|
|
+ result = [" ".join((indent, name, diff))]
|
|
+ indent = " " + indent
|
|
|
|
for child in element.getchildren():
|
|
result.extend(_make_ascii_tree(child, indent))
|
|
@@ -138,4 +143,4 @@ def _make_ascii_tree(element, indent='')
|
|
|
|
def make_ascii_tree(element):
|
|
result = _make_ascii_tree(element)
|
|
- return '\n'.join(result)
|
|
+ return "\n".join(result)
|