From 4474f3da5f6e25e16a5bdfe6c9a7bd511ee2a4887b4067ff4f69d571812d2e8c Mon Sep 17 00:00:00 2001 From: Steve Kowalik Date: Tue, 5 Mar 2024 04:35:32 +0000 Subject: [PATCH] - Update to 2.6.3: * Solved an error in the xmlformatter when using default namespaces. #89 * #108: Fixed an error that happens if using namespaces like ns0 or ns1. * Added `InsertNamespace` and `DeleteNamespace` actions for better handling of changing namespaces. Should improve any "Unknown namespace prefix" errors. Changing the URI of a a namespace prefix is not supported, and will raise an error. * Used geometric mean for the node_ratio, for better handling of simple nodes. * Added an experimental --best-match method that is slower, but generate smaller diffs when you have many nodes that are similar. * The -F argument now also affects the --fast-match stage. * Make it possible to adjust the attributes considered when comparing nodes. * Python versions 3.7 to 3.11 are now supported. * Improved node matching method, that puts more emphasis similarities than differences when weighing attributes vs children. * Added a parameter to return error code 1 when there are differences between the files * Added a parameter for ignoring attributes in comparison. * Solved a bug in xmlpatch in certain namespace situations. * Added a --diff-encoding parameter to xmlpatch, to support diff-files that are not in your system default encoding. - Switch to autosetup and pyproject macros. - No more greedy globs in %files. - Drop python-xmldiff-src-upgrades.patch, now included upstream. - Drop python-xmldiff-no-six-remains.patch, no longer required. OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-xmldiff?expand=0&rev=19 --- python-xmldiff-no-six-remains.patch | 43 - python-xmldiff-src-upgrades.patch | 11543 -------------------------- python-xmldiff.changes | 29 + python-xmldiff.spec | 25 +- xmldiff-2.4.tar.gz | 3 - xmldiff-2.6.3.tar.gz | 3 + 6 files changed, 42 insertions(+), 11604 deletions(-) delete mode 100644 python-xmldiff-no-six-remains.patch delete mode 100644 python-xmldiff-src-upgrades.patch delete mode 100644 xmldiff-2.4.tar.gz create mode 100644 xmldiff-2.6.3.tar.gz diff --git a/python-xmldiff-no-six-remains.patch b/python-xmldiff-no-six-remains.patch deleted file mode 100644 index a63092f..0000000 --- a/python-xmldiff-no-six-remains.patch +++ /dev/null @@ -1,43 +0,0 @@ -Index: xmldiff-2.4/tests/test_main.py -=================================================================== ---- xmldiff-2.4.orig/tests/test_main.py -+++ xmldiff-2.4/tests/test_main.py -@@ -1,5 +1,5 @@ -+import io - import os --import six - import sys - import unittest - -@@ -71,8 +71,8 @@ class MainAPITests(unittest.TestCase): - - class MainCLITests(unittest.TestCase): - def call_run(self, args, command=main.diff_command): -- output = six.StringIO() -- errors = six.StringIO() -+ output = io.StringIO() -+ errors = io.StringIO() - - stdout = sys.stdout - stderr = sys.stderr -Index: xmldiff-2.4/xmldiff.egg-info/requires.txt -=================================================================== ---- xmldiff-2.4.orig/xmldiff.egg-info/requires.txt -+++ xmldiff-2.4/xmldiff.egg-info/requires.txt -@@ -1,3 +1,2 @@ - setuptools - lxml>=3.1.0 --six -Index: xmldiff-2.4/setup.py -=================================================================== ---- xmldiff-2.4.orig/setup.py -+++ xmldiff-2.4/setup.py -@@ -33,7 +33,7 @@ setup( - packages=find_packages(exclude=["doc", "tests"]), - include_package_data=True, - zip_safe=False, -- install_requires=["setuptools", "lxml>=3.1.0", "six",], -+ install_requires=["setuptools", "lxml>=3.1.0",], - test_suite="tests", - entry_points={ - "console_scripts": [ diff --git a/python-xmldiff-src-upgrades.patch b/python-xmldiff-src-upgrades.patch deleted file mode 100644 index 40dba47..0000000 --- a/python-xmldiff-src-upgrades.patch +++ /dev/null @@ -1,11543 +0,0 @@ -From 34b810f4394965aadeca31204e6b76eb023fd11a Mon Sep 17 00:00:00 2001 -From: Jeremy Lavergne -Date: Tue, 4 Aug 2020 08:32:14 -0400 -Subject: [PATCH] src upgrades (#64) - -* black - -* pyupgrade - -* py3+ - -* py3.6+ - -* black - -* update python version deps - -* travis: drop py < 3.6, use bionic - -* flake fixes - -* more flake8 fixups - -* fixup black command - -* try skipping black on pypy3 - -* travis: try figuring out build type by env ---- - .travis.yml | 14 +- - Makefile | 5 +- - docs/source/conf.py | 158 +- - docs/source/contributing.rst | 6 +- - setup.py | 75 +- - tests/test_diff.py | 913 +++++---- - tests/test_formatting.py | 394 ++-- - tests/test_main.py | 83 +- - tests/test_patch.py | 169 +- - tests/test_utils.py | 116 +- - tests/testing.py | 20 +- - xmldiff/_diff_match_patch_py2.py | 3105 ++++++++++++++++-------------- - xmldiff/_diff_match_patch_py3.py | 3080 +++++++++++++++-------------- - xmldiff/actions.py | 22 +- - xmldiff/diff.py | 98 +- - xmldiff/diff_match_patch.py | 1 + - xmldiff/formatting.py | 246 +-- - xmldiff/main.py | 170 +- - xmldiff/patch.py | 17 +- - xmldiff/utils.py | 57 +- - 20 files changed, 4506 insertions(+), 4243 deletions(-) - -Index: xmldiff-2.4/.travis.yml -=================================================================== ---- xmldiff-2.4.orig/.travis.yml -+++ xmldiff-2.4/.travis.yml -@@ -4,17 +4,21 @@ language: python - matrix: - fast_finish: true - include: -- - python: 2.7 -- - python: 3.5 - - python: 3.6 -+ env: MATRIX=py36 - - python: 3.7 - sudo: required -- dist: xenial -- - python: pypy -+ dist: bionic -+ env: MATRIX=py37 - - python: pypy3 -+ env: MATRIX=pypy3 -+ -+before_install: -+ - if [ $MATRIX != pypy3 ]; then pip install black; fi -+ - pip install coverage coveralls flake8 sphinx sphinx-argparse - - install: -- - pip install . coverage coveralls flake8 sphinx sphinx-argparse -+ - pip install . - - script: - - make flake -Index: xmldiff-2.4/Makefile -=================================================================== ---- xmldiff-2.4.orig/Makefile -+++ xmldiff-2.4/Makefile -@@ -5,7 +5,10 @@ dfm_source_3 := "https://raw.githubuserc - all: coverage flake - - flake: -- flake8 tests xmldiff --exclude *diff_match_patch*.py -+ifneq (, $(shell which black)) -+ black --check . -+endif -+ flake8 tests xmldiff --exclude *diff_match_patch*.py --ignore=E231,E501,W503 - - coverage: - coverage run setup.py test -Index: xmldiff-2.4/docs/source/conf.py -=================================================================== ---- xmldiff-2.4.orig/docs/source/conf.py -+++ xmldiff-2.4/docs/source/conf.py -@@ -1,4 +1,3 @@ --# -*- coding: utf-8 -*- - # - # xmldiff documentation build configuration file, created by - # sphinx-quickstart on Tue Sep 4 12:07:12 2018. -@@ -18,49 +17,49 @@ import os - # If extensions (or modules to document with autodoc) are in another directory, - # add these directories to sys.path here. If the directory is relative to the - # documentation root, use os.path.abspath to make it absolute, like shown here. --#sys.path.insert(0, os.path.abspath('.')) -+# sys.path.insert(0, os.path.abspath('.')) - - # -- General configuration ------------------------------------------------ - - # If your documentation needs a minimal Sphinx version, state it here. --#needs_sphinx = '1.0' -+# needs_sphinx = '1.0' - - # Add any Sphinx extension module names here, as strings. They can be - # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom - # ones. - extensions = [ -- 'sphinx.ext.doctest', -- 'sphinx.ext.coverage', -- 'sphinxarg.ext', -+ "sphinx.ext.doctest", -+ "sphinx.ext.coverage", -+ "sphinxarg.ext", - ] - - # Add any paths that contain templates here, relative to this directory. --templates_path = ['_templates'] -+templates_path = ["_templates"] - - # The suffix(es) of source filenames. - # You can specify multiple suffix as a list of string: - # source_suffix = ['.rst', '.md'] --source_suffix = '.rst' -+source_suffix = ".rst" - - # The encoding of source files. --#source_encoding = 'utf-8-sig' -+# source_encoding = 'utf-8-sig' - - # The master toctree document. --master_doc = 'index' -+master_doc = "index" - - # General information about the project. --project = u'xmldiff' --copyright = u'2018, Lennart Regebro' --author = u'Lennart Regebro' -+project = "xmldiff" -+copyright = "2018, Lennart Regebro" -+author = "Lennart Regebro" - - # The version info for the project you're documenting, acts as replacement for - # |version| and |release|, also used in various other places throughout the - # built documents. - # - # The short X.Y version. --#version = u'2.0' -+# version = u'2.0' - # The full version, including alpha/beta/rc tags. --#release = u'2.0' -+# release = u'2.0' - - # The language for content autogenerated by Sphinx. Refer to documentation - # for a list of supported languages. -@@ -71,9 +70,9 @@ language = None - - # There are two options for replacing |today|: either, you set today to some - # non-false value, then it is used: --#today = '' -+# today = '' - # Else, today_fmt is used as the format for a strftime call. --#today_fmt = '%B %d, %Y' -+# today_fmt = '%B %d, %Y' - - # List of patterns, relative to source directory, that match files and - # directories to ignore when looking for source files. -@@ -81,27 +80,27 @@ exclude_patterns = [] - - # The reST default role (used for this markup: `text`) to use for all - # documents. --#default_role = None -+# default_role = None - - # If true, '()' will be appended to :func: etc. cross-reference text. --#add_function_parentheses = True -+# add_function_parentheses = True - - # If true, the current module name will be prepended to all description - # unit titles (such as .. function::). --#add_module_names = True -+# add_module_names = True - - # If true, sectionauthor and moduleauthor directives will be shown in the - # output. They are ignored by default. --#show_authors = False -+# show_authors = False - - # The name of the Pygments (syntax highlighting) style to use. --pygments_style = 'sphinx' -+pygments_style = "sphinx" - - # A list of ignored prefixes for module index sorting. --#modindex_common_prefix = [] -+# modindex_common_prefix = [] - - # If true, keep warnings as "system message" paragraphs in the built documents. --#keep_warnings = False -+# keep_warnings = False - - # If true, `todo` and `todoList` produce output, else they produce nothing. - todo_include_todos = False -@@ -111,156 +110,149 @@ todo_include_todos = False - - # The theme to use for HTML and HTML Help pages. See the documentation for - # a list of builtin themes. --html_theme = 'alabaster' -+html_theme = "alabaster" - - # Theme options are theme-specific and customize the look and feel of a theme - # further. For a list of options available for each theme, see the - # documentation. --#html_theme_options = {} -+# html_theme_options = {} - - # Add any paths that contain custom themes here, relative to this directory. --#html_theme_path = [] -+# html_theme_path = [] - - # The name for this set of Sphinx documents. If None, it defaults to - # " v documentation". --#html_title = None -+# html_title = None - - # A shorter title for the navigation bar. Default is the same as html_title. --#html_short_title = None -+# html_short_title = None - - # The name of an image file (relative to this directory) to place at the top - # of the sidebar. --#html_logo = None -+# html_logo = None - - # The name of an image file (relative to this directory) to use as a favicon of - # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 - # pixels large. --#html_favicon = None -+# html_favicon = None - - # Add any paths that contain custom static files (such as style sheets) here, - # relative to this directory. They are copied after the builtin static files, - # so a file named "default.css" will overwrite the builtin "default.css". --html_static_path = ['static'] -+html_static_path = ["static"] - - # Add any extra paths that contain custom files (such as robots.txt or - # .htaccess) here, relative to this directory. These files are copied - # directly to the root of the documentation. --#html_extra_path = [] -+# html_extra_path = [] - - # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, - # using the given strftime format. --#html_last_updated_fmt = '%b %d, %Y' -+# html_last_updated_fmt = '%b %d, %Y' - - # If true, SmartyPants will be used to convert quotes and dashes to - # typographically correct entities. --#html_use_smartypants = True -+# html_use_smartypants = True - - # Custom sidebar templates, maps document names to template names. --#html_sidebars = {} -+# html_sidebars = {} - - # Additional templates that should be rendered to pages, maps page names to - # template names. --#html_additional_pages = {} -+# html_additional_pages = {} - - # If false, no module index is generated. --#html_domain_indices = True -+# html_domain_indices = True - - # If false, no index is generated. --#html_use_index = True -+# html_use_index = True - - # If true, the index is split into individual pages for each letter. --#html_split_index = False -+# html_split_index = False - - # If true, links to the reST sources are added to the pages. --#html_show_sourcelink = True -+# html_show_sourcelink = True - - # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. --#html_show_sphinx = True -+# html_show_sphinx = True - - # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. --#html_show_copyright = True -+# html_show_copyright = True - - # If true, an OpenSearch description file will be output, and all pages will - # contain a tag referring to it. The value of this option must be the - # base URL from which the finished HTML is served. --#html_use_opensearch = '' -+# html_use_opensearch = '' - - # This is the file name suffix for HTML files (e.g. ".xhtml"). --#html_file_suffix = None -+# html_file_suffix = None - - # Language to be used for generating the HTML full-text search index. - # Sphinx supports the following languages: - # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' - # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' --#html_search_language = 'en' -+# html_search_language = 'en' - - # A dictionary with options for the search language support, empty by default. - # Now only 'ja' uses this config value --#html_search_options = {'type': 'default'} -+# html_search_options = {'type': 'default'} - - # The name of a javascript file (relative to the configuration directory) that - # implements a search results scorer. If empty, the default will be used. --#html_search_scorer = 'scorer.js' -+# html_search_scorer = 'scorer.js' - - # Output file base name for HTML help builder. --htmlhelp_basename = 'xmldiffdoc' -+htmlhelp_basename = "xmldiffdoc" - - # -- Options for LaTeX output --------------------------------------------- - - latex_elements = { --# The paper size ('letterpaper' or 'a4paper'). --#'papersize': 'letterpaper', -- --# The font size ('10pt', '11pt' or '12pt'). --#'pointsize': '10pt', -- --# Additional stuff for the LaTeX preamble. --#'preamble': '', -- --# Latex figure (float) alignment --#'figure_align': 'htbp', -+ # The paper size ('letterpaper' or 'a4paper'). -+ #'papersize': 'letterpaper', -+ # The font size ('10pt', '11pt' or '12pt'). -+ #'pointsize': '10pt', -+ # Additional stuff for the LaTeX preamble. -+ #'preamble': '', -+ # Latex figure (float) alignment -+ #'figure_align': 'htbp', - } - - # Grouping the document tree into LaTeX files. List of tuples - # (source start file, target name, title, - # author, documentclass [howto, manual, or own class]). - latex_documents = [ -- (master_doc, 'xmldiff.tex', u'xmldiff Documentation', -- u'Lennart Regebro', 'manual'), -+ (master_doc, "xmldiff.tex", "xmldiff Documentation", "Lennart Regebro", "manual"), - ] - - # The name of an image file (relative to this directory) to place at the top of - # the title page. --#latex_logo = None -+# latex_logo = None - - # For "manual" documents, if this is true, then toplevel headings are parts, - # not chapters. --#latex_use_parts = False -+# latex_use_parts = False - - # If true, show page references after internal links. --#latex_show_pagerefs = False -+# latex_show_pagerefs = False - - # If true, show URL addresses after external links. --#latex_show_urls = False -+# latex_show_urls = False - - # Documents to append as an appendix to all manuals. --#latex_appendices = [] -+# latex_appendices = [] - - # If false, no module index is generated. --#latex_domain_indices = True -+# latex_domain_indices = True - - - # -- Options for manual page output --------------------------------------- - - # One entry per manual page. List of tuples - # (source start file, name, description, authors, manual section). --man_pages = [ -- (master_doc, 'xmldiff', u'xmldiff Documentation', -- [author], 1) --] -+man_pages = [(master_doc, "xmldiff", "xmldiff Documentation", [author], 1)] - - # If true, show URL addresses after external links. --#man_show_urls = False -+# man_show_urls = False - - - # -- Options for Texinfo output ------------------------------------------- -@@ -269,19 +261,25 @@ man_pages = [ - # (source start file, target name, title, author, - # dir menu entry, description, category) - texinfo_documents = [ -- (master_doc, 'xmldiff', u'xmldiff Documentation', -- author, 'xmldiff', 'One line description of project.', -- 'Miscellaneous'), -+ ( -+ master_doc, -+ "xmldiff", -+ "xmldiff Documentation", -+ author, -+ "xmldiff", -+ "One line description of project.", -+ "Miscellaneous", -+ ), - ] - - # Documents to append as an appendix to all manuals. --#texinfo_appendices = [] -+# texinfo_appendices = [] - - # If false, no module index is generated. --#texinfo_domain_indices = True -+# texinfo_domain_indices = True - - # How to display URL addresses: 'footnote', 'no', or 'inline'. --#texinfo_show_urls = 'footnote' -+# texinfo_show_urls = 'footnote' - - # If true, do not generate a @detailmenu in the "Top" node's menu. --#texinfo_no_detailmenu = False -+# texinfo_no_detailmenu = False -Index: xmldiff-2.4/docs/source/contributing.rst -=================================================================== ---- xmldiff-2.4.orig/docs/source/contributing.rst -+++ xmldiff-2.4/docs/source/contributing.rst -@@ -15,9 +15,9 @@ Setting Up a Development Environment - - To set up a development environment you need a github account, git, and - of course Python with pip installed. You also should have the Python tools --``coverage`` and ``flake8`` installed:: -+``black``, ``coverage``, and ``flake8`` installed:: - -- pip install coverage flake8 -+ pip install black coverage flake8 - - Then you need to clone the repository, and install it's dependencies:: - -@@ -44,8 +44,6 @@ The following test runners/commands are - - * ``python setup.py test`` - -- * ``nosetests`` -- - * ``pytest`` - - There is no support for ``tox`` to run test under different Python versions. -Index: xmldiff-2.4/setup.py -=================================================================== ---- xmldiff-2.4.orig/setup.py -+++ xmldiff-2.4/setup.py -@@ -1,49 +1,45 @@ --from io import open - from setuptools import setup, find_packages - --version = '2.4' -+version = "2.5.dev0" - --with open('README.rst', 'rt', encoding='utf8') as readme: -+with open("README.rst", encoding="utf8") as readme: - description = readme.read() - --with open('CHANGES.rst', 'rt', encoding='utf8') as changes: -+with open("CHANGES.rst", encoding="utf8") as changes: - history = changes.read() - - --setup(name='xmldiff', -- version=version, -- description="Creates diffs of XML files", -- long_description=description + '\n' + history, -- # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers -- classifiers=['Development Status :: 5 - Production/Stable', -- 'Topic :: Text Processing :: Markup :: XML', -- 'Operating System :: OS Independent', -- 'Programming Language :: Python :: 2', -- 'Programming Language :: Python :: 2.7', -- 'Programming Language :: Python :: 3', -- 'Programming Language :: Python :: 3.5', -- 'Programming Language :: Python :: 3.6', -- 'Programming Language :: Python :: 3.7', -- 'License :: OSI Approved :: MIT License', -- ], -- keywords='xml html diff', -- author='Lennart Regebro', -- author_email='lregebro@shoobx.com', -- url='https://github.com/Shoobx/xmldiff', -- license='MIT', -- packages=find_packages(exclude=['doc', 'tests']), -- include_package_data=True, -- zip_safe=False, -- install_requires=[ -- 'setuptools', -- 'lxml>=3.1.0', -- 'six', -- ], -- test_suite='tests', -- entry_points={ -- 'console_scripts': [ -- 'xmldiff = xmldiff.main:diff_command', -- 'xmlpatch = xmldiff.main:patch_command', -- ], -- }, -+setup( -+ name="xmldiff", -+ version=version, -+ description="Creates diffs of XML files", -+ long_description=description + "\n" + history, -+ # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers -+ classifiers=[ -+ "Development Status :: 5 - Production/Stable", -+ "Topic :: Text Processing :: Markup :: XML", -+ "Operating System :: OS Independent", -+ "Programming Language :: Python :: 3", -+ "Programming Language :: Python :: 3.6", -+ "Programming Language :: Python :: 3.7", -+ "Programming Language :: Python :: 3.8", -+ "License :: OSI Approved :: MIT License", -+ ], -+ keywords="xml html diff", -+ author="Lennart Regebro", -+ author_email="lregebro@shoobx.com", -+ url="https://github.com/Shoobx/xmldiff", -+ license="MIT", -+ packages=find_packages(exclude=["doc", "tests"]), -+ include_package_data=True, -+ zip_safe=False, -+ install_requires=["setuptools", "lxml>=3.1.0", "six",], -+ test_suite="tests", -+ entry_points={ -+ "console_scripts": [ -+ "xmldiff = xmldiff.main:diff_command", -+ "xmlpatch = xmldiff.main:patch_command", -+ ], -+ }, - ) -+ -Index: xmldiff-2.4/tests/test_diff.py -=================================================================== ---- xmldiff-2.4.orig/tests/test_diff.py -+++ xmldiff-2.4/tests/test_diff.py -@@ -1,14 +1,22 @@ - import os - import unittest - --from io import open - from lxml import etree - from xmldiff import utils - from xmldiff.diff import Differ --from xmldiff.actions import (UpdateTextIn, InsertNode, MoveNode, -- DeleteNode, UpdateAttrib, InsertAttrib, -- RenameAttrib, DeleteAttrib, UpdateTextAfter, -- RenameNode, InsertComment) -+from xmldiff.actions import ( -+ UpdateTextIn, -+ InsertNode, -+ MoveNode, -+ DeleteNode, -+ UpdateAttrib, -+ InsertAttrib, -+ RenameAttrib, -+ DeleteAttrib, -+ UpdateTextAfter, -+ RenameNode, -+ InsertComment, -+) - - from .testing import compare_elements - -@@ -16,20 +24,13 @@ from .testing import compare_elements - def dedent(string): - """Remove the maximum common indent of the lines making up the string.""" - lines = string.splitlines() -- indent = min( -- len(line) - len(line.lstrip()) -- for line in lines -- if line -- ) -- return "\n".join( -- line[indent:] if line else line -- for line in lines -- ) -+ indent = min(len(line) - len(line.lstrip()) for line in lines if line) -+ return "\n".join(line[indent:] if line else line for line in lines) - - - class APITests(unittest.TestCase): -- left = u"

Text

More

" -- right = u"

Tokst

More

" -+ left = "

Text

More

" -+ right = "

Tokst

More

" - lefttree = etree.fromstring(left) - righttree = etree.fromstring(right) - differ = Differ() -@@ -107,9 +108,8 @@ class APITests(unittest.TestCase): - - - class NodeRatioTests(unittest.TestCase): -- - def test_compare_equal(self): -- xml = u""" -+ xml = """ - -
- First paragraph -@@ -127,8 +127,10 @@ class NodeRatioTests(unittest.TestCase): - - # Every node in these trees should get a 1.0 leaf_ratio, - # and if it has children, 1.0 child_ration, else None -- for left, right in zip(utils.post_order_traverse(differ.left), -- utils.post_order_traverse(differ.right)): -+ for left, right in zip( -+ utils.post_order_traverse(differ.left), -+ utils.post_order_traverse(differ.right), -+ ): - self.assertEqual(differ.leaf_ratio(left, right), 1.0) - if left.getchildren(): - self.assertEqual(differ.child_ratio(left, right), 1.0) -@@ -136,7 +138,7 @@ class NodeRatioTests(unittest.TestCase): - self.assertIsNone(differ.child_ratio(left, right)) - - def test_compare_different_leafs(self): -- left = u""" -+ left = """ - -
- This doesn't match at all -@@ -151,7 +153,7 @@ class NodeRatioTests(unittest.TestCase): - - """ - -- right = u""" -+ right = """ - -
- Completely different from before -@@ -172,26 +174,24 @@ class NodeRatioTests(unittest.TestCase): - - # Make some choice comparisons here - # These node are exactly the same -- left = lefttree.xpath('/document/story/section[3]/para')[0] -- right = righttree.xpath('/document/story/section[3]/para')[0] -+ left = lefttree.xpath("/document/story/section[3]/para")[0] -+ right = righttree.xpath("/document/story/section[3]/para")[0] - - self.assertEqual(differ.leaf_ratio(left, right), 1.0) - - # These nodes have slightly different text, but no children -- left = lefttree.xpath('/document/story/section[2]/para')[0] -- right = righttree.xpath('/document/story/section[2]/para')[0] -+ left = lefttree.xpath("/document/story/section[2]/para")[0] -+ right = righttree.xpath("/document/story/section[2]/para")[0] - -- self.assertAlmostEqual(differ.leaf_ratio(left, right), -- 0.75) -+ self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.75) - - # These nodes should not be very similar -- left = lefttree.xpath('/document/story/section[1]/para')[0] -- right = righttree.xpath('/document/story/section[1]/para')[0] -- self.assertAlmostEqual(differ.leaf_ratio(left, right), -- 0.45614035087719) -+ left = lefttree.xpath("/document/story/section[1]/para")[0] -+ right = righttree.xpath("/document/story/section[1]/para")[0] -+ self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.45614035087719) - - def test_compare_different_nodes(self): -- left = u""" -+ left = """ - -
- First paragraph -@@ -207,7 +207,7 @@ class NodeRatioTests(unittest.TestCase): - - """ - -- right = u""" -+ right = """ - -
- First paragraph -@@ -230,28 +230,28 @@ class NodeRatioTests(unittest.TestCase): - # Make some choice comparisons here. leaf_ratio will always be 1.0, - # as these leafs have the same attributes and no text, even though - # attributes may be in different order. -- left = differ.left.xpath('/document/story/section[1]')[0] -- right = differ.right.xpath('/document/story/section[1]')[0] -+ left = differ.left.xpath("/document/story/section[1]")[0] -+ right = differ.right.xpath("/document/story/section[1]")[0] - - self.assertEqual(differ.leaf_ratio(left, right), 1.0) - # Only one of two matches: - self.assertEqual(differ.child_ratio(left, right), 0.5) - -- left = differ.left.xpath('/document/story/section[2]')[0] -- right = differ.right.xpath('/document/story/section[2]')[0] -+ left = differ.left.xpath("/document/story/section[2]")[0] -+ right = differ.right.xpath("/document/story/section[2]")[0] - - self.assertEqual(differ.leaf_ratio(left, right), 1.0) - # Only one of two matches: - self.assertEqual(differ.child_ratio(left, right), 0.5) - - # These nodes should not be very similar -- left = differ.left.xpath('/document/story/section[3]')[0] -- right = differ.right.xpath('/document/story/section[3]')[0] -+ left = differ.left.xpath("/document/story/section[3]")[0] -+ right = differ.right.xpath("/document/story/section[3]")[0] - self.assertEqual(differ.leaf_ratio(left, right), 1.0) - self.assertEqual(differ.child_ratio(left, right), 1.0) - - def test_compare_with_xmlid(self): -- left = u""" -+ left = """ - -
- First paragraph -@@ -267,7 +267,7 @@ class NodeRatioTests(unittest.TestCase): - - """ - -- right = u""" -+ right = """ - -
- First paragraph -@@ -289,8 +289,8 @@ class NodeRatioTests(unittest.TestCase): - - # Make some choice comparisons here. - -- left = differ.left.xpath('/document/story/section[1]')[0] -- right = differ.right.xpath('/document/story/section[1]')[0] -+ left = differ.left.xpath("/document/story/section[1]")[0] -+ right = differ.right.xpath("/document/story/section[1]")[0] - - # These are very similar - self.assertEqual(differ.leaf_ratio(left, right), 0.9) -@@ -300,8 +300,8 @@ class NodeRatioTests(unittest.TestCase): - self.assertEqual(differ.node_ratio(left, right), 0) - - # Here's the ones with the same id: -- left = differ.left.xpath('/document/story/section[1]')[0] -- right = differ.right.xpath('/document/story/section[2]')[0] -+ left = differ.left.xpath("/document/story/section[1]")[0] -+ right = differ.right.xpath("/document/story/section[2]")[0] - - # Only one out of two children in common - self.assertEqual(differ.child_ratio(left, right), 0.5) -@@ -310,8 +310,8 @@ class NodeRatioTests(unittest.TestCase): - - # The last ones are completely similar, but only one - # has an xml:id, so they do not match. -- left = differ.left.xpath('/document/story/section[3]')[0] -- right = differ.right.xpath('/document/story/section[3]')[0] -+ left = differ.left.xpath("/document/story/section[3]")[0] -+ right = differ.right.xpath("/document/story/section[3]")[0] - self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.81818181818) - self.assertEqual(differ.child_ratio(left, right), 1.0) - self.assertEqual(differ.node_ratio(left, right), 0) -@@ -319,7 +319,8 @@ class NodeRatioTests(unittest.TestCase): - def test_compare_with_uniqueattrs(self): - # `uniqueattrs` can be pairs of (tag, attribute) as well as just string - # attributes. -- left = dedent(u"""\ -+ left = dedent( -+ """\ - - -
-@@ -334,9 +335,11 @@ class NodeRatioTests(unittest.TestCase): -
-
-
-- """) -+ """ -+ ) - -- right = dedent(u"""\ -+ right = dedent( -+ """\ - - -
-@@ -355,19 +358,22 @@ class NodeRatioTests(unittest.TestCase): - - - -- """) -+ """ -+ ) - -- differ = Differ(uniqueattrs=[ -- ('section', 'name'), -- '{http://www.w3.org/XML/1998/namespace}id' -- ]) -+ differ = Differ( -+ uniqueattrs=[ -+ ("section", "name"), -+ "{http://www.w3.org/XML/1998/namespace}id", -+ ] -+ ) - differ.set_trees(etree.fromstring(left), etree.fromstring(right)) - differ.match() - - # Make some choice comparisons here. - -- left = differ.left.xpath('/document/story/section[1]')[0] -- right = differ.right.xpath('/document/story/section[1]')[0] -+ left = differ.left.xpath("/document/story/section[1]")[0] -+ right = differ.right.xpath("/document/story/section[1]")[0] - - # These are very similar - self.assertEqual(differ.leaf_ratio(left, right), 0.90625) -@@ -377,8 +383,8 @@ class NodeRatioTests(unittest.TestCase): - self.assertEqual(differ.node_ratio(left, right), 0) - - # Here's the ones with the same tag and name attribute: -- left = differ.left.xpath('/document/story/section[1]')[0] -- right = differ.right.xpath('/document/story/section[2]')[0] -+ left = differ.left.xpath("/document/story/section[1]")[0] -+ right = differ.right.xpath("/document/story/section[2]")[0] - - # Only one out of two children in common - self.assertEqual(differ.child_ratio(left, right), 0) -@@ -387,29 +393,29 @@ class NodeRatioTests(unittest.TestCase): - - # The last ones are completely similar, but only one - # has an name, so they do not match. -- left = differ.left.xpath('/document/story/section[3]')[0] -- right = differ.right.xpath('/document/story/section[3]')[0] -+ left = differ.left.xpath("/document/story/section[3]")[0] -+ right = differ.right.xpath("/document/story/section[3]")[0] - self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.78260869565) - self.assertEqual(differ.child_ratio(left, right), 1.0) - self.assertEqual(differ.node_ratio(left, right), 0) - - # Now these are structurally similar, have the same name, but - # one of them is not a section, so the uniqueattr does not match -- left = differ.left.xpath('/document/story/section[1]')[0] -- right = differ.right.xpath('/document/story/subsection[1]')[0] -+ left = differ.left.xpath("/document/story/section[1]")[0] -+ right = differ.right.xpath("/document/story/subsection[1]")[0] - self.assertAlmostEqual(differ.leaf_ratio(left, right), 1.0) - self.assertEqual(differ.child_ratio(left, right), 0.5) - self.assertAlmostEqual(differ.node_ratio(left, right), 0.75) - - def test_compare_node_rename(self): -- left = u""" -+ left = """ - First paragraph - Second paragraph - Third paragraph - - """ - -- right = u""" -+ right = """ -
First paragraph
-
Second paragraph
-
A different text
-@@ -421,38 +427,35 @@ class NodeRatioTests(unittest.TestCase): - differ.match() - - # Make some choice comparisons here. -- left = differ.left.xpath('/document/para[1]')[0] -- right = differ.right.xpath('/document/section[1]')[0] -+ left = differ.left.xpath("/document/para[1]")[0] -+ right = differ.right.xpath("/document/section[1]")[0] - - # These have different tags, but should still match -- self.assertEqual(differ.leaf_ratio(left, right), -- 1.0) -+ self.assertEqual(differ.leaf_ratio(left, right), 1.0) - - # These have different tags, and different attribute value, - # but still similar enough -- left = differ.left.xpath('/document/para[2]')[0] -- right = differ.right.xpath('/document/section[2]')[0] -+ left = differ.left.xpath("/document/para[2]")[0] -+ right = differ.right.xpath("/document/section[2]")[0] - - # These have different tags, but should still match -- self.assertAlmostEqual(differ.leaf_ratio(left, right), -- 0.76190476190476) -+ self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.76190476190476) - - # These have different tags, and different attribute value, - # but still similar enough -- left = differ.left.xpath('/document/para[3]')[0] -- right = differ.right.xpath('/document/section[3]')[0] -+ left = differ.left.xpath("/document/para[3]")[0] -+ right = differ.right.xpath("/document/section[3]")[0] - - # These are too different -- self.assertAlmostEqual(differ.leaf_ratio(left, right), -- 0.45161290322580) -+ self.assertAlmostEqual(differ.leaf_ratio(left, right), 0.45161290322580) - - def test_compare_namespaces(self): -- left = u""" -+ left = """ - First paragraph - - """ - -- right = u""" -+ right = """ - First paragraph - - """ -@@ -462,46 +465,46 @@ class NodeRatioTests(unittest.TestCase): - differ.match() - - # Make some choice comparisons here. -- left = differ.left.xpath('/document/foo:para[1]', -- namespaces={'foo': 'someuri'})[0] -- right = differ.right.xpath('/document/foo:para[1]', -- namespaces={'foo': 'otheruri'})[0] -+ left = differ.left.xpath( -+ "/document/foo:para[1]", namespaces={"foo": "someuri"} -+ )[0] -+ right = differ.right.xpath( -+ "/document/foo:para[1]", namespaces={"foo": "otheruri"} -+ )[0] - - # These have different namespaces, but should still match -- self.assertEqual(differ.leaf_ratio(left, right), -- 1.0) -+ self.assertEqual(differ.leaf_ratio(left, right), 1.0) - - def test_different_ratio_modes(self): -- node1 = etree.Element('para') -+ node1 = etree.Element("para") - node1.text = "This doesn't match at all" -- node2 = etree.Element('para') -+ node2 = etree.Element("para") - node2.text = "It's completely different" -- node3 = etree.Element('para') -+ node3 = etree.Element("para") - node3.text = "Completely different from before" - - # These texts are very different -- differ = Differ(ratio_mode='accurate') -+ differ = Differ(ratio_mode="accurate") - self.assertAlmostEqual(differ.leaf_ratio(node1, node2), 0.24) - # However, the quick_ratio doesn't catch that, and think they match -- differ = Differ(ratio_mode='fast') -+ differ = Differ(ratio_mode="fast") - self.assertAlmostEqual(differ.leaf_ratio(node1, node2), 0.64) - # It still realizes these sentences are different, though. -- differ = Differ(ratio_mode='fast') -+ differ = Differ(ratio_mode="fast") - self.assertAlmostEqual(differ.leaf_ratio(node1, node3), 0.4561403508) - # Faster thinks the first two are the same! -- differ = Differ(ratio_mode='faster') -+ differ = Differ(ratio_mode="faster") - self.assertAlmostEqual(differ.leaf_ratio(node1, node2), 1.0) - # And that the third is almost the same -- differ = Differ(ratio_mode='faster') -+ differ = Differ(ratio_mode="faster") - self.assertAlmostEqual(differ.leaf_ratio(node1, node3), 0.8771929824) - - # Invalid modes raise error: - with self.assertRaises(ValueError): -- differ = Differ(ratio_mode='allezlebleus') -+ differ = Differ(ratio_mode="allezlebleus") - - - class MatchTests(unittest.TestCase): -- - def _match(self, left, right): - left_tree = etree.fromstring(left) - right_tree = etree.fromstring(right) -@@ -513,7 +516,7 @@ class MatchTests(unittest.TestCase): - return [(lpath(item[0]), rpath(item[1])) for item in matches] - - def test_same_tree(self): -- xml = u""" -+ xml = """ - -
- First paragraph -@@ -537,7 +540,7 @@ class MatchTests(unittest.TestCase): - # If the number of similar attributes are few it works fine, the - # differing content of the ref="3" section means it's detected to - # be an insert. -- left = u""" -+ left = """ - -
- First paragraph -@@ -551,7 +554,7 @@ class MatchTests(unittest.TestCase): - - # We even detect that the first section is an insert without - # xmlid, but that's less reliable. -- right = u""" -+ right = """ - -
- New paragraph -@@ -567,27 +570,24 @@ class MatchTests(unittest.TestCase): - """ - - result = self._match(left, right) -- self.assertEqual(result, [ -- ('/document/story/section[1]/para', -- '/document/story/section[2]/para'), -- ('/document/story/section[1]', -- '/document/story/section[2]'), -- ('/document/story/section[2]/para', -- '/document/story/section[3]/para'), -- ('/document/story/section[2]', -- '/document/story/section[3]'), -- ('/document/story', -- '/document/story'), -- ('/document', -- '/document') -- ]) -+ self.assertEqual( -+ result, -+ [ -+ ("/document/story/section[1]/para", "/document/story/section[2]/para"), -+ ("/document/story/section[1]", "/document/story/section[2]"), -+ ("/document/story/section[2]/para", "/document/story/section[3]/para"), -+ ("/document/story/section[2]", "/document/story/section[3]"), -+ ("/document/story", "/document/story"), -+ ("/document", "/document"), -+ ], -+ ) - - def test_with_xmlid(self): - # This first section contains attributes that are similar (and longer - # than the content text. That would trick the matcher into matching - # the oldfirst and the newfirst section to match, except that we - # this time also have xml:id's, and they trump everything else! -- left = u""" -+ left = """ - -
-@@ -605,7 +605,7 @@ class MatchTests(unittest.TestCase): - - # We even detect that the first section is an insert without - # xmlid, but that's less reliable. -- right = u""" -+ right = """ - -
-@@ -625,28 +625,23 @@ class MatchTests(unittest.TestCase): - """ - - result = self._match(left, right) -- self.assertEqual(result, [ -- ('/document/story/section[1]/para', -- '/document/story/section[2]/para'), -- ('/document/story/section[1]', -- '/document/story/section[2]'), -- ('/document/story/section[2]/para', -- '/document/story/section[3]/para'), -- ('/document/story/section[2]', -- '/document/story/section[3]'), -- ('/document/story/section[3]/para', -- '/document/story/section[4]/para'), -- ('/document/story/section[3]', -- '/document/story/section[4]'), -- ('/document/story', -- '/document/story'), -- ('/document', -- '/document') -- ]) -+ self.assertEqual( -+ result, -+ [ -+ ("/document/story/section[1]/para", "/document/story/section[2]/para"), -+ ("/document/story/section[1]", "/document/story/section[2]"), -+ ("/document/story/section[2]/para", "/document/story/section[3]/para"), -+ ("/document/story/section[2]", "/document/story/section[3]"), -+ ("/document/story/section[3]/para", "/document/story/section[4]/para"), -+ ("/document/story/section[3]", "/document/story/section[4]"), -+ ("/document/story", "/document/story"), -+ ("/document", "/document"), -+ ], -+ ) - - def test_change_attribs(self): - -- left = u""" -+ left = """ - -
- First -@@ -658,7 +653,7 @@ class MatchTests(unittest.TestCase): - - """ - -- right = u""" -+ right = """ - -
- First -@@ -672,23 +667,20 @@ class MatchTests(unittest.TestCase): - # It matches everything straight, which means the attrib changes - # should become updates, which makes sense. - result = self._match(left, right) -- self.assertEqual(result, [ -- ('/document/story/section[1]/para', -- '/document/story/section[1]/para'), -- ('/document/story/section[1]', -- '/document/story/section[1]'), -- ('/document/story/section[2]/para', -- '/document/story/section[2]/para'), -- ('/document/story/section[2]', -- '/document/story/section[2]'), -- ('/document/story', -- '/document/story'), -- ('/document', -- '/document') -- ]) -+ self.assertEqual( -+ result, -+ [ -+ ("/document/story/section[1]/para", "/document/story/section[1]/para"), -+ ("/document/story/section[1]", "/document/story/section[1]"), -+ ("/document/story/section[2]/para", "/document/story/section[2]/para"), -+ ("/document/story/section[2]", "/document/story/section[2]"), -+ ("/document/story", "/document/story"), -+ ("/document", "/document"), -+ ], -+ ) - - def test_move_paragraph(self): -- left = u""" -+ left = """ - -
- First paragraph -@@ -701,7 +693,7 @@ class MatchTests(unittest.TestCase): - - """ - -- right = u""" -+ right = """ - -
- First paragraph -@@ -714,18 +706,27 @@ class MatchTests(unittest.TestCase): - - """ - result = self._match(left, right) -- self.assertEqual(result, [ -- ('/document/story/section[1]/para[1]', -- '/document/story/section[1]/para'), -- ('/document/story/section[1]/para[2]', -- '/document/story/section[2]/para[1]'), -- ('/document/story/section[1]', '/document/story/section[1]'), -- ('/document/story/section[2]/para', -- '/document/story/section[2]/para[2]'), -- ('/document/story/section[2]', '/document/story/section[2]'), -- ('/document/story', '/document/story'), -- ('/document', '/document') -- ]) -+ self.assertEqual( -+ result, -+ [ -+ ( -+ "/document/story/section[1]/para[1]", -+ "/document/story/section[1]/para", -+ ), -+ ( -+ "/document/story/section[1]/para[2]", -+ "/document/story/section[2]/para[1]", -+ ), -+ ("/document/story/section[1]", "/document/story/section[1]"), -+ ( -+ "/document/story/section[2]/para", -+ "/document/story/section[2]/para[2]", -+ ), -+ ("/document/story/section[2]", "/document/story/section[2]"), -+ ("/document/story", "/document/story"), -+ ("/document", "/document"), -+ ], -+ ) - - def test_match_complex_text(self): - left = """ -@@ -769,50 +770,50 @@ class MatchTests(unittest.TestCase): - """ - - result = self._match(left, right) -- self.assertEqual(result, [ -- ('/wrap/para/b', '/wrap/para/b'), -- ('/wrap/para', '/wrap/para'), -- ('/wrap', '/wrap') -- ]) -+ self.assertEqual( -+ result, -+ [ -+ ("/wrap/para/b", "/wrap/para/b"), -+ ("/wrap/para", "/wrap/para"), -+ ("/wrap", "/wrap"), -+ ], -+ ) - - def test_match_insert_node(self): -- left = u''' -+ left = """ - - - - --''' -- right = u''' -+""" -+ right = """ - - -

Inserted Node

- -
--
''' -+
""" - result = self._match(left, right) -- self.assertEqual(result, [ -- ('/document/story', '/document/story'), -- ('/document', '/document'), -- ]) -+ self.assertEqual( -+ result, -+ [("/document/story", "/document/story"), ("/document", "/document"),], -+ ) - - def test_entirely_different(self): -- left = u''' -+ left = """ - - - - --''' -- right = u''' -+""" -+ right = """ -

Inserted Node

--
''' -+
""" - result = self._match(left, right) -- self.assertEqual(result, [ -- ('/document', '/document'), -- ]) -+ self.assertEqual(result, [("/document", "/document"),]) - - - class FastMatchTests(unittest.TestCase): -- - def _match(self, left, right, fast_match): - left_tree = etree.fromstring(left) - right_tree = etree.fromstring(right) -@@ -824,7 +825,7 @@ class FastMatchTests(unittest.TestCase): - return [(lpath(item[0]), rpath(item[1])) for item in matches] - - def test_move_paragraph(self): -- left = u""" -+ left = """ - -
- First paragraph -@@ -837,7 +838,7 @@ class FastMatchTests(unittest.TestCase): - - """ - -- right = u""" -+ right = """ - -
- First paragraph -@@ -858,7 +859,7 @@ class FastMatchTests(unittest.TestCase): - def test_move_children(self): - # Here the paragraphs are all so similar that that each paragraph - # will match any other. -- left = u""" -+ left = """ - -
- First paragraph -@@ -869,7 +870,7 @@ class FastMatchTests(unittest.TestCase): - - """ - -- right = u""" -+ right = """ - -
- Second paragraph -@@ -882,31 +883,31 @@ class FastMatchTests(unittest.TestCase): - # The slow match will match the nodes that match *best*, so it will - # find that paragraphs have moved around. - slow_result = sorted(self._match(left, right, False)) -- self.assertEqual(slow_result, [ -- ('/document', '/document'), -- ('/document/story', '/document/story'), -- ('/document/story/section', '/document/story/section'), -- ('/document/story/section/para[1]', -- '/document/story/section/para[3]'), -- ('/document/story/section/para[2]', -- '/document/story/section/para[1]'), -- ('/document/story/section/para[3]', -- '/document/story/section/para[2]') -- ]) -+ self.assertEqual( -+ slow_result, -+ [ -+ ("/document", "/document"), -+ ("/document/story", "/document/story"), -+ ("/document/story/section", "/document/story/section"), -+ ("/document/story/section/para[1]", "/document/story/section/para[3]"), -+ ("/document/story/section/para[2]", "/document/story/section/para[1]"), -+ ("/document/story/section/para[3]", "/document/story/section/para[2]"), -+ ], -+ ) - - # But the fast match will just pick any that matches. - fast_result = sorted(self._match(left, right, True)) -- self.assertEqual(fast_result, [ -- ('/document', '/document'), -- ('/document/story', '/document/story'), -- ('/document/story/section', '/document/story/section'), -- ('/document/story/section/para[1]', -- '/document/story/section/para[1]'), -- ('/document/story/section/para[2]', -- '/document/story/section/para[2]'), -- ('/document/story/section/para[3]', -- '/document/story/section/para[3]') -- ]) -+ self.assertEqual( -+ fast_result, -+ [ -+ ("/document", "/document"), -+ ("/document/story", "/document/story"), -+ ("/document/story/section", "/document/story/section"), -+ ("/document/story/section/para[1]", "/document/story/section/para[1]"), -+ ("/document/story/section/para[2]", "/document/story/section/para[2]"), -+ ("/document/story/section/para[3]", "/document/story/section/para[3]"), -+ ], -+ ) - - - class UpdateNodeTests(unittest.TestCase): -@@ -926,7 +927,7 @@ class UpdateNodeTests(unittest.TestCase) - return steps - - def test_same_tree(self): -- xml = u""" -+ xml = """ - -
- First paragraph -@@ -942,24 +943,28 @@ class UpdateNodeTests(unittest.TestCase) - self.assertEqual(result, []) - - def test_attribute_changes(self): -- left = u"""The contained textAnd a tail!""" -+ left = ( -+ """The contained textAnd a tail!""" -+ ) - -- right = u"""The new textAlso a tail!""" -+ right = ( -+ """The new textAlso a tail!""" -+ ) - - result = self._match(left, right) - - self.assertEqual( - result, - [ -- UpdateAttrib('/root/node[1]', 'attr2', 'uhhuh'), -- RenameAttrib('/root/node[1]', 'attr1', 'attr4'), -- InsertAttrib('/root/node[1]', 'attr5', 'new'), -- DeleteAttrib('/root/node[1]', 'attr0'), -- UpdateTextIn('/root/node[1]', 'The new text'), -- UpdateTextAfter('/root/node[1]', 'Also a tail!'), -- ] -+ UpdateAttrib("/root/node[1]", "attr2", "uhhuh"), -+ RenameAttrib("/root/node[1]", "attr1", "attr4"), -+ InsertAttrib("/root/node[1]", "attr5", "new"), -+ DeleteAttrib("/root/node[1]", "attr0"), -+ UpdateTextIn("/root/node[1]", "The new text"), -+ UpdateTextAfter("/root/node[1]", "Also a tail!"), -+ ], - ) - - -@@ -978,7 +983,7 @@ class AlignChildrenTests(unittest.TestCa - return steps - - def test_same_tree(self): -- xml = u""" -+ xml = """ - -
- First paragraph -@@ -994,7 +999,7 @@ class AlignChildrenTests(unittest.TestCa - self.assertEqual(result, []) - - def test_move_paragraph(self): -- left = u""" -+ left = """ - -
- First paragraph -@@ -1007,7 +1012,7 @@ class AlignChildrenTests(unittest.TestCa - - """ - -- right = u""" -+ right = """ - -
- First paragraph -@@ -1024,7 +1029,7 @@ class AlignChildrenTests(unittest.TestCa - self.assertEqual(result, []) - - def test_move_children(self): -- left = u""" -+ left = """ - -
- First paragraph -@@ -1035,7 +1040,7 @@ class AlignChildrenTests(unittest.TestCa - - """ - -- right = u""" -+ right = """ - -
- Second paragraph -@@ -1046,9 +1051,14 @@ class AlignChildrenTests(unittest.TestCa - - """ - result = self._align(left, right) -- self.assertEqual(result, -- [MoveNode('/document/story/section/para[1]', -- '/document/story/section[1]', 2)]) -+ self.assertEqual( -+ result, -+ [ -+ MoveNode( -+ "/document/story/section/para[1]", "/document/story/section[1]", 2 -+ ) -+ ], -+ ) - - - class DiffTests(unittest.TestCase): -@@ -1065,7 +1075,7 @@ class DiffTests(unittest.TestCase): - return editscript - - def test_process(self): -- left = u""" -+ left = """ - -
- First paragraph -@@ -1079,7 +1089,7 @@ class DiffTests(unittest.TestCase): - - """ - -- right = u""" -+ right = """ - -
- First paragraph -@@ -1096,17 +1106,19 @@ class DiffTests(unittest.TestCase): - self.assertEqual( - result, - [ -- InsertNode('/document/story[1]', 'section', 1), -- InsertAttrib('/document/story/section[2]', 'ref', '4'), -- InsertAttrib('/document/story/section[2]', 'single-ref', '4'), -- MoveNode('/document/story/section[1]/para[3]', -- '/document/story/section[2]', 0), -- InsertNode('/document/story/section[2]', 'para', 1), -- UpdateTextIn('/document/story/section[2]/para[2]', -- 'Fourth paragraph'), -- DeleteNode('/document/story/deleteme/para[1]'), -- DeleteNode('/document/story/deleteme[1]'), -- ] -+ InsertNode("/document/story[1]", "section", 1), -+ InsertAttrib("/document/story/section[2]", "ref", "4"), -+ InsertAttrib("/document/story/section[2]", "single-ref", "4"), -+ MoveNode( -+ "/document/story/section[1]/para[3]", -+ "/document/story/section[2]", -+ 0, -+ ), -+ InsertNode("/document/story/section[2]", "para", 1), -+ UpdateTextIn("/document/story/section[2]/para[2]", "Fourth paragraph"), -+ DeleteNode("/document/story/deleteme/para[1]"), -+ DeleteNode("/document/story/deleteme[1]"), -+ ], - ) - - def test_needs_align(self): -@@ -1116,34 +1128,36 @@ class DiffTests(unittest.TestCase): - self.assertEqual( - result, - [ -- MoveNode('/root/n[1]', '/root[1]', 1), -- MoveNode('/root/n[2]/p[2]', '/root/n[1]', 0), -- ] -+ MoveNode("/root/n[1]", "/root[1]", 1), -+ MoveNode("/root/n[2]/p[2]", "/root/n[1]", 0), -+ ], - ) - - def test_no_root_match(self): -- left = '

1

2

3

'\ -- '

4

' -- right = '

2

4

1

3

' -+ left = ( -+ '

1

2

3

' -+ "

4

" -+ ) -+ right = "

2

4

1

3

" - result = self._diff(left, right) - self.assertEqual( - result, - [ -- DeleteAttrib('/root[1]', 'attr'), -- MoveNode('/root/root/n[2]', '/root[1]', 0), -- MoveNode('/root/root/n[1]', '/root[1]', 1), -- MoveNode('/root/n[2]/p[2]', '/root/n[1]', 0), -- DeleteNode('/root/root[1]') -- ] -+ DeleteAttrib("/root[1]", "attr"), -+ MoveNode("/root/root/n[2]", "/root[1]", 0), -+ MoveNode("/root/root/n[1]", "/root[1]", 1), -+ MoveNode("/root/n[2]/p[2]", "/root/n[1]", 0), -+ DeleteNode("/root/root[1]"), -+ ], - ) - - def test_rmldoc(self): - here = os.path.split(__file__)[0] -- lfile = os.path.join(here, 'test_data', 'rmldoc.left.xml') -- rfile = os.path.join(here, 'test_data', 'rmldoc.right.xml') -- with open(lfile, 'rt', encoding='utf8') as infile: -+ lfile = os.path.join(here, "test_data", "rmldoc.left.xml") -+ rfile = os.path.join(here, "test_data", "rmldoc.right.xml") -+ with open(lfile, encoding="utf8") as infile: - left = infile.read() -- with open(rfile, 'rt', encoding='utf8') as infile: -+ with open(rfile, encoding="utf8") as infile: - right = infile.read() - - result = self._diff(left, right) -@@ -1151,240 +1165,197 @@ class DiffTests(unittest.TestCase): - result, - [ - InsertNode( -- '/document/story[1]', -- '{http://namespaces.shoobx.com/application}section', -- 4), -- InsertAttrib( -- '/document/story/app:section[4]', 'hidden', 'false'), -- InsertAttrib( -- '/document/story/app:section[4]', 'name', 'sign'), -- InsertAttrib( -- '/document/story/app:section[4]', 'ref', '3'), -- InsertAttrib( -- '/document/story/app:section[4]', 'removed', 'false'), -- InsertAttrib( -- '/document/story/app:section[4]', 'single-ref', '3'), -+ "/document/story[1]", -+ "{http://namespaces.shoobx.com/application}section", -+ 4, -+ ), -+ InsertAttrib("/document/story/app:section[4]", "hidden", "false"), -+ InsertAttrib("/document/story/app:section[4]", "name", "sign"), -+ InsertAttrib("/document/story/app:section[4]", "ref", "3"), -+ InsertAttrib("/document/story/app:section[4]", "removed", "false"), -+ InsertAttrib("/document/story/app:section[4]", "single-ref", "3"), - InsertAttrib( -- '/document/story/app:section[4]', 'title', 'Signing Bonus'), -- UpdateAttrib('/document/story/app:section[5]', 'ref', '4'), -- UpdateAttrib( -- '/document/story/app:section[5]', 'single-ref', '4'), -- UpdateAttrib('/document/story/app:section[6]', 'ref', '5'), -- UpdateAttrib( -- '/document/story/app:section[6]', 'single-ref', '5'), -- UpdateAttrib('/document/story/app:section[7]', 'ref', '6'), -- UpdateAttrib( -- '/document/story/app:section[7]', 'single-ref', '6'), -- UpdateAttrib('/document/story/app:section[8]', 'ref', '7'), -- UpdateAttrib( -- '/document/story/app:section[8]', 'single-ref', '7'), -- UpdateAttrib('/document/story/app:section[9]', 'ref', '8'), -- UpdateAttrib( -- '/document/story/app:section[9]', 'single-ref', '8'), -- UpdateAttrib('/document/story/app:section[10]', 'ref', '9'), -- UpdateAttrib( -- '/document/story/app:section[10]', 'single-ref', '9'), -- UpdateAttrib('/document/story/app:section[11]', 'ref', '10'), -- UpdateAttrib( -- '/document/story/app:section[11]', 'single-ref', '10'), -- UpdateAttrib('/document/story/app:section[12]', 'ref', '11'), -- UpdateAttrib( -- '/document/story/app:section[12]', 'single-ref', '11'), -- UpdateAttrib('/document/story/app:section[14]', 'ref', '12'), -- UpdateAttrib( -- '/document/story/app:section[14]', 'single-ref', '12'), -+ "/document/story/app:section[4]", "title", "Signing Bonus" -+ ), -+ UpdateAttrib("/document/story/app:section[5]", "ref", "4"), -+ UpdateAttrib("/document/story/app:section[5]", "single-ref", "4"), -+ UpdateAttrib("/document/story/app:section[6]", "ref", "5"), -+ UpdateAttrib("/document/story/app:section[6]", "single-ref", "5"), -+ UpdateAttrib("/document/story/app:section[7]", "ref", "6"), -+ UpdateAttrib("/document/story/app:section[7]", "single-ref", "6"), -+ UpdateAttrib("/document/story/app:section[8]", "ref", "7"), -+ UpdateAttrib("/document/story/app:section[8]", "single-ref", "7"), -+ UpdateAttrib("/document/story/app:section[9]", "ref", "8"), -+ UpdateAttrib("/document/story/app:section[9]", "single-ref", "8"), -+ UpdateAttrib("/document/story/app:section[10]", "ref", "9"), -+ UpdateAttrib("/document/story/app:section[10]", "single-ref", "9"), -+ UpdateAttrib("/document/story/app:section[11]", "ref", "10"), -+ UpdateAttrib("/document/story/app:section[11]", "single-ref", "10"), -+ UpdateAttrib("/document/story/app:section[12]", "ref", "11"), -+ UpdateAttrib("/document/story/app:section[12]", "single-ref", "11"), -+ UpdateAttrib("/document/story/app:section[14]", "ref", "12"), -+ UpdateAttrib("/document/story/app:section[14]", "single-ref", "12"), - InsertNode( -- '/document/story/app:section[4]', -- '{http://namespaces.shoobx.com/application}term', -- 0), -- InsertAttrib( -- '/document/story/app:section[4]/app:term[1]', 'name', -- 'sign_bonus'), -+ "/document/story/app:section[4]", -+ "{http://namespaces.shoobx.com/application}term", -+ 0, -+ ), - InsertAttrib( -- '/document/story/app:section[4]/app:term[1]', 'set', 'ol'), -- InsertNode('/document/story/app:section[4]', 'para', 1), -+ "/document/story/app:section[4]/app:term[1]", "name", "sign_bonus" -+ ), -+ InsertAttrib("/document/story/app:section[4]/app:term[1]", "set", "ol"), -+ InsertNode("/document/story/app:section[4]", "para", 1), - UpdateTextIn( -- '/document/story/app:section[1]/para[2]/' -- 'app:placeholder[1]', -- 'consectetur'), -+ "/document/story/app:section[1]/para[2]/" "app:placeholder[1]", -+ "consectetur", -+ ), - InsertNode( -- '/document/story/app:section[4]/para[1]', -- '{http://namespaces.shoobx.com/application}ref', -- 0), -+ "/document/story/app:section[4]/para[1]", -+ "{http://namespaces.shoobx.com/application}ref", -+ 0, -+ ), - InsertAttrib( -- '/document/story/app:section[4]/para/app:ref[1]', 'name', -- 'sign'), -+ "/document/story/app:section[4]/para/app:ref[1]", "name", "sign" -+ ), - InsertAttrib( -- '/document/story/app:section[4]/para/app:ref[1]', -- '{http://namespaces.shoobx.com/preview}body', -- ''), -- UpdateTextIn( -- '/document/story/app:section[4]/para/app:ref[1]', '3'), -- UpdateTextAfter( -- '/document/story/app:section[4]/para/app:ref[1]', 'eu'), -- InsertNode('/document/story/app:section[4]/para[1]', 'u', 1), -+ "/document/story/app:section[4]/para/app:ref[1]", -+ "{http://namespaces.shoobx.com/preview}body", -+ "", -+ ), -+ UpdateTextIn("/document/story/app:section[4]/para/app:ref[1]", "3"), -+ UpdateTextAfter("/document/story/app:section[4]/para/app:ref[1]", "eu"), -+ InsertNode("/document/story/app:section[4]/para[1]", "u", 1), - UpdateTextAfter( -- '/document/story/app:section[4]/para/u[1]', -- 'ntum augue.\n\nAliquam nec tortor diam. Ph'), -+ "/document/story/app:section[4]/para/u[1]", -+ "ntum augue.\n\nAliquam nec tortor diam. Ph", -+ ), - InsertNode( -- '/document/story/app:section[4]/para[1]', -- '{http://namespaces.shoobx.com/application}placeholder', -- 2), -+ "/document/story/app:section[4]/para[1]", -+ "{http://namespaces.shoobx.com/application}placeholder", -+ 2, -+ ), - InsertAttrib( -- '/document/story/app:section[4]/para/app:placeholder[1]', -- 'field', -- 'ol.sign_bonus_include_amt'), -+ "/document/story/app:section[4]/para/app:placeholder[1]", -+ "field", -+ "ol.sign_bonus_include_amt", -+ ), - InsertAttrib( -- '/document/story/app:section[4]/para/app:placeholder[1]', -- 'missing', -- 'Signing Bonus Amount'), -+ "/document/story/app:section[4]/para/app:placeholder[1]", -+ "missing", -+ "Signing Bonus Amount", -+ ), - UpdateTextAfter( -- '/document/story/app:section[4]/para/app:placeholder[1]', -- 'asellus congue accumsan tempor. Donec vel risus se' -+ "/document/story/app:section[4]/para/app:placeholder[1]", -+ "asellus congue accumsan tempor. Donec vel risus se", - ), -+ UpdateTextIn("/document/story/app:section[5]/para/app:ref[1]", "4"), -+ UpdateTextIn("/document/story/app:section[6]/para/app:ref[1]", "5"), -+ UpdateTextIn("/document/story/app:section[7]/para/app:ref[1]", "6"), -+ UpdateTextIn("/document/story/app:section[8]/para/app:ref[1]", "7"), -+ UpdateTextIn("/document/story/app:section[9]/para/app:ref[1]", "8"), -+ UpdateTextIn("/document/story/app:section[10]/para/app:ref[1]", "9"), -+ UpdateTextIn("/document/story/app:section[11]/para/app:ref[1]", "10"), -+ UpdateTextIn("/document/story/app:section[12]/para/app:ref[1]", "11"), -+ InsertNode("/document/story/app:section[4]/para/u[1]", "b", 0), - UpdateTextIn( -- '/document/story/app:section[5]/para/app:ref[1]', -- '4'), -- UpdateTextIn( -- '/document/story/app:section[6]/para/app:ref[1]', -- '5'), -- UpdateTextIn( -- '/document/story/app:section[7]/para/app:ref[1]', -- '6'), -- UpdateTextIn( -- '/document/story/app:section[8]/para/app:ref[1]', -- '7'), -- UpdateTextIn( -- '/document/story/app:section[9]/para/app:ref[1]', -- '8'), -- UpdateTextIn( -- '/document/story/app:section[10]/para/app:ref[1]', -- '9'), -- UpdateTextIn( -- '/document/story/app:section[11]/para/app:ref[1]', -- '10'), -- UpdateTextIn( -- '/document/story/app:section[12]/para/app:ref[1]', -- '11'), -- InsertNode('/document/story/app:section[4]/para/u[1]', 'b', 0), -- UpdateTextIn( -- '/document/story/app:section[4]/para/u/b[1]', -- 'ger nec ferme'), -- ] -+ "/document/story/app:section[4]/para/u/b[1]", "ger nec ferme" -+ ), -+ ], - ) - - def test_sbt_template(self): - here = os.path.split(__file__)[0] -- lfile = os.path.join(here, 'test_data', 'sbt_template.left.xml') -- rfile = os.path.join(here, 'test_data', 'sbt_template.right.xml') -- with open(lfile, 'rt', encoding='utf8') as infile: -+ lfile = os.path.join(here, "test_data", "sbt_template.left.xml") -+ rfile = os.path.join(here, "test_data", "sbt_template.right.xml") -+ with open(lfile, encoding="utf8") as infile: - left = infile.read() -- with open(rfile, 'rt', encoding='utf8') as infile: -+ with open(rfile, encoding="utf8") as infile: - right = infile.read() - - result = self._diff(left, right) - -- # Most lines get too long and flake8 complains because of this part: -- bm_bm_bm = '/metal:block/metal:block/metal:block' -+ bm_bm_bm = "/metal:block/metal:block/metal:block" - self.assertEqual( - result, - [ - InsertNode( -- bm_bm_bm + '[1]', -- '{http://namespaces.shoobx.com/application}section', -- 0), -- InsertAttrib( -- bm_bm_bm + '/app:section[1]', -- 'allowCustom', -- 'False'), -- InsertAttrib( -- bm_bm_bm + '/app:section[1]', -- 'hidden', -- "advisor.payment_type == 'none'"), -- InsertAttrib( -- bm_bm_bm + '/app:section[1]', -- 'name', -- 'payment'), -+ bm_bm_bm + "[1]", -+ "{http://namespaces.shoobx.com/application}section", -+ 0, -+ ), -+ InsertAttrib(bm_bm_bm + "/app:section[1]", "allowCustom", "False"), - InsertAttrib( -- bm_bm_bm + '/app:section[1]', -- 'title', -- 'Payment'), -+ bm_bm_bm + "/app:section[1]", -+ "hidden", -+ "advisor.payment_type == 'none'", -+ ), -+ InsertAttrib(bm_bm_bm + "/app:section[1]", "name", "payment"), -+ InsertAttrib(bm_bm_bm + "/app:section[1]", "title", "Payment"), - InsertNode( -- bm_bm_bm + '/app:section[1]', -- '{http://xml.zope.org/namespaces/tal}if', -- 0), -+ bm_bm_bm + "/app:section[1]", -+ "{http://xml.zope.org/namespaces/tal}if", -+ 0, -+ ), - InsertAttrib( -- bm_bm_bm + '/app:section[1]/tal:if[1]', -- 'condition', -- "python: advisor.payment_type == 'stock_award'"), -+ bm_bm_bm + "/app:section[1]/tal:if[1]", -+ "condition", -+ "python: advisor.payment_type == 'stock_award'", -+ ), - InsertNode( -- bm_bm_bm + '/app:section[1]', -- '{http://xml.zope.org/namespaces/tal}if', -- 1), -+ bm_bm_bm + "/app:section[1]", -+ "{http://xml.zope.org/namespaces/tal}if", -+ 1, -+ ), - InsertAttrib( -- bm_bm_bm + '/app:section[1]/tal:if[2]', -- 'condition', -- "python: advisor.payment_type == 'cash'"), -+ bm_bm_bm + "/app:section[1]/tal:if[2]", -+ "condition", -+ "python: advisor.payment_type == 'cash'", -+ ), - InsertNode( -- bm_bm_bm + '/app:section[1]', -- '{http://xml.zope.org/namespaces/tal}if', -- 2), -+ bm_bm_bm + "/app:section[1]", -+ "{http://xml.zope.org/namespaces/tal}if", -+ 2, -+ ), - InsertAttrib( -- bm_bm_bm + '/app:section[1]/tal:if[3]', -- 'condition', -- "python: advisor.payment_type == 'stock_award_and_cash'"), -- InsertNode( -- bm_bm_bm + '/app:section[1]/tal:if[1]', -- 'para', -- 0), -- UpdateTextIn( -- bm_bm_bm + '/app:section[1]/tal:if[1]/para[1]', -- '\n A '), -- InsertNode( -- bm_bm_bm + '/app:section[1]/tal:if[2]', -- 'para', -- 0), -+ bm_bm_bm + "/app:section[1]/tal:if[3]", -+ "condition", -+ "python: advisor.payment_type == 'stock_award_and_cash'", -+ ), -+ InsertNode(bm_bm_bm + "/app:section[1]/tal:if[1]", "para", 0), - UpdateTextIn( -- bm_bm_bm + '/app:section[1]/tal:if[2]/para[1]', -- '\n More text for diffing purposes\n '), -- InsertNode( -- bm_bm_bm + '/app:section[1]/tal:if[3]', -- 'para', -- 0), -+ bm_bm_bm + "/app:section[1]/tal:if[1]/para[1]", "\n A " -+ ), -+ InsertNode(bm_bm_bm + "/app:section[1]/tal:if[2]", "para", 0), - UpdateTextIn( -- bm_bm_bm + '/app:section[1]/tal:if[3]/para[1]', -- '\n Lorem hipster ipso facto\n '), -- InsertNode( -- bm_bm_bm + '/app:section[1]/tal:if[1]/para[1]', -- 'i', -- 0), -+ bm_bm_bm + "/app:section[1]/tal:if[2]/para[1]", -+ "\n More text for diffing purposes\n ", -+ ), -+ InsertNode(bm_bm_bm + "/app:section[1]/tal:if[3]", "para", 0), - UpdateTextIn( -- bm_bm_bm + '/app:section[1]/tal:if[1]/para/i[1]', -- 'whole'), -+ bm_bm_bm + "/app:section[1]/tal:if[3]/para[1]", -+ "\n Lorem hipster ipso facto\n ", -+ ), -+ InsertNode(bm_bm_bm + "/app:section[1]/tal:if[1]/para[1]", "i", 0), -+ UpdateTextIn(bm_bm_bm + "/app:section[1]/tal:if[1]/para/i[1]", "whole"), - UpdateTextAfter( -- bm_bm_bm + '/app:section[1]/tal:if[1]/para/i[1]', -- ' load of formatted text and '), -- InsertNode( -- bm_bm_bm + '/app:section[1]/tal:if[1]/para[1]', -- 'br', -- 1), -+ bm_bm_bm + "/app:section[1]/tal:if[1]/para/i[1]", -+ " load of formatted text and ", -+ ), -+ InsertNode(bm_bm_bm + "/app:section[1]/tal:if[1]/para[1]", "br", 1), - UpdateTextAfter( -- bm_bm_bm + '/app:section[1]/tal:if[1]/para/br[1]', -- ' other stuff.\n '), -- DeleteNode( -- bm_bm_bm + '/app:section[2]/tal:if/para/b[1]'), -- DeleteNode( -- bm_bm_bm + '/app:section[2]/tal:if/para[1]'), -- DeleteNode( -- bm_bm_bm + '/app:section[2]/tal:if[1]'), -- DeleteNode( -- bm_bm_bm + '/app:section[2]') -- ] -+ bm_bm_bm + "/app:section[1]/tal:if[1]/para/br[1]", -+ " other stuff.\n ", -+ ), -+ DeleteNode(bm_bm_bm + "/app:section[2]/tal:if/para/b[1]"), -+ DeleteNode(bm_bm_bm + "/app:section[2]/tal:if/para[1]"), -+ DeleteNode(bm_bm_bm + "/app:section[2]/tal:if[1]"), -+ DeleteNode(bm_bm_bm + "/app:section[2]"), -+ ], - ) - - def test_namespace(self): - # Test changing nodes and attributes with namespaces -- left = u""" -+ left = """ - - - Lorem ipsum dolor sit amet, -@@ -1407,7 +1378,7 @@ class DiffTests(unittest.TestCase): - - """ - -- right = u""" -+ right = """ - - - Lorem ipsum dolor sit amet, -@@ -1433,17 +1404,17 @@ class DiffTests(unittest.TestCase): - self.assertEqual( - result, - [ -- RenameNode( -- '/document/story/app:section/foo:para[1]', -- '{someuri}para'), -+ RenameNode("/document/story/app:section/foo:para[1]", "{someuri}para"), - InsertAttrib( -- '/document/story/app:section/app:para[3]', -- '{someuri}attrib', 'value'), -- ] -+ "/document/story/app:section/app:para[3]", -+ "{someuri}attrib", -+ "value", -+ ), -+ ], - ) - - def test_multiple_tag_deletes(self): -- left = u""" -+ left = """ - - -
    -@@ -1455,7 +1426,7 @@ class DiffTests(unittest.TestCase): - - """ - -- right = u""" -+ right = """ - - - """ -@@ -1463,23 +1434,21 @@ class DiffTests(unittest.TestCase): - result = self._diff(left, right) - self.assertEqual( - result, -- [UpdateTextIn('/document/story[1]', '\n '), -- DeleteNode('/document/story/ul/li[3]'), -- DeleteNode('/document/story/ul/li[2]'), -- DeleteNode('/document/story/ul/li[1]'), -- DeleteNode('/document/story/ul[1]'), -- ] -+ [ -+ UpdateTextIn("/document/story[1]", "\n "), -+ DeleteNode("/document/story/ul/li[3]"), -+ DeleteNode("/document/story/ul/li[2]"), -+ DeleteNode("/document/story/ul/li[1]"), -+ DeleteNode("/document/story/ul[1]"), -+ ], - ) - - def test_insert_comment(self): -- left = u"Something" -- right = u"Something" -+ left = "Something" -+ right = "Something" - - result = self._diff(left, right) -- self.assertEqual( -- result, -- [InsertComment('/doc[1]', 0, ' New comment! ')] -- ) -+ self.assertEqual(result, [InsertComment("/doc[1]", 0, " New comment! ")]) - - def test_issue_21_default_namespaces(self): - # When you have a default namespace you get "*" instead of the -@@ -1488,4 +1457,4 @@ class DiffTests(unittest.TestCase): - left = 'old' - right = 'new' - result = self._diff(left, right) -- self.assertEqual(result[0].node, '/*[1]') -+ self.assertEqual(result[0].node, "/*[1]") -Index: xmldiff-2.4/tests/test_formatting.py -=================================================================== ---- xmldiff-2.4.orig/tests/test_formatting.py -+++ xmldiff-2.4/tests/test_formatting.py -@@ -1,4 +1,3 @@ --# -*- coding: UTF-8 -*- - import os - import sys - import unittest -@@ -8,89 +7,80 @@ from xmldiff import formatting, main, ac - - from .testing import generate_filebased_cases - --START = u'' -+START = 'This is a tag with formatted text.

    ' -+ text = "

    This is a tag with formatted text.

    " - element = etree.fromstring(text) - replacer.do_element(element) - - self.assertEqual( - etree.tounicode(element), -- u'

    This is a tag with \ue006formatted\ue005 text.

    ') -+ "

    This is a tag with \ue006formatted\ue005 text.

    ", -+ ) - - replacer.undo_element(element) - self.assertEqual(etree.tounicode(element), text) - - # Non formatting tags get replaced with content -- text = u'

    This is a tag with formatted text.

    ' -+ text = "

    This is a tag with formatted text.

    " - element = etree.fromstring(text) - replacer.do_element(element) - result = etree.tounicode(element) -- self.assertEqual( -- result, -- u'

    This is a tag with \ue007 text.

    ') -+ self.assertEqual(result, "

    This is a tag with \ue007 text.

    ") - - # Single formatting tags still get two placeholders. -- text = u'

    This is a with text.

    ' -+ text = "

    This is a with text.

    " - element = etree.fromstring(text) - replacer.do_element(element) - result = etree.tounicode(element) -- self.assertEqual( -- result, -- u'

    This is a \ue009\ue008 with \ue00a text.

    ') -+ self.assertEqual(result, "

    This is a \ue009\ue008 with \ue00a text.

    ") - - def test_do_undo_element(self): -- replacer = formatting.PlaceholderMaker(['p'], ['b']) -+ replacer = formatting.PlaceholderMaker(["p"], ["b"]) - - # Formatting tags get replaced, and the content remains -- text = u'

    This a tag with formatted text.

    ' -+ text = "

    This a tag with formatted text.

    " - element = etree.fromstring(text) - replacer.do_element(element) - - self.assertEqual( -- element.text, -- u'This \ue005 a \ue006 with \ue008formatted' -- u'\ue007 text.') -+ element.text, "This \ue005 a \ue006 with \ue008formatted" "\ue007 text." -+ ) - - replacer.undo_element(element) - result = etree.tounicode(element) - self.assertEqual(result, text) - - def test_do_undo_element_double_format(self): -- replacer = formatting.PlaceholderMaker(['p'], ['b', 'u']) -+ replacer = formatting.PlaceholderMaker(["p"], ["b", "u"]) - - # Formatting tags get replaced, and the content remains -- text = u'

    This is doubly formatted text.

    ' -+ text = "

    This is doubly formatted text.

    " - element = etree.fromstring(text) - replacer.do_element(element) - - self.assertEqual( -- element.text, -- u'This is \ue006doubly \ue008formatted\ue007' -- u'\ue005 text.') -+ element.text, "This is \ue006doubly \ue008formatted\ue007" "\ue005 text." -+ ) - - replacer.undo_element(element) - result = etree.tounicode(element) -@@ -98,7 +88,7 @@ class PlaceholderMakerTests(unittest.Tes - - def test_rml_bug(self): - etree.register_namespace(formatting.DIFF_PREFIX, formatting.DIFF_NS) -- before_diff = u""" -+ before_diff = """ -
    - - 4. -@@ -109,9 +99,10 @@ class PlaceholderMakerTests(unittest.Tes - """ - tree = etree.fromstring(before_diff) - replacer = formatting.PlaceholderMaker( -- text_tags=('para',), formatting_tags=('b', 'u', 'i',)) -+ text_tags=("para",), formatting_tags=("b", "u", "i",) -+ ) - replacer.do_tree(tree) -- after_diff = u""" -+ after_diff = """ -
    - - \ue005. -@@ -122,15 +113,13 @@ class PlaceholderMakerTests(unittest.Tes - """ - - # The diff formatting will find some text to insert. -- delete_attrib = u'{%s}delete-format' % formatting.DIFF_NS -- replacer.placeholder2tag[u'\ue006' -- ].element.attrib[delete_attrib] = '' -- replacer.placeholder2tag[u'\ue007' -- ].element.attrib[delete_attrib] = '' -+ delete_attrib = "{%s}delete-format" % formatting.DIFF_NS -+ replacer.placeholder2tag["\ue006"].element.attrib[delete_attrib] = "" -+ replacer.placeholder2tag["\ue007"].element.attrib[delete_attrib] = "" - tree = etree.fromstring(after_diff) - replacer.undo_tree(tree) - result = etree.tounicode(tree) -- expected = u""" -+ expected = """ -
    - - 4. -@@ -150,18 +139,17 @@ class PlaceholderMakerTests(unittest.Tes - # This is the last character of the Private use area - formatting.PLACEHOLDER_START = 0xF8FF - -- replacer = formatting.PlaceholderMaker(['p'], ['b']) -+ replacer = formatting.PlaceholderMaker(["p"], ["b"]) - - # Formatting tags get replaced, and the content remains -- text = u'

    This a tag with some text.

    ' -+ text = "

    This a tag with some text.

    " - element = etree.fromstring(text) - replacer.do_element(element) - - # - self.assertEqual( -- element.text, -- u'This \uf904 a \uf905 with \uf907some' -- u'\uf906 text.') -+ element.text, "This \uf904 a \uf905 with \uf907some" "\uf906 text." -+ ) - - try: - # If this is a wide build, also test what happens if we -@@ -169,18 +157,19 @@ class PlaceholderMakerTests(unittest.Tes - # (On narrow builds this will give an error) - formatting.PLACEHOLDER_START = 0xFFFF - -- replacer = formatting.PlaceholderMaker(['p'], ['b']) -+ replacer = formatting.PlaceholderMaker(["p"], ["b"]) - - # Formatting tags get replaced, and the content remains -- text = u'

    This a tag with some text.

    ' -+ text = "

    This a tag with some text.

    " - element = etree.fromstring(text) - replacer.do_element(element) - - # This should raise an error on a narrow build - self.assertEqual( - element.text, -- u'This \U00010004 a \U00010005 with \U00010007some' -- u'\U00010006 text.') -+ "This \U00010004 a \U00010005 with \U00010007some" -+ "\U00010006 text.", -+ ) - except ValueError: - if sys.maxunicode > 0x10000: - # This is a wide build, we should NOT get an error -@@ -192,229 +181,224 @@ class PlaceholderMakerTests(unittest.Tes - - - class XMLFormatTests(unittest.TestCase): -- - def _format_test(self, left, action, expected): - formatter = formatting.XMLFormatter(pretty_print=False) - result = formatter.format([action], etree.fromstring(left)) - self.assertEqual(result, expected) - - def test_incorrect_xpaths(self): -- left = u'Text' -- expected = START + u' diff:delete-attr="a">Text' + END -+ left = 'Text' -+ expected = START + ' diff:delete-attr="a">Text' + END - - with self.assertRaises(ValueError): -- action = actions.DeleteAttrib('/document/node', 'a') -+ action = actions.DeleteAttrib("/document/node", "a") - self._format_test(left, action, expected) - - with self.assertRaises(ValueError): -- action = actions.DeleteAttrib('/document/ummagumma', 'a') -+ action = actions.DeleteAttrib("/document/ummagumma", "a") - self._format_test(left, action, expected) - - def test_del_attr(self): -- left = u'Text' -- action = actions.DeleteAttrib('/document/node', 'a') -- expected = START + u' diff:delete-attr="a">Text' + END -+ left = 'Text' -+ action = actions.DeleteAttrib("/document/node", "a") -+ expected = START + ' diff:delete-attr="a">Text' + END - - self._format_test(left, action, expected) - - def test_del_node(self): -- left = u'Text' -- action = actions.DeleteNode('/document/node') -- expected = START + u' attr="val" diff:delete="">Text' + END -+ left = 'Text' -+ action = actions.DeleteNode("/document/node") -+ expected = START + ' attr="val" diff:delete="">Text' + END - - self._format_test(left, action, expected) - - def test_del_text(self): -- left = u'Text' -- action = actions.UpdateTextIn('/document/node', None) -- expected = START + u' attr="val">Text' + END -+ left = 'Text' -+ action = actions.UpdateTextIn("/document/node", None) -+ expected = START + ' attr="val">Text' + END - - self._format_test(left, action, expected) - - def test_insert_attr(self): -- left = u'We need more text' -- action = actions.InsertAttrib('/document/node', 'attr', 'val') -- expected = START + u' attr="val" diff:add-attr="attr">'\ -- u'We need more text' + END -+ left = "We need more text" -+ action = actions.InsertAttrib("/document/node", "attr", "val") -+ expected = START + ' attr="val" diff:add-attr="attr">' "We need more text" + END - - self._format_test(left, action, expected) - - def test_insert_node(self): -- left = u'' -- action = actions.InsertNode('/document', 'node', 0) -- expected = START + u' diff:insert=""/>' -+ left = "" -+ action = actions.InsertNode("/document", "node", 0) -+ expected = START + ' diff:insert=""/>' - - self._format_test(left, action, expected) - - def test_move_attr(self): - # The library currently only uses move attr for when attributes are - # renamed: -- left = u'Text' -- action = actions.RenameAttrib('/document/node', 'attr', 'bottr') -- expected = START + u' bottr="val" diff:rename-attr="attr:bottr"'\ -- u'>Text' + END -+ left = 'Text' -+ action = actions.RenameAttrib("/document/node", "attr", "bottr") -+ expected = START + ' bottr="val" diff:rename-attr="attr:bottr"' ">Text" + END - - self._format_test(left, action, expected) - - def test_move_node(self): - # Move 1 down -- left = u'' -- action = actions.MoveNode('/document/node[1]', '/document', 1) -- expected = START + u' id="1" diff:delete=""/>' -+ left = '' -+ action = actions.MoveNode("/document/node[1]", "/document", 1) -+ expected = ( -+ START + ' id="1" diff:delete=""/>' -+ ) - - self._format_test(left, action, expected) - - # Move 2 up (same result, different diff) -- left = u'' -- action = actions.MoveNode('/document/node[2]', '/document', 0) -- expected = START + u' id="2" diff:insert=""/>' -+ left = '' -+ action = actions.MoveNode("/document/node[2]", "/document", 0) -+ expected = ( -+ START + ' id="2" diff:insert=""/>' -+ ) - - self._format_test(left, action, expected) - - def test_rename_node(self): -- left = u'ContentTail' -- action = actions.RenameNode('/document/node[1]/para[1]', 'newtag') -- expected = START + u'>Content'\ -- 'Tail' + END -+ left = "ContentTail" -+ action = actions.RenameNode("/document/node[1]/para[1]", "newtag") -+ expected = START + '>Content' "Tail" + END - - self._format_test(left, action, expected) - - def test_update_attr(self): -- left = u'' -- action = actions.UpdateAttrib('/document/node', 'attr', 'newval') -- expected = START + u' attr="newval" diff:update-attr="attr:val"/>'\ -- u'' -+ left = '' -+ action = actions.UpdateAttrib("/document/node", "attr", "newval") -+ expected = START + ' attr="newval" diff:update-attr="attr:val"/>' "" - - self._format_test(left, action, expected) - - def test_update_text_in(self): -- left = u'' -- action = actions.UpdateTextIn('/document/node', 'Text') -- expected = START + u' attr="val">Text' + END -+ left = '' -+ action = actions.UpdateTextIn("/document/node", "Text") -+ expected = START + ' attr="val">Text' + END - - self._format_test(left, action, expected) - -- left = u'This is a bit of text, right' + END -- action = actions.UpdateTextIn('/document/node', -- 'Also a bit of text, rick') -- expected = START + u'>This is'\ -- u'Also a bit of text, right'\ -- u'ck' + END -+ left = "This is a bit of text, right" + END -+ action = actions.UpdateTextIn("/document/node", "Also a bit of text, rick") -+ expected = ( -+ START + ">This is" -+ "Also a bit of text, right" -+ "ck" + END -+ ) - - self._format_test(left, action, expected) - - def test_update_text_after_1(self): -- left = u'' -- action = actions.UpdateTextAfter('/document/node[1]', 'Text') -- expected = START + u'/>Text'\ -- u'' -+ left = "" -+ action = actions.UpdateTextAfter("/document/node[1]", "Text") -+ expected = START + "/>Text" "" - - self._format_test(left, action, expected) - - def test_update_text_after_2(self): -- left = u'This is a bit of text, right' -- action = actions.UpdateTextAfter('/document/node', -- 'Also a bit of text, rick') -- expected = START + u'/>This is'\ -- u'Also a bit of text, ri'\ -- u'ghtck' -+ left = "This is a bit of text, right" -+ action = actions.UpdateTextAfter("/document/node", "Also a bit of text, rick") -+ expected = ( -+ START + "/>This is" -+ "Also a bit of text, ri" -+ "ghtck" -+ ) - - self._format_test(left, action, expected) - - - class DiffFormatTests(unittest.TestCase): -- - def _format_test(self, action, expected): - formatter = formatting.DiffFormatter() - result = formatter.format([action], None) - self.assertEqual(result, expected) - - def test_del_attr(self): -- action = actions.DeleteAttrib('/document/node', 'a') -- expected = '[delete-attribute, /document/node, a]' -+ action = actions.DeleteAttrib("/document/node", "a") -+ expected = "[delete-attribute, /document/node, a]" - self._format_test(action, expected) - - def test_del_node(self): -- action = actions.DeleteNode('/document/node') -- expected = '[delete, /document/node]' -+ action = actions.DeleteNode("/document/node") -+ expected = "[delete, /document/node]" - self._format_test(action, expected) - - def test_del_text(self): -- action = actions.UpdateTextIn('/document/node', None) -- expected = '[update-text, /document/node, null]' -+ action = actions.UpdateTextIn("/document/node", None) -+ expected = "[update-text, /document/node, null]" - self._format_test(action, expected) - - def test_insert_attr(self): -- action = actions.InsertAttrib('/document/node', 'attr', 'val') -+ action = actions.InsertAttrib("/document/node", "attr", "val") - expected = '[insert-attribute, /document/node, attr, "val"]' - self._format_test(action, expected) - - def test_insert_node(self): -- action = actions.InsertNode('/document', 'node', 0) -- expected = '[insert, /document, node, 0]' -+ action = actions.InsertNode("/document", "node", 0) -+ expected = "[insert, /document, node, 0]" - self._format_test(action, expected) - - def test_rename_attr(self): -- action = actions.RenameAttrib('/document/node', 'attr', 'bottr') -- expected = '[rename-attribute, /document/node, attr, bottr]' -+ action = actions.RenameAttrib("/document/node", "attr", "bottr") -+ expected = "[rename-attribute, /document/node, attr, bottr]" - self._format_test(action, expected) - - def test_move_node(self): - # Move 1 down -- action = actions.MoveNode('/document/node[1]', '/document', 1) -- expected = '[move, /document/node[1], /document, 1]' -+ action = actions.MoveNode("/document/node[1]", "/document", 1) -+ expected = "[move, /document/node[1], /document, 1]" - self._format_test(action, expected) - - # Move 2 up (same result, different diff) -- action = actions.MoveNode('/document/node[2]', '/document', 0) -- expected = '[move, /document/node[2], /document, 0]' -+ action = actions.MoveNode("/document/node[2]", "/document", 0) -+ expected = "[move, /document/node[2], /document, 0]" - - self._format_test(action, expected) - - def test_rename_node(self): - # Move 1 down -- action = actions.RenameNode('/document/node[1]', 'newtag') -- expected = '[rename, /document/node[1], newtag]' -+ action = actions.RenameNode("/document/node[1]", "newtag") -+ expected = "[rename, /document/node[1], newtag]" - self._format_test(action, expected) - - # Move 2 up (same result, different diff) -- action = actions.MoveNode('/document/node[2]', '/document', 0) -- expected = '[move, /document/node[2], /document, 0]' -+ action = actions.MoveNode("/document/node[2]", "/document", 0) -+ expected = "[move, /document/node[2], /document, 0]" - - self._format_test(action, expected) - - def test_update_attr(self): -- action = actions.UpdateAttrib('/document/node', 'attr', 'newval') -+ action = actions.UpdateAttrib("/document/node", "attr", "newval") - expected = '[update-attribute, /document/node, attr, "newval"]' - self._format_test(action, expected) - - def test_update_text_in(self): -- action = actions.UpdateTextIn('/document/node', 'Text') -+ action = actions.UpdateTextIn("/document/node", "Text") - expected = '[update-text, /document/node, "Text"]' - self._format_test(action, expected) - -- action = actions.UpdateTextIn('/document/node', -- 'Also a bit of text, "rick"') -- expected = '[update-text, /document/node, '\ -- u'"Also a bit of text, \\"rick\\""]' -+ action = actions.UpdateTextIn("/document/node", 'Also a bit of text, "rick"') -+ expected = "[update-text, /document/node, " '"Also a bit of text, \\"rick\\""]' - self._format_test(action, expected) - - def test_update_text_after_1(self): -- action = actions.UpdateTextAfter('/document/node[1]', 'Text') -+ action = actions.UpdateTextAfter("/document/node[1]", "Text") - expected = '[update-text-after, /document/node[1], "Text"]' - self._format_test(action, expected) - - def test_update_text_after_2(self): -- action = actions.UpdateTextAfter('/document/node', -- 'Also a bit of text, rick') -- expected = '[update-text-after, /document/node, '\ -- u'"Also a bit of text, rick"]' -+ action = actions.UpdateTextAfter("/document/node", "Also a bit of text, rick") -+ expected = "[update-text-after, /document/node, " '"Also a bit of text, rick"]' - self._format_test(action, expected) - - def test_insert_comment(self): -- action = actions.InsertComment('/document/node', 2, 'Commentary') -+ action = actions.InsertComment("/document/node", 2, "Commentary") - expected = '[insert-comment, /document/node, 2, "Commentary"]' - self._format_test(action, expected) - -@@ -430,97 +414,95 @@ class XmlDiffFormatTests(unittest.TestCa - self.assertEqual(result, expected) - - def test_del_attr(self): -- action = actions.DeleteAttrib('/document/node', 'a') -- expected = '[remove, /document/node/@a]' -+ action = actions.DeleteAttrib("/document/node", "a") -+ expected = "[remove, /document/node/@a]" - self._format_test(action, expected) - - def test_del_node(self): -- action = actions.DeleteNode('/document/node') -- expected = '[remove, /document/node]' -+ action = actions.DeleteNode("/document/node") -+ expected = "[remove, /document/node]" - self._format_test(action, expected) - - def test_del_text(self): -- action = actions.UpdateTextIn('/document/node', None) -- expected = '[update, /document/node/text()[1], null]' -+ action = actions.UpdateTextIn("/document/node", None) -+ expected = "[update, /document/node/text()[1], null]" - self._format_test(action, expected) - - def test_insert_attr(self): -- action = actions.InsertAttrib('/document/node', 'attr', 'val') -- expected = '[insert, /document/node, \n<@attr>\nval\n]' -+ action = actions.InsertAttrib("/document/node", "attr", "val") -+ expected = "[insert, /document/node, \n<@attr>\nval\n]" - self._format_test(action, expected) - - def test_insert_node(self): -- action = actions.InsertNode('/document', 'node', 0) -- expected = '[insert-first, /document, \n]' -+ action = actions.InsertNode("/document", "node", 0) -+ expected = "[insert-first, /document, \n]" - self._format_test(action, expected) - - def test_rename_node(self): - # Move 1 down -- action = actions.RenameNode('/document/node[1]', 'newtag') -- expected = '[rename, /document/node[1], newtag]' -+ action = actions.RenameNode("/document/node[1]", "newtag") -+ expected = "[rename, /document/node[1], newtag]" - self._format_test(action, expected) - - # Move 2 up (same result, different diff) -- action = actions.MoveNode('/document/node[2]', '/document', 0) -- expected = '[move-first, /document/node[2], /document]' -+ action = actions.MoveNode("/document/node[2]", "/document", 0) -+ expected = "[move-first, /document/node[2], /document]" - self._format_test(action, expected) - - def test_update_attr(self): -- action = actions.UpdateAttrib('/document/node', 'attr', 'newval') -+ action = actions.UpdateAttrib("/document/node", "attr", "newval") - expected = '[update, /document/node/@attr, "newval"]' - self._format_test(action, expected) - - def test_update_text_in(self): -- action = actions.UpdateTextIn('/document/node', 'Text') -+ action = actions.UpdateTextIn("/document/node", "Text") - expected = '[update, /document/node/text()[1], "Text"]' - self._format_test(action, expected) - -- action = actions.UpdateTextIn('/document/node', -- 'Also a bit of text, "rick"') -- expected = '[update, /document/node/text()[1], '\ -- u'"Also a bit of text, \\"rick\\""]' -+ action = actions.UpdateTextIn("/document/node", 'Also a bit of text, "rick"') -+ expected = ( -+ "[update, /document/node/text()[1], " '"Also a bit of text, \\"rick\\""]' -+ ) - self._format_test(action, expected) - - def test_update_text_after_1(self): -- action = actions.UpdateTextAfter('/document/node[1]', 'Text') -+ action = actions.UpdateTextAfter("/document/node[1]", "Text") - expected = '[update, /document/node[1]/text()[2], "Text"]' - self._format_test(action, expected) - - def test_update_text_after_2(self): -- action = actions.UpdateTextAfter('/document/node', -- 'Also a bit of text, rick') -- expected = '[update, /document/node/text()[2], '\ -- u'"Also a bit of text, rick"]' -+ action = actions.UpdateTextAfter("/document/node", "Also a bit of text, rick") -+ expected = "[update, /document/node/text()[2], " '"Also a bit of text, rick"]' - self._format_test(action, expected) - - def test_all_actions(self): - here = os.path.split(__file__)[0] -- lfile = os.path.join(here, 'test_data', 'all_actions.left.xml') -- rfile = os.path.join(here, 'test_data', 'all_actions.right.xml') -+ lfile = os.path.join(here, "test_data", "all_actions.left.xml") -+ rfile = os.path.join(here, "test_data", "all_actions.right.xml") - - formatter = formatting.XmlDiffFormatter() - result = main.diff_files(lfile, rfile, formatter=formatter) - expected = ( -- u'[move-after, /document/node[2], /document/tag[1]]\n' -- u'[insert-comment, /document[1], 0, Insert a new comment ]\n' -- u'[update, /document/node[1]/@name, "was updated"]\n' -- u'[remove, /document/node[1]/@attribute]\n' -- u'[insert, /document/node[1], \n' -- u'<@newtribute>\n' -- u'renamed\n' -- u']\n' -- u'[insert, /document/node[1], \n' -- u'<@this>\n' -- u'is new\n' -- u']\n' -- u'[remove, /document/node[1]/@attr]\n' -- u'[update, /document/node[1]/text()[1], "\\n Modified\\n "]\n' -- u'[update, /document/node[1]/text()[2], "\\n ' -- u'New tail content\\n "]\n' -- u'[rename, /document/node[2], nod]\n' -- u'[insert-after, /document/tail[1], \n' -- u']\n' -- u'[remove, /document/tail[1]]' -+ "[move-after, /document/node[2], /document/tag[1]]\n" -+ "[insert-comment, /document[1], 0, Insert a new comment ]\n" -+ '[update, /document/node[1]/@name, "was updated"]\n' -+ "[remove, /document/node[1]/@attribute]\n" -+ "[insert, /document/node[1], \n" -+ "<@newtribute>\n" -+ "renamed\n" -+ "]\n" -+ "[insert, /document/node[1], \n" -+ "<@this>\n" -+ "is new\n" -+ "]\n" -+ "[remove, /document/node[1]/@attr]\n" -+ '[update, /document/node[1]/text()[1], "\\n Modified\\n "]\n' -+ '[update, /document/node[1]/text()[2], "\\n ' -+ 'New tail content\\n "]\n' -+ "[rename, /document/node[2], nod]\n" -+ "[insert-after, /document/tail[1], \n" -+ "]\n" -+ "[remove, /document/tail[1]]" - ) - self.assertEqual(result, expected) - -@@ -537,12 +519,14 @@ class FormatterFileTests(unittest.TestCa - class XMLFormatterFileTests(FormatterFileTests): - - # The XMLFormatter has no text or formatting tags, so -- formatter = formatting.XMLFormatter(pretty_print=False, -- normalize=formatting.WS_TEXT) -+ formatter = formatting.XMLFormatter( -+ pretty_print=False, normalize=formatting.WS_TEXT -+ ) - - - # Also test the bits that handle text tags: - -+ - class HTMLFormatterFileTests(FormatterFileTests): - - # We use a few tags for the placeholder tests. -@@ -551,15 +535,27 @@ class HTMLFormatterFileTests(FormatterFi - formatter = formatting.XMLFormatter( - normalize=formatting.WS_BOTH, - pretty_print=True, -- text_tags=('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'), -- formatting_tags=('b', 'u', 'i', 'strike', 'em', 'super', -- 'sup', 'sub', 'link', 'a', 'span')) -+ text_tags=("p", "h1", "h2", "h3", "h4", "h5", "h6", "li"), -+ formatting_tags=( -+ "b", -+ "u", -+ "i", -+ "strike", -+ "em", -+ "super", -+ "sup", -+ "sub", -+ "link", -+ "a", -+ "span", -+ ), -+ ) - - - # Add tests that use no placeholder replacement (ie plain XML) --data_dir = os.path.join(os.path.dirname(__file__), 'test_data') -+data_dir = os.path.join(os.path.dirname(__file__), "test_data") - generate_filebased_cases(data_dir, XMLFormatterFileTests) - - # Add tests that use placeholder replacement (ie HTML) --data_dir = os.path.join(os.path.dirname(__file__), 'test_data') --generate_filebased_cases(data_dir, HTMLFormatterFileTests, suffix='html') -+data_dir = os.path.join(os.path.dirname(__file__), "test_data") -+generate_filebased_cases(data_dir, HTMLFormatterFileTests, suffix="html") -Index: xmldiff-2.4/tests/test_main.py -=================================================================== ---- xmldiff-2.4.orig/tests/test_main.py -+++ xmldiff-2.4/tests/test_main.py -@@ -7,44 +7,42 @@ from lxml import etree - from xmldiff import main, formatting - - CURDIR = os.path.split(__file__)[0] --LEFT_FILE = os.path.join(CURDIR, 'test_data', 'rmldoc.left.xml') --RIGHT_FILE = os.path.join(CURDIR, 'test_data', 'rmldoc.right.xml') --EXPECTED_FILE = os.path.join(CURDIR, 'test_data', 'rmldoc.expected.xml') -+LEFT_FILE = os.path.join(CURDIR, "test_data", "rmldoc.left.xml") -+RIGHT_FILE = os.path.join(CURDIR, "test_data", "rmldoc.right.xml") -+EXPECTED_FILE = os.path.join(CURDIR, "test_data", "rmldoc.expected.xml") - - - class MainAPITests(unittest.TestCase): -- - def test_api_diff_files(self): - # diff_files can take filenames - result1 = main.diff_files(LEFT_FILE, RIGHT_FILE) - - # Or open file streams: -- with open(LEFT_FILE, 'rb') as linfile: -- with open(RIGHT_FILE, 'rb') as rinfile: -+ with open(LEFT_FILE, "rb") as linfile: -+ with open(RIGHT_FILE, "rb") as rinfile: - result2 = main.diff_files(linfile, rinfile) - - self.assertEqual(result1, result2) - - # Give something else, and it fails: - with self.assertRaises(IOError): -- main.diff_files('', '') -+ main.diff_files("", "") - - def test_api_diff_texts(self): - # diff_text can take bytes -- with open(LEFT_FILE, 'rb') as linfile: -- with open(RIGHT_FILE, 'rb') as rinfile: -+ with open(LEFT_FILE, "rb") as linfile: -+ with open(RIGHT_FILE, "rb") as rinfile: - left = linfile.read() - right = rinfile.read() - result1 = main.diff_texts(left, right) - - # And unicode -- result2 = main.diff_texts(left.decode('utf8'), -- right.decode('utf8')) -+ result2 = main.diff_texts(left.decode("utf8"), right.decode("utf8")) - - self.assertEqual(result1, result2) - -- with open(LEFT_FILE, 'rb') as infile: -- with open(RIGHT_FILE, 'rb') as infile: -+ with open(LEFT_FILE, "rb") as infile: -+ with open(RIGHT_FILE, "rb") as infile: - # Give something else, and it fails: - with self.assertRaises(ValueError): - main.diff_texts(infile, infile) -@@ -72,7 +70,6 @@ class MainAPITests(unittest.TestCase): - - - class MainCLITests(unittest.TestCase): -- - def call_run(self, args, command=main.diff_command): - output = six.StringIO() - errors = six.StringIO() -@@ -97,75 +94,75 @@ class MainCLITests(unittest.TestCase): - - def test_diff_cli_simple(self): - curdir = os.path.dirname(__file__) -- filepath = os.path.join(curdir, 'test_data') -- file1 = os.path.join(filepath, 'insert-node.left.html') -- file2 = os.path.join(filepath, 'insert-node.right.html') -+ filepath = os.path.join(curdir, "test_data") -+ file1 = os.path.join(filepath, "insert-node.left.html") -+ file2 = os.path.join(filepath, "insert-node.right.html") - - output, errors = self.call_run([file1, file2]) - self.assertEqual(len(output.splitlines()), 3) - # This should default to the diff formatter: -- self.assertEqual(output[0], '[') -+ self.assertEqual(output[0], "[") - - def test_diff_cli_args(self): - curdir = os.path.dirname(__file__) -- filepath = os.path.join(curdir, 'test_data') -- file1 = os.path.join(filepath, 'insert-node.left.html') -- file2 = os.path.join(filepath, 'insert-node.right.html') -+ filepath = os.path.join(curdir, "test_data") -+ file1 = os.path.join(filepath, "insert-node.left.html") -+ file2 = os.path.join(filepath, "insert-node.right.html") - - # Select a formatter: -- output, errors = self.call_run([file1, file2, '--formatter', 'xml']) -+ output, errors = self.call_run([file1, file2, "--formatter", "xml"]) - # It gives a very compact output - self.assertEqual(len(output.splitlines()), 1) - # Now it's XML -- self.assertEqual(output[0], '<') -+ self.assertEqual(output[0], "<") - - # Don't strip the whitespace keeps the formatting from the source: -- output, errors = self.call_run([file1, file2, '--keep-whitespace', -- '--formatter', 'xml']) -+ output, errors = self.call_run( -+ [file1, file2, "--keep-whitespace", "--formatter", "xml"] -+ ) - self.assertEqual(len(output.splitlines()), 5) - - # And stripping and pretty printing gives a longer readable output -- output, errors = self.call_run([file1, file2, '--pretty-print', -- '--formatter', 'xml']) -+ output, errors = self.call_run( -+ [file1, file2, "--pretty-print", "--formatter", "xml"] -+ ) - self.assertEqual(len(output.splitlines()), 6) - - # The default output gives three lines for three actions -- output, errors = self.call_run([file1, file2, '--ratio-mode', 'fast']) -+ output, errors = self.call_run([file1, file2, "--ratio-mode", "fast"]) - self.assertEqual(len(output.splitlines()), 3) - - # 'fast' is default, so it's the same output -- output2, errors = self.call_run([file1, file2, '--ratio-mode', 'fast']) -+ output2, errors = self.call_run([file1, file2, "--ratio-mode", "fast"]) - self.assertEqual(output, output2) - - # Accurate is the same in this case, although sometimes it isn't -- output2, errors = self.call_run([file1, file2, '--ratio-mode', -- 'accurate']) -+ output2, errors = self.call_run([file1, file2, "--ratio-mode", "accurate"]) - self.assertEqual(output, output2) - - # But "faster" gives six actions instead of three -- output, errors = self.call_run([file1, file2, '--ratio-mode', -- 'faster']) -+ output, errors = self.call_run([file1, file2, "--ratio-mode", "faster"]) - self.assertEqual(len(output.splitlines()), 6) - - # You can specify unique attributes: -- output, errors = self.call_run([file1, file2, '--unique-attributes', -- 'id,foo,frotz']) -+ output, errors = self.call_run( -+ [file1, file2, "--unique-attributes", "id,foo,frotz"] -+ ) - self.assertEqual(len(output.splitlines()), 3) - - # Or none -- output, errors = self.call_run([file1, file2, '--unique-attributes']) -+ output, errors = self.call_run([file1, file2, "--unique-attributes"]) - self.assertEqual(len(output.splitlines()), 3) - - def test_patch_cli_simple(self): - curdir = os.path.dirname(__file__) -- filepath = os.path.join(curdir, 'test_data') -- patchfile = os.path.join(filepath, 'insert-node.diff') -- xmlfile = os.path.join(filepath, 'insert-node.left.html') -+ filepath = os.path.join(curdir, "test_data") -+ patchfile = os.path.join(filepath, "insert-node.diff") -+ xmlfile = os.path.join(filepath, "insert-node.left.html") - -- output, errors = self.call_run([patchfile, xmlfile], -- command=main.patch_command) -+ output, errors = self.call_run([patchfile, xmlfile], command=main.patch_command) - -- expectedfile = os.path.join(filepath, 'insert-node.right.html') -- with open(expectedfile, 'rt') as f: -+ expectedfile = os.path.join(filepath, "insert-node.right.html") -+ with open(expectedfile) as f: - expected = f.read() - self.assertEqual(output, expected) -Index: xmldiff-2.4/tests/test_patch.py -=================================================================== ---- xmldiff-2.4.orig/tests/test_patch.py -+++ xmldiff-2.4/tests/test_patch.py -@@ -5,10 +5,19 @@ from lxml import etree - from xmldiff.formatting import DiffFormatter, WS_NONE - from xmldiff.main import diff_trees, diff_texts, patch_text, patch_file - from xmldiff.patch import Patcher, DiffParser --from xmldiff.actions import (UpdateTextIn, InsertNode, MoveNode, -- DeleteNode, UpdateAttrib, InsertAttrib, -- RenameAttrib, DeleteAttrib, UpdateTextAfter, -- RenameNode, InsertComment) -+from xmldiff.actions import ( -+ UpdateTextIn, -+ InsertNode, -+ MoveNode, -+ DeleteNode, -+ UpdateAttrib, -+ InsertAttrib, -+ RenameAttrib, -+ DeleteAttrib, -+ UpdateTextAfter, -+ RenameNode, -+ InsertComment, -+) - - from .testing import compare_elements - -@@ -23,67 +32,84 @@ class PatcherTests(unittest.TestCase): - self.assertEqual(etree.tounicode(tree), end) - - def test_delete_node(self): -- self._test('', -- DeleteNode('/root/deleteme'), -- '') -+ self._test("", DeleteNode("/root/deleteme"), "") - - def test_insert_node(self): -- self._test('', -- InsertNode('/root/anode', 'newnode', 0), -- '') -+ self._test( -+ "", -+ InsertNode("/root/anode", "newnode", 0), -+ "", -+ ) - - def test_rename_node(self): -- self._test('', -- RenameNode('/root/oldname', 'newname'), -- '') -+ self._test( -+ "", -+ RenameNode("/root/oldname", "newname"), -+ "", -+ ) - - def test_move_node(self): -- self._test('', -- MoveNode('/root/anode/moveme', '/root', 1), -- '') -+ self._test( -+ "", -+ MoveNode("/root/anode/moveme", "/root", 1), -+ "", -+ ) - - def test_update_text_in(self): -- self._test('', -- UpdateTextIn('/root/anode', 'New text'), -- 'New text') -+ self._test( -+ "", -+ UpdateTextIn("/root/anode", "New text"), -+ "New text", -+ ) - - def test_update_text_after(self): -- self._test('', -- UpdateTextAfter('/root/anode', 'New text'), -- 'New text') -+ self._test( -+ "", -+ UpdateTextAfter("/root/anode", "New text"), -+ "New text", -+ ) - - def test_update_attrib(self): -- self._test('', -- UpdateAttrib('/root/anode', 'attrib', 'newvalue'), -- '') -+ self._test( -+ '', -+ UpdateAttrib("/root/anode", "attrib", "newvalue"), -+ '', -+ ) - - def test_delete_attrib(self): -- self._test('', -- DeleteAttrib('/root/anode', 'attrib'), -- '') -+ self._test( -+ '', -+ DeleteAttrib("/root/anode", "attrib"), -+ "", -+ ) - - def test_insert_attrib(self): -- self._test('', -- InsertAttrib('/root/anode', 'attrib', 'value'), -- '') -+ self._test( -+ "", -+ InsertAttrib("/root/anode", "attrib", "value"), -+ '', -+ ) - - def test_rename_attrib(self): -- self._test('', -- RenameAttrib('/root/anode', 'oldname', 'newname'), -- '') -+ self._test( -+ '', -+ RenameAttrib("/root/anode", "oldname", "newname"), -+ '', -+ ) - - def test_insert_comment(self): -- self._test('', -- InsertComment('/root', 1, "This is a new comment"), -- '') -+ self._test( -+ "", -+ InsertComment("/root", 1, "This is a new comment"), -+ "", -+ ) - - - class DiffPatch(unittest.TestCase): -- - def test_diff_patch(self): - here = os.path.split(__file__)[0] -- lfile = os.path.join(here, 'test_data', 'all_actions.left.xml') -- rfile = os.path.join(here, 'test_data', 'all_actions.right.xml') -+ lfile = os.path.join(here, "test_data", "all_actions.left.xml") -+ rfile = os.path.join(here, "test_data", "all_actions.right.xml") - - left = etree.parse(lfile) - right = etree.parse(rfile) -@@ -96,7 +122,7 @@ class DiffPatch(unittest.TestCase): - compare_elements(result.getroot(), right.getroot()) - - --TEST_DIFF = '''[delete, node] -+TEST_DIFF = """[delete, node] - [insert, target, tag, 0] - [rename, node, tag] - [move, node, target, 0] -@@ -107,67 +133,61 @@ TEST_DIFF = '''[delete, node] - [insert-attribute, node, name, "value"] - [rename-attribute, node, oldname, newname] - [insert-comment, target, 0, "text"] --''' -+""" - - - class ParserTests(unittest.TestCase): -- - def test_make_action(self): - parser = DiffParser() - -- self.assertEqual( -- parser.make_action('[delete, node]'), -- DeleteNode('node') -- ) -+ self.assertEqual(parser.make_action("[delete, node]"), DeleteNode("node")) - - self.assertEqual( -- parser.make_action('[insert, target, tag, 0]'), -- InsertNode('target', 'tag', 0) -+ parser.make_action("[insert, target, tag, 0]"), -+ InsertNode("target", "tag", 0), - ) - - self.assertEqual( -- parser.make_action('[rename, node, tag]'), -- RenameNode('node', 'tag') -+ parser.make_action("[rename, node, tag]"), RenameNode("node", "tag") - ) - - self.assertEqual( -- parser.make_action('[move, node, target, 0]'), -- MoveNode('node', 'target', 0) -+ parser.make_action("[move, node, target, 0]"), MoveNode("node", "target", 0) - ) - - self.assertEqual( - parser.make_action('[update-text, node, "text"]'), -- UpdateTextIn('node', 'text') -+ UpdateTextIn("node", "text"), - ) - - self.assertEqual( - parser.make_action('[update-text-after, node, "text"]'), -- UpdateTextAfter('node', 'text') -+ UpdateTextAfter("node", "text"), - ) - - self.assertEqual( - parser.make_action('[update-attribute, node, name, "value"]'), -- UpdateAttrib('node', 'name', 'value') -+ UpdateAttrib("node", "name", "value"), - ) - - self.assertEqual( -- parser.make_action('[delete-attribute, node, name]'), -- DeleteAttrib('node', 'name') -+ parser.make_action("[delete-attribute, node, name]"), -+ DeleteAttrib("node", "name"), - ) - - self.assertEqual( - parser.make_action('[insert-attribute, node, name, "value"]'), -- InsertAttrib('node', 'name', 'value') -+ InsertAttrib("node", "name", "value"), - ) - - self.assertEqual( -- parser.make_action('[rename-attribute, node, oldname, newname]'), -- RenameAttrib('node', 'oldname', 'newname') -+ parser.make_action("[rename-attribute, node, oldname, newname]"), -+ RenameAttrib("node", "oldname", "newname"), - ) - - self.assertEqual( - parser.make_action('[insert-comment, target, 0, "text"]'), -- InsertComment('target', 0, 'text') -+ InsertComment("target", 0, "text"), - ) - - def test_parse(self): -@@ -180,43 +200,42 @@ class ParserTests(unittest.TestCase): - parser = DiffParser() - - # Empty file, nothing happens -- actions = list(parser.parse('')) -+ actions = list(parser.parse("")) - self.assertEqual(actions, []) - - # Not a diff raises error - with self.assertRaises(ValueError): -- actions = list(parser.parse('Not a diff')) -+ actions = list(parser.parse("Not a diff")) - - # It should handle lines that have been broken, say in an email - actions = list(parser.parse('[insert-comment, target,\n 0, "text"]')) -- self.assertEqual(actions, [InsertComment('target', 0, 'text')]) -+ self.assertEqual(actions, [InsertComment("target", 0, "text")]) - - # It should not handle broken files - with self.assertRaises(ValueError): -- actions = list(parser.parse('[insert-comment, target,\n')) -+ actions = list(parser.parse("[insert-comment, target,\n")) - - def test_diff_patch(self): - here = os.path.split(__file__)[0] -- lfile = os.path.join(here, 'test_data', 'all_actions.left.xml') -- rfile = os.path.join(here, 'test_data', 'all_actions.right.xml') -+ lfile = os.path.join(here, "test_data", "all_actions.left.xml") -+ rfile = os.path.join(here, "test_data", "all_actions.right.xml") - with open(lfile) as f: - left = f.read() - with open(rfile) as f: - right = f.read() - -- diff = diff_texts(left, right, -- formatter=DiffFormatter(normalize=WS_NONE)) -+ diff = diff_texts(left, right, formatter=DiffFormatter(normalize=WS_NONE)) - result = patch_text(diff, left) - compare_elements(etree.fromstring(result), etree.fromstring(right)) - - def test_patch_stream(self): -- here = os.path.join(os.path.split(__file__)[0], 'test_data') -- xmlfile = os.path.join(here, 'insert-node.left.html') -- patchfile = os.path.join(here, 'insert-node.diff') -+ here = os.path.join(os.path.split(__file__)[0], "test_data") -+ xmlfile = os.path.join(here, "insert-node.left.html") -+ patchfile = os.path.join(here, "insert-node.diff") - result = patch_file(patchfile, xmlfile) - -- expectedfile = os.path.join(here, 'insert-node.right.html') -- with open(expectedfile, 'rt') as f: -+ expectedfile = os.path.join(here, "insert-node.right.html") -+ with open(expectedfile) as f: - expected = f.read() - # lxml.etree.parse() will strip ending whitespace - self.assertEqual(result, expected.rstrip()) -Index: xmldiff-2.4/tests/test_utils.py -=================================================================== ---- xmldiff-2.4.orig/tests/test_utils.py -+++ xmldiff-2.4/tests/test_utils.py -@@ -5,9 +5,8 @@ from xmldiff import utils - - - class TraverseTests(unittest.TestCase): -- - def test_post_order(self): -- xml = u''' -+ xml = """ - -
    - First paragraph -@@ -17,19 +16,24 @@ class TraverseTests(unittest.TestCase): -
    -
    -
    --''' -+""" - root = etree.fromstring(xml) - tree = root.getroottree() - res = [tree.getpath(x) for x in utils.post_order_traverse(root)] -- self.assertEqual(res, ['/document/story/section[1]/para', -- '/document/story/section[1]', -- '/document/story/section[2]/para', -- '/document/story/section[2]', -- '/document/story', -- '/document']) -+ self.assertEqual( -+ res, -+ [ -+ "/document/story/section[1]/para", -+ "/document/story/section[1]", -+ "/document/story/section[2]/para", -+ "/document/story/section[2]", -+ "/document/story", -+ "/document", -+ ], -+ ) - - def test_reverse_post_order(self): -- xml = u''' -+ xml = """ - -
    - First paragraph -@@ -39,20 +43,24 @@ class TraverseTests(unittest.TestCase): -
    -
    -
    --''' -+""" - root = etree.fromstring(xml) - tree = root.getroottree() -- res = [tree.getpath(x) for x in -- utils.reverse_post_order_traverse(root)] -- self.assertEqual(res, ['/document/story/section[2]/para', -- '/document/story/section[2]', -- '/document/story/section[1]/para', -- '/document/story/section[1]', -- '/document/story', -- '/document']) -+ res = [tree.getpath(x) for x in utils.reverse_post_order_traverse(root)] -+ self.assertEqual( -+ res, -+ [ -+ "/document/story/section[2]/para", -+ "/document/story/section[2]", -+ "/document/story/section[1]/para", -+ "/document/story/section[1]", -+ "/document/story", -+ "/document", -+ ], -+ ) - - def test_breadth_first(self): -- xml = u''' -+ xml = """ - -
    - First paragraph -@@ -69,68 +77,70 @@ class TraverseTests(unittest.TestCase): -
    -
    -
    --''' -+""" - root = etree.fromstring(xml) - tree = root.getroottree() - res = [tree.getpath(x) for x in utils.breadth_first_traverse(root)] -- self.assertEqual(res, ['/document', -- '/document/story[1]', -- '/document/story[2]', -- '/document/story[1]/section[1]', -- '/document/story[1]/section[2]', -- '/document/story[2]/section', -- '/document/story[1]/section[1]/para[1]', -- '/document/story[1]/section[1]/para[2]', -- '/document/story[1]/section[2]/para[1]', -- '/document/story[1]/section[2]/para[2]', -- '/document/story[2]/section/para', -- '/document/story[1]/section[1]/para[1]/i', -- '/document/story[1]/section[2]/para[2]/b', -- ]) -+ self.assertEqual( -+ res, -+ [ -+ "/document", -+ "/document/story[1]", -+ "/document/story[2]", -+ "/document/story[1]/section[1]", -+ "/document/story[1]/section[2]", -+ "/document/story[2]/section", -+ "/document/story[1]/section[1]/para[1]", -+ "/document/story[1]/section[1]/para[2]", -+ "/document/story[1]/section[2]/para[1]", -+ "/document/story[1]/section[2]/para[2]", -+ "/document/story[2]/section/para", -+ "/document/story[1]/section[1]/para[1]/i", -+ "/document/story[1]/section[2]/para[2]/b", -+ ], -+ ) - - - class LongestCommonSubsequenceTests(unittest.TestCase): -- - def _diff(self, left, right, result): - res = [] - for x, y in utils.longest_common_subsequence(left, right): - self.assertEqual(left[x], right[y]) - res.append(left[x]) - -- self.assertEqual(''.join(res), result) -+ self.assertEqual("".join(res), result) - - def test_lcs(self): - -- self._diff('ABCDEF', 'ABCDEF', 'ABCDEF') -+ self._diff("ABCDEF", "ABCDEF", "ABCDEF") - -- self._diff('ABCDEF', 'GHIJKL', '') -+ self._diff("ABCDEF", "GHIJKL", "") - -- self._diff('ABCDEF', 'ACDQRB', 'ACD') -+ self._diff("ABCDEF", "ACDQRB", "ACD") - -- self._diff('CXCDEFX', 'CDEFX', 'CDEFX') -+ self._diff("CXCDEFX", "CDEFX", "CDEFX") - -- self._diff('HUMAN', 'CHIMPANZEE', 'HMAN') -+ self._diff("HUMAN", "CHIMPANZEE", "HMAN") - -- self._diff('ABCDEF', 'A', 'A') -+ self._diff("ABCDEF", "A", "A") - -- self._diff('123AAAAAAAAA', '123BBBBBBBBB', '123') -+ self._diff("123AAAAAAAAA", "123BBBBBBBBB", "123") - -- self._diff('AAAAAAAAA123', 'BBBBBBBBB123', '123') -+ self._diff("AAAAAAAAA123", "BBBBBBBBB123", "123") - -- self._diff('ABCDE1', '1FGHIJK', '1') -+ self._diff("ABCDE1", "1FGHIJK", "1") - - # There are several correct options here, make sure that doesn't - # confuse it, we want just one, and don't care which. -- self._diff('HORSEBACK', 'SNOWFLAKE', 'SAK') -+ self._diff("HORSEBACK", "SNOWFLAKE", "SAK") - - # Empty sequences: -- self._diff('', '', '') -+ self._diff("", "", "") - - - class MakeAsciiTreeTests(unittest.TestCase): -- - def test_make_ascii_tree(self): -- xml = u''' -+ xml = """ - -
    - First paragraph -@@ -140,11 +150,11 @@ class MakeAsciiTreeTests(unittest.TestCa -
    -
    -
    --''' -+""" - root = etree.fromstring(xml) - tree = utils.make_ascii_tree(root) - self.assertEqual( - tree, -- ' document \n story \n section \n para (delete)\n' -- ' section \n para \n diff:insert ' -+ " document \n story \n section \n para (delete)\n" -+ " section \n para \n diff:insert ", - ) -Index: xmldiff-2.4/tests/testing.py -=================================================================== ---- xmldiff-2.4.orig/tests/testing.py -+++ xmldiff-2.4/tests/testing.py -@@ -1,39 +1,37 @@ - import os - --from io import open -- - - def make_case_function(left_filename): -- right_filename = left_filename.replace('.left.', '.right.') -- expected_filename = left_filename.replace('.left.', '.expected.') -+ right_filename = left_filename.replace(".left.", ".right.") -+ expected_filename = left_filename.replace(".left.", ".expected.") - - def test(self): -- with open(expected_filename, 'rt', encoding='utf8') as input_file: -+ with open(expected_filename, encoding="utf8") as input_file: - expected_xml = input_file.read() - - try: - result_xml = self.process(left_filename, right_filename) - except Exception as err: -- if u'.err' not in left_filename: -+ if ".err" not in left_filename: - raise -- result_xml = u'%s: %s' % (err.__class__.__name__, err) -+ result_xml = f"{err.__class__.__name__}: {err}" - - self.assertEqual(expected_xml.strip(), result_xml.strip()) - - return test - - --def generate_filebased_cases(data_dir, test_class, suffix='xml', ignore=()): -+def generate_filebased_cases(data_dir, test_class, suffix="xml", ignore=()): - for left_filename in os.listdir(data_dir): -- if not left_filename.endswith('.left.' + suffix): -+ if not left_filename.endswith(".left." + suffix): - continue - if left_filename in ignore: - continue - - left_filename = os.path.join(data_dir, left_filename) - test_function = make_case_function(left_filename) -- function_name = os.path.split(left_filename)[-1].replace('.', '-') -- test_name = 'test_' + function_name -+ function_name = os.path.split(left_filename)[-1].replace(".", "-") -+ test_name = "test_" + function_name - setattr(test_class, test_name, test_function) - - -Index: xmldiff-2.4/xmldiff/_diff_match_patch_py2.py -=================================================================== ---- xmldiff-2.4.orig/xmldiff/_diff_match_patch_py2.py -+++ xmldiff-2.4/xmldiff/_diff_match_patch_py2.py -@@ -1,6 +1,5 @@ - #!/usr/bin/python2.4 - --from __future__ import division - - """Diff Match and Patch - Copyright 2018 The diff-match-patch Authors. -@@ -25,7 +24,7 @@ Computes the difference between two text - Applies the patch onto another text, allowing for errors. - """ - --__author__ = 'fraser@google.com (Neil Fraser)' -+__author__ = "fraser@google.com (Neil Fraser)" - - import re - import sys -@@ -34,51 +33,51 @@ import urllib - - - class diff_match_patch: -- """Class containing the diff, match and patch methods. -+ """Class containing the diff, match and patch methods. - - Also contains the behaviour settings. - """ - -- def __init__(self): -- """Inits a diff_match_patch object with default settings. -+ def __init__(self): -+ """Inits a diff_match_patch object with default settings. - Redefine these in your program to override the defaults. - """ - -- # Number of seconds to map a diff before giving up (0 for infinity). -- self.Diff_Timeout = 1.0 -- # Cost of an empty edit operation in terms of edit characters. -- self.Diff_EditCost = 4 -- # At what point is no match declared (0.0 = perfection, 1.0 = very loose). -- self.Match_Threshold = 0.5 -- # How far to search for a match (0 = exact location, 1000+ = broad match). -- # A match this many characters away from the expected location will add -- # 1.0 to the score (0.0 is a perfect match). -- self.Match_Distance = 1000 -- # When deleting a large block of text (over ~64 characters), how close do -- # the contents have to be to match the expected contents. (0.0 = perfection, -- # 1.0 = very loose). Note that Match_Threshold controls how closely the -- # end points of a delete need to match. -- self.Patch_DeleteThreshold = 0.5 -- # Chunk size for context length. -- self.Patch_Margin = 4 -- -- # The number of bits in an int. -- # Python has no maximum, thus to disable patch splitting set to 0. -- # However to avoid long patches in certain pathological cases, use 32. -- # Multiple short patches (using native ints) are much faster than long ones. -- self.Match_MaxBits = 32 -- -- # DIFF FUNCTIONS -- -- # The data structure representing a diff is an array of tuples: -- # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")] -- # which means: delete "Hello", add "Goodbye" and keep " world." -- DIFF_DELETE = -1 -- DIFF_INSERT = 1 -- DIFF_EQUAL = 0 -+ # Number of seconds to map a diff before giving up (0 for infinity). -+ self.Diff_Timeout = 1.0 -+ # Cost of an empty edit operation in terms of edit characters. -+ self.Diff_EditCost = 4 -+ # At what point is no match declared (0.0 = perfection, 1.0 = very loose). -+ self.Match_Threshold = 0.5 -+ # How far to search for a match (0 = exact location, 1000+ = broad match). -+ # A match this many characters away from the expected location will add -+ # 1.0 to the score (0.0 is a perfect match). -+ self.Match_Distance = 1000 -+ # When deleting a large block of text (over ~64 characters), how close do -+ # the contents have to be to match the expected contents. (0.0 = perfection, -+ # 1.0 = very loose). Note that Match_Threshold controls how closely the -+ # end points of a delete need to match. -+ self.Patch_DeleteThreshold = 0.5 -+ # Chunk size for context length. -+ self.Patch_Margin = 4 -+ -+ # The number of bits in an int. -+ # Python has no maximum, thus to disable patch splitting set to 0. -+ # However to avoid long patches in certain pathological cases, use 32. -+ # Multiple short patches (using native ints) are much faster than long ones. -+ self.Match_MaxBits = 32 -+ -+ # DIFF FUNCTIONS -+ -+ # The data structure representing a diff is an array of tuples: -+ # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")] -+ # which means: delete "Hello", add "Goodbye" and keep " world." -+ DIFF_DELETE = -1 -+ DIFF_INSERT = 1 -+ DIFF_EQUAL = 0 - -- def diff_main(self, text1, text2, checklines=True, deadline=None): -- """Find the differences between two texts. Simplifies the problem by -+ def diff_main(self, text1, text2, checklines=True, deadline=None): -+ """Find the differences between two texts. Simplifies the problem by - stripping any common prefix or suffix off the texts before diffing. - - Args: -@@ -93,52 +92,52 @@ class diff_match_patch: - Returns: - Array of changes. - """ -- # Set a deadline by which time the diff must be complete. -- if deadline == None: -- # Unlike in most languages, Python counts time in seconds. -- if self.Diff_Timeout <= 0: -- deadline = sys.maxint -- else: -- deadline = time.time() + self.Diff_Timeout -- -- # Check for null inputs. -- if text1 == None or text2 == None: -- raise ValueError("Null inputs. (diff_main)") -- -- # Check for equality (speedup). -- if text1 == text2: -- if text1: -- return [(self.DIFF_EQUAL, text1)] -- return [] -- -- # Trim off common prefix (speedup). -- commonlength = self.diff_commonPrefix(text1, text2) -- commonprefix = text1[:commonlength] -- text1 = text1[commonlength:] -- text2 = text2[commonlength:] -- -- # Trim off common suffix (speedup). -- commonlength = self.diff_commonSuffix(text1, text2) -- if commonlength == 0: -- commonsuffix = '' -- else: -- commonsuffix = text1[-commonlength:] -- text1 = text1[:-commonlength] -- text2 = text2[:-commonlength] -- -- # Compute the diff on the middle block. -- diffs = self.diff_compute(text1, text2, checklines, deadline) -- -- # Restore the prefix and suffix. -- if commonprefix: -- diffs[:0] = [(self.DIFF_EQUAL, commonprefix)] -- if commonsuffix: -- diffs.append((self.DIFF_EQUAL, commonsuffix)) -- self.diff_cleanupMerge(diffs) -- return diffs -+ # Set a deadline by which time the diff must be complete. -+ if deadline == None: -+ # Unlike in most languages, Python counts time in seconds. -+ if self.Diff_Timeout <= 0: -+ deadline = sys.maxint -+ else: -+ deadline = time.time() + self.Diff_Timeout -+ -+ # Check for null inputs. -+ if text1 == None or text2 == None: -+ raise ValueError("Null inputs. (diff_main)") -+ -+ # Check for equality (speedup). -+ if text1 == text2: -+ if text1: -+ return [(self.DIFF_EQUAL, text1)] -+ return [] -+ -+ # Trim off common prefix (speedup). -+ commonlength = self.diff_commonPrefix(text1, text2) -+ commonprefix = text1[:commonlength] -+ text1 = text1[commonlength:] -+ text2 = text2[commonlength:] -+ -+ # Trim off common suffix (speedup). -+ commonlength = self.diff_commonSuffix(text1, text2) -+ if commonlength == 0: -+ commonsuffix = "" -+ else: -+ commonsuffix = text1[-commonlength:] -+ text1 = text1[:-commonlength] -+ text2 = text2[:-commonlength] -+ -+ # Compute the diff on the middle block. -+ diffs = self.diff_compute(text1, text2, checklines, deadline) -+ -+ # Restore the prefix and suffix. -+ if commonprefix: -+ diffs[:0] = [(self.DIFF_EQUAL, commonprefix)] -+ if commonsuffix: -+ diffs.append((self.DIFF_EQUAL, commonsuffix)) -+ self.diff_cleanupMerge(diffs) -+ return diffs - -- def diff_compute(self, text1, text2, checklines, deadline): -- """Find the differences between two texts. Assumes that the texts do not -+ def diff_compute(self, text1, text2, checklines, deadline): -+ """Find the differences between two texts. Assumes that the texts do not - have any common prefix or suffix. - - Args: -@@ -152,52 +151,55 @@ class diff_match_patch: - Returns: - Array of changes. - """ -- if not text1: -- # Just add some text (speedup). -- return [(self.DIFF_INSERT, text2)] -- -- if not text2: -- # Just delete some text (speedup). -- return [(self.DIFF_DELETE, text1)] -- -- if len(text1) > len(text2): -- (longtext, shorttext) = (text1, text2) -- else: -- (shorttext, longtext) = (text1, text2) -- i = longtext.find(shorttext) -- if i != -1: -- # Shorter text is inside the longer text (speedup). -- diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext), -- (self.DIFF_INSERT, longtext[i + len(shorttext):])] -- # Swap insertions for deletions if diff is reversed. -- if len(text1) > len(text2): -- diffs[0] = (self.DIFF_DELETE, diffs[0][1]) -- diffs[2] = (self.DIFF_DELETE, diffs[2][1]) -- return diffs -- -- if len(shorttext) == 1: -- # Single character string. -- # After the previous speedup, the character can't be an equality. -- return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] -- -- # Check to see if the problem can be split in two. -- hm = self.diff_halfMatch(text1, text2) -- if hm: -- # A half-match was found, sort out the return data. -- (text1_a, text1_b, text2_a, text2_b, mid_common) = hm -- # Send both pairs off for separate processing. -- diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline) -- diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline) -- # Merge the results. -- return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b -+ if not text1: -+ # Just add some text (speedup). -+ return [(self.DIFF_INSERT, text2)] -+ -+ if not text2: -+ # Just delete some text (speedup). -+ return [(self.DIFF_DELETE, text1)] - -- if checklines and len(text1) > 100 and len(text2) > 100: -- return self.diff_lineMode(text1, text2, deadline) -+ if len(text1) > len(text2): -+ (longtext, shorttext) = (text1, text2) -+ else: -+ (shorttext, longtext) = (text1, text2) -+ i = longtext.find(shorttext) -+ if i != -1: -+ # Shorter text is inside the longer text (speedup). -+ diffs = [ -+ (self.DIFF_INSERT, longtext[:i]), -+ (self.DIFF_EQUAL, shorttext), -+ (self.DIFF_INSERT, longtext[i + len(shorttext) :]), -+ ] -+ # Swap insertions for deletions if diff is reversed. -+ if len(text1) > len(text2): -+ diffs[0] = (self.DIFF_DELETE, diffs[0][1]) -+ diffs[2] = (self.DIFF_DELETE, diffs[2][1]) -+ return diffs -+ -+ if len(shorttext) == 1: -+ # Single character string. -+ # After the previous speedup, the character can't be an equality. -+ return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] -+ -+ # Check to see if the problem can be split in two. -+ hm = self.diff_halfMatch(text1, text2) -+ if hm: -+ # A half-match was found, sort out the return data. -+ (text1_a, text1_b, text2_a, text2_b, mid_common) = hm -+ # Send both pairs off for separate processing. -+ diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline) -+ diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline) -+ # Merge the results. -+ return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b -+ -+ if checklines and len(text1) > 100 and len(text2) > 100: -+ return self.diff_lineMode(text1, text2, deadline) - -- return self.diff_bisect(text1, text2, deadline) -+ return self.diff_bisect(text1, text2, deadline) - -- def diff_lineMode(self, text1, text2, deadline): -- """Do a quick line-level diff on both strings, then rediff the parts for -+ def diff_lineMode(self, text1, text2, deadline): -+ """Do a quick line-level diff on both strings, then rediff the parts for - greater accuracy. - This speedup can produce non-minimal diffs. - -@@ -210,51 +212,51 @@ class diff_match_patch: - Array of changes. - """ - -- # Scan the text on a line-by-line basis first. -- (text1, text2, linearray) = self.diff_linesToChars(text1, text2) -+ # Scan the text on a line-by-line basis first. -+ (text1, text2, linearray) = self.diff_linesToChars(text1, text2) - -- diffs = self.diff_main(text1, text2, False, deadline) -+ diffs = self.diff_main(text1, text2, False, deadline) - -- # Convert the diff back to original text. -- self.diff_charsToLines(diffs, linearray) -- # Eliminate freak matches (e.g. blank lines) -- self.diff_cleanupSemantic(diffs) -- -- # Rediff any replacement blocks, this time character-by-character. -- # Add a dummy entry at the end. -- diffs.append((self.DIFF_EQUAL, '')) -- pointer = 0 -- count_delete = 0 -- count_insert = 0 -- text_delete = '' -- text_insert = '' -- while pointer < len(diffs): -- if diffs[pointer][0] == self.DIFF_INSERT: -- count_insert += 1 -- text_insert += diffs[pointer][1] -- elif diffs[pointer][0] == self.DIFF_DELETE: -- count_delete += 1 -- text_delete += diffs[pointer][1] -- elif diffs[pointer][0] == self.DIFF_EQUAL: -- # Upon reaching an equality, check for prior redundancies. -- if count_delete >= 1 and count_insert >= 1: -- # Delete the offending records and add the merged ones. -- subDiff = self.diff_main(text_delete, text_insert, False, deadline) -- diffs[pointer - count_delete - count_insert : pointer] = subDiff -- pointer = pointer - count_delete - count_insert + len(subDiff) -- count_insert = 0 -+ # Convert the diff back to original text. -+ self.diff_charsToLines(diffs, linearray) -+ # Eliminate freak matches (e.g. blank lines) -+ self.diff_cleanupSemantic(diffs) -+ -+ # Rediff any replacement blocks, this time character-by-character. -+ # Add a dummy entry at the end. -+ diffs.append((self.DIFF_EQUAL, "")) -+ pointer = 0 - count_delete = 0 -- text_delete = '' -- text_insert = '' -+ count_insert = 0 -+ text_delete = "" -+ text_insert = "" -+ while pointer < len(diffs): -+ if diffs[pointer][0] == self.DIFF_INSERT: -+ count_insert += 1 -+ text_insert += diffs[pointer][1] -+ elif diffs[pointer][0] == self.DIFF_DELETE: -+ count_delete += 1 -+ text_delete += diffs[pointer][1] -+ elif diffs[pointer][0] == self.DIFF_EQUAL: -+ # Upon reaching an equality, check for prior redundancies. -+ if count_delete >= 1 and count_insert >= 1: -+ # Delete the offending records and add the merged ones. -+ subDiff = self.diff_main(text_delete, text_insert, False, deadline) -+ diffs[pointer - count_delete - count_insert : pointer] = subDiff -+ pointer = pointer - count_delete - count_insert + len(subDiff) -+ count_insert = 0 -+ count_delete = 0 -+ text_delete = "" -+ text_insert = "" - -- pointer += 1 -+ pointer += 1 - -- diffs.pop() # Remove the dummy entry at the end. -+ diffs.pop() # Remove the dummy entry at the end. - -- return diffs -+ return diffs - -- def diff_bisect(self, text1, text2, deadline): -- """Find the 'middle snake' of a diff, split the problem in two -+ def diff_bisect(self, text1, text2, deadline): -+ """Find the 'middle snake' of a diff, split the problem in two - and return the recursively constructed diff. - See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. - -@@ -267,96 +269,98 @@ class diff_match_patch: - Array of diff tuples. - """ - -- # Cache the text lengths to prevent multiple calls. -- text1_length = len(text1) -- text2_length = len(text2) -- max_d = (text1_length + text2_length + 1) // 2 -- v_offset = max_d -- v_length = 2 * max_d -- v1 = [-1] * v_length -- v1[v_offset + 1] = 0 -- v2 = v1[:] -- delta = text1_length - text2_length -- # If the total number of characters is odd, then the front path will -- # collide with the reverse path. -- front = (delta % 2 != 0) -- # Offsets for start and end of k loop. -- # Prevents mapping of space beyond the grid. -- k1start = 0 -- k1end = 0 -- k2start = 0 -- k2end = 0 -- for d in xrange(max_d): -- # Bail out if deadline is reached. -- if time.time() > deadline: -- break -- -- # Walk the front path one step. -- for k1 in xrange(-d + k1start, d + 1 - k1end, 2): -- k1_offset = v_offset + k1 -- if k1 == -d or (k1 != d and -- v1[k1_offset - 1] < v1[k1_offset + 1]): -- x1 = v1[k1_offset + 1] -- else: -- x1 = v1[k1_offset - 1] + 1 -- y1 = x1 - k1 -- while (x1 < text1_length and y1 < text2_length and -- text1[x1] == text2[y1]): -- x1 += 1 -- y1 += 1 -- v1[k1_offset] = x1 -- if x1 > text1_length: -- # Ran off the right of the graph. -- k1end += 2 -- elif y1 > text2_length: -- # Ran off the bottom of the graph. -- k1start += 2 -- elif front: -- k2_offset = v_offset + delta - k1 -- if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1: -- # Mirror x2 onto top-left coordinate system. -- x2 = text1_length - v2[k2_offset] -- if x1 >= x2: -- # Overlap detected. -- return self.diff_bisectSplit(text1, text2, x1, y1, deadline) -- -- # Walk the reverse path one step. -- for k2 in xrange(-d + k2start, d + 1 - k2end, 2): -- k2_offset = v_offset + k2 -- if k2 == -d or (k2 != d and -- v2[k2_offset - 1] < v2[k2_offset + 1]): -- x2 = v2[k2_offset + 1] -- else: -- x2 = v2[k2_offset - 1] + 1 -- y2 = x2 - k2 -- while (x2 < text1_length and y2 < text2_length and -- text1[-x2 - 1] == text2[-y2 - 1]): -- x2 += 1 -- y2 += 1 -- v2[k2_offset] = x2 -- if x2 > text1_length: -- # Ran off the left of the graph. -- k2end += 2 -- elif y2 > text2_length: -- # Ran off the top of the graph. -- k2start += 2 -- elif not front: -- k1_offset = v_offset + delta - k2 -- if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1: -- x1 = v1[k1_offset] -- y1 = v_offset + x1 - k1_offset -- # Mirror x2 onto top-left coordinate system. -- x2 = text1_length - x2 -- if x1 >= x2: -- # Overlap detected. -- return self.diff_bisectSplit(text1, text2, x1, y1, deadline) -- -- # Diff took too long and hit the deadline or -- # number of diffs equals number of characters, no commonality at all. -- return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] -+ # Cache the text lengths to prevent multiple calls. -+ text1_length = len(text1) -+ text2_length = len(text2) -+ max_d = (text1_length + text2_length + 1) // 2 -+ v_offset = max_d -+ v_length = 2 * max_d -+ v1 = [-1] * v_length -+ v1[v_offset + 1] = 0 -+ v2 = v1[:] -+ delta = text1_length - text2_length -+ # If the total number of characters is odd, then the front path will -+ # collide with the reverse path. -+ front = delta % 2 != 0 -+ # Offsets for start and end of k loop. -+ # Prevents mapping of space beyond the grid. -+ k1start = 0 -+ k1end = 0 -+ k2start = 0 -+ k2end = 0 -+ for d in xrange(max_d): -+ # Bail out if deadline is reached. -+ if time.time() > deadline: -+ break -+ -+ # Walk the front path one step. -+ for k1 in xrange(-d + k1start, d + 1 - k1end, 2): -+ k1_offset = v_offset + k1 -+ if k1 == -d or (k1 != d and v1[k1_offset - 1] < v1[k1_offset + 1]): -+ x1 = v1[k1_offset + 1] -+ else: -+ x1 = v1[k1_offset - 1] + 1 -+ y1 = x1 - k1 -+ while ( -+ x1 < text1_length and y1 < text2_length and text1[x1] == text2[y1] -+ ): -+ x1 += 1 -+ y1 += 1 -+ v1[k1_offset] = x1 -+ if x1 > text1_length: -+ # Ran off the right of the graph. -+ k1end += 2 -+ elif y1 > text2_length: -+ # Ran off the bottom of the graph. -+ k1start += 2 -+ elif front: -+ k2_offset = v_offset + delta - k1 -+ if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1: -+ # Mirror x2 onto top-left coordinate system. -+ x2 = text1_length - v2[k2_offset] -+ if x1 >= x2: -+ # Overlap detected. -+ return self.diff_bisectSplit(text1, text2, x1, y1, deadline) -+ -+ # Walk the reverse path one step. -+ for k2 in xrange(-d + k2start, d + 1 - k2end, 2): -+ k2_offset = v_offset + k2 -+ if k2 == -d or (k2 != d and v2[k2_offset - 1] < v2[k2_offset + 1]): -+ x2 = v2[k2_offset + 1] -+ else: -+ x2 = v2[k2_offset - 1] + 1 -+ y2 = x2 - k2 -+ while ( -+ x2 < text1_length -+ and y2 < text2_length -+ and text1[-x2 - 1] == text2[-y2 - 1] -+ ): -+ x2 += 1 -+ y2 += 1 -+ v2[k2_offset] = x2 -+ if x2 > text1_length: -+ # Ran off the left of the graph. -+ k2end += 2 -+ elif y2 > text2_length: -+ # Ran off the top of the graph. -+ k2start += 2 -+ elif not front: -+ k1_offset = v_offset + delta - k2 -+ if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1: -+ x1 = v1[k1_offset] -+ y1 = v_offset + x1 - k1_offset -+ # Mirror x2 onto top-left coordinate system. -+ x2 = text1_length - x2 -+ if x1 >= x2: -+ # Overlap detected. -+ return self.diff_bisectSplit(text1, text2, x1, y1, deadline) -+ -+ # Diff took too long and hit the deadline or -+ # number of diffs equals number of characters, no commonality at all. -+ return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] - -- def diff_bisectSplit(self, text1, text2, x, y, deadline): -- """Given the location of the 'middle snake', split the diff in two parts -+ def diff_bisectSplit(self, text1, text2, x, y, deadline): -+ """Given the location of the 'middle snake', split the diff in two parts - and recurse. - - Args: -@@ -369,19 +373,19 @@ class diff_match_patch: - Returns: - Array of diff tuples. - """ -- text1a = text1[:x] -- text2a = text2[:y] -- text1b = text1[x:] -- text2b = text2[y:] -- -- # Compute both diffs serially. -- diffs = self.diff_main(text1a, text2a, False, deadline) -- diffsb = self.diff_main(text1b, text2b, False, deadline) -+ text1a = text1[:x] -+ text2a = text2[:y] -+ text1b = text1[x:] -+ text2b = text2[y:] -+ -+ # Compute both diffs serially. -+ diffs = self.diff_main(text1a, text2a, False, deadline) -+ diffsb = self.diff_main(text1b, text2b, False, deadline) - -- return diffs + diffsb -+ return diffs + diffsb - -- def diff_linesToChars(self, text1, text2): -- """Split two texts into an array of strings. Reduce the texts to a string -+ def diff_linesToChars(self, text1, text2): -+ """Split two texts into an array of strings. Reduce the texts to a string - of hashes where each Unicode character represents one line. - - Args: -@@ -393,15 +397,15 @@ class diff_match_patch: - the array of unique strings. The zeroth element of the array of unique - strings is intentionally blank. - """ -- lineArray = [] # e.g. lineArray[4] == "Hello\n" -- lineHash = {} # e.g. lineHash["Hello\n"] == 4 -+ lineArray = [] # e.g. lineArray[4] == "Hello\n" -+ lineHash = {} # e.g. lineHash["Hello\n"] == 4 - -- # "\x00" is a valid character, but various debuggers don't like it. -- # So we'll insert a junk entry to avoid generating a null character. -- lineArray.append('') -+ # "\x00" is a valid character, but various debuggers don't like it. -+ # So we'll insert a junk entry to avoid generating a null character. -+ lineArray.append("") - -- def diff_linesToCharsMunge(text): -- """Split a text into an array of strings. Reduce the texts to a string -+ def diff_linesToCharsMunge(text): -+ """Split a text into an array of strings. Reduce the texts to a string - of hashes where each Unicode character represents one line. - Modifies linearray and linehash through being a closure. - -@@ -411,54 +415,54 @@ class diff_match_patch: - Returns: - Encoded string. - """ -- chars = [] -- # Walk the text, pulling out a substring for each line. -- # text.split('\n') would would temporarily double our memory footprint. -- # Modifying text would create many large strings to garbage collect. -- lineStart = 0 -- lineEnd = -1 -- while lineEnd < len(text) - 1: -- lineEnd = text.find('\n', lineStart) -- if lineEnd == -1: -- lineEnd = len(text) - 1 -- line = text[lineStart:lineEnd + 1] -- -- if line in lineHash: -- chars.append(unichr(lineHash[line])) -- else: -- if len(lineArray) == maxLines: -- # Bail out at 65535 because unichr(65536) throws. -- line = text[lineStart:] -- lineEnd = len(text) -- lineArray.append(line) -- lineHash[line] = len(lineArray) - 1 -- chars.append(unichr(len(lineArray) - 1)) -- lineStart = lineEnd + 1 -- return "".join(chars) -- -- # Allocate 2/3rds of the space for text1, the rest for text2. -- maxLines = 40000 -- chars1 = diff_linesToCharsMunge(text1) -- maxLines = 65535 -- chars2 = diff_linesToCharsMunge(text2) -- return (chars1, chars2, lineArray) -+ chars = [] -+ # Walk the text, pulling out a substring for each line. -+ # text.split('\n') would would temporarily double our memory footprint. -+ # Modifying text would create many large strings to garbage collect. -+ lineStart = 0 -+ lineEnd = -1 -+ while lineEnd < len(text) - 1: -+ lineEnd = text.find("\n", lineStart) -+ if lineEnd == -1: -+ lineEnd = len(text) - 1 -+ line = text[lineStart : lineEnd + 1] -+ -+ if line in lineHash: -+ chars.append(unichr(lineHash[line])) -+ else: -+ if len(lineArray) == maxLines: -+ # Bail out at 65535 because unichr(65536) throws. -+ line = text[lineStart:] -+ lineEnd = len(text) -+ lineArray.append(line) -+ lineHash[line] = len(lineArray) - 1 -+ chars.append(unichr(len(lineArray) - 1)) -+ lineStart = lineEnd + 1 -+ return "".join(chars) -+ -+ # Allocate 2/3rds of the space for text1, the rest for text2. -+ maxLines = 40000 -+ chars1 = diff_linesToCharsMunge(text1) -+ maxLines = 65535 -+ chars2 = diff_linesToCharsMunge(text2) -+ return (chars1, chars2, lineArray) - -- def diff_charsToLines(self, diffs, lineArray): -- """Rehydrate the text in a diff from a string of line hashes to real lines -+ def diff_charsToLines(self, diffs, lineArray): -+ """Rehydrate the text in a diff from a string of line hashes to real lines - of text. - - Args: - diffs: Array of diff tuples. - lineArray: Array of unique strings. - """ -- for i in xrange(len(diffs)): -- text = [] -- for char in diffs[i][1]: -- text.append(lineArray[ord(char)]) -- diffs[i] = (diffs[i][0], "".join(text)) -+ for i in xrange(len(diffs)): -+ text = [] -+ for char in diffs[i][1]: -+ text.append(lineArray[ord(char)]) -+ diffs[i] = (diffs[i][0], "".join(text)) - -- def diff_commonPrefix(self, text1, text2): -- """Determine the common prefix of two strings. -+ def diff_commonPrefix(self, text1, text2): -+ """Determine the common prefix of two strings. - - Args: - text1: First string. -@@ -467,26 +471,26 @@ class diff_match_patch: - Returns: - The number of characters common to the start of each string. - """ -- # Quick check for common null cases. -- if not text1 or not text2 or text1[0] != text2[0]: -- return 0 -- # Binary search. -- # Performance analysis: https://neil.fraser.name/news/2007/10/09/ -- pointermin = 0 -- pointermax = min(len(text1), len(text2)) -- pointermid = pointermax -- pointerstart = 0 -- while pointermin < pointermid: -- if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]: -- pointermin = pointermid -- pointerstart = pointermin -- else: -- pointermax = pointermid -- pointermid = (pointermax - pointermin) // 2 + pointermin -- return pointermid -+ # Quick check for common null cases. -+ if not text1 or not text2 or text1[0] != text2[0]: -+ return 0 -+ # Binary search. -+ # Performance analysis: https://neil.fraser.name/news/2007/10/09/ -+ pointermin = 0 -+ pointermax = min(len(text1), len(text2)) -+ pointermid = pointermax -+ pointerstart = 0 -+ while pointermin < pointermid: -+ if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]: -+ pointermin = pointermid -+ pointerstart = pointermin -+ else: -+ pointermax = pointermid -+ pointermid = (pointermax - pointermin) // 2 + pointermin -+ return pointermid - -- def diff_commonSuffix(self, text1, text2): -- """Determine the common suffix of two strings. -+ def diff_commonSuffix(self, text1, text2): -+ """Determine the common suffix of two strings. - - Args: - text1: First string. -@@ -495,27 +499,29 @@ class diff_match_patch: - Returns: - The number of characters common to the end of each string. - """ -- # Quick check for common null cases. -- if not text1 or not text2 or text1[-1] != text2[-1]: -- return 0 -- # Binary search. -- # Performance analysis: https://neil.fraser.name/news/2007/10/09/ -- pointermin = 0 -- pointermax = min(len(text1), len(text2)) -- pointermid = pointermax -- pointerend = 0 -- while pointermin < pointermid: -- if (text1[-pointermid:len(text1) - pointerend] == -- text2[-pointermid:len(text2) - pointerend]): -- pointermin = pointermid -- pointerend = pointermin -- else: -- pointermax = pointermid -- pointermid = (pointermax - pointermin) // 2 + pointermin -- return pointermid -+ # Quick check for common null cases. -+ if not text1 or not text2 or text1[-1] != text2[-1]: -+ return 0 -+ # Binary search. -+ # Performance analysis: https://neil.fraser.name/news/2007/10/09/ -+ pointermin = 0 -+ pointermax = min(len(text1), len(text2)) -+ pointermid = pointermax -+ pointerend = 0 -+ while pointermin < pointermid: -+ if ( -+ text1[-pointermid : len(text1) - pointerend] -+ == text2[-pointermid : len(text2) - pointerend] -+ ): -+ pointermin = pointermid -+ pointerend = pointermin -+ else: -+ pointermax = pointermid -+ pointermid = (pointermax - pointermin) // 2 + pointermin -+ return pointermid - -- def diff_commonOverlap(self, text1, text2): -- """Determine if the suffix of one string is the prefix of another. -+ def diff_commonOverlap(self, text1, text2): -+ """Determine if the suffix of one string is the prefix of another. - - Args: - text1 First string. -@@ -525,39 +531,39 @@ class diff_match_patch: - The number of characters common to the end of the first - string and the start of the second string. - """ -- # Cache the text lengths to prevent multiple calls. -- text1_length = len(text1) -- text2_length = len(text2) -- # Eliminate the null case. -- if text1_length == 0 or text2_length == 0: -- return 0 -- # Truncate the longer string. -- if text1_length > text2_length: -- text1 = text1[-text2_length:] -- elif text1_length < text2_length: -- text2 = text2[:text1_length] -- text_length = min(text1_length, text2_length) -- # Quick check for the worst case. -- if text1 == text2: -- return text_length -- -- # Start by looking for a single character match -- # and increase length until no match is found. -- # Performance analysis: https://neil.fraser.name/news/2010/11/04/ -- best = 0 -- length = 1 -- while True: -- pattern = text1[-length:] -- found = text2.find(pattern) -- if found == -1: -- return best -- length += found -- if found == 0 or text1[-length:] == text2[:length]: -- best = length -- length += 1 -+ # Cache the text lengths to prevent multiple calls. -+ text1_length = len(text1) -+ text2_length = len(text2) -+ # Eliminate the null case. -+ if text1_length == 0 or text2_length == 0: -+ return 0 -+ # Truncate the longer string. -+ if text1_length > text2_length: -+ text1 = text1[-text2_length:] -+ elif text1_length < text2_length: -+ text2 = text2[:text1_length] -+ text_length = min(text1_length, text2_length) -+ # Quick check for the worst case. -+ if text1 == text2: -+ return text_length -+ -+ # Start by looking for a single character match -+ # and increase length until no match is found. -+ # Performance analysis: https://neil.fraser.name/news/2010/11/04/ -+ best = 0 -+ length = 1 -+ while True: -+ pattern = text1[-length:] -+ found = text2.find(pattern) -+ if found == -1: -+ return best -+ length += found -+ if found == 0 or text1[-length:] == text2[:length]: -+ best = length -+ length += 1 - -- def diff_halfMatch(self, text1, text2): -- """Do the two texts share a substring which is at least half the length of -+ def diff_halfMatch(self, text1, text2): -+ """Do the two texts share a substring which is at least half the length of - the longer text? - This speedup can produce non-minimal diffs. - -@@ -570,18 +576,18 @@ class diff_match_patch: - the prefix of text2, the suffix of text2 and the common middle. Or None - if there was no match. - """ -- if self.Diff_Timeout <= 0: -- # Don't risk returning a non-optimal diff if we have unlimited time. -- return None -- if len(text1) > len(text2): -- (longtext, shorttext) = (text1, text2) -- else: -- (shorttext, longtext) = (text1, text2) -- if len(longtext) < 4 or len(shorttext) * 2 < len(longtext): -- return None # Pointless. -+ if self.Diff_Timeout <= 0: -+ # Don't risk returning a non-optimal diff if we have unlimited time. -+ return None -+ if len(text1) > len(text2): -+ (longtext, shorttext) = (text1, text2) -+ else: -+ (shorttext, longtext) = (text1, text2) -+ if len(longtext) < 4 or len(shorttext) * 2 < len(longtext): -+ return None # Pointless. - -- def diff_halfMatchI(longtext, shorttext, i): -- """Does a substring of shorttext exist within longtext such that the -+ def diff_halfMatchI(longtext, shorttext, i): -+ """Does a substring of shorttext exist within longtext such that the - substring is at least half the length of longtext? - Closure, but does not reference any external variables. - -@@ -595,148 +601,181 @@ class diff_match_patch: - longtext, the prefix of shorttext, the suffix of shorttext and the - common middle. Or None if there was no match. - """ -- seed = longtext[i:i + len(longtext) // 4] -- best_common = '' -- j = shorttext.find(seed) -- while j != -1: -- prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:]) -- suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j]) -- if len(best_common) < suffixLength + prefixLength: -- best_common = (shorttext[j - suffixLength:j] + -- shorttext[j:j + prefixLength]) -- best_longtext_a = longtext[:i - suffixLength] -- best_longtext_b = longtext[i + prefixLength:] -- best_shorttext_a = shorttext[:j - suffixLength] -- best_shorttext_b = shorttext[j + prefixLength:] -- j = shorttext.find(seed, j + 1) -- -- if len(best_common) * 2 >= len(longtext): -- return (best_longtext_a, best_longtext_b, -- best_shorttext_a, best_shorttext_b, best_common) -- else: -- return None -- -- # First check if the second quarter is the seed for a half-match. -- hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4) -- # Check again based on the third quarter. -- hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2) -- if not hm1 and not hm2: -- return None -- elif not hm2: -- hm = hm1 -- elif not hm1: -- hm = hm2 -- else: -- # Both matched. Select the longest. -- if len(hm1[4]) > len(hm2[4]): -- hm = hm1 -- else: -- hm = hm2 -- -- # A half-match was found, sort out the return data. -- if len(text1) > len(text2): -- (text1_a, text1_b, text2_a, text2_b, mid_common) = hm -- else: -- (text2_a, text2_b, text1_a, text1_b, mid_common) = hm -- return (text1_a, text1_b, text2_a, text2_b, mid_common) -+ seed = longtext[i : i + len(longtext) // 4] -+ best_common = "" -+ j = shorttext.find(seed) -+ while j != -1: -+ prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:]) -+ suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j]) -+ if len(best_common) < suffixLength + prefixLength: -+ best_common = ( -+ shorttext[j - suffixLength : j] -+ + shorttext[j : j + prefixLength] -+ ) -+ best_longtext_a = longtext[: i - suffixLength] -+ best_longtext_b = longtext[i + prefixLength :] -+ best_shorttext_a = shorttext[: j - suffixLength] -+ best_shorttext_b = shorttext[j + prefixLength :] -+ j = shorttext.find(seed, j + 1) -+ -+ if len(best_common) * 2 >= len(longtext): -+ return ( -+ best_longtext_a, -+ best_longtext_b, -+ best_shorttext_a, -+ best_shorttext_b, -+ best_common, -+ ) -+ else: -+ return None -+ -+ # First check if the second quarter is the seed for a half-match. -+ hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4) -+ # Check again based on the third quarter. -+ hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2) -+ if not hm1 and not hm2: -+ return None -+ elif not hm2: -+ hm = hm1 -+ elif not hm1: -+ hm = hm2 -+ else: -+ # Both matched. Select the longest. -+ if len(hm1[4]) > len(hm2[4]): -+ hm = hm1 -+ else: -+ hm = hm2 - -- def diff_cleanupSemantic(self, diffs): -- """Reduce the number of edits by eliminating semantically trivial -+ # A half-match was found, sort out the return data. -+ if len(text1) > len(text2): -+ (text1_a, text1_b, text2_a, text2_b, mid_common) = hm -+ else: -+ (text2_a, text2_b, text1_a, text1_b, mid_common) = hm -+ return (text1_a, text1_b, text2_a, text2_b, mid_common) -+ -+ def diff_cleanupSemantic(self, diffs): -+ """Reduce the number of edits by eliminating semantically trivial - equalities. - - Args: - diffs: Array of diff tuples. - """ -- changes = False -- equalities = [] # Stack of indices where equalities are found. -- lastEquality = None # Always equal to diffs[equalities[-1]][1] -- pointer = 0 # Index of current position. -- # Number of chars that changed prior to the equality. -- length_insertions1, length_deletions1 = 0, 0 -- # Number of chars that changed after the equality. -- length_insertions2, length_deletions2 = 0, 0 -- while pointer < len(diffs): -- if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. -- equalities.append(pointer) -- length_insertions1, length_insertions2 = length_insertions2, 0 -- length_deletions1, length_deletions2 = length_deletions2, 0 -- lastEquality = diffs[pointer][1] -- else: # An insertion or deletion. -- if diffs[pointer][0] == self.DIFF_INSERT: -- length_insertions2 += len(diffs[pointer][1]) -- else: -- length_deletions2 += len(diffs[pointer][1]) -- # Eliminate an equality that is smaller or equal to the edits on both -- # sides of it. -- if (lastEquality and (len(lastEquality) <= -- max(length_insertions1, length_deletions1)) and -- (len(lastEquality) <= max(length_insertions2, length_deletions2))): -- # Duplicate record. -- diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality)) -- # Change second copy to insert. -- diffs[equalities[-1] + 1] = (self.DIFF_INSERT, -- diffs[equalities[-1] + 1][1]) -- # Throw away the equality we just deleted. -- equalities.pop() -- # Throw away the previous equality (it needs to be reevaluated). -- if len(equalities): -- equalities.pop() -- if len(equalities): -- pointer = equalities[-1] -- else: -- pointer = -1 -- # Reset the counters. -- length_insertions1, length_deletions1 = 0, 0 -- length_insertions2, length_deletions2 = 0, 0 -- lastEquality = None -- changes = True -- pointer += 1 -- -- # Normalize the diff. -- if changes: -- self.diff_cleanupMerge(diffs) -- self.diff_cleanupSemanticLossless(diffs) -- -- # Find any overlaps between deletions and insertions. -- # e.g: abcxxxxxxdef -- # -> abcxxxdef -- # e.g: xxxabcdefxxx -- # -> defxxxabc -- # Only extract an overlap if it is as big as the edit ahead or behind it. -- pointer = 1 -- while pointer < len(diffs): -- if (diffs[pointer - 1][0] == self.DIFF_DELETE and -- diffs[pointer][0] == self.DIFF_INSERT): -- deletion = diffs[pointer - 1][1] -- insertion = diffs[pointer][1] -- overlap_length1 = self.diff_commonOverlap(deletion, insertion) -- overlap_length2 = self.diff_commonOverlap(insertion, deletion) -- if overlap_length1 >= overlap_length2: -- if (overlap_length1 >= len(deletion) / 2.0 or -- overlap_length1 >= len(insertion) / 2.0): -- # Overlap found. Insert an equality and trim the surrounding edits. -- diffs.insert(pointer, (self.DIFF_EQUAL, -- insertion[:overlap_length1])) -- diffs[pointer - 1] = (self.DIFF_DELETE, -- deletion[:len(deletion) - overlap_length1]) -- diffs[pointer + 1] = (self.DIFF_INSERT, -- insertion[overlap_length1:]) -+ changes = False -+ equalities = [] # Stack of indices where equalities are found. -+ lastEquality = None # Always equal to diffs[equalities[-1]][1] -+ pointer = 0 # Index of current position. -+ # Number of chars that changed prior to the equality. -+ length_insertions1, length_deletions1 = 0, 0 -+ # Number of chars that changed after the equality. -+ length_insertions2, length_deletions2 = 0, 0 -+ while pointer < len(diffs): -+ if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. -+ equalities.append(pointer) -+ length_insertions1, length_insertions2 = length_insertions2, 0 -+ length_deletions1, length_deletions2 = length_deletions2, 0 -+ lastEquality = diffs[pointer][1] -+ else: # An insertion or deletion. -+ if diffs[pointer][0] == self.DIFF_INSERT: -+ length_insertions2 += len(diffs[pointer][1]) -+ else: -+ length_deletions2 += len(diffs[pointer][1]) -+ # Eliminate an equality that is smaller or equal to the edits on both -+ # sides of it. -+ if ( -+ lastEquality -+ and ( -+ len(lastEquality) <= max(length_insertions1, length_deletions1) -+ ) -+ and ( -+ len(lastEquality) <= max(length_insertions2, length_deletions2) -+ ) -+ ): -+ # Duplicate record. -+ diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality)) -+ # Change second copy to insert. -+ diffs[equalities[-1] + 1] = ( -+ self.DIFF_INSERT, -+ diffs[equalities[-1] + 1][1], -+ ) -+ # Throw away the equality we just deleted. -+ equalities.pop() -+ # Throw away the previous equality (it needs to be reevaluated). -+ if len(equalities): -+ equalities.pop() -+ if len(equalities): -+ pointer = equalities[-1] -+ else: -+ pointer = -1 -+ # Reset the counters. -+ length_insertions1, length_deletions1 = 0, 0 -+ length_insertions2, length_deletions2 = 0, 0 -+ lastEquality = None -+ changes = True - pointer += 1 -- else: -- if (overlap_length2 >= len(deletion) / 2.0 or -- overlap_length2 >= len(insertion) / 2.0): -- # Reverse overlap found. -- # Insert an equality and swap and trim the surrounding edits. -- diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2])) -- diffs[pointer - 1] = (self.DIFF_INSERT, -- insertion[:len(insertion) - overlap_length2]) -- diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:]) -+ -+ # Normalize the diff. -+ if changes: -+ self.diff_cleanupMerge(diffs) -+ self.diff_cleanupSemanticLossless(diffs) -+ -+ # Find any overlaps between deletions and insertions. -+ # e.g: abcxxxxxxdef -+ # -> abcxxxdef -+ # e.g: xxxabcdefxxx -+ # -> defxxxabc -+ # Only extract an overlap if it is as big as the edit ahead or behind it. -+ pointer = 1 -+ while pointer < len(diffs): -+ if ( -+ diffs[pointer - 1][0] == self.DIFF_DELETE -+ and diffs[pointer][0] == self.DIFF_INSERT -+ ): -+ deletion = diffs[pointer - 1][1] -+ insertion = diffs[pointer][1] -+ overlap_length1 = self.diff_commonOverlap(deletion, insertion) -+ overlap_length2 = self.diff_commonOverlap(insertion, deletion) -+ if overlap_length1 >= overlap_length2: -+ if ( -+ overlap_length1 >= len(deletion) / 2.0 -+ or overlap_length1 >= len(insertion) / 2.0 -+ ): -+ # Overlap found. Insert an equality and trim the surrounding edits. -+ diffs.insert( -+ pointer, (self.DIFF_EQUAL, insertion[:overlap_length1]) -+ ) -+ diffs[pointer - 1] = ( -+ self.DIFF_DELETE, -+ deletion[: len(deletion) - overlap_length1], -+ ) -+ diffs[pointer + 1] = ( -+ self.DIFF_INSERT, -+ insertion[overlap_length1:], -+ ) -+ pointer += 1 -+ else: -+ if ( -+ overlap_length2 >= len(deletion) / 2.0 -+ or overlap_length2 >= len(insertion) / 2.0 -+ ): -+ # Reverse overlap found. -+ # Insert an equality and swap and trim the surrounding edits. -+ diffs.insert( -+ pointer, (self.DIFF_EQUAL, deletion[:overlap_length2]) -+ ) -+ diffs[pointer - 1] = ( -+ self.DIFF_INSERT, -+ insertion[: len(insertion) - overlap_length2], -+ ) -+ diffs[pointer + 1] = ( -+ self.DIFF_DELETE, -+ deletion[overlap_length2:], -+ ) -+ pointer += 1 -+ pointer += 1 - pointer += 1 -- pointer += 1 -- pointer += 1 - -- def diff_cleanupSemanticLossless(self, diffs): -- """Look for single edits surrounded on both sides by equalities -+ def diff_cleanupSemanticLossless(self, diffs): -+ """Look for single edits surrounded on both sides by equalities - which can be shifted sideways to align the edit to a word boundary. - e.g: The cat came. -> The cat came. - -@@ -744,8 +783,8 @@ class diff_match_patch: - diffs: Array of diff tuples. - """ - -- def diff_cleanupSemanticScore(one, two): -- """Given two strings, compute a score representing whether the -+ def diff_cleanupSemanticScore(one, two): -+ """Given two strings, compute a score representing whether the - internal boundary falls on logical boundaries. - Scores range from 6 (best) to 0 (worst). - Closure, but does not reference any external variables. -@@ -757,277 +796,306 @@ class diff_match_patch: - Returns: - The score. - """ -- if not one or not two: -- # Edges are the best. -- return 6 -- -- # Each port of this function behaves slightly differently due to -- # subtle differences in each language's definition of things like -- # 'whitespace'. Since this function's purpose is largely cosmetic, -- # the choice has been made to use each language's native features -- # rather than force total conformity. -- char1 = one[-1] -- char2 = two[0] -- nonAlphaNumeric1 = not char1.isalnum() -- nonAlphaNumeric2 = not char2.isalnum() -- whitespace1 = nonAlphaNumeric1 and char1.isspace() -- whitespace2 = nonAlphaNumeric2 and char2.isspace() -- lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n") -- lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n") -- blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one) -- blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two) -- -- if blankLine1 or blankLine2: -- # Five points for blank lines. -- return 5 -- elif lineBreak1 or lineBreak2: -- # Four points for line breaks. -- return 4 -- elif nonAlphaNumeric1 and not whitespace1 and whitespace2: -- # Three points for end of sentences. -- return 3 -- elif whitespace1 or whitespace2: -- # Two points for whitespace. -- return 2 -- elif nonAlphaNumeric1 or nonAlphaNumeric2: -- # One point for non-alphanumeric. -- return 1 -- return 0 -- -- pointer = 1 -- # Intentionally ignore the first and last element (don't need checking). -- while pointer < len(diffs) - 1: -- if (diffs[pointer - 1][0] == self.DIFF_EQUAL and -- diffs[pointer + 1][0] == self.DIFF_EQUAL): -- # This is a single edit surrounded by equalities. -- equality1 = diffs[pointer - 1][1] -- edit = diffs[pointer][1] -- equality2 = diffs[pointer + 1][1] -- -- # First, shift the edit as far left as possible. -- commonOffset = self.diff_commonSuffix(equality1, edit) -- if commonOffset: -- commonString = edit[-commonOffset:] -- equality1 = equality1[:-commonOffset] -- edit = commonString + edit[:-commonOffset] -- equality2 = commonString + equality2 -- -- # Second, step character by character right, looking for the best fit. -- bestEquality1 = equality1 -- bestEdit = edit -- bestEquality2 = equality2 -- bestScore = (diff_cleanupSemanticScore(equality1, edit) + -- diff_cleanupSemanticScore(edit, equality2)) -- while edit and equality2 and edit[0] == equality2[0]: -- equality1 += edit[0] -- edit = edit[1:] + equality2[0] -- equality2 = equality2[1:] -- score = (diff_cleanupSemanticScore(equality1, edit) + -- diff_cleanupSemanticScore(edit, equality2)) -- # The >= encourages trailing rather than leading whitespace on edits. -- if score >= bestScore: -- bestScore = score -- bestEquality1 = equality1 -- bestEdit = edit -- bestEquality2 = equality2 -- -- if diffs[pointer - 1][1] != bestEquality1: -- # We have an improvement, save it back to the diff. -- if bestEquality1: -- diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1) -- else: -- del diffs[pointer - 1] -- pointer -= 1 -- diffs[pointer] = (diffs[pointer][0], bestEdit) -- if bestEquality2: -- diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2) -- else: -- del diffs[pointer + 1] -- pointer -= 1 -- pointer += 1 -- -- # Define some regex patterns for matching boundaries. -- BLANKLINEEND = re.compile(r"\n\r?\n$") -- BLANKLINESTART = re.compile(r"^\r?\n\r?\n") -+ if not one or not two: -+ # Edges are the best. -+ return 6 -+ -+ # Each port of this function behaves slightly differently due to -+ # subtle differences in each language's definition of things like -+ # 'whitespace'. Since this function's purpose is largely cosmetic, -+ # the choice has been made to use each language's native features -+ # rather than force total conformity. -+ char1 = one[-1] -+ char2 = two[0] -+ nonAlphaNumeric1 = not char1.isalnum() -+ nonAlphaNumeric2 = not char2.isalnum() -+ whitespace1 = nonAlphaNumeric1 and char1.isspace() -+ whitespace2 = nonAlphaNumeric2 and char2.isspace() -+ lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n") -+ lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n") -+ blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one) -+ blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two) -+ -+ if blankLine1 or blankLine2: -+ # Five points for blank lines. -+ return 5 -+ elif lineBreak1 or lineBreak2: -+ # Four points for line breaks. -+ return 4 -+ elif nonAlphaNumeric1 and not whitespace1 and whitespace2: -+ # Three points for end of sentences. -+ return 3 -+ elif whitespace1 or whitespace2: -+ # Two points for whitespace. -+ return 2 -+ elif nonAlphaNumeric1 or nonAlphaNumeric2: -+ # One point for non-alphanumeric. -+ return 1 -+ return 0 -+ -+ pointer = 1 -+ # Intentionally ignore the first and last element (don't need checking). -+ while pointer < len(diffs) - 1: -+ if ( -+ diffs[pointer - 1][0] == self.DIFF_EQUAL -+ and diffs[pointer + 1][0] == self.DIFF_EQUAL -+ ): -+ # This is a single edit surrounded by equalities. -+ equality1 = diffs[pointer - 1][1] -+ edit = diffs[pointer][1] -+ equality2 = diffs[pointer + 1][1] -+ -+ # First, shift the edit as far left as possible. -+ commonOffset = self.diff_commonSuffix(equality1, edit) -+ if commonOffset: -+ commonString = edit[-commonOffset:] -+ equality1 = equality1[:-commonOffset] -+ edit = commonString + edit[:-commonOffset] -+ equality2 = commonString + equality2 -+ -+ # Second, step character by character right, looking for the best fit. -+ bestEquality1 = equality1 -+ bestEdit = edit -+ bestEquality2 = equality2 -+ bestScore = diff_cleanupSemanticScore( -+ equality1, edit -+ ) + diff_cleanupSemanticScore(edit, equality2) -+ while edit and equality2 and edit[0] == equality2[0]: -+ equality1 += edit[0] -+ edit = edit[1:] + equality2[0] -+ equality2 = equality2[1:] -+ score = diff_cleanupSemanticScore( -+ equality1, edit -+ ) + diff_cleanupSemanticScore(edit, equality2) -+ # The >= encourages trailing rather than leading whitespace on edits. -+ if score >= bestScore: -+ bestScore = score -+ bestEquality1 = equality1 -+ bestEdit = edit -+ bestEquality2 = equality2 -+ -+ if diffs[pointer - 1][1] != bestEquality1: -+ # We have an improvement, save it back to the diff. -+ if bestEquality1: -+ diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1) -+ else: -+ del diffs[pointer - 1] -+ pointer -= 1 -+ diffs[pointer] = (diffs[pointer][0], bestEdit) -+ if bestEquality2: -+ diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2) -+ else: -+ del diffs[pointer + 1] -+ pointer -= 1 -+ pointer += 1 -+ -+ # Define some regex patterns for matching boundaries. -+ BLANKLINEEND = re.compile(r"\n\r?\n$") -+ BLANKLINESTART = re.compile(r"^\r?\n\r?\n") - -- def diff_cleanupEfficiency(self, diffs): -- """Reduce the number of edits by eliminating operationally trivial -+ def diff_cleanupEfficiency(self, diffs): -+ """Reduce the number of edits by eliminating operationally trivial - equalities. - - Args: - diffs: Array of diff tuples. - """ -- changes = False -- equalities = [] # Stack of indices where equalities are found. -- lastEquality = None # Always equal to diffs[equalities[-1]][1] -- pointer = 0 # Index of current position. -- pre_ins = False # Is there an insertion operation before the last equality. -- pre_del = False # Is there a deletion operation before the last equality. -- post_ins = False # Is there an insertion operation after the last equality. -- post_del = False # Is there a deletion operation after the last equality. -- while pointer < len(diffs): -- if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. -- if (len(diffs[pointer][1]) < self.Diff_EditCost and -- (post_ins or post_del)): -- # Candidate found. -- equalities.append(pointer) -- pre_ins = post_ins -- pre_del = post_del -- lastEquality = diffs[pointer][1] -- else: -- # Not a candidate, and can never become one. -- equalities = [] -- lastEquality = None -- -- post_ins = post_del = False -- else: # An insertion or deletion. -- if diffs[pointer][0] == self.DIFF_DELETE: -- post_del = True -- else: -- post_ins = True -- -- # Five types to be split: -- # ABXYCD -- # AXCD -- # ABXC -- # AXCD -- # ABXC -- -- if lastEquality and ((pre_ins and pre_del and post_ins and post_del) or -- ((len(lastEquality) < self.Diff_EditCost / 2) and -- (pre_ins + pre_del + post_ins + post_del) == 3)): -- # Duplicate record. -- diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality)) -- # Change second copy to insert. -- diffs[equalities[-1] + 1] = (self.DIFF_INSERT, -- diffs[equalities[-1] + 1][1]) -- equalities.pop() # Throw away the equality we just deleted. -- lastEquality = None -- if pre_ins and pre_del: -- # No changes made which could affect previous entry, keep going. -- post_ins = post_del = True -- equalities = [] -- else: -- if len(equalities): -- equalities.pop() # Throw away the previous equality. -- if len(equalities): -- pointer = equalities[-1] -- else: -- pointer = -1 -- post_ins = post_del = False -- changes = True -- pointer += 1 -+ changes = False -+ equalities = [] # Stack of indices where equalities are found. -+ lastEquality = None # Always equal to diffs[equalities[-1]][1] -+ pointer = 0 # Index of current position. -+ pre_ins = False # Is there an insertion operation before the last equality. -+ pre_del = False # Is there a deletion operation before the last equality. -+ post_ins = False # Is there an insertion operation after the last equality. -+ post_del = False # Is there a deletion operation after the last equality. -+ while pointer < len(diffs): -+ if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. -+ if len(diffs[pointer][1]) < self.Diff_EditCost and ( -+ post_ins or post_del -+ ): -+ # Candidate found. -+ equalities.append(pointer) -+ pre_ins = post_ins -+ pre_del = post_del -+ lastEquality = diffs[pointer][1] -+ else: -+ # Not a candidate, and can never become one. -+ equalities = [] -+ lastEquality = None -+ -+ post_ins = post_del = False -+ else: # An insertion or deletion. -+ if diffs[pointer][0] == self.DIFF_DELETE: -+ post_del = True -+ else: -+ post_ins = True -+ -+ # Five types to be split: -+ # ABXYCD -+ # AXCD -+ # ABXC -+ # AXCD -+ # ABXC -+ -+ if lastEquality and ( -+ (pre_ins and pre_del and post_ins and post_del) -+ or ( -+ (len(lastEquality) < self.Diff_EditCost / 2) -+ and (pre_ins + pre_del + post_ins + post_del) == 3 -+ ) -+ ): -+ # Duplicate record. -+ diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality)) -+ # Change second copy to insert. -+ diffs[equalities[-1] + 1] = ( -+ self.DIFF_INSERT, -+ diffs[equalities[-1] + 1][1], -+ ) -+ equalities.pop() # Throw away the equality we just deleted. -+ lastEquality = None -+ if pre_ins and pre_del: -+ # No changes made which could affect previous entry, keep going. -+ post_ins = post_del = True -+ equalities = [] -+ else: -+ if len(equalities): -+ equalities.pop() # Throw away the previous equality. -+ if len(equalities): -+ pointer = equalities[-1] -+ else: -+ pointer = -1 -+ post_ins = post_del = False -+ changes = True -+ pointer += 1 - -- if changes: -- self.diff_cleanupMerge(diffs) -+ if changes: -+ self.diff_cleanupMerge(diffs) - -- def diff_cleanupMerge(self, diffs): -- """Reorder and merge like edit sections. Merge equalities. -+ def diff_cleanupMerge(self, diffs): -+ """Reorder and merge like edit sections. Merge equalities. - Any edit section can move as long as it doesn't cross an equality. - - Args: - diffs: Array of diff tuples. - """ -- diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end. -- pointer = 0 -- count_delete = 0 -- count_insert = 0 -- text_delete = '' -- text_insert = '' -- while pointer < len(diffs): -- if diffs[pointer][0] == self.DIFF_INSERT: -- count_insert += 1 -- text_insert += diffs[pointer][1] -- pointer += 1 -- elif diffs[pointer][0] == self.DIFF_DELETE: -- count_delete += 1 -- text_delete += diffs[pointer][1] -- pointer += 1 -- elif diffs[pointer][0] == self.DIFF_EQUAL: -- # Upon reaching an equality, check for prior redundancies. -- if count_delete + count_insert > 1: -- if count_delete != 0 and count_insert != 0: -- # Factor out any common prefixies. -- commonlength = self.diff_commonPrefix(text_insert, text_delete) -- if commonlength != 0: -- x = pointer - count_delete - count_insert - 1 -- if x >= 0 and diffs[x][0] == self.DIFF_EQUAL: -- diffs[x] = (diffs[x][0], diffs[x][1] + -- text_insert[:commonlength]) -- else: -- diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength])) -- pointer += 1 -- text_insert = text_insert[commonlength:] -- text_delete = text_delete[commonlength:] -- # Factor out any common suffixes. -- commonlength = self.diff_commonSuffix(text_insert, text_delete) -- if commonlength != 0: -- diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] + -- diffs[pointer][1]) -- text_insert = text_insert[:-commonlength] -- text_delete = text_delete[:-commonlength] -- # Delete the offending records and add the merged ones. -- new_ops = [] -- if len(text_delete) != 0: -- new_ops.append((self.DIFF_DELETE, text_delete)) -- if len(text_insert) != 0: -- new_ops.append((self.DIFF_INSERT, text_insert)) -- pointer -= count_delete + count_insert -- diffs[pointer : pointer + count_delete + count_insert] = new_ops -- pointer += len(new_ops) + 1 -- elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL: -- # Merge this equality with the previous one. -- diffs[pointer - 1] = (diffs[pointer - 1][0], -- diffs[pointer - 1][1] + diffs[pointer][1]) -- del diffs[pointer] -- else: -- pointer += 1 -- -- count_insert = 0 -+ diffs.append((self.DIFF_EQUAL, "")) # Add a dummy entry at the end. -+ pointer = 0 - count_delete = 0 -- text_delete = '' -- text_insert = '' -- -- if diffs[-1][1] == '': -- diffs.pop() # Remove the dummy entry at the end. -+ count_insert = 0 -+ text_delete = "" -+ text_insert = "" -+ while pointer < len(diffs): -+ if diffs[pointer][0] == self.DIFF_INSERT: -+ count_insert += 1 -+ text_insert += diffs[pointer][1] -+ pointer += 1 -+ elif diffs[pointer][0] == self.DIFF_DELETE: -+ count_delete += 1 -+ text_delete += diffs[pointer][1] -+ pointer += 1 -+ elif diffs[pointer][0] == self.DIFF_EQUAL: -+ # Upon reaching an equality, check for prior redundancies. -+ if count_delete + count_insert > 1: -+ if count_delete != 0 and count_insert != 0: -+ # Factor out any common prefixies. -+ commonlength = self.diff_commonPrefix(text_insert, text_delete) -+ if commonlength != 0: -+ x = pointer - count_delete - count_insert - 1 -+ if x >= 0 and diffs[x][0] == self.DIFF_EQUAL: -+ diffs[x] = ( -+ diffs[x][0], -+ diffs[x][1] + text_insert[:commonlength], -+ ) -+ else: -+ diffs.insert( -+ 0, (self.DIFF_EQUAL, text_insert[:commonlength]) -+ ) -+ pointer += 1 -+ text_insert = text_insert[commonlength:] -+ text_delete = text_delete[commonlength:] -+ # Factor out any common suffixes. -+ commonlength = self.diff_commonSuffix(text_insert, text_delete) -+ if commonlength != 0: -+ diffs[pointer] = ( -+ diffs[pointer][0], -+ text_insert[-commonlength:] + diffs[pointer][1], -+ ) -+ text_insert = text_insert[:-commonlength] -+ text_delete = text_delete[:-commonlength] -+ # Delete the offending records and add the merged ones. -+ new_ops = [] -+ if len(text_delete) != 0: -+ new_ops.append((self.DIFF_DELETE, text_delete)) -+ if len(text_insert) != 0: -+ new_ops.append((self.DIFF_INSERT, text_insert)) -+ pointer -= count_delete + count_insert -+ diffs[pointer : pointer + count_delete + count_insert] = new_ops -+ pointer += len(new_ops) + 1 -+ elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL: -+ # Merge this equality with the previous one. -+ diffs[pointer - 1] = ( -+ diffs[pointer - 1][0], -+ diffs[pointer - 1][1] + diffs[pointer][1], -+ ) -+ del diffs[pointer] -+ else: -+ pointer += 1 -+ -+ count_insert = 0 -+ count_delete = 0 -+ text_delete = "" -+ text_insert = "" -+ -+ if diffs[-1][1] == "": -+ diffs.pop() # Remove the dummy entry at the end. -+ -+ # Second pass: look for single edits surrounded on both sides by equalities -+ # which can be shifted sideways to eliminate an equality. -+ # e.g: ABAC -> ABAC -+ changes = False -+ pointer = 1 -+ # Intentionally ignore the first and last element (don't need checking). -+ while pointer < len(diffs) - 1: -+ if ( -+ diffs[pointer - 1][0] == self.DIFF_EQUAL -+ and diffs[pointer + 1][0] == self.DIFF_EQUAL -+ ): -+ # This is a single edit surrounded by equalities. -+ if diffs[pointer][1].endswith(diffs[pointer - 1][1]): -+ # Shift the edit over the previous equality. -+ if diffs[pointer - 1][1] != "": -+ diffs[pointer] = ( -+ diffs[pointer][0], -+ diffs[pointer - 1][1] -+ + diffs[pointer][1][: -len(diffs[pointer - 1][1])], -+ ) -+ diffs[pointer + 1] = ( -+ diffs[pointer + 1][0], -+ diffs[pointer - 1][1] + diffs[pointer + 1][1], -+ ) -+ del diffs[pointer - 1] -+ changes = True -+ elif diffs[pointer][1].startswith(diffs[pointer + 1][1]): -+ # Shift the edit over the next equality. -+ diffs[pointer - 1] = ( -+ diffs[pointer - 1][0], -+ diffs[pointer - 1][1] + diffs[pointer + 1][1], -+ ) -+ diffs[pointer] = ( -+ diffs[pointer][0], -+ diffs[pointer][1][len(diffs[pointer + 1][1]) :] -+ + diffs[pointer + 1][1], -+ ) -+ del diffs[pointer + 1] -+ changes = True -+ pointer += 1 - -- # Second pass: look for single edits surrounded on both sides by equalities -- # which can be shifted sideways to eliminate an equality. -- # e.g: ABAC -> ABAC -- changes = False -- pointer = 1 -- # Intentionally ignore the first and last element (don't need checking). -- while pointer < len(diffs) - 1: -- if (diffs[pointer - 1][0] == self.DIFF_EQUAL and -- diffs[pointer + 1][0] == self.DIFF_EQUAL): -- # This is a single edit surrounded by equalities. -- if diffs[pointer][1].endswith(diffs[pointer - 1][1]): -- # Shift the edit over the previous equality. -- if diffs[pointer - 1][1] != "": -- diffs[pointer] = (diffs[pointer][0], -- diffs[pointer - 1][1] + -- diffs[pointer][1][:-len(diffs[pointer - 1][1])]) -- diffs[pointer + 1] = (diffs[pointer + 1][0], -- diffs[pointer - 1][1] + diffs[pointer + 1][1]) -- del diffs[pointer - 1] -- changes = True -- elif diffs[pointer][1].startswith(diffs[pointer + 1][1]): -- # Shift the edit over the next equality. -- diffs[pointer - 1] = (diffs[pointer - 1][0], -- diffs[pointer - 1][1] + diffs[pointer + 1][1]) -- diffs[pointer] = (diffs[pointer][0], -- diffs[pointer][1][len(diffs[pointer + 1][1]):] + -- diffs[pointer + 1][1]) -- del diffs[pointer + 1] -- changes = True -- pointer += 1 -- -- # If shifts were made, the diff needs reordering and another shift sweep. -- if changes: -- self.diff_cleanupMerge(diffs) -+ # If shifts were made, the diff needs reordering and another shift sweep. -+ if changes: -+ self.diff_cleanupMerge(diffs) - -- def diff_xIndex(self, diffs, loc): -- """loc is a location in text1, compute and return the equivalent location -+ def diff_xIndex(self, diffs, loc): -+ """loc is a location in text1, compute and return the equivalent location - in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8 - - Args: -@@ -1037,29 +1105,29 @@ class diff_match_patch: - Returns: - Location within text2. - """ -- chars1 = 0 -- chars2 = 0 -- last_chars1 = 0 -- last_chars2 = 0 -- for x in xrange(len(diffs)): -- (op, text) = diffs[x] -- if op != self.DIFF_INSERT: # Equality or deletion. -- chars1 += len(text) -- if op != self.DIFF_DELETE: # Equality or insertion. -- chars2 += len(text) -- if chars1 > loc: # Overshot the location. -- break -- last_chars1 = chars1 -- last_chars2 = chars2 -- -- if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE: -- # The location was deleted. -- return last_chars2 -- # Add the remaining len(character). -- return last_chars2 + (loc - last_chars1) -+ chars1 = 0 -+ chars2 = 0 -+ last_chars1 = 0 -+ last_chars2 = 0 -+ for x in xrange(len(diffs)): -+ (op, text) = diffs[x] -+ if op != self.DIFF_INSERT: # Equality or deletion. -+ chars1 += len(text) -+ if op != self.DIFF_DELETE: # Equality or insertion. -+ chars2 += len(text) -+ if chars1 > loc: # Overshot the location. -+ break -+ last_chars1 = chars1 -+ last_chars2 = chars2 -+ -+ if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE: -+ # The location was deleted. -+ return last_chars2 -+ # Add the remaining len(character). -+ return last_chars2 + (loc - last_chars1) - -- def diff_prettyHtml(self, diffs): -- """Convert a diff array into a pretty HTML report. -+ def diff_prettyHtml(self, diffs): -+ """Convert a diff array into a pretty HTML report. - - Args: - diffs: Array of diff tuples. -@@ -1067,20 +1135,24 @@ class diff_match_patch: - Returns: - HTML representation. - """ -- html = [] -- for (op, data) in diffs: -- text = (data.replace("&", "&").replace("<", "<") -- .replace(">", ">").replace("\n", "¶
    ")) -- if op == self.DIFF_INSERT: -- html.append("%s" % text) -- elif op == self.DIFF_DELETE: -- html.append("%s" % text) -- elif op == self.DIFF_EQUAL: -- html.append("%s" % text) -- return "".join(html) -+ html = [] -+ for (op, data) in diffs: -+ text = ( -+ data.replace("&", "&") -+ .replace("<", "<") -+ .replace(">", ">") -+ .replace("\n", "¶
    ") -+ ) -+ if op == self.DIFF_INSERT: -+ html.append('%s' % text) -+ elif op == self.DIFF_DELETE: -+ html.append('%s' % text) -+ elif op == self.DIFF_EQUAL: -+ html.append("%s" % text) -+ return "".join(html) - -- def diff_text1(self, diffs): -- """Compute and return the source text (all equalities and deletions). -+ def diff_text1(self, diffs): -+ """Compute and return the source text (all equalities and deletions). - - Args: - diffs: Array of diff tuples. -@@ -1088,14 +1160,14 @@ class diff_match_patch: - Returns: - Source text. - """ -- text = [] -- for (op, data) in diffs: -- if op != self.DIFF_INSERT: -- text.append(data) -- return "".join(text) -+ text = [] -+ for (op, data) in diffs: -+ if op != self.DIFF_INSERT: -+ text.append(data) -+ return "".join(text) - -- def diff_text2(self, diffs): -- """Compute and return the destination text (all equalities and insertions). -+ def diff_text2(self, diffs): -+ """Compute and return the destination text (all equalities and insertions). - - Args: - diffs: Array of diff tuples. -@@ -1103,14 +1175,14 @@ class diff_match_patch: - Returns: - Destination text. - """ -- text = [] -- for (op, data) in diffs: -- if op != self.DIFF_DELETE: -- text.append(data) -- return "".join(text) -+ text = [] -+ for (op, data) in diffs: -+ if op != self.DIFF_DELETE: -+ text.append(data) -+ return "".join(text) - -- def diff_levenshtein(self, diffs): -- """Compute the Levenshtein distance; the number of inserted, deleted or -+ def diff_levenshtein(self, diffs): -+ """Compute the Levenshtein distance; the number of inserted, deleted or - substituted characters. - - Args: -@@ -1119,24 +1191,24 @@ class diff_match_patch: - Returns: - Number of changes. - """ -- levenshtein = 0 -- insertions = 0 -- deletions = 0 -- for (op, data) in diffs: -- if op == self.DIFF_INSERT: -- insertions += len(data) -- elif op == self.DIFF_DELETE: -- deletions += len(data) -- elif op == self.DIFF_EQUAL: -- # A deletion and an insertion is one substitution. -- levenshtein += max(insertions, deletions) -+ levenshtein = 0 - insertions = 0 - deletions = 0 -- levenshtein += max(insertions, deletions) -- return levenshtein -+ for (op, data) in diffs: -+ if op == self.DIFF_INSERT: -+ insertions += len(data) -+ elif op == self.DIFF_DELETE: -+ deletions += len(data) -+ elif op == self.DIFF_EQUAL: -+ # A deletion and an insertion is one substitution. -+ levenshtein += max(insertions, deletions) -+ insertions = 0 -+ deletions = 0 -+ levenshtein += max(insertions, deletions) -+ return levenshtein - -- def diff_toDelta(self, diffs): -- """Crush the diff into an encoded string which describes the operations -+ def diff_toDelta(self, diffs): -+ """Crush the diff into an encoded string which describes the operations - required to transform text1 into text2. - E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. - Operations are tab-separated. Inserted text is escaped using %xx notation. -@@ -1147,20 +1219,20 @@ class diff_match_patch: - Returns: - Delta text. - """ -- text = [] -- for (op, data) in diffs: -- if op == self.DIFF_INSERT: -- # High ascii will raise UnicodeDecodeError. Use Unicode instead. -- data = data.encode("utf-8") -- text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# ")) -- elif op == self.DIFF_DELETE: -- text.append("-%d" % len(data)) -- elif op == self.DIFF_EQUAL: -- text.append("=%d" % len(data)) -- return "\t".join(text) -+ text = [] -+ for (op, data) in diffs: -+ if op == self.DIFF_INSERT: -+ # High ascii will raise UnicodeDecodeError. Use Unicode instead. -+ data = data.encode("utf-8") -+ text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# ")) -+ elif op == self.DIFF_DELETE: -+ text.append("-%d" % len(data)) -+ elif op == self.DIFF_EQUAL: -+ text.append("=%d" % len(data)) -+ return "\t".join(text) - -- def diff_fromDelta(self, text1, delta): -- """Given the original text1, and an encoded string which describes the -+ def diff_fromDelta(self, text1, delta): -+ """Given the original text1, and an encoded string which describes the - operations required to transform text1 into text2, compute the full diff. - - Args: -@@ -1173,50 +1245,52 @@ class diff_match_patch: - Raises: - ValueError: If invalid input. - """ -- if type(delta) == unicode: -- # Deltas should be composed of a subset of ascii chars, Unicode not -- # required. If this encode raises UnicodeEncodeError, delta is invalid. -- delta = delta.encode("ascii") -- diffs = [] -- pointer = 0 # Cursor in text1 -- tokens = delta.split("\t") -- for token in tokens: -- if token == "": -- # Blank tokens are ok (from a trailing \t). -- continue -- # Each token begins with a one character parameter which specifies the -- # operation of this token (delete, insert, equality). -- param = token[1:] -- if token[0] == "+": -- param = urllib.unquote(param).decode("utf-8") -- diffs.append((self.DIFF_INSERT, param)) -- elif token[0] == "-" or token[0] == "=": -- try: -- n = int(param) -- except ValueError: -- raise ValueError("Invalid number in diff_fromDelta: " + param) -- if n < 0: -- raise ValueError("Negative number in diff_fromDelta: " + param) -- text = text1[pointer : pointer + n] -- pointer += n -- if token[0] == "=": -- diffs.append((self.DIFF_EQUAL, text)) -- else: -- diffs.append((self.DIFF_DELETE, text)) -- else: -- # Anything else is an error. -- raise ValueError("Invalid diff operation in diff_fromDelta: " + -- token[0]) -- if pointer != len(text1): -- raise ValueError( -- "Delta length (%d) does not equal source text length (%d)." % -- (pointer, len(text1))) -- return diffs -+ if type(delta) == unicode: -+ # Deltas should be composed of a subset of ascii chars, Unicode not -+ # required. If this encode raises UnicodeEncodeError, delta is invalid. -+ delta = delta.encode("ascii") -+ diffs = [] -+ pointer = 0 # Cursor in text1 -+ tokens = delta.split("\t") -+ for token in tokens: -+ if token == "": -+ # Blank tokens are ok (from a trailing \t). -+ continue -+ # Each token begins with a one character parameter which specifies the -+ # operation of this token (delete, insert, equality). -+ param = token[1:] -+ if token[0] == "+": -+ param = urllib.unquote(param).decode("utf-8") -+ diffs.append((self.DIFF_INSERT, param)) -+ elif token[0] == "-" or token[0] == "=": -+ try: -+ n = int(param) -+ except ValueError: -+ raise ValueError("Invalid number in diff_fromDelta: " + param) -+ if n < 0: -+ raise ValueError("Negative number in diff_fromDelta: " + param) -+ text = text1[pointer : pointer + n] -+ pointer += n -+ if token[0] == "=": -+ diffs.append((self.DIFF_EQUAL, text)) -+ else: -+ diffs.append((self.DIFF_DELETE, text)) -+ else: -+ # Anything else is an error. -+ raise ValueError( -+ "Invalid diff operation in diff_fromDelta: " + token[0] -+ ) -+ if pointer != len(text1): -+ raise ValueError( -+ "Delta length (%d) does not equal source text length (%d)." -+ % (pointer, len(text1)) -+ ) -+ return diffs - -- # MATCH FUNCTIONS -+ # MATCH FUNCTIONS - -- def match_main(self, text, pattern, loc): -- """Locate the best instance of 'pattern' in 'text' near 'loc'. -+ def match_main(self, text, pattern, loc): -+ """Locate the best instance of 'pattern' in 'text' near 'loc'. - - Args: - text: The text to search. -@@ -1226,27 +1300,27 @@ class diff_match_patch: - Returns: - Best match index or -1. - """ -- # Check for null inputs. -- if text == None or pattern == None: -- raise ValueError("Null inputs. (match_main)") -- -- loc = max(0, min(loc, len(text))) -- if text == pattern: -- # Shortcut (potentially not guaranteed by the algorithm) -- return 0 -- elif not text: -- # Nothing to match. -- return -1 -- elif text[loc:loc + len(pattern)] == pattern: -- # Perfect match at the perfect spot! (Includes case of null pattern) -- return loc -- else: -- # Do a fuzzy compare. -- match = self.match_bitap(text, pattern, loc) -- return match -+ # Check for null inputs. -+ if text == None or pattern == None: -+ raise ValueError("Null inputs. (match_main)") -+ -+ loc = max(0, min(loc, len(text))) -+ if text == pattern: -+ # Shortcut (potentially not guaranteed by the algorithm) -+ return 0 -+ elif not text: -+ # Nothing to match. -+ return -1 -+ elif text[loc : loc + len(pattern)] == pattern: -+ # Perfect match at the perfect spot! (Includes case of null pattern) -+ return loc -+ else: -+ # Do a fuzzy compare. -+ match = self.match_bitap(text, pattern, loc) -+ return match - -- def match_bitap(self, text, pattern, loc): -- """Locate the best instance of 'pattern' in 'text' near 'loc' using the -+ def match_bitap(self, text, pattern, loc): -+ """Locate the best instance of 'pattern' in 'text' near 'loc' using the - Bitap algorithm. - - Args: -@@ -1257,15 +1331,15 @@ class diff_match_patch: - Returns: - Best match index or -1. - """ -- # Python doesn't have a maxint limit, so ignore this check. -- #if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits: -- # raise ValueError("Pattern too long for this application.") -+ # Python doesn't have a maxint limit, so ignore this check. -+ # if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits: -+ # raise ValueError("Pattern too long for this application.") - -- # Initialise the alphabet. -- s = self.match_alphabet(pattern) -+ # Initialise the alphabet. -+ s = self.match_alphabet(pattern) - -- def match_bitapScore(e, x): -- """Compute and return the score for a match with e errors and x location. -+ def match_bitapScore(e, x): -+ """Compute and return the score for a match with e errors and x location. - Accesses loc and pattern through being a closure. - - Args: -@@ -1275,84 +1349,87 @@ class diff_match_patch: - Returns: - Overall score for match (0.0 = good, 1.0 = bad). - """ -- accuracy = float(e) / len(pattern) -- proximity = abs(loc - x) -- if not self.Match_Distance: -- # Dodge divide by zero error. -- return proximity and 1.0 or accuracy -- return accuracy + (proximity / float(self.Match_Distance)) -- -- # Highest score beyond which we give up. -- score_threshold = self.Match_Threshold -- # Is there a nearby exact match? (speedup) -- best_loc = text.find(pattern, loc) -- if best_loc != -1: -- score_threshold = min(match_bitapScore(0, best_loc), score_threshold) -- # What about in the other direction? (speedup) -- best_loc = text.rfind(pattern, loc + len(pattern)) -- if best_loc != -1: -- score_threshold = min(match_bitapScore(0, best_loc), score_threshold) -- -- # Initialise the bit arrays. -- matchmask = 1 << (len(pattern) - 1) -- best_loc = -1 -- -- bin_max = len(pattern) + len(text) -- # Empty initialization added to appease pychecker. -- last_rd = None -- for d in xrange(len(pattern)): -- # Scan for the best match each iteration allows for one more error. -- # Run a binary search to determine how far from 'loc' we can stray at -- # this error level. -- bin_min = 0 -- bin_mid = bin_max -- while bin_min < bin_mid: -- if match_bitapScore(d, loc + bin_mid) <= score_threshold: -- bin_min = bin_mid -- else: -- bin_max = bin_mid -- bin_mid = (bin_max - bin_min) // 2 + bin_min -- -- # Use the result from this iteration as the maximum for the next. -- bin_max = bin_mid -- start = max(1, loc - bin_mid + 1) -- finish = min(loc + bin_mid, len(text)) + len(pattern) -- -- rd = [0] * (finish + 2) -- rd[finish + 1] = (1 << d) - 1 -- for j in xrange(finish, start - 1, -1): -- if len(text) <= j - 1: -- # Out of range. -- charMatch = 0 -- else: -- charMatch = s.get(text[j - 1], 0) -- if d == 0: # First pass: exact match. -- rd[j] = ((rd[j + 1] << 1) | 1) & charMatch -- else: # Subsequent passes: fuzzy match. -- rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | ( -- ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1] -- if rd[j] & matchmask: -- score = match_bitapScore(d, j - 1) -- # This match will almost certainly be better than any existing match. -- # But check anyway. -- if score <= score_threshold: -- # Told you so. -- score_threshold = score -- best_loc = j - 1 -- if best_loc > loc: -- # When passing loc, don't exceed our current distance from loc. -- start = max(1, 2 * loc - best_loc) -- else: -- # Already passed loc, downhill from here on in. -- break -- # No hope for a (better) match at greater error levels. -- if match_bitapScore(d + 1, loc) > score_threshold: -- break -- last_rd = rd -- return best_loc -+ accuracy = float(e) / len(pattern) -+ proximity = abs(loc - x) -+ if not self.Match_Distance: -+ # Dodge divide by zero error. -+ return proximity and 1.0 or accuracy -+ return accuracy + (proximity / float(self.Match_Distance)) -+ -+ # Highest score beyond which we give up. -+ score_threshold = self.Match_Threshold -+ # Is there a nearby exact match? (speedup) -+ best_loc = text.find(pattern, loc) -+ if best_loc != -1: -+ score_threshold = min(match_bitapScore(0, best_loc), score_threshold) -+ # What about in the other direction? (speedup) -+ best_loc = text.rfind(pattern, loc + len(pattern)) -+ if best_loc != -1: -+ score_threshold = min(match_bitapScore(0, best_loc), score_threshold) -+ -+ # Initialise the bit arrays. -+ matchmask = 1 << (len(pattern) - 1) -+ best_loc = -1 -+ -+ bin_max = len(pattern) + len(text) -+ # Empty initialization added to appease pychecker. -+ last_rd = None -+ for d in xrange(len(pattern)): -+ # Scan for the best match each iteration allows for one more error. -+ # Run a binary search to determine how far from 'loc' we can stray at -+ # this error level. -+ bin_min = 0 -+ bin_mid = bin_max -+ while bin_min < bin_mid: -+ if match_bitapScore(d, loc + bin_mid) <= score_threshold: -+ bin_min = bin_mid -+ else: -+ bin_max = bin_mid -+ bin_mid = (bin_max - bin_min) // 2 + bin_min -+ -+ # Use the result from this iteration as the maximum for the next. -+ bin_max = bin_mid -+ start = max(1, loc - bin_mid + 1) -+ finish = min(loc + bin_mid, len(text)) + len(pattern) -+ -+ rd = [0] * (finish + 2) -+ rd[finish + 1] = (1 << d) - 1 -+ for j in xrange(finish, start - 1, -1): -+ if len(text) <= j - 1: -+ # Out of range. -+ charMatch = 0 -+ else: -+ charMatch = s.get(text[j - 1], 0) -+ if d == 0: # First pass: exact match. -+ rd[j] = ((rd[j + 1] << 1) | 1) & charMatch -+ else: # Subsequent passes: fuzzy match. -+ rd[j] = ( -+ (((rd[j + 1] << 1) | 1) & charMatch) -+ | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) -+ | last_rd[j + 1] -+ ) -+ if rd[j] & matchmask: -+ score = match_bitapScore(d, j - 1) -+ # This match will almost certainly be better than any existing match. -+ # But check anyway. -+ if score <= score_threshold: -+ # Told you so. -+ score_threshold = score -+ best_loc = j - 1 -+ if best_loc > loc: -+ # When passing loc, don't exceed our current distance from loc. -+ start = max(1, 2 * loc - best_loc) -+ else: -+ # Already passed loc, downhill from here on in. -+ break -+ # No hope for a (better) match at greater error levels. -+ if match_bitapScore(d + 1, loc) > score_threshold: -+ break -+ last_rd = rd -+ return best_loc - -- def match_alphabet(self, pattern): -- """Initialise the alphabet for the Bitap algorithm. -+ def match_alphabet(self, pattern): -+ """Initialise the alphabet for the Bitap algorithm. - - Args: - pattern: The text to encode. -@@ -1360,58 +1437,61 @@ class diff_match_patch: - Returns: - Hash of character locations. - """ -- s = {} -- for char in pattern: -- s[char] = 0 -- for i in xrange(len(pattern)): -- s[pattern[i]] |= 1 << (len(pattern) - i - 1) -- return s -+ s = {} -+ for char in pattern: -+ s[char] = 0 -+ for i in xrange(len(pattern)): -+ s[pattern[i]] |= 1 << (len(pattern) - i - 1) -+ return s - -- # PATCH FUNCTIONS -+ # PATCH FUNCTIONS - -- def patch_addContext(self, patch, text): -- """Increase the context until it is unique, -+ def patch_addContext(self, patch, text): -+ """Increase the context until it is unique, - but don't let the pattern expand beyond Match_MaxBits. - - Args: - patch: The patch to grow. - text: Source text. - """ -- if len(text) == 0: -- return -- pattern = text[patch.start2 : patch.start2 + patch.length1] -- padding = 0 -- -- # Look for the first and last matches of pattern in text. If two different -- # matches are found, increase the pattern length. -- while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits == -- 0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin - -- self.Patch_Margin)): -- padding += self.Patch_Margin -- pattern = text[max(0, patch.start2 - padding) : -- patch.start2 + patch.length1 + padding] -- # Add one chunk for good luck. -- padding += self.Patch_Margin -- -- # Add the prefix. -- prefix = text[max(0, patch.start2 - padding) : patch.start2] -- if prefix: -- patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)] -- # Add the suffix. -- suffix = text[patch.start2 + patch.length1 : -- patch.start2 + patch.length1 + padding] -- if suffix: -- patch.diffs.append((self.DIFF_EQUAL, suffix)) -- -- # Roll back the start points. -- patch.start1 -= len(prefix) -- patch.start2 -= len(prefix) -- # Extend lengths. -- patch.length1 += len(prefix) + len(suffix) -- patch.length2 += len(prefix) + len(suffix) -+ if len(text) == 0: -+ return -+ pattern = text[patch.start2 : patch.start2 + patch.length1] -+ padding = 0 -+ -+ # Look for the first and last matches of pattern in text. If two different -+ # matches are found, increase the pattern length. -+ while text.find(pattern) != text.rfind(pattern) and ( -+ self.Match_MaxBits == 0 -+ or len(pattern) < self.Match_MaxBits - self.Patch_Margin - self.Patch_Margin -+ ): -+ padding += self.Patch_Margin -+ pattern = text[ -+ max(0, patch.start2 - padding) : patch.start2 + patch.length1 + padding -+ ] -+ # Add one chunk for good luck. -+ padding += self.Patch_Margin -+ -+ # Add the prefix. -+ prefix = text[max(0, patch.start2 - padding) : patch.start2] -+ if prefix: -+ patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)] -+ # Add the suffix. -+ suffix = text[ -+ patch.start2 + patch.length1 : patch.start2 + patch.length1 + padding -+ ] -+ if suffix: -+ patch.diffs.append((self.DIFF_EQUAL, suffix)) -+ -+ # Roll back the start points. -+ patch.start1 -= len(prefix) -+ patch.start2 -= len(prefix) -+ # Extend lengths. -+ patch.length1 += len(prefix) + len(suffix) -+ patch.length2 += len(prefix) + len(suffix) - -- def patch_make(self, a, b=None, c=None): -- """Compute a list of patches to turn text1 into text2. -+ def patch_make(self, a, b=None, c=None): -+ """Compute a list of patches to turn text1 into text2. - Use diffs if provided, otherwise compute it ourselves. - There are four ways to call this function, depending on what data is - available to the caller: -@@ -1435,97 +1515,107 @@ class diff_match_patch: - Returns: - Array of Patch objects. - """ -- text1 = None -- diffs = None -- # Note that texts may arrive as 'str' or 'unicode'. -- if isinstance(a, basestring) and isinstance(b, basestring) and c is None: -- # Method 1: text1, text2 -- # Compute diffs from text1 and text2. -- text1 = a -- diffs = self.diff_main(text1, b, True) -- if len(diffs) > 2: -- self.diff_cleanupSemantic(diffs) -- self.diff_cleanupEfficiency(diffs) -- elif isinstance(a, list) and b is None and c is None: -- # Method 2: diffs -- # Compute text1 from diffs. -- diffs = a -- text1 = self.diff_text1(diffs) -- elif isinstance(a, basestring) and isinstance(b, list) and c is None: -- # Method 3: text1, diffs -- text1 = a -- diffs = b -- elif (isinstance(a, basestring) and isinstance(b, basestring) and -- isinstance(c, list)): -- # Method 4: text1, text2, diffs -- # text2 is not used. -- text1 = a -- diffs = c -- else: -- raise ValueError("Unknown call format to patch_make.") -- -- if not diffs: -- return [] # Get rid of the None case. -- patches = [] -- patch = patch_obj() -- char_count1 = 0 # Number of characters into the text1 string. -- char_count2 = 0 # Number of characters into the text2 string. -- prepatch_text = text1 # Recreate the patches to determine context info. -- postpatch_text = text1 -- for x in xrange(len(diffs)): -- (diff_type, diff_text) = diffs[x] -- if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL: -- # A new patch starts here. -- patch.start1 = char_count1 -- patch.start2 = char_count2 -- if diff_type == self.DIFF_INSERT: -- # Insertion -- patch.diffs.append(diffs[x]) -- patch.length2 += len(diff_text) -- postpatch_text = (postpatch_text[:char_count2] + diff_text + -- postpatch_text[char_count2:]) -- elif diff_type == self.DIFF_DELETE: -- # Deletion. -- patch.length1 += len(diff_text) -- patch.diffs.append(diffs[x]) -- postpatch_text = (postpatch_text[:char_count2] + -- postpatch_text[char_count2 + len(diff_text):]) -- elif (diff_type == self.DIFF_EQUAL and -- len(diff_text) <= 2 * self.Patch_Margin and -- len(patch.diffs) != 0 and len(diffs) != x + 1): -- # Small equality inside a patch. -- patch.diffs.append(diffs[x]) -- patch.length1 += len(diff_text) -- patch.length2 += len(diff_text) -- -- if (diff_type == self.DIFF_EQUAL and -- len(diff_text) >= 2 * self.Patch_Margin): -- # Time for a new patch. -+ text1 = None -+ diffs = None -+ # Note that texts may arrive as 'str' or 'unicode'. -+ if isinstance(a, basestring) and isinstance(b, basestring) and c is None: -+ # Method 1: text1, text2 -+ # Compute diffs from text1 and text2. -+ text1 = a -+ diffs = self.diff_main(text1, b, True) -+ if len(diffs) > 2: -+ self.diff_cleanupSemantic(diffs) -+ self.diff_cleanupEfficiency(diffs) -+ elif isinstance(a, list) and b is None and c is None: -+ # Method 2: diffs -+ # Compute text1 from diffs. -+ diffs = a -+ text1 = self.diff_text1(diffs) -+ elif isinstance(a, basestring) and isinstance(b, list) and c is None: -+ # Method 3: text1, diffs -+ text1 = a -+ diffs = b -+ elif ( -+ isinstance(a, basestring) -+ and isinstance(b, basestring) -+ and isinstance(c, list) -+ ): -+ # Method 4: text1, text2, diffs -+ # text2 is not used. -+ text1 = a -+ diffs = c -+ else: -+ raise ValueError("Unknown call format to patch_make.") -+ -+ if not diffs: -+ return [] # Get rid of the None case. -+ patches = [] -+ patch = patch_obj() -+ char_count1 = 0 # Number of characters into the text1 string. -+ char_count2 = 0 # Number of characters into the text2 string. -+ prepatch_text = text1 # Recreate the patches to determine context info. -+ postpatch_text = text1 -+ for x in xrange(len(diffs)): -+ (diff_type, diff_text) = diffs[x] -+ if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL: -+ # A new patch starts here. -+ patch.start1 = char_count1 -+ patch.start2 = char_count2 -+ if diff_type == self.DIFF_INSERT: -+ # Insertion -+ patch.diffs.append(diffs[x]) -+ patch.length2 += len(diff_text) -+ postpatch_text = ( -+ postpatch_text[:char_count2] -+ + diff_text -+ + postpatch_text[char_count2:] -+ ) -+ elif diff_type == self.DIFF_DELETE: -+ # Deletion. -+ patch.length1 += len(diff_text) -+ patch.diffs.append(diffs[x]) -+ postpatch_text = ( -+ postpatch_text[:char_count2] -+ + postpatch_text[char_count2 + len(diff_text) :] -+ ) -+ elif ( -+ diff_type == self.DIFF_EQUAL -+ and len(diff_text) <= 2 * self.Patch_Margin -+ and len(patch.diffs) != 0 -+ and len(diffs) != x + 1 -+ ): -+ # Small equality inside a patch. -+ patch.diffs.append(diffs[x]) -+ patch.length1 += len(diff_text) -+ patch.length2 += len(diff_text) -+ -+ if diff_type == self.DIFF_EQUAL and len(diff_text) >= 2 * self.Patch_Margin: -+ # Time for a new patch. -+ if len(patch.diffs) != 0: -+ self.patch_addContext(patch, prepatch_text) -+ patches.append(patch) -+ patch = patch_obj() -+ # Unlike Unidiff, our patch lists have a rolling context. -+ # https://github.com/google/diff-match-patch/wiki/Unidiff -+ # Update prepatch text & pos to reflect the application of the -+ # just completed patch. -+ prepatch_text = postpatch_text -+ char_count1 = char_count2 -+ -+ # Update the current character count. -+ if diff_type != self.DIFF_INSERT: -+ char_count1 += len(diff_text) -+ if diff_type != self.DIFF_DELETE: -+ char_count2 += len(diff_text) -+ -+ # Pick up the leftover patch if not empty. - if len(patch.diffs) != 0: -- self.patch_addContext(patch, prepatch_text) -- patches.append(patch) -- patch = patch_obj() -- # Unlike Unidiff, our patch lists have a rolling context. -- # https://github.com/google/diff-match-patch/wiki/Unidiff -- # Update prepatch text & pos to reflect the application of the -- # just completed patch. -- prepatch_text = postpatch_text -- char_count1 = char_count2 -- -- # Update the current character count. -- if diff_type != self.DIFF_INSERT: -- char_count1 += len(diff_text) -- if diff_type != self.DIFF_DELETE: -- char_count2 += len(diff_text) -- -- # Pick up the leftover patch if not empty. -- if len(patch.diffs) != 0: -- self.patch_addContext(patch, prepatch_text) -- patches.append(patch) -- return patches -+ self.patch_addContext(patch, prepatch_text) -+ patches.append(patch) -+ return patches - -- def patch_deepCopy(self, patches): -- """Given an array of patches, return another array that is identical. -+ def patch_deepCopy(self, patches): -+ """Given an array of patches, return another array that is identical. - - Args: - patches: Array of Patch objects. -@@ -1533,20 +1623,20 @@ class diff_match_patch: - Returns: - Array of Patch objects. - """ -- patchesCopy = [] -- for patch in patches: -- patchCopy = patch_obj() -- # No need to deep copy the tuples since they are immutable. -- patchCopy.diffs = patch.diffs[:] -- patchCopy.start1 = patch.start1 -- patchCopy.start2 = patch.start2 -- patchCopy.length1 = patch.length1 -- patchCopy.length2 = patch.length2 -- patchesCopy.append(patchCopy) -- return patchesCopy -+ patchesCopy = [] -+ for patch in patches: -+ patchCopy = patch_obj() -+ # No need to deep copy the tuples since they are immutable. -+ patchCopy.diffs = patch.diffs[:] -+ patchCopy.start1 = patch.start1 -+ patchCopy.start2 = patch.start2 -+ patchCopy.length1 = patch.length1 -+ patchCopy.length2 = patch.length2 -+ patchesCopy.append(patchCopy) -+ return patchesCopy - -- def patch_apply(self, patches, text): -- """Merge a set of patches onto the text. Return a patched text, as well -+ def patch_apply(self, patches, text): -+ """Merge a set of patches onto the text. Return a patched text, as well - as a list of true/false values indicating which patches were applied. - - Args: -@@ -1556,85 +1646,102 @@ class diff_match_patch: - Returns: - Two element Array, containing the new text and an array of boolean values. - """ -- if not patches: -- return (text, []) -+ if not patches: -+ return (text, []) - -- # Deep copy the patches so that no changes are made to originals. -- patches = self.patch_deepCopy(patches) -+ # Deep copy the patches so that no changes are made to originals. -+ patches = self.patch_deepCopy(patches) - -- nullPadding = self.patch_addPadding(patches) -- text = nullPadding + text + nullPadding -- self.patch_splitMax(patches) -- -- # delta keeps track of the offset between the expected and actual location -- # of the previous patch. If there are patches expected at positions 10 and -- # 20, but the first patch was found at 12, delta is 2 and the second patch -- # has an effective expected position of 22. -- delta = 0 -- results = [] -- for patch in patches: -- expected_loc = patch.start2 + delta -- text1 = self.diff_text1(patch.diffs) -- end_loc = -1 -- if len(text1) > self.Match_MaxBits: -- # patch_splitMax will only provide an oversized pattern in the case of -- # a monster delete. -- start_loc = self.match_main(text, text1[:self.Match_MaxBits], -- expected_loc) -- if start_loc != -1: -- end_loc = self.match_main(text, text1[-self.Match_MaxBits:], -- expected_loc + len(text1) - self.Match_MaxBits) -- if end_loc == -1 or start_loc >= end_loc: -- # Can't find valid trailing context. Drop this patch. -- start_loc = -1 -- else: -- start_loc = self.match_main(text, text1, expected_loc) -- if start_loc == -1: -- # No match found. :( -- results.append(False) -- # Subtract the delta for this failed patch from subsequent patches. -- delta -= patch.length2 - patch.length1 -- else: -- # Found a match. :) -- results.append(True) -- delta = start_loc - expected_loc -- if end_loc == -1: -- text2 = text[start_loc : start_loc + len(text1)] -- else: -- text2 = text[start_loc : end_loc + self.Match_MaxBits] -- if text1 == text2: -- # Perfect match, just shove the replacement text in. -- text = (text[:start_loc] + self.diff_text2(patch.diffs) + -- text[start_loc + len(text1):]) -- else: -- # Imperfect match. -- # Run a diff to get a framework of equivalent indices. -- diffs = self.diff_main(text1, text2, False) -- if (len(text1) > self.Match_MaxBits and -- self.diff_levenshtein(diffs) / float(len(text1)) > -- self.Patch_DeleteThreshold): -- # The end points match, but the content is unacceptably bad. -- results[-1] = False -- else: -- self.diff_cleanupSemanticLossless(diffs) -- index1 = 0 -- for (op, data) in patch.diffs: -- if op != self.DIFF_EQUAL: -- index2 = self.diff_xIndex(diffs, index1) -- if op == self.DIFF_INSERT: # Insertion -- text = text[:start_loc + index2] + data + text[start_loc + -- index2:] -- elif op == self.DIFF_DELETE: # Deletion -- text = text[:start_loc + index2] + text[start_loc + -- self.diff_xIndex(diffs, index1 + len(data)):] -- if op != self.DIFF_DELETE: -- index1 += len(data) -- # Strip the padding off. -- text = text[len(nullPadding):-len(nullPadding)] -- return (text, results) -+ nullPadding = self.patch_addPadding(patches) -+ text = nullPadding + text + nullPadding -+ self.patch_splitMax(patches) -+ -+ # delta keeps track of the offset between the expected and actual location -+ # of the previous patch. If there are patches expected at positions 10 and -+ # 20, but the first patch was found at 12, delta is 2 and the second patch -+ # has an effective expected position of 22. -+ delta = 0 -+ results = [] -+ for patch in patches: -+ expected_loc = patch.start2 + delta -+ text1 = self.diff_text1(patch.diffs) -+ end_loc = -1 -+ if len(text1) > self.Match_MaxBits: -+ # patch_splitMax will only provide an oversized pattern in the case of -+ # a monster delete. -+ start_loc = self.match_main( -+ text, text1[: self.Match_MaxBits], expected_loc -+ ) -+ if start_loc != -1: -+ end_loc = self.match_main( -+ text, -+ text1[-self.Match_MaxBits :], -+ expected_loc + len(text1) - self.Match_MaxBits, -+ ) -+ if end_loc == -1 or start_loc >= end_loc: -+ # Can't find valid trailing context. Drop this patch. -+ start_loc = -1 -+ else: -+ start_loc = self.match_main(text, text1, expected_loc) -+ if start_loc == -1: -+ # No match found. :( -+ results.append(False) -+ # Subtract the delta for this failed patch from subsequent patches. -+ delta -= patch.length2 - patch.length1 -+ else: -+ # Found a match. :) -+ results.append(True) -+ delta = start_loc - expected_loc -+ if end_loc == -1: -+ text2 = text[start_loc : start_loc + len(text1)] -+ else: -+ text2 = text[start_loc : end_loc + self.Match_MaxBits] -+ if text1 == text2: -+ # Perfect match, just shove the replacement text in. -+ text = ( -+ text[:start_loc] -+ + self.diff_text2(patch.diffs) -+ + text[start_loc + len(text1) :] -+ ) -+ else: -+ # Imperfect match. -+ # Run a diff to get a framework of equivalent indices. -+ diffs = self.diff_main(text1, text2, False) -+ if ( -+ len(text1) > self.Match_MaxBits -+ and self.diff_levenshtein(diffs) / float(len(text1)) -+ > self.Patch_DeleteThreshold -+ ): -+ # The end points match, but the content is unacceptably bad. -+ results[-1] = False -+ else: -+ self.diff_cleanupSemanticLossless(diffs) -+ index1 = 0 -+ for (op, data) in patch.diffs: -+ if op != self.DIFF_EQUAL: -+ index2 = self.diff_xIndex(diffs, index1) -+ if op == self.DIFF_INSERT: # Insertion -+ text = ( -+ text[: start_loc + index2] -+ + data -+ + text[start_loc + index2 :] -+ ) -+ elif op == self.DIFF_DELETE: # Deletion -+ text = ( -+ text[: start_loc + index2] -+ + text[ -+ start_loc -+ + self.diff_xIndex(diffs, index1 + len(data)) : -+ ] -+ ) -+ if op != self.DIFF_DELETE: -+ index1 += len(data) -+ # Strip the padding off. -+ text = text[len(nullPadding) : -len(nullPadding)] -+ return (text, results) - -- def patch_addPadding(self, patches): -- """Add some padding on text start and end so that edges can match -+ def patch_addPadding(self, patches): -+ """Add some padding on text start and end so that edges can match - something. Intended to be called only from within patch_apply. - - Args: -@@ -1643,144 +1750,154 @@ class diff_match_patch: - Returns: - The padding string added to each side. - """ -- paddingLength = self.Patch_Margin -- nullPadding = "" -- for x in xrange(1, paddingLength + 1): -- nullPadding += chr(x) -- -- # Bump all the patches forward. -- for patch in patches: -- patch.start1 += paddingLength -- patch.start2 += paddingLength -- -- # Add some padding on start of first diff. -- patch = patches[0] -- diffs = patch.diffs -- if not diffs or diffs[0][0] != self.DIFF_EQUAL: -- # Add nullPadding equality. -- diffs.insert(0, (self.DIFF_EQUAL, nullPadding)) -- patch.start1 -= paddingLength # Should be 0. -- patch.start2 -= paddingLength # Should be 0. -- patch.length1 += paddingLength -- patch.length2 += paddingLength -- elif paddingLength > len(diffs[0][1]): -- # Grow first equality. -- extraLength = paddingLength - len(diffs[0][1]) -- newText = nullPadding[len(diffs[0][1]):] + diffs[0][1] -- diffs[0] = (diffs[0][0], newText) -- patch.start1 -= extraLength -- patch.start2 -= extraLength -- patch.length1 += extraLength -- patch.length2 += extraLength -- -- # Add some padding on end of last diff. -- patch = patches[-1] -- diffs = patch.diffs -- if not diffs or diffs[-1][0] != self.DIFF_EQUAL: -- # Add nullPadding equality. -- diffs.append((self.DIFF_EQUAL, nullPadding)) -- patch.length1 += paddingLength -- patch.length2 += paddingLength -- elif paddingLength > len(diffs[-1][1]): -- # Grow last equality. -- extraLength = paddingLength - len(diffs[-1][1]) -- newText = diffs[-1][1] + nullPadding[:extraLength] -- diffs[-1] = (diffs[-1][0], newText) -- patch.length1 += extraLength -- patch.length2 += extraLength -+ paddingLength = self.Patch_Margin -+ nullPadding = "" -+ for x in xrange(1, paddingLength + 1): -+ nullPadding += chr(x) -+ -+ # Bump all the patches forward. -+ for patch in patches: -+ patch.start1 += paddingLength -+ patch.start2 += paddingLength -+ -+ # Add some padding on start of first diff. -+ patch = patches[0] -+ diffs = patch.diffs -+ if not diffs or diffs[0][0] != self.DIFF_EQUAL: -+ # Add nullPadding equality. -+ diffs.insert(0, (self.DIFF_EQUAL, nullPadding)) -+ patch.start1 -= paddingLength # Should be 0. -+ patch.start2 -= paddingLength # Should be 0. -+ patch.length1 += paddingLength -+ patch.length2 += paddingLength -+ elif paddingLength > len(diffs[0][1]): -+ # Grow first equality. -+ extraLength = paddingLength - len(diffs[0][1]) -+ newText = nullPadding[len(diffs[0][1]) :] + diffs[0][1] -+ diffs[0] = (diffs[0][0], newText) -+ patch.start1 -= extraLength -+ patch.start2 -= extraLength -+ patch.length1 += extraLength -+ patch.length2 += extraLength -+ -+ # Add some padding on end of last diff. -+ patch = patches[-1] -+ diffs = patch.diffs -+ if not diffs or diffs[-1][0] != self.DIFF_EQUAL: -+ # Add nullPadding equality. -+ diffs.append((self.DIFF_EQUAL, nullPadding)) -+ patch.length1 += paddingLength -+ patch.length2 += paddingLength -+ elif paddingLength > len(diffs[-1][1]): -+ # Grow last equality. -+ extraLength = paddingLength - len(diffs[-1][1]) -+ newText = diffs[-1][1] + nullPadding[:extraLength] -+ diffs[-1] = (diffs[-1][0], newText) -+ patch.length1 += extraLength -+ patch.length2 += extraLength - -- return nullPadding -+ return nullPadding - -- def patch_splitMax(self, patches): -- """Look through the patches and break up any which are longer than the -+ def patch_splitMax(self, patches): -+ """Look through the patches and break up any which are longer than the - maximum limit of the match algorithm. - Intended to be called only from within patch_apply. - - Args: - patches: Array of Patch objects. - """ -- patch_size = self.Match_MaxBits -- if patch_size == 0: -- # Python has the option of not splitting strings due to its ability -- # to handle integers of arbitrary precision. -- return -- for x in xrange(len(patches)): -- if patches[x].length1 <= patch_size: -- continue -- bigpatch = patches[x] -- # Remove the big old patch. -- del patches[x] -- x -= 1 -- start1 = bigpatch.start1 -- start2 = bigpatch.start2 -- precontext = '' -- while len(bigpatch.diffs) != 0: -- # Create one of several smaller patches. -- patch = patch_obj() -- empty = True -- patch.start1 = start1 - len(precontext) -- patch.start2 = start2 - len(precontext) -- if precontext: -- patch.length1 = patch.length2 = len(precontext) -- patch.diffs.append((self.DIFF_EQUAL, precontext)) -- -- while (len(bigpatch.diffs) != 0 and -- patch.length1 < patch_size - self.Patch_Margin): -- (diff_type, diff_text) = bigpatch.diffs[0] -- if diff_type == self.DIFF_INSERT: -- # Insertions are harmless. -- patch.length2 += len(diff_text) -- start2 += len(diff_text) -- patch.diffs.append(bigpatch.diffs.pop(0)) -- empty = False -- elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and -- patch.diffs[0][0] == self.DIFF_EQUAL and -- len(diff_text) > 2 * patch_size): -- # This is a large deletion. Let it pass in one chunk. -- patch.length1 += len(diff_text) -- start1 += len(diff_text) -- empty = False -- patch.diffs.append((diff_type, diff_text)) -- del bigpatch.diffs[0] -- else: -- # Deletion or equality. Only take as much as we can stomach. -- diff_text = diff_text[:patch_size - patch.length1 - -- self.Patch_Margin] -- patch.length1 += len(diff_text) -- start1 += len(diff_text) -- if diff_type == self.DIFF_EQUAL: -- patch.length2 += len(diff_text) -- start2 += len(diff_text) -- else: -- empty = False -- -- patch.diffs.append((diff_type, diff_text)) -- if diff_text == bigpatch.diffs[0][1]: -- del bigpatch.diffs[0] -- else: -- bigpatch.diffs[0] = (bigpatch.diffs[0][0], -- bigpatch.diffs[0][1][len(diff_text):]) -+ patch_size = self.Match_MaxBits -+ if patch_size == 0: -+ # Python has the option of not splitting strings due to its ability -+ # to handle integers of arbitrary precision. -+ return -+ for x in xrange(len(patches)): -+ if patches[x].length1 <= patch_size: -+ continue -+ bigpatch = patches[x] -+ # Remove the big old patch. -+ del patches[x] -+ x -= 1 -+ start1 = bigpatch.start1 -+ start2 = bigpatch.start2 -+ precontext = "" -+ while len(bigpatch.diffs) != 0: -+ # Create one of several smaller patches. -+ patch = patch_obj() -+ empty = True -+ patch.start1 = start1 - len(precontext) -+ patch.start2 = start2 - len(precontext) -+ if precontext: -+ patch.length1 = patch.length2 = len(precontext) -+ patch.diffs.append((self.DIFF_EQUAL, precontext)) -+ -+ while ( -+ len(bigpatch.diffs) != 0 -+ and patch.length1 < patch_size - self.Patch_Margin -+ ): -+ (diff_type, diff_text) = bigpatch.diffs[0] -+ if diff_type == self.DIFF_INSERT: -+ # Insertions are harmless. -+ patch.length2 += len(diff_text) -+ start2 += len(diff_text) -+ patch.diffs.append(bigpatch.diffs.pop(0)) -+ empty = False -+ elif ( -+ diff_type == self.DIFF_DELETE -+ and len(patch.diffs) == 1 -+ and patch.diffs[0][0] == self.DIFF_EQUAL -+ and len(diff_text) > 2 * patch_size -+ ): -+ # This is a large deletion. Let it pass in one chunk. -+ patch.length1 += len(diff_text) -+ start1 += len(diff_text) -+ empty = False -+ patch.diffs.append((diff_type, diff_text)) -+ del bigpatch.diffs[0] -+ else: -+ # Deletion or equality. Only take as much as we can stomach. -+ diff_text = diff_text[ -+ : patch_size - patch.length1 - self.Patch_Margin -+ ] -+ patch.length1 += len(diff_text) -+ start1 += len(diff_text) -+ if diff_type == self.DIFF_EQUAL: -+ patch.length2 += len(diff_text) -+ start2 += len(diff_text) -+ else: -+ empty = False -+ -+ patch.diffs.append((diff_type, diff_text)) -+ if diff_text == bigpatch.diffs[0][1]: -+ del bigpatch.diffs[0] -+ else: -+ bigpatch.diffs[0] = ( -+ bigpatch.diffs[0][0], -+ bigpatch.diffs[0][1][len(diff_text) :], -+ ) -+ -+ # Compute the head context for the next patch. -+ precontext = self.diff_text2(patch.diffs) -+ precontext = precontext[-self.Patch_Margin :] -+ # Append the end context for this patch. -+ postcontext = self.diff_text1(bigpatch.diffs)[: self.Patch_Margin] -+ if postcontext: -+ patch.length1 += len(postcontext) -+ patch.length2 += len(postcontext) -+ if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL: -+ patch.diffs[-1] = ( -+ self.DIFF_EQUAL, -+ patch.diffs[-1][1] + postcontext, -+ ) -+ else: -+ patch.diffs.append((self.DIFF_EQUAL, postcontext)) -+ -+ if not empty: -+ x += 1 -+ patches.insert(x, patch) - -- # Compute the head context for the next patch. -- precontext = self.diff_text2(patch.diffs) -- precontext = precontext[-self.Patch_Margin:] -- # Append the end context for this patch. -- postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin] -- if postcontext: -- patch.length1 += len(postcontext) -- patch.length2 += len(postcontext) -- if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL: -- patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] + -- postcontext) -- else: -- patch.diffs.append((self.DIFF_EQUAL, postcontext)) -- -- if not empty: -- x += 1 -- patches.insert(x, patch) -- -- def patch_toText(self, patches): -- """Take a list of patches and return a textual representation. -+ def patch_toText(self, patches): -+ """Take a list of patches and return a textual representation. - - Args: - patches: Array of Patch objects. -@@ -1788,13 +1905,13 @@ class diff_match_patch: - Returns: - Text representation of patches. - """ -- text = [] -- for patch in patches: -- text.append(str(patch)) -- return "".join(text) -+ text = [] -+ for patch in patches: -+ text.append(str(patch)) -+ return "".join(text) - -- def patch_fromText(self, textline): -- """Parse a textual representation of patches and return a list of patch -+ def patch_fromText(self, textline): -+ """Parse a textual representation of patches and return a list of patch - objects. - - Args: -@@ -1806,114 +1923,114 @@ class diff_match_patch: - Raises: - ValueError: If invalid input. - """ -- if type(textline) == unicode: -- # Patches should be composed of a subset of ascii chars, Unicode not -- # required. If this encode raises UnicodeEncodeError, patch is invalid. -- textline = textline.encode("ascii") -- patches = [] -- if not textline: -- return patches -- text = textline.split('\n') -- while len(text) != 0: -- m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0]) -- if not m: -- raise ValueError("Invalid patch string: " + text[0]) -- patch = patch_obj() -- patches.append(patch) -- patch.start1 = int(m.group(1)) -- if m.group(2) == '': -- patch.start1 -= 1 -- patch.length1 = 1 -- elif m.group(2) == '0': -- patch.length1 = 0 -- else: -- patch.start1 -= 1 -- patch.length1 = int(m.group(2)) -- -- patch.start2 = int(m.group(3)) -- if m.group(4) == '': -- patch.start2 -= 1 -- patch.length2 = 1 -- elif m.group(4) == '0': -- patch.length2 = 0 -- else: -- patch.start2 -= 1 -- patch.length2 = int(m.group(4)) -- -- del text[0] -- -- while len(text) != 0: -- if text[0]: -- sign = text[0][0] -- else: -- sign = '' -- line = urllib.unquote(text[0][1:]) -- line = line.decode("utf-8") -- if sign == '+': -- # Insertion. -- patch.diffs.append((self.DIFF_INSERT, line)) -- elif sign == '-': -- # Deletion. -- patch.diffs.append((self.DIFF_DELETE, line)) -- elif sign == ' ': -- # Minor equality. -- patch.diffs.append((self.DIFF_EQUAL, line)) -- elif sign == '@': -- # Start of next patch. -- break -- elif sign == '': -- # Blank line? Whatever. -- pass -- else: -- # WTF? -- raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line)) -- del text[0] -- return patches -+ if type(textline) == unicode: -+ # Patches should be composed of a subset of ascii chars, Unicode not -+ # required. If this encode raises UnicodeEncodeError, patch is invalid. -+ textline = textline.encode("ascii") -+ patches = [] -+ if not textline: -+ return patches -+ text = textline.split("\n") -+ while len(text) != 0: -+ m = re.match(r"^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0]) -+ if not m: -+ raise ValueError("Invalid patch string: " + text[0]) -+ patch = patch_obj() -+ patches.append(patch) -+ patch.start1 = int(m.group(1)) -+ if m.group(2) == "": -+ patch.start1 -= 1 -+ patch.length1 = 1 -+ elif m.group(2) == "0": -+ patch.length1 = 0 -+ else: -+ patch.start1 -= 1 -+ patch.length1 = int(m.group(2)) -+ -+ patch.start2 = int(m.group(3)) -+ if m.group(4) == "": -+ patch.start2 -= 1 -+ patch.length2 = 1 -+ elif m.group(4) == "0": -+ patch.length2 = 0 -+ else: -+ patch.start2 -= 1 -+ patch.length2 = int(m.group(4)) -+ -+ del text[0] -+ -+ while len(text) != 0: -+ if text[0]: -+ sign = text[0][0] -+ else: -+ sign = "" -+ line = urllib.unquote(text[0][1:]) -+ line = line.decode("utf-8") -+ if sign == "+": -+ # Insertion. -+ patch.diffs.append((self.DIFF_INSERT, line)) -+ elif sign == "-": -+ # Deletion. -+ patch.diffs.append((self.DIFF_DELETE, line)) -+ elif sign == " ": -+ # Minor equality. -+ patch.diffs.append((self.DIFF_EQUAL, line)) -+ elif sign == "@": -+ # Start of next patch. -+ break -+ elif sign == "": -+ # Blank line? Whatever. -+ pass -+ else: -+ # WTF? -+ raise ValueError(f"Invalid patch mode: '{sign}'\n{line}") -+ del text[0] -+ return patches - - - class patch_obj: -- """Class representing one patch operation. -+ """Class representing one patch operation. - """ - -- def __init__(self): -- """Initializes with an empty list of diffs. -+ def __init__(self): -+ """Initializes with an empty list of diffs. - """ -- self.diffs = [] -- self.start1 = None -- self.start2 = None -- self.length1 = 0 -- self.length2 = 0 -+ self.diffs = [] -+ self.start1 = None -+ self.start2 = None -+ self.length1 = 0 -+ self.length2 = 0 - -- def __str__(self): -- """Emulate GNU diff's format. -+ def __str__(self): -+ """Emulate GNU diff's format. - Header: @@ -382,8 +481,9 @@ - Indices are printed as 1-based, not 0-based. - - Returns: - The GNU diff string. - """ -- if self.length1 == 0: -- coords1 = str(self.start1) + ",0" -- elif self.length1 == 1: -- coords1 = str(self.start1 + 1) -- else: -- coords1 = str(self.start1 + 1) + "," + str(self.length1) -- if self.length2 == 0: -- coords2 = str(self.start2) + ",0" -- elif self.length2 == 1: -- coords2 = str(self.start2 + 1) -- else: -- coords2 = str(self.start2 + 1) + "," + str(self.length2) -- text = ["@@ -", coords1, " +", coords2, " @@\n"] -- # Escape the body of the patch with %xx notation. -- for (op, data) in self.diffs: -- if op == diff_match_patch.DIFF_INSERT: -- text.append("+") -- elif op == diff_match_patch.DIFF_DELETE: -- text.append("-") -- elif op == diff_match_patch.DIFF_EQUAL: -- text.append(" ") -- # High ascii will raise UnicodeDecodeError. Use Unicode instead. -- data = data.encode("utf-8") -- text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + "\n") -- return "".join(text) -+ if self.length1 == 0: -+ coords1 = str(self.start1) + ",0" -+ elif self.length1 == 1: -+ coords1 = str(self.start1 + 1) -+ else: -+ coords1 = str(self.start1 + 1) + "," + str(self.length1) -+ if self.length2 == 0: -+ coords2 = str(self.start2) + ",0" -+ elif self.length2 == 1: -+ coords2 = str(self.start2 + 1) -+ else: -+ coords2 = str(self.start2 + 1) + "," + str(self.length2) -+ text = ["@@ -", coords1, " +", coords2, " @@\n"] -+ # Escape the body of the patch with %xx notation. -+ for (op, data) in self.diffs: -+ if op == diff_match_patch.DIFF_INSERT: -+ text.append("+") -+ elif op == diff_match_patch.DIFF_DELETE: -+ text.append("-") -+ elif op == diff_match_patch.DIFF_EQUAL: -+ text.append(" ") -+ # High ascii will raise UnicodeDecodeError. Use Unicode instead. -+ data = data.encode("utf-8") -+ text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + "\n") -+ return "".join(text) -Index: xmldiff-2.4/xmldiff/_diff_match_patch_py3.py -=================================================================== ---- xmldiff-2.4.orig/xmldiff/_diff_match_patch_py3.py -+++ xmldiff-2.4/xmldiff/_diff_match_patch_py3.py -@@ -23,7 +23,7 @@ Computes the difference between two text - Applies the patch onto another text, allowing for errors. - """ - --__author__ = 'fraser@google.com (Neil Fraser)' -+__author__ = "fraser@google.com (Neil Fraser)" - - import re - import sys -@@ -32,51 +32,51 @@ import urllib.parse - - - class diff_match_patch: -- """Class containing the diff, match and patch methods. -+ """Class containing the diff, match and patch methods. - - Also contains the behaviour settings. - """ - -- def __init__(self): -- """Inits a diff_match_patch object with default settings. -+ def __init__(self): -+ """Inits a diff_match_patch object with default settings. - Redefine these in your program to override the defaults. - """ - -- # Number of seconds to map a diff before giving up (0 for infinity). -- self.Diff_Timeout = 1.0 -- # Cost of an empty edit operation in terms of edit characters. -- self.Diff_EditCost = 4 -- # At what point is no match declared (0.0 = perfection, 1.0 = very loose). -- self.Match_Threshold = 0.5 -- # How far to search for a match (0 = exact location, 1000+ = broad match). -- # A match this many characters away from the expected location will add -- # 1.0 to the score (0.0 is a perfect match). -- self.Match_Distance = 1000 -- # When deleting a large block of text (over ~64 characters), how close do -- # the contents have to be to match the expected contents. (0.0 = perfection, -- # 1.0 = very loose). Note that Match_Threshold controls how closely the -- # end points of a delete need to match. -- self.Patch_DeleteThreshold = 0.5 -- # Chunk size for context length. -- self.Patch_Margin = 4 -- -- # The number of bits in an int. -- # Python has no maximum, thus to disable patch splitting set to 0. -- # However to avoid long patches in certain pathological cases, use 32. -- # Multiple short patches (using native ints) are much faster than long ones. -- self.Match_MaxBits = 32 -- -- # DIFF FUNCTIONS -- -- # The data structure representing a diff is an array of tuples: -- # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")] -- # which means: delete "Hello", add "Goodbye" and keep " world." -- DIFF_DELETE = -1 -- DIFF_INSERT = 1 -- DIFF_EQUAL = 0 -+ # Number of seconds to map a diff before giving up (0 for infinity). -+ self.Diff_Timeout = 1.0 -+ # Cost of an empty edit operation in terms of edit characters. -+ self.Diff_EditCost = 4 -+ # At what point is no match declared (0.0 = perfection, 1.0 = very loose). -+ self.Match_Threshold = 0.5 -+ # How far to search for a match (0 = exact location, 1000+ = broad match). -+ # A match this many characters away from the expected location will add -+ # 1.0 to the score (0.0 is a perfect match). -+ self.Match_Distance = 1000 -+ # When deleting a large block of text (over ~64 characters), how close do -+ # the contents have to be to match the expected contents. (0.0 = perfection, -+ # 1.0 = very loose). Note that Match_Threshold controls how closely the -+ # end points of a delete need to match. -+ self.Patch_DeleteThreshold = 0.5 -+ # Chunk size for context length. -+ self.Patch_Margin = 4 -+ -+ # The number of bits in an int. -+ # Python has no maximum, thus to disable patch splitting set to 0. -+ # However to avoid long patches in certain pathological cases, use 32. -+ # Multiple short patches (using native ints) are much faster than long ones. -+ self.Match_MaxBits = 32 -+ -+ # DIFF FUNCTIONS -+ -+ # The data structure representing a diff is an array of tuples: -+ # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")] -+ # which means: delete "Hello", add "Goodbye" and keep " world." -+ DIFF_DELETE = -1 -+ DIFF_INSERT = 1 -+ DIFF_EQUAL = 0 - -- def diff_main(self, text1, text2, checklines=True, deadline=None): -- """Find the differences between two texts. Simplifies the problem by -+ def diff_main(self, text1, text2, checklines=True, deadline=None): -+ """Find the differences between two texts. Simplifies the problem by - stripping any common prefix or suffix off the texts before diffing. - - Args: -@@ -91,52 +91,52 @@ class diff_match_patch: - Returns: - Array of changes. - """ -- # Set a deadline by which time the diff must be complete. -- if deadline == None: -- # Unlike in most languages, Python counts time in seconds. -- if self.Diff_Timeout <= 0: -- deadline = sys.maxsize -- else: -- deadline = time.time() + self.Diff_Timeout -- -- # Check for null inputs. -- if text1 == None or text2 == None: -- raise ValueError("Null inputs. (diff_main)") -- -- # Check for equality (speedup). -- if text1 == text2: -- if text1: -- return [(self.DIFF_EQUAL, text1)] -- return [] -- -- # Trim off common prefix (speedup). -- commonlength = self.diff_commonPrefix(text1, text2) -- commonprefix = text1[:commonlength] -- text1 = text1[commonlength:] -- text2 = text2[commonlength:] -- -- # Trim off common suffix (speedup). -- commonlength = self.diff_commonSuffix(text1, text2) -- if commonlength == 0: -- commonsuffix = '' -- else: -- commonsuffix = text1[-commonlength:] -- text1 = text1[:-commonlength] -- text2 = text2[:-commonlength] -- -- # Compute the diff on the middle block. -- diffs = self.diff_compute(text1, text2, checklines, deadline) -- -- # Restore the prefix and suffix. -- if commonprefix: -- diffs[:0] = [(self.DIFF_EQUAL, commonprefix)] -- if commonsuffix: -- diffs.append((self.DIFF_EQUAL, commonsuffix)) -- self.diff_cleanupMerge(diffs) -- return diffs -+ # Set a deadline by which time the diff must be complete. -+ if deadline == None: -+ # Unlike in most languages, Python counts time in seconds. -+ if self.Diff_Timeout <= 0: -+ deadline = sys.maxsize -+ else: -+ deadline = time.time() + self.Diff_Timeout -+ -+ # Check for null inputs. -+ if text1 == None or text2 == None: -+ raise ValueError("Null inputs. (diff_main)") -+ -+ # Check for equality (speedup). -+ if text1 == text2: -+ if text1: -+ return [(self.DIFF_EQUAL, text1)] -+ return [] -+ -+ # Trim off common prefix (speedup). -+ commonlength = self.diff_commonPrefix(text1, text2) -+ commonprefix = text1[:commonlength] -+ text1 = text1[commonlength:] -+ text2 = text2[commonlength:] -+ -+ # Trim off common suffix (speedup). -+ commonlength = self.diff_commonSuffix(text1, text2) -+ if commonlength == 0: -+ commonsuffix = "" -+ else: -+ commonsuffix = text1[-commonlength:] -+ text1 = text1[:-commonlength] -+ text2 = text2[:-commonlength] -+ -+ # Compute the diff on the middle block. -+ diffs = self.diff_compute(text1, text2, checklines, deadline) -+ -+ # Restore the prefix and suffix. -+ if commonprefix: -+ diffs[:0] = [(self.DIFF_EQUAL, commonprefix)] -+ if commonsuffix: -+ diffs.append((self.DIFF_EQUAL, commonsuffix)) -+ self.diff_cleanupMerge(diffs) -+ return diffs - -- def diff_compute(self, text1, text2, checklines, deadline): -- """Find the differences between two texts. Assumes that the texts do not -+ def diff_compute(self, text1, text2, checklines, deadline): -+ """Find the differences between two texts. Assumes that the texts do not - have any common prefix or suffix. - - Args: -@@ -150,52 +150,55 @@ class diff_match_patch: - Returns: - Array of changes. - """ -- if not text1: -- # Just add some text (speedup). -- return [(self.DIFF_INSERT, text2)] -- -- if not text2: -- # Just delete some text (speedup). -- return [(self.DIFF_DELETE, text1)] -- -- if len(text1) > len(text2): -- (longtext, shorttext) = (text1, text2) -- else: -- (shorttext, longtext) = (text1, text2) -- i = longtext.find(shorttext) -- if i != -1: -- # Shorter text is inside the longer text (speedup). -- diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext), -- (self.DIFF_INSERT, longtext[i + len(shorttext):])] -- # Swap insertions for deletions if diff is reversed. -- if len(text1) > len(text2): -- diffs[0] = (self.DIFF_DELETE, diffs[0][1]) -- diffs[2] = (self.DIFF_DELETE, diffs[2][1]) -- return diffs -- -- if len(shorttext) == 1: -- # Single character string. -- # After the previous speedup, the character can't be an equality. -- return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] -- -- # Check to see if the problem can be split in two. -- hm = self.diff_halfMatch(text1, text2) -- if hm: -- # A half-match was found, sort out the return data. -- (text1_a, text1_b, text2_a, text2_b, mid_common) = hm -- # Send both pairs off for separate processing. -- diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline) -- diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline) -- # Merge the results. -- return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b -+ if not text1: -+ # Just add some text (speedup). -+ return [(self.DIFF_INSERT, text2)] -+ -+ if not text2: -+ # Just delete some text (speedup). -+ return [(self.DIFF_DELETE, text1)] - -- if checklines and len(text1) > 100 and len(text2) > 100: -- return self.diff_lineMode(text1, text2, deadline) -+ if len(text1) > len(text2): -+ (longtext, shorttext) = (text1, text2) -+ else: -+ (shorttext, longtext) = (text1, text2) -+ i = longtext.find(shorttext) -+ if i != -1: -+ # Shorter text is inside the longer text (speedup). -+ diffs = [ -+ (self.DIFF_INSERT, longtext[:i]), -+ (self.DIFF_EQUAL, shorttext), -+ (self.DIFF_INSERT, longtext[i + len(shorttext) :]), -+ ] -+ # Swap insertions for deletions if diff is reversed. -+ if len(text1) > len(text2): -+ diffs[0] = (self.DIFF_DELETE, diffs[0][1]) -+ diffs[2] = (self.DIFF_DELETE, diffs[2][1]) -+ return diffs -+ -+ if len(shorttext) == 1: -+ # Single character string. -+ # After the previous speedup, the character can't be an equality. -+ return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] -+ -+ # Check to see if the problem can be split in two. -+ hm = self.diff_halfMatch(text1, text2) -+ if hm: -+ # A half-match was found, sort out the return data. -+ (text1_a, text1_b, text2_a, text2_b, mid_common) = hm -+ # Send both pairs off for separate processing. -+ diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline) -+ diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline) -+ # Merge the results. -+ return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b -+ -+ if checklines and len(text1) > 100 and len(text2) > 100: -+ return self.diff_lineMode(text1, text2, deadline) - -- return self.diff_bisect(text1, text2, deadline) -+ return self.diff_bisect(text1, text2, deadline) - -- def diff_lineMode(self, text1, text2, deadline): -- """Do a quick line-level diff on both strings, then rediff the parts for -+ def diff_lineMode(self, text1, text2, deadline): -+ """Do a quick line-level diff on both strings, then rediff the parts for - greater accuracy. - This speedup can produce non-minimal diffs. - -@@ -208,51 +211,51 @@ class diff_match_patch: - Array of changes. - """ - -- # Scan the text on a line-by-line basis first. -- (text1, text2, linearray) = self.diff_linesToChars(text1, text2) -+ # Scan the text on a line-by-line basis first. -+ (text1, text2, linearray) = self.diff_linesToChars(text1, text2) - -- diffs = self.diff_main(text1, text2, False, deadline) -+ diffs = self.diff_main(text1, text2, False, deadline) - -- # Convert the diff back to original text. -- self.diff_charsToLines(diffs, linearray) -- # Eliminate freak matches (e.g. blank lines) -- self.diff_cleanupSemantic(diffs) -- -- # Rediff any replacement blocks, this time character-by-character. -- # Add a dummy entry at the end. -- diffs.append((self.DIFF_EQUAL, '')) -- pointer = 0 -- count_delete = 0 -- count_insert = 0 -- text_delete = '' -- text_insert = '' -- while pointer < len(diffs): -- if diffs[pointer][0] == self.DIFF_INSERT: -- count_insert += 1 -- text_insert += diffs[pointer][1] -- elif diffs[pointer][0] == self.DIFF_DELETE: -- count_delete += 1 -- text_delete += diffs[pointer][1] -- elif diffs[pointer][0] == self.DIFF_EQUAL: -- # Upon reaching an equality, check for prior redundancies. -- if count_delete >= 1 and count_insert >= 1: -- # Delete the offending records and add the merged ones. -- subDiff = self.diff_main(text_delete, text_insert, False, deadline) -- diffs[pointer - count_delete - count_insert : pointer] = subDiff -- pointer = pointer - count_delete - count_insert + len(subDiff) -- count_insert = 0 -+ # Convert the diff back to original text. -+ self.diff_charsToLines(diffs, linearray) -+ # Eliminate freak matches (e.g. blank lines) -+ self.diff_cleanupSemantic(diffs) -+ -+ # Rediff any replacement blocks, this time character-by-character. -+ # Add a dummy entry at the end. -+ diffs.append((self.DIFF_EQUAL, "")) -+ pointer = 0 - count_delete = 0 -- text_delete = '' -- text_insert = '' -+ count_insert = 0 -+ text_delete = "" -+ text_insert = "" -+ while pointer < len(diffs): -+ if diffs[pointer][0] == self.DIFF_INSERT: -+ count_insert += 1 -+ text_insert += diffs[pointer][1] -+ elif diffs[pointer][0] == self.DIFF_DELETE: -+ count_delete += 1 -+ text_delete += diffs[pointer][1] -+ elif diffs[pointer][0] == self.DIFF_EQUAL: -+ # Upon reaching an equality, check for prior redundancies. -+ if count_delete >= 1 and count_insert >= 1: -+ # Delete the offending records and add the merged ones. -+ subDiff = self.diff_main(text_delete, text_insert, False, deadline) -+ diffs[pointer - count_delete - count_insert : pointer] = subDiff -+ pointer = pointer - count_delete - count_insert + len(subDiff) -+ count_insert = 0 -+ count_delete = 0 -+ text_delete = "" -+ text_insert = "" - -- pointer += 1 -+ pointer += 1 - -- diffs.pop() # Remove the dummy entry at the end. -+ diffs.pop() # Remove the dummy entry at the end. - -- return diffs -+ return diffs - -- def diff_bisect(self, text1, text2, deadline): -- """Find the 'middle snake' of a diff, split the problem in two -+ def diff_bisect(self, text1, text2, deadline): -+ """Find the 'middle snake' of a diff, split the problem in two - and return the recursively constructed diff. - See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. - -@@ -265,96 +268,98 @@ class diff_match_patch: - Array of diff tuples. - """ - -- # Cache the text lengths to prevent multiple calls. -- text1_length = len(text1) -- text2_length = len(text2) -- max_d = (text1_length + text2_length + 1) // 2 -- v_offset = max_d -- v_length = 2 * max_d -- v1 = [-1] * v_length -- v1[v_offset + 1] = 0 -- v2 = v1[:] -- delta = text1_length - text2_length -- # If the total number of characters is odd, then the front path will -- # collide with the reverse path. -- front = (delta % 2 != 0) -- # Offsets for start and end of k loop. -- # Prevents mapping of space beyond the grid. -- k1start = 0 -- k1end = 0 -- k2start = 0 -- k2end = 0 -- for d in range(max_d): -- # Bail out if deadline is reached. -- if time.time() > deadline: -- break -- -- # Walk the front path one step. -- for k1 in range(-d + k1start, d + 1 - k1end, 2): -- k1_offset = v_offset + k1 -- if k1 == -d or (k1 != d and -- v1[k1_offset - 1] < v1[k1_offset + 1]): -- x1 = v1[k1_offset + 1] -- else: -- x1 = v1[k1_offset - 1] + 1 -- y1 = x1 - k1 -- while (x1 < text1_length and y1 < text2_length and -- text1[x1] == text2[y1]): -- x1 += 1 -- y1 += 1 -- v1[k1_offset] = x1 -- if x1 > text1_length: -- # Ran off the right of the graph. -- k1end += 2 -- elif y1 > text2_length: -- # Ran off the bottom of the graph. -- k1start += 2 -- elif front: -- k2_offset = v_offset + delta - k1 -- if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1: -- # Mirror x2 onto top-left coordinate system. -- x2 = text1_length - v2[k2_offset] -- if x1 >= x2: -- # Overlap detected. -- return self.diff_bisectSplit(text1, text2, x1, y1, deadline) -- -- # Walk the reverse path one step. -- for k2 in range(-d + k2start, d + 1 - k2end, 2): -- k2_offset = v_offset + k2 -- if k2 == -d or (k2 != d and -- v2[k2_offset - 1] < v2[k2_offset + 1]): -- x2 = v2[k2_offset + 1] -- else: -- x2 = v2[k2_offset - 1] + 1 -- y2 = x2 - k2 -- while (x2 < text1_length and y2 < text2_length and -- text1[-x2 - 1] == text2[-y2 - 1]): -- x2 += 1 -- y2 += 1 -- v2[k2_offset] = x2 -- if x2 > text1_length: -- # Ran off the left of the graph. -- k2end += 2 -- elif y2 > text2_length: -- # Ran off the top of the graph. -- k2start += 2 -- elif not front: -- k1_offset = v_offset + delta - k2 -- if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1: -- x1 = v1[k1_offset] -- y1 = v_offset + x1 - k1_offset -- # Mirror x2 onto top-left coordinate system. -- x2 = text1_length - x2 -- if x1 >= x2: -- # Overlap detected. -- return self.diff_bisectSplit(text1, text2, x1, y1, deadline) -- -- # Diff took too long and hit the deadline or -- # number of diffs equals number of characters, no commonality at all. -- return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] -+ # Cache the text lengths to prevent multiple calls. -+ text1_length = len(text1) -+ text2_length = len(text2) -+ max_d = (text1_length + text2_length + 1) // 2 -+ v_offset = max_d -+ v_length = 2 * max_d -+ v1 = [-1] * v_length -+ v1[v_offset + 1] = 0 -+ v2 = v1[:] -+ delta = text1_length - text2_length -+ # If the total number of characters is odd, then the front path will -+ # collide with the reverse path. -+ front = delta % 2 != 0 -+ # Offsets for start and end of k loop. -+ # Prevents mapping of space beyond the grid. -+ k1start = 0 -+ k1end = 0 -+ k2start = 0 -+ k2end = 0 -+ for d in range(max_d): -+ # Bail out if deadline is reached. -+ if time.time() > deadline: -+ break -+ -+ # Walk the front path one step. -+ for k1 in range(-d + k1start, d + 1 - k1end, 2): -+ k1_offset = v_offset + k1 -+ if k1 == -d or (k1 != d and v1[k1_offset - 1] < v1[k1_offset + 1]): -+ x1 = v1[k1_offset + 1] -+ else: -+ x1 = v1[k1_offset - 1] + 1 -+ y1 = x1 - k1 -+ while ( -+ x1 < text1_length and y1 < text2_length and text1[x1] == text2[y1] -+ ): -+ x1 += 1 -+ y1 += 1 -+ v1[k1_offset] = x1 -+ if x1 > text1_length: -+ # Ran off the right of the graph. -+ k1end += 2 -+ elif y1 > text2_length: -+ # Ran off the bottom of the graph. -+ k1start += 2 -+ elif front: -+ k2_offset = v_offset + delta - k1 -+ if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1: -+ # Mirror x2 onto top-left coordinate system. -+ x2 = text1_length - v2[k2_offset] -+ if x1 >= x2: -+ # Overlap detected. -+ return self.diff_bisectSplit(text1, text2, x1, y1, deadline) -+ -+ # Walk the reverse path one step. -+ for k2 in range(-d + k2start, d + 1 - k2end, 2): -+ k2_offset = v_offset + k2 -+ if k2 == -d or (k2 != d and v2[k2_offset - 1] < v2[k2_offset + 1]): -+ x2 = v2[k2_offset + 1] -+ else: -+ x2 = v2[k2_offset - 1] + 1 -+ y2 = x2 - k2 -+ while ( -+ x2 < text1_length -+ and y2 < text2_length -+ and text1[-x2 - 1] == text2[-y2 - 1] -+ ): -+ x2 += 1 -+ y2 += 1 -+ v2[k2_offset] = x2 -+ if x2 > text1_length: -+ # Ran off the left of the graph. -+ k2end += 2 -+ elif y2 > text2_length: -+ # Ran off the top of the graph. -+ k2start += 2 -+ elif not front: -+ k1_offset = v_offset + delta - k2 -+ if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1: -+ x1 = v1[k1_offset] -+ y1 = v_offset + x1 - k1_offset -+ # Mirror x2 onto top-left coordinate system. -+ x2 = text1_length - x2 -+ if x1 >= x2: -+ # Overlap detected. -+ return self.diff_bisectSplit(text1, text2, x1, y1, deadline) -+ -+ # Diff took too long and hit the deadline or -+ # number of diffs equals number of characters, no commonality at all. -+ return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] - -- def diff_bisectSplit(self, text1, text2, x, y, deadline): -- """Given the location of the 'middle snake', split the diff in two parts -+ def diff_bisectSplit(self, text1, text2, x, y, deadline): -+ """Given the location of the 'middle snake', split the diff in two parts - and recurse. - - Args: -@@ -367,19 +372,19 @@ class diff_match_patch: - Returns: - Array of diff tuples. - """ -- text1a = text1[:x] -- text2a = text2[:y] -- text1b = text1[x:] -- text2b = text2[y:] -- -- # Compute both diffs serially. -- diffs = self.diff_main(text1a, text2a, False, deadline) -- diffsb = self.diff_main(text1b, text2b, False, deadline) -+ text1a = text1[:x] -+ text2a = text2[:y] -+ text1b = text1[x:] -+ text2b = text2[y:] -+ -+ # Compute both diffs serially. -+ diffs = self.diff_main(text1a, text2a, False, deadline) -+ diffsb = self.diff_main(text1b, text2b, False, deadline) - -- return diffs + diffsb -+ return diffs + diffsb - -- def diff_linesToChars(self, text1, text2): -- """Split two texts into an array of strings. Reduce the texts to a string -+ def diff_linesToChars(self, text1, text2): -+ """Split two texts into an array of strings. Reduce the texts to a string - of hashes where each Unicode character represents one line. - - Args: -@@ -391,15 +396,15 @@ class diff_match_patch: - the array of unique strings. The zeroth element of the array of unique - strings is intentionally blank. - """ -- lineArray = [] # e.g. lineArray[4] == "Hello\n" -- lineHash = {} # e.g. lineHash["Hello\n"] == 4 -+ lineArray = [] # e.g. lineArray[4] == "Hello\n" -+ lineHash = {} # e.g. lineHash["Hello\n"] == 4 - -- # "\x00" is a valid character, but various debuggers don't like it. -- # So we'll insert a junk entry to avoid generating a null character. -- lineArray.append('') -+ # "\x00" is a valid character, but various debuggers don't like it. -+ # So we'll insert a junk entry to avoid generating a null character. -+ lineArray.append("") - -- def diff_linesToCharsMunge(text): -- """Split a text into an array of strings. Reduce the texts to a string -+ def diff_linesToCharsMunge(text): -+ """Split a text into an array of strings. Reduce the texts to a string - of hashes where each Unicode character represents one line. - Modifies linearray and linehash through being a closure. - -@@ -409,54 +414,54 @@ class diff_match_patch: - Returns: - Encoded string. - """ -- chars = [] -- # Walk the text, pulling out a substring for each line. -- # text.split('\n') would would temporarily double our memory footprint. -- # Modifying text would create many large strings to garbage collect. -- lineStart = 0 -- lineEnd = -1 -- while lineEnd < len(text) - 1: -- lineEnd = text.find('\n', lineStart) -- if lineEnd == -1: -- lineEnd = len(text) - 1 -- line = text[lineStart:lineEnd + 1] -- -- if line in lineHash: -- chars.append(chr(lineHash[line])) -- else: -- if len(lineArray) == maxLines: -- # Bail out at 1114111 because chr(1114112) throws. -- line = text[lineStart:] -- lineEnd = len(text) -- lineArray.append(line) -- lineHash[line] = len(lineArray) - 1 -- chars.append(chr(len(lineArray) - 1)) -- lineStart = lineEnd + 1 -- return "".join(chars) -- -- # Allocate 2/3rds of the space for text1, the rest for text2. -- maxLines = 666666 -- chars1 = diff_linesToCharsMunge(text1) -- maxLines = 1114111 -- chars2 = diff_linesToCharsMunge(text2) -- return (chars1, chars2, lineArray) -+ chars = [] -+ # Walk the text, pulling out a substring for each line. -+ # text.split('\n') would would temporarily double our memory footprint. -+ # Modifying text would create many large strings to garbage collect. -+ lineStart = 0 -+ lineEnd = -1 -+ while lineEnd < len(text) - 1: -+ lineEnd = text.find("\n", lineStart) -+ if lineEnd == -1: -+ lineEnd = len(text) - 1 -+ line = text[lineStart : lineEnd + 1] -+ -+ if line in lineHash: -+ chars.append(chr(lineHash[line])) -+ else: -+ if len(lineArray) == maxLines: -+ # Bail out at 1114111 because chr(1114112) throws. -+ line = text[lineStart:] -+ lineEnd = len(text) -+ lineArray.append(line) -+ lineHash[line] = len(lineArray) - 1 -+ chars.append(chr(len(lineArray) - 1)) -+ lineStart = lineEnd + 1 -+ return "".join(chars) -+ -+ # Allocate 2/3rds of the space for text1, the rest for text2. -+ maxLines = 666666 -+ chars1 = diff_linesToCharsMunge(text1) -+ maxLines = 1114111 -+ chars2 = diff_linesToCharsMunge(text2) -+ return (chars1, chars2, lineArray) - -- def diff_charsToLines(self, diffs, lineArray): -- """Rehydrate the text in a diff from a string of line hashes to real lines -+ def diff_charsToLines(self, diffs, lineArray): -+ """Rehydrate the text in a diff from a string of line hashes to real lines - of text. - - Args: - diffs: Array of diff tuples. - lineArray: Array of unique strings. - """ -- for i in range(len(diffs)): -- text = [] -- for char in diffs[i][1]: -- text.append(lineArray[ord(char)]) -- diffs[i] = (diffs[i][0], "".join(text)) -+ for i in range(len(diffs)): -+ text = [] -+ for char in diffs[i][1]: -+ text.append(lineArray[ord(char)]) -+ diffs[i] = (diffs[i][0], "".join(text)) - -- def diff_commonPrefix(self, text1, text2): -- """Determine the common prefix of two strings. -+ def diff_commonPrefix(self, text1, text2): -+ """Determine the common prefix of two strings. - - Args: - text1: First string. -@@ -465,26 +470,26 @@ class diff_match_patch: - Returns: - The number of characters common to the start of each string. - """ -- # Quick check for common null cases. -- if not text1 or not text2 or text1[0] != text2[0]: -- return 0 -- # Binary search. -- # Performance analysis: https://neil.fraser.name/news/2007/10/09/ -- pointermin = 0 -- pointermax = min(len(text1), len(text2)) -- pointermid = pointermax -- pointerstart = 0 -- while pointermin < pointermid: -- if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]: -- pointermin = pointermid -- pointerstart = pointermin -- else: -- pointermax = pointermid -- pointermid = (pointermax - pointermin) // 2 + pointermin -- return pointermid -+ # Quick check for common null cases. -+ if not text1 or not text2 or text1[0] != text2[0]: -+ return 0 -+ # Binary search. -+ # Performance analysis: https://neil.fraser.name/news/2007/10/09/ -+ pointermin = 0 -+ pointermax = min(len(text1), len(text2)) -+ pointermid = pointermax -+ pointerstart = 0 -+ while pointermin < pointermid: -+ if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]: -+ pointermin = pointermid -+ pointerstart = pointermin -+ else: -+ pointermax = pointermid -+ pointermid = (pointermax - pointermin) // 2 + pointermin -+ return pointermid - -- def diff_commonSuffix(self, text1, text2): -- """Determine the common suffix of two strings. -+ def diff_commonSuffix(self, text1, text2): -+ """Determine the common suffix of two strings. - - Args: - text1: First string. -@@ -493,27 +498,29 @@ class diff_match_patch: - Returns: - The number of characters common to the end of each string. - """ -- # Quick check for common null cases. -- if not text1 or not text2 or text1[-1] != text2[-1]: -- return 0 -- # Binary search. -- # Performance analysis: https://neil.fraser.name/news/2007/10/09/ -- pointermin = 0 -- pointermax = min(len(text1), len(text2)) -- pointermid = pointermax -- pointerend = 0 -- while pointermin < pointermid: -- if (text1[-pointermid:len(text1) - pointerend] == -- text2[-pointermid:len(text2) - pointerend]): -- pointermin = pointermid -- pointerend = pointermin -- else: -- pointermax = pointermid -- pointermid = (pointermax - pointermin) // 2 + pointermin -- return pointermid -+ # Quick check for common null cases. -+ if not text1 or not text2 or text1[-1] != text2[-1]: -+ return 0 -+ # Binary search. -+ # Performance analysis: https://neil.fraser.name/news/2007/10/09/ -+ pointermin = 0 -+ pointermax = min(len(text1), len(text2)) -+ pointermid = pointermax -+ pointerend = 0 -+ while pointermin < pointermid: -+ if ( -+ text1[-pointermid : len(text1) - pointerend] -+ == text2[-pointermid : len(text2) - pointerend] -+ ): -+ pointermin = pointermid -+ pointerend = pointermin -+ else: -+ pointermax = pointermid -+ pointermid = (pointermax - pointermin) // 2 + pointermin -+ return pointermid - -- def diff_commonOverlap(self, text1, text2): -- """Determine if the suffix of one string is the prefix of another. -+ def diff_commonOverlap(self, text1, text2): -+ """Determine if the suffix of one string is the prefix of another. - - Args: - text1 First string. -@@ -523,39 +530,39 @@ class diff_match_patch: - The number of characters common to the end of the first - string and the start of the second string. - """ -- # Cache the text lengths to prevent multiple calls. -- text1_length = len(text1) -- text2_length = len(text2) -- # Eliminate the null case. -- if text1_length == 0 or text2_length == 0: -- return 0 -- # Truncate the longer string. -- if text1_length > text2_length: -- text1 = text1[-text2_length:] -- elif text1_length < text2_length: -- text2 = text2[:text1_length] -- text_length = min(text1_length, text2_length) -- # Quick check for the worst case. -- if text1 == text2: -- return text_length -- -- # Start by looking for a single character match -- # and increase length until no match is found. -- # Performance analysis: https://neil.fraser.name/news/2010/11/04/ -- best = 0 -- length = 1 -- while True: -- pattern = text1[-length:] -- found = text2.find(pattern) -- if found == -1: -- return best -- length += found -- if found == 0 or text1[-length:] == text2[:length]: -- best = length -- length += 1 -+ # Cache the text lengths to prevent multiple calls. -+ text1_length = len(text1) -+ text2_length = len(text2) -+ # Eliminate the null case. -+ if text1_length == 0 or text2_length == 0: -+ return 0 -+ # Truncate the longer string. -+ if text1_length > text2_length: -+ text1 = text1[-text2_length:] -+ elif text1_length < text2_length: -+ text2 = text2[:text1_length] -+ text_length = min(text1_length, text2_length) -+ # Quick check for the worst case. -+ if text1 == text2: -+ return text_length -+ -+ # Start by looking for a single character match -+ # and increase length until no match is found. -+ # Performance analysis: https://neil.fraser.name/news/2010/11/04/ -+ best = 0 -+ length = 1 -+ while True: -+ pattern = text1[-length:] -+ found = text2.find(pattern) -+ if found == -1: -+ return best -+ length += found -+ if found == 0 or text1[-length:] == text2[:length]: -+ best = length -+ length += 1 - -- def diff_halfMatch(self, text1, text2): -- """Do the two texts share a substring which is at least half the length of -+ def diff_halfMatch(self, text1, text2): -+ """Do the two texts share a substring which is at least half the length of - the longer text? - This speedup can produce non-minimal diffs. - -@@ -568,18 +575,18 @@ class diff_match_patch: - the prefix of text2, the suffix of text2 and the common middle. Or None - if there was no match. - """ -- if self.Diff_Timeout <= 0: -- # Don't risk returning a non-optimal diff if we have unlimited time. -- return None -- if len(text1) > len(text2): -- (longtext, shorttext) = (text1, text2) -- else: -- (shorttext, longtext) = (text1, text2) -- if len(longtext) < 4 or len(shorttext) * 2 < len(longtext): -- return None # Pointless. -+ if self.Diff_Timeout <= 0: -+ # Don't risk returning a non-optimal diff if we have unlimited time. -+ return None -+ if len(text1) > len(text2): -+ (longtext, shorttext) = (text1, text2) -+ else: -+ (shorttext, longtext) = (text1, text2) -+ if len(longtext) < 4 or len(shorttext) * 2 < len(longtext): -+ return None # Pointless. - -- def diff_halfMatchI(longtext, shorttext, i): -- """Does a substring of shorttext exist within longtext such that the -+ def diff_halfMatchI(longtext, shorttext, i): -+ """Does a substring of shorttext exist within longtext such that the - substring is at least half the length of longtext? - Closure, but does not reference any external variables. - -@@ -593,148 +600,181 @@ class diff_match_patch: - longtext, the prefix of shorttext, the suffix of shorttext and the - common middle. Or None if there was no match. - """ -- seed = longtext[i:i + len(longtext) // 4] -- best_common = '' -- j = shorttext.find(seed) -- while j != -1: -- prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:]) -- suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j]) -- if len(best_common) < suffixLength + prefixLength: -- best_common = (shorttext[j - suffixLength:j] + -- shorttext[j:j + prefixLength]) -- best_longtext_a = longtext[:i - suffixLength] -- best_longtext_b = longtext[i + prefixLength:] -- best_shorttext_a = shorttext[:j - suffixLength] -- best_shorttext_b = shorttext[j + prefixLength:] -- j = shorttext.find(seed, j + 1) -- -- if len(best_common) * 2 >= len(longtext): -- return (best_longtext_a, best_longtext_b, -- best_shorttext_a, best_shorttext_b, best_common) -- else: -- return None -- -- # First check if the second quarter is the seed for a half-match. -- hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4) -- # Check again based on the third quarter. -- hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2) -- if not hm1 and not hm2: -- return None -- elif not hm2: -- hm = hm1 -- elif not hm1: -- hm = hm2 -- else: -- # Both matched. Select the longest. -- if len(hm1[4]) > len(hm2[4]): -- hm = hm1 -- else: -- hm = hm2 -- -- # A half-match was found, sort out the return data. -- if len(text1) > len(text2): -- (text1_a, text1_b, text2_a, text2_b, mid_common) = hm -- else: -- (text2_a, text2_b, text1_a, text1_b, mid_common) = hm -- return (text1_a, text1_b, text2_a, text2_b, mid_common) -+ seed = longtext[i : i + len(longtext) // 4] -+ best_common = "" -+ j = shorttext.find(seed) -+ while j != -1: -+ prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:]) -+ suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j]) -+ if len(best_common) < suffixLength + prefixLength: -+ best_common = ( -+ shorttext[j - suffixLength : j] -+ + shorttext[j : j + prefixLength] -+ ) -+ best_longtext_a = longtext[: i - suffixLength] -+ best_longtext_b = longtext[i + prefixLength :] -+ best_shorttext_a = shorttext[: j - suffixLength] -+ best_shorttext_b = shorttext[j + prefixLength :] -+ j = shorttext.find(seed, j + 1) -+ -+ if len(best_common) * 2 >= len(longtext): -+ return ( -+ best_longtext_a, -+ best_longtext_b, -+ best_shorttext_a, -+ best_shorttext_b, -+ best_common, -+ ) -+ else: -+ return None -+ -+ # First check if the second quarter is the seed for a half-match. -+ hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4) -+ # Check again based on the third quarter. -+ hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2) -+ if not hm1 and not hm2: -+ return None -+ elif not hm2: -+ hm = hm1 -+ elif not hm1: -+ hm = hm2 -+ else: -+ # Both matched. Select the longest. -+ if len(hm1[4]) > len(hm2[4]): -+ hm = hm1 -+ else: -+ hm = hm2 - -- def diff_cleanupSemantic(self, diffs): -- """Reduce the number of edits by eliminating semantically trivial -+ # A half-match was found, sort out the return data. -+ if len(text1) > len(text2): -+ (text1_a, text1_b, text2_a, text2_b, mid_common) = hm -+ else: -+ (text2_a, text2_b, text1_a, text1_b, mid_common) = hm -+ return (text1_a, text1_b, text2_a, text2_b, mid_common) -+ -+ def diff_cleanupSemantic(self, diffs): -+ """Reduce the number of edits by eliminating semantically trivial - equalities. - - Args: - diffs: Array of diff tuples. - """ -- changes = False -- equalities = [] # Stack of indices where equalities are found. -- lastEquality = None # Always equal to diffs[equalities[-1]][1] -- pointer = 0 # Index of current position. -- # Number of chars that changed prior to the equality. -- length_insertions1, length_deletions1 = 0, 0 -- # Number of chars that changed after the equality. -- length_insertions2, length_deletions2 = 0, 0 -- while pointer < len(diffs): -- if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. -- equalities.append(pointer) -- length_insertions1, length_insertions2 = length_insertions2, 0 -- length_deletions1, length_deletions2 = length_deletions2, 0 -- lastEquality = diffs[pointer][1] -- else: # An insertion or deletion. -- if diffs[pointer][0] == self.DIFF_INSERT: -- length_insertions2 += len(diffs[pointer][1]) -- else: -- length_deletions2 += len(diffs[pointer][1]) -- # Eliminate an equality that is smaller or equal to the edits on both -- # sides of it. -- if (lastEquality and (len(lastEquality) <= -- max(length_insertions1, length_deletions1)) and -- (len(lastEquality) <= max(length_insertions2, length_deletions2))): -- # Duplicate record. -- diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality)) -- # Change second copy to insert. -- diffs[equalities[-1] + 1] = (self.DIFF_INSERT, -- diffs[equalities[-1] + 1][1]) -- # Throw away the equality we just deleted. -- equalities.pop() -- # Throw away the previous equality (it needs to be reevaluated). -- if len(equalities): -- equalities.pop() -- if len(equalities): -- pointer = equalities[-1] -- else: -- pointer = -1 -- # Reset the counters. -- length_insertions1, length_deletions1 = 0, 0 -- length_insertions2, length_deletions2 = 0, 0 -- lastEquality = None -- changes = True -- pointer += 1 -- -- # Normalize the diff. -- if changes: -- self.diff_cleanupMerge(diffs) -- self.diff_cleanupSemanticLossless(diffs) -- -- # Find any overlaps between deletions and insertions. -- # e.g: abcxxxxxxdef -- # -> abcxxxdef -- # e.g: xxxabcdefxxx -- # -> defxxxabc -- # Only extract an overlap if it is as big as the edit ahead or behind it. -- pointer = 1 -- while pointer < len(diffs): -- if (diffs[pointer - 1][0] == self.DIFF_DELETE and -- diffs[pointer][0] == self.DIFF_INSERT): -- deletion = diffs[pointer - 1][1] -- insertion = diffs[pointer][1] -- overlap_length1 = self.diff_commonOverlap(deletion, insertion) -- overlap_length2 = self.diff_commonOverlap(insertion, deletion) -- if overlap_length1 >= overlap_length2: -- if (overlap_length1 >= len(deletion) / 2.0 or -- overlap_length1 >= len(insertion) / 2.0): -- # Overlap found. Insert an equality and trim the surrounding edits. -- diffs.insert(pointer, (self.DIFF_EQUAL, -- insertion[:overlap_length1])) -- diffs[pointer - 1] = (self.DIFF_DELETE, -- deletion[:len(deletion) - overlap_length1]) -- diffs[pointer + 1] = (self.DIFF_INSERT, -- insertion[overlap_length1:]) -+ changes = False -+ equalities = [] # Stack of indices where equalities are found. -+ lastEquality = None # Always equal to diffs[equalities[-1]][1] -+ pointer = 0 # Index of current position. -+ # Number of chars that changed prior to the equality. -+ length_insertions1, length_deletions1 = 0, 0 -+ # Number of chars that changed after the equality. -+ length_insertions2, length_deletions2 = 0, 0 -+ while pointer < len(diffs): -+ if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. -+ equalities.append(pointer) -+ length_insertions1, length_insertions2 = length_insertions2, 0 -+ length_deletions1, length_deletions2 = length_deletions2, 0 -+ lastEquality = diffs[pointer][1] -+ else: # An insertion or deletion. -+ if diffs[pointer][0] == self.DIFF_INSERT: -+ length_insertions2 += len(diffs[pointer][1]) -+ else: -+ length_deletions2 += len(diffs[pointer][1]) -+ # Eliminate an equality that is smaller or equal to the edits on both -+ # sides of it. -+ if ( -+ lastEquality -+ and ( -+ len(lastEquality) <= max(length_insertions1, length_deletions1) -+ ) -+ and ( -+ len(lastEquality) <= max(length_insertions2, length_deletions2) -+ ) -+ ): -+ # Duplicate record. -+ diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality)) -+ # Change second copy to insert. -+ diffs[equalities[-1] + 1] = ( -+ self.DIFF_INSERT, -+ diffs[equalities[-1] + 1][1], -+ ) -+ # Throw away the equality we just deleted. -+ equalities.pop() -+ # Throw away the previous equality (it needs to be reevaluated). -+ if len(equalities): -+ equalities.pop() -+ if len(equalities): -+ pointer = equalities[-1] -+ else: -+ pointer = -1 -+ # Reset the counters. -+ length_insertions1, length_deletions1 = 0, 0 -+ length_insertions2, length_deletions2 = 0, 0 -+ lastEquality = None -+ changes = True - pointer += 1 -- else: -- if (overlap_length2 >= len(deletion) / 2.0 or -- overlap_length2 >= len(insertion) / 2.0): -- # Reverse overlap found. -- # Insert an equality and swap and trim the surrounding edits. -- diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2])) -- diffs[pointer - 1] = (self.DIFF_INSERT, -- insertion[:len(insertion) - overlap_length2]) -- diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:]) -+ -+ # Normalize the diff. -+ if changes: -+ self.diff_cleanupMerge(diffs) -+ self.diff_cleanupSemanticLossless(diffs) -+ -+ # Find any overlaps between deletions and insertions. -+ # e.g: abcxxxxxxdef -+ # -> abcxxxdef -+ # e.g: xxxabcdefxxx -+ # -> defxxxabc -+ # Only extract an overlap if it is as big as the edit ahead or behind it. -+ pointer = 1 -+ while pointer < len(diffs): -+ if ( -+ diffs[pointer - 1][0] == self.DIFF_DELETE -+ and diffs[pointer][0] == self.DIFF_INSERT -+ ): -+ deletion = diffs[pointer - 1][1] -+ insertion = diffs[pointer][1] -+ overlap_length1 = self.diff_commonOverlap(deletion, insertion) -+ overlap_length2 = self.diff_commonOverlap(insertion, deletion) -+ if overlap_length1 >= overlap_length2: -+ if ( -+ overlap_length1 >= len(deletion) / 2.0 -+ or overlap_length1 >= len(insertion) / 2.0 -+ ): -+ # Overlap found. Insert an equality and trim the surrounding edits. -+ diffs.insert( -+ pointer, (self.DIFF_EQUAL, insertion[:overlap_length1]) -+ ) -+ diffs[pointer - 1] = ( -+ self.DIFF_DELETE, -+ deletion[: len(deletion) - overlap_length1], -+ ) -+ diffs[pointer + 1] = ( -+ self.DIFF_INSERT, -+ insertion[overlap_length1:], -+ ) -+ pointer += 1 -+ else: -+ if ( -+ overlap_length2 >= len(deletion) / 2.0 -+ or overlap_length2 >= len(insertion) / 2.0 -+ ): -+ # Reverse overlap found. -+ # Insert an equality and swap and trim the surrounding edits. -+ diffs.insert( -+ pointer, (self.DIFF_EQUAL, deletion[:overlap_length2]) -+ ) -+ diffs[pointer - 1] = ( -+ self.DIFF_INSERT, -+ insertion[: len(insertion) - overlap_length2], -+ ) -+ diffs[pointer + 1] = ( -+ self.DIFF_DELETE, -+ deletion[overlap_length2:], -+ ) -+ pointer += 1 -+ pointer += 1 - pointer += 1 -- pointer += 1 -- pointer += 1 - -- def diff_cleanupSemanticLossless(self, diffs): -- """Look for single edits surrounded on both sides by equalities -+ def diff_cleanupSemanticLossless(self, diffs): -+ """Look for single edits surrounded on both sides by equalities - which can be shifted sideways to align the edit to a word boundary. - e.g: The cat came. -> The cat came. - -@@ -742,8 +782,8 @@ class diff_match_patch: - diffs: Array of diff tuples. - """ - -- def diff_cleanupSemanticScore(one, two): -- """Given two strings, compute a score representing whether the -+ def diff_cleanupSemanticScore(one, two): -+ """Given two strings, compute a score representing whether the - internal boundary falls on logical boundaries. - Scores range from 6 (best) to 0 (worst). - Closure, but does not reference any external variables. -@@ -755,277 +795,306 @@ class diff_match_patch: - Returns: - The score. - """ -- if not one or not two: -- # Edges are the best. -- return 6 -- -- # Each port of this function behaves slightly differently due to -- # subtle differences in each language's definition of things like -- # 'whitespace'. Since this function's purpose is largely cosmetic, -- # the choice has been made to use each language's native features -- # rather than force total conformity. -- char1 = one[-1] -- char2 = two[0] -- nonAlphaNumeric1 = not char1.isalnum() -- nonAlphaNumeric2 = not char2.isalnum() -- whitespace1 = nonAlphaNumeric1 and char1.isspace() -- whitespace2 = nonAlphaNumeric2 and char2.isspace() -- lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n") -- lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n") -- blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one) -- blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two) -- -- if blankLine1 or blankLine2: -- # Five points for blank lines. -- return 5 -- elif lineBreak1 or lineBreak2: -- # Four points for line breaks. -- return 4 -- elif nonAlphaNumeric1 and not whitespace1 and whitespace2: -- # Three points for end of sentences. -- return 3 -- elif whitespace1 or whitespace2: -- # Two points for whitespace. -- return 2 -- elif nonAlphaNumeric1 or nonAlphaNumeric2: -- # One point for non-alphanumeric. -- return 1 -- return 0 -- -- pointer = 1 -- # Intentionally ignore the first and last element (don't need checking). -- while pointer < len(diffs) - 1: -- if (diffs[pointer - 1][0] == self.DIFF_EQUAL and -- diffs[pointer + 1][0] == self.DIFF_EQUAL): -- # This is a single edit surrounded by equalities. -- equality1 = diffs[pointer - 1][1] -- edit = diffs[pointer][1] -- equality2 = diffs[pointer + 1][1] -- -- # First, shift the edit as far left as possible. -- commonOffset = self.diff_commonSuffix(equality1, edit) -- if commonOffset: -- commonString = edit[-commonOffset:] -- equality1 = equality1[:-commonOffset] -- edit = commonString + edit[:-commonOffset] -- equality2 = commonString + equality2 -- -- # Second, step character by character right, looking for the best fit. -- bestEquality1 = equality1 -- bestEdit = edit -- bestEquality2 = equality2 -- bestScore = (diff_cleanupSemanticScore(equality1, edit) + -- diff_cleanupSemanticScore(edit, equality2)) -- while edit and equality2 and edit[0] == equality2[0]: -- equality1 += edit[0] -- edit = edit[1:] + equality2[0] -- equality2 = equality2[1:] -- score = (diff_cleanupSemanticScore(equality1, edit) + -- diff_cleanupSemanticScore(edit, equality2)) -- # The >= encourages trailing rather than leading whitespace on edits. -- if score >= bestScore: -- bestScore = score -- bestEquality1 = equality1 -- bestEdit = edit -- bestEquality2 = equality2 -- -- if diffs[pointer - 1][1] != bestEquality1: -- # We have an improvement, save it back to the diff. -- if bestEquality1: -- diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1) -- else: -- del diffs[pointer - 1] -- pointer -= 1 -- diffs[pointer] = (diffs[pointer][0], bestEdit) -- if bestEquality2: -- diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2) -- else: -- del diffs[pointer + 1] -- pointer -= 1 -- pointer += 1 -- -- # Define some regex patterns for matching boundaries. -- BLANKLINEEND = re.compile(r"\n\r?\n$") -- BLANKLINESTART = re.compile(r"^\r?\n\r?\n") -+ if not one or not two: -+ # Edges are the best. -+ return 6 -+ -+ # Each port of this function behaves slightly differently due to -+ # subtle differences in each language's definition of things like -+ # 'whitespace'. Since this function's purpose is largely cosmetic, -+ # the choice has been made to use each language's native features -+ # rather than force total conformity. -+ char1 = one[-1] -+ char2 = two[0] -+ nonAlphaNumeric1 = not char1.isalnum() -+ nonAlphaNumeric2 = not char2.isalnum() -+ whitespace1 = nonAlphaNumeric1 and char1.isspace() -+ whitespace2 = nonAlphaNumeric2 and char2.isspace() -+ lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n") -+ lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n") -+ blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one) -+ blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two) -+ -+ if blankLine1 or blankLine2: -+ # Five points for blank lines. -+ return 5 -+ elif lineBreak1 or lineBreak2: -+ # Four points for line breaks. -+ return 4 -+ elif nonAlphaNumeric1 and not whitespace1 and whitespace2: -+ # Three points for end of sentences. -+ return 3 -+ elif whitespace1 or whitespace2: -+ # Two points for whitespace. -+ return 2 -+ elif nonAlphaNumeric1 or nonAlphaNumeric2: -+ # One point for non-alphanumeric. -+ return 1 -+ return 0 -+ -+ pointer = 1 -+ # Intentionally ignore the first and last element (don't need checking). -+ while pointer < len(diffs) - 1: -+ if ( -+ diffs[pointer - 1][0] == self.DIFF_EQUAL -+ and diffs[pointer + 1][0] == self.DIFF_EQUAL -+ ): -+ # This is a single edit surrounded by equalities. -+ equality1 = diffs[pointer - 1][1] -+ edit = diffs[pointer][1] -+ equality2 = diffs[pointer + 1][1] -+ -+ # First, shift the edit as far left as possible. -+ commonOffset = self.diff_commonSuffix(equality1, edit) -+ if commonOffset: -+ commonString = edit[-commonOffset:] -+ equality1 = equality1[:-commonOffset] -+ edit = commonString + edit[:-commonOffset] -+ equality2 = commonString + equality2 -+ -+ # Second, step character by character right, looking for the best fit. -+ bestEquality1 = equality1 -+ bestEdit = edit -+ bestEquality2 = equality2 -+ bestScore = diff_cleanupSemanticScore( -+ equality1, edit -+ ) + diff_cleanupSemanticScore(edit, equality2) -+ while edit and equality2 and edit[0] == equality2[0]: -+ equality1 += edit[0] -+ edit = edit[1:] + equality2[0] -+ equality2 = equality2[1:] -+ score = diff_cleanupSemanticScore( -+ equality1, edit -+ ) + diff_cleanupSemanticScore(edit, equality2) -+ # The >= encourages trailing rather than leading whitespace on edits. -+ if score >= bestScore: -+ bestScore = score -+ bestEquality1 = equality1 -+ bestEdit = edit -+ bestEquality2 = equality2 -+ -+ if diffs[pointer - 1][1] != bestEquality1: -+ # We have an improvement, save it back to the diff. -+ if bestEquality1: -+ diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1) -+ else: -+ del diffs[pointer - 1] -+ pointer -= 1 -+ diffs[pointer] = (diffs[pointer][0], bestEdit) -+ if bestEquality2: -+ diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2) -+ else: -+ del diffs[pointer + 1] -+ pointer -= 1 -+ pointer += 1 -+ -+ # Define some regex patterns for matching boundaries. -+ BLANKLINEEND = re.compile(r"\n\r?\n$") -+ BLANKLINESTART = re.compile(r"^\r?\n\r?\n") - -- def diff_cleanupEfficiency(self, diffs): -- """Reduce the number of edits by eliminating operationally trivial -+ def diff_cleanupEfficiency(self, diffs): -+ """Reduce the number of edits by eliminating operationally trivial - equalities. - - Args: - diffs: Array of diff tuples. - """ -- changes = False -- equalities = [] # Stack of indices where equalities are found. -- lastEquality = None # Always equal to diffs[equalities[-1]][1] -- pointer = 0 # Index of current position. -- pre_ins = False # Is there an insertion operation before the last equality. -- pre_del = False # Is there a deletion operation before the last equality. -- post_ins = False # Is there an insertion operation after the last equality. -- post_del = False # Is there a deletion operation after the last equality. -- while pointer < len(diffs): -- if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. -- if (len(diffs[pointer][1]) < self.Diff_EditCost and -- (post_ins or post_del)): -- # Candidate found. -- equalities.append(pointer) -- pre_ins = post_ins -- pre_del = post_del -- lastEquality = diffs[pointer][1] -- else: -- # Not a candidate, and can never become one. -- equalities = [] -- lastEquality = None -- -- post_ins = post_del = False -- else: # An insertion or deletion. -- if diffs[pointer][0] == self.DIFF_DELETE: -- post_del = True -- else: -- post_ins = True -- -- # Five types to be split: -- # ABXYCD -- # AXCD -- # ABXC -- # AXCD -- # ABXC -- -- if lastEquality and ((pre_ins and pre_del and post_ins and post_del) or -- ((len(lastEquality) < self.Diff_EditCost / 2) and -- (pre_ins + pre_del + post_ins + post_del) == 3)): -- # Duplicate record. -- diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality)) -- # Change second copy to insert. -- diffs[equalities[-1] + 1] = (self.DIFF_INSERT, -- diffs[equalities[-1] + 1][1]) -- equalities.pop() # Throw away the equality we just deleted. -- lastEquality = None -- if pre_ins and pre_del: -- # No changes made which could affect previous entry, keep going. -- post_ins = post_del = True -- equalities = [] -- else: -- if len(equalities): -- equalities.pop() # Throw away the previous equality. -- if len(equalities): -- pointer = equalities[-1] -- else: -- pointer = -1 -- post_ins = post_del = False -- changes = True -- pointer += 1 -+ changes = False -+ equalities = [] # Stack of indices where equalities are found. -+ lastEquality = None # Always equal to diffs[equalities[-1]][1] -+ pointer = 0 # Index of current position. -+ pre_ins = False # Is there an insertion operation before the last equality. -+ pre_del = False # Is there a deletion operation before the last equality. -+ post_ins = False # Is there an insertion operation after the last equality. -+ post_del = False # Is there a deletion operation after the last equality. -+ while pointer < len(diffs): -+ if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. -+ if len(diffs[pointer][1]) < self.Diff_EditCost and ( -+ post_ins or post_del -+ ): -+ # Candidate found. -+ equalities.append(pointer) -+ pre_ins = post_ins -+ pre_del = post_del -+ lastEquality = diffs[pointer][1] -+ else: -+ # Not a candidate, and can never become one. -+ equalities = [] -+ lastEquality = None -+ -+ post_ins = post_del = False -+ else: # An insertion or deletion. -+ if diffs[pointer][0] == self.DIFF_DELETE: -+ post_del = True -+ else: -+ post_ins = True -+ -+ # Five types to be split: -+ # ABXYCD -+ # AXCD -+ # ABXC -+ # AXCD -+ # ABXC -+ -+ if lastEquality and ( -+ (pre_ins and pre_del and post_ins and post_del) -+ or ( -+ (len(lastEquality) < self.Diff_EditCost / 2) -+ and (pre_ins + pre_del + post_ins + post_del) == 3 -+ ) -+ ): -+ # Duplicate record. -+ diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality)) -+ # Change second copy to insert. -+ diffs[equalities[-1] + 1] = ( -+ self.DIFF_INSERT, -+ diffs[equalities[-1] + 1][1], -+ ) -+ equalities.pop() # Throw away the equality we just deleted. -+ lastEquality = None -+ if pre_ins and pre_del: -+ # No changes made which could affect previous entry, keep going. -+ post_ins = post_del = True -+ equalities = [] -+ else: -+ if len(equalities): -+ equalities.pop() # Throw away the previous equality. -+ if len(equalities): -+ pointer = equalities[-1] -+ else: -+ pointer = -1 -+ post_ins = post_del = False -+ changes = True -+ pointer += 1 - -- if changes: -- self.diff_cleanupMerge(diffs) -+ if changes: -+ self.diff_cleanupMerge(diffs) - -- def diff_cleanupMerge(self, diffs): -- """Reorder and merge like edit sections. Merge equalities. -+ def diff_cleanupMerge(self, diffs): -+ """Reorder and merge like edit sections. Merge equalities. - Any edit section can move as long as it doesn't cross an equality. - - Args: - diffs: Array of diff tuples. - """ -- diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end. -- pointer = 0 -- count_delete = 0 -- count_insert = 0 -- text_delete = '' -- text_insert = '' -- while pointer < len(diffs): -- if diffs[pointer][0] == self.DIFF_INSERT: -- count_insert += 1 -- text_insert += diffs[pointer][1] -- pointer += 1 -- elif diffs[pointer][0] == self.DIFF_DELETE: -- count_delete += 1 -- text_delete += diffs[pointer][1] -- pointer += 1 -- elif diffs[pointer][0] == self.DIFF_EQUAL: -- # Upon reaching an equality, check for prior redundancies. -- if count_delete + count_insert > 1: -- if count_delete != 0 and count_insert != 0: -- # Factor out any common prefixies. -- commonlength = self.diff_commonPrefix(text_insert, text_delete) -- if commonlength != 0: -- x = pointer - count_delete - count_insert - 1 -- if x >= 0 and diffs[x][0] == self.DIFF_EQUAL: -- diffs[x] = (diffs[x][0], diffs[x][1] + -- text_insert[:commonlength]) -- else: -- diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength])) -- pointer += 1 -- text_insert = text_insert[commonlength:] -- text_delete = text_delete[commonlength:] -- # Factor out any common suffixes. -- commonlength = self.diff_commonSuffix(text_insert, text_delete) -- if commonlength != 0: -- diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] + -- diffs[pointer][1]) -- text_insert = text_insert[:-commonlength] -- text_delete = text_delete[:-commonlength] -- # Delete the offending records and add the merged ones. -- new_ops = [] -- if len(text_delete) != 0: -- new_ops.append((self.DIFF_DELETE, text_delete)) -- if len(text_insert) != 0: -- new_ops.append((self.DIFF_INSERT, text_insert)) -- pointer -= count_delete + count_insert -- diffs[pointer : pointer + count_delete + count_insert] = new_ops -- pointer += len(new_ops) + 1 -- elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL: -- # Merge this equality with the previous one. -- diffs[pointer - 1] = (diffs[pointer - 1][0], -- diffs[pointer - 1][1] + diffs[pointer][1]) -- del diffs[pointer] -- else: -- pointer += 1 -- -- count_insert = 0 -+ diffs.append((self.DIFF_EQUAL, "")) # Add a dummy entry at the end. -+ pointer = 0 - count_delete = 0 -- text_delete = '' -- text_insert = '' -- -- if diffs[-1][1] == '': -- diffs.pop() # Remove the dummy entry at the end. -+ count_insert = 0 -+ text_delete = "" -+ text_insert = "" -+ while pointer < len(diffs): -+ if diffs[pointer][0] == self.DIFF_INSERT: -+ count_insert += 1 -+ text_insert += diffs[pointer][1] -+ pointer += 1 -+ elif diffs[pointer][0] == self.DIFF_DELETE: -+ count_delete += 1 -+ text_delete += diffs[pointer][1] -+ pointer += 1 -+ elif diffs[pointer][0] == self.DIFF_EQUAL: -+ # Upon reaching an equality, check for prior redundancies. -+ if count_delete + count_insert > 1: -+ if count_delete != 0 and count_insert != 0: -+ # Factor out any common prefixies. -+ commonlength = self.diff_commonPrefix(text_insert, text_delete) -+ if commonlength != 0: -+ x = pointer - count_delete - count_insert - 1 -+ if x >= 0 and diffs[x][0] == self.DIFF_EQUAL: -+ diffs[x] = ( -+ diffs[x][0], -+ diffs[x][1] + text_insert[:commonlength], -+ ) -+ else: -+ diffs.insert( -+ 0, (self.DIFF_EQUAL, text_insert[:commonlength]) -+ ) -+ pointer += 1 -+ text_insert = text_insert[commonlength:] -+ text_delete = text_delete[commonlength:] -+ # Factor out any common suffixes. -+ commonlength = self.diff_commonSuffix(text_insert, text_delete) -+ if commonlength != 0: -+ diffs[pointer] = ( -+ diffs[pointer][0], -+ text_insert[-commonlength:] + diffs[pointer][1], -+ ) -+ text_insert = text_insert[:-commonlength] -+ text_delete = text_delete[:-commonlength] -+ # Delete the offending records and add the merged ones. -+ new_ops = [] -+ if len(text_delete) != 0: -+ new_ops.append((self.DIFF_DELETE, text_delete)) -+ if len(text_insert) != 0: -+ new_ops.append((self.DIFF_INSERT, text_insert)) -+ pointer -= count_delete + count_insert -+ diffs[pointer : pointer + count_delete + count_insert] = new_ops -+ pointer += len(new_ops) + 1 -+ elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL: -+ # Merge this equality with the previous one. -+ diffs[pointer - 1] = ( -+ diffs[pointer - 1][0], -+ diffs[pointer - 1][1] + diffs[pointer][1], -+ ) -+ del diffs[pointer] -+ else: -+ pointer += 1 -+ -+ count_insert = 0 -+ count_delete = 0 -+ text_delete = "" -+ text_insert = "" -+ -+ if diffs[-1][1] == "": -+ diffs.pop() # Remove the dummy entry at the end. -+ -+ # Second pass: look for single edits surrounded on both sides by equalities -+ # which can be shifted sideways to eliminate an equality. -+ # e.g: ABAC -> ABAC -+ changes = False -+ pointer = 1 -+ # Intentionally ignore the first and last element (don't need checking). -+ while pointer < len(diffs) - 1: -+ if ( -+ diffs[pointer - 1][0] == self.DIFF_EQUAL -+ and diffs[pointer + 1][0] == self.DIFF_EQUAL -+ ): -+ # This is a single edit surrounded by equalities. -+ if diffs[pointer][1].endswith(diffs[pointer - 1][1]): -+ # Shift the edit over the previous equality. -+ if diffs[pointer - 1][1] != "": -+ diffs[pointer] = ( -+ diffs[pointer][0], -+ diffs[pointer - 1][1] -+ + diffs[pointer][1][: -len(diffs[pointer - 1][1])], -+ ) -+ diffs[pointer + 1] = ( -+ diffs[pointer + 1][0], -+ diffs[pointer - 1][1] + diffs[pointer + 1][1], -+ ) -+ del diffs[pointer - 1] -+ changes = True -+ elif diffs[pointer][1].startswith(diffs[pointer + 1][1]): -+ # Shift the edit over the next equality. -+ diffs[pointer - 1] = ( -+ diffs[pointer - 1][0], -+ diffs[pointer - 1][1] + diffs[pointer + 1][1], -+ ) -+ diffs[pointer] = ( -+ diffs[pointer][0], -+ diffs[pointer][1][len(diffs[pointer + 1][1]) :] -+ + diffs[pointer + 1][1], -+ ) -+ del diffs[pointer + 1] -+ changes = True -+ pointer += 1 - -- # Second pass: look for single edits surrounded on both sides by equalities -- # which can be shifted sideways to eliminate an equality. -- # e.g: ABAC -> ABAC -- changes = False -- pointer = 1 -- # Intentionally ignore the first and last element (don't need checking). -- while pointer < len(diffs) - 1: -- if (diffs[pointer - 1][0] == self.DIFF_EQUAL and -- diffs[pointer + 1][0] == self.DIFF_EQUAL): -- # This is a single edit surrounded by equalities. -- if diffs[pointer][1].endswith(diffs[pointer - 1][1]): -- # Shift the edit over the previous equality. -- if diffs[pointer - 1][1] != "": -- diffs[pointer] = (diffs[pointer][0], -- diffs[pointer - 1][1] + -- diffs[pointer][1][:-len(diffs[pointer - 1][1])]) -- diffs[pointer + 1] = (diffs[pointer + 1][0], -- diffs[pointer - 1][1] + diffs[pointer + 1][1]) -- del diffs[pointer - 1] -- changes = True -- elif diffs[pointer][1].startswith(diffs[pointer + 1][1]): -- # Shift the edit over the next equality. -- diffs[pointer - 1] = (diffs[pointer - 1][0], -- diffs[pointer - 1][1] + diffs[pointer + 1][1]) -- diffs[pointer] = (diffs[pointer][0], -- diffs[pointer][1][len(diffs[pointer + 1][1]):] + -- diffs[pointer + 1][1]) -- del diffs[pointer + 1] -- changes = True -- pointer += 1 -- -- # If shifts were made, the diff needs reordering and another shift sweep. -- if changes: -- self.diff_cleanupMerge(diffs) -+ # If shifts were made, the diff needs reordering and another shift sweep. -+ if changes: -+ self.diff_cleanupMerge(diffs) - -- def diff_xIndex(self, diffs, loc): -- """loc is a location in text1, compute and return the equivalent location -+ def diff_xIndex(self, diffs, loc): -+ """loc is a location in text1, compute and return the equivalent location - in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8 - - Args: -@@ -1035,29 +1104,29 @@ class diff_match_patch: - Returns: - Location within text2. - """ -- chars1 = 0 -- chars2 = 0 -- last_chars1 = 0 -- last_chars2 = 0 -- for x in range(len(diffs)): -- (op, text) = diffs[x] -- if op != self.DIFF_INSERT: # Equality or deletion. -- chars1 += len(text) -- if op != self.DIFF_DELETE: # Equality or insertion. -- chars2 += len(text) -- if chars1 > loc: # Overshot the location. -- break -- last_chars1 = chars1 -- last_chars2 = chars2 -- -- if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE: -- # The location was deleted. -- return last_chars2 -- # Add the remaining len(character). -- return last_chars2 + (loc - last_chars1) -+ chars1 = 0 -+ chars2 = 0 -+ last_chars1 = 0 -+ last_chars2 = 0 -+ for x in range(len(diffs)): -+ (op, text) = diffs[x] -+ if op != self.DIFF_INSERT: # Equality or deletion. -+ chars1 += len(text) -+ if op != self.DIFF_DELETE: # Equality or insertion. -+ chars2 += len(text) -+ if chars1 > loc: # Overshot the location. -+ break -+ last_chars1 = chars1 -+ last_chars2 = chars2 -+ -+ if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE: -+ # The location was deleted. -+ return last_chars2 -+ # Add the remaining len(character). -+ return last_chars2 + (loc - last_chars1) - -- def diff_prettyHtml(self, diffs): -- """Convert a diff array into a pretty HTML report. -+ def diff_prettyHtml(self, diffs): -+ """Convert a diff array into a pretty HTML report. - - Args: - diffs: Array of diff tuples. -@@ -1065,20 +1134,24 @@ class diff_match_patch: - Returns: - HTML representation. - """ -- html = [] -- for (op, data) in diffs: -- text = (data.replace("&", "&").replace("<", "<") -- .replace(">", ">").replace("\n", "¶
    ")) -- if op == self.DIFF_INSERT: -- html.append("%s" % text) -- elif op == self.DIFF_DELETE: -- html.append("%s" % text) -- elif op == self.DIFF_EQUAL: -- html.append("%s" % text) -- return "".join(html) -+ html = [] -+ for (op, data) in diffs: -+ text = ( -+ data.replace("&", "&") -+ .replace("<", "<") -+ .replace(">", ">") -+ .replace("\n", "¶
    ") -+ ) -+ if op == self.DIFF_INSERT: -+ html.append('%s' % text) -+ elif op == self.DIFF_DELETE: -+ html.append('%s' % text) -+ elif op == self.DIFF_EQUAL: -+ html.append("%s" % text) -+ return "".join(html) - -- def diff_text1(self, diffs): -- """Compute and return the source text (all equalities and deletions). -+ def diff_text1(self, diffs): -+ """Compute and return the source text (all equalities and deletions). - - Args: - diffs: Array of diff tuples. -@@ -1086,14 +1159,14 @@ class diff_match_patch: - Returns: - Source text. - """ -- text = [] -- for (op, data) in diffs: -- if op != self.DIFF_INSERT: -- text.append(data) -- return "".join(text) -+ text = [] -+ for (op, data) in diffs: -+ if op != self.DIFF_INSERT: -+ text.append(data) -+ return "".join(text) - -- def diff_text2(self, diffs): -- """Compute and return the destination text (all equalities and insertions). -+ def diff_text2(self, diffs): -+ """Compute and return the destination text (all equalities and insertions). - - Args: - diffs: Array of diff tuples. -@@ -1101,14 +1174,14 @@ class diff_match_patch: - Returns: - Destination text. - """ -- text = [] -- for (op, data) in diffs: -- if op != self.DIFF_DELETE: -- text.append(data) -- return "".join(text) -+ text = [] -+ for (op, data) in diffs: -+ if op != self.DIFF_DELETE: -+ text.append(data) -+ return "".join(text) - -- def diff_levenshtein(self, diffs): -- """Compute the Levenshtein distance; the number of inserted, deleted or -+ def diff_levenshtein(self, diffs): -+ """Compute the Levenshtein distance; the number of inserted, deleted or - substituted characters. - - Args: -@@ -1117,24 +1190,24 @@ class diff_match_patch: - Returns: - Number of changes. - """ -- levenshtein = 0 -- insertions = 0 -- deletions = 0 -- for (op, data) in diffs: -- if op == self.DIFF_INSERT: -- insertions += len(data) -- elif op == self.DIFF_DELETE: -- deletions += len(data) -- elif op == self.DIFF_EQUAL: -- # A deletion and an insertion is one substitution. -- levenshtein += max(insertions, deletions) -+ levenshtein = 0 - insertions = 0 - deletions = 0 -- levenshtein += max(insertions, deletions) -- return levenshtein -+ for (op, data) in diffs: -+ if op == self.DIFF_INSERT: -+ insertions += len(data) -+ elif op == self.DIFF_DELETE: -+ deletions += len(data) -+ elif op == self.DIFF_EQUAL: -+ # A deletion and an insertion is one substitution. -+ levenshtein += max(insertions, deletions) -+ insertions = 0 -+ deletions = 0 -+ levenshtein += max(insertions, deletions) -+ return levenshtein - -- def diff_toDelta(self, diffs): -- """Crush the diff into an encoded string which describes the operations -+ def diff_toDelta(self, diffs): -+ """Crush the diff into an encoded string which describes the operations - required to transform text1 into text2. - E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. - Operations are tab-separated. Inserted text is escaped using %xx notation. -@@ -1145,20 +1218,20 @@ class diff_match_patch: - Returns: - Delta text. - """ -- text = [] -- for (op, data) in diffs: -- if op == self.DIFF_INSERT: -- # High ascii will raise UnicodeDecodeError. Use Unicode instead. -- data = data.encode("utf-8") -- text.append("+" + urllib.parse.quote(data, "!~*'();/?:@&=+$,# ")) -- elif op == self.DIFF_DELETE: -- text.append("-%d" % len(data)) -- elif op == self.DIFF_EQUAL: -- text.append("=%d" % len(data)) -- return "\t".join(text) -+ text = [] -+ for (op, data) in diffs: -+ if op == self.DIFF_INSERT: -+ # High ascii will raise UnicodeDecodeError. Use Unicode instead. -+ data = data.encode("utf-8") -+ text.append("+" + urllib.parse.quote(data, "!~*'();/?:@&=+$,# ")) -+ elif op == self.DIFF_DELETE: -+ text.append("-%d" % len(data)) -+ elif op == self.DIFF_EQUAL: -+ text.append("=%d" % len(data)) -+ return "\t".join(text) - -- def diff_fromDelta(self, text1, delta): -- """Given the original text1, and an encoded string which describes the -+ def diff_fromDelta(self, text1, delta): -+ """Given the original text1, and an encoded string which describes the - operations required to transform text1 into text2, compute the full diff. - - Args: -@@ -1171,46 +1244,48 @@ class diff_match_patch: - Raises: - ValueError: If invalid input. - """ -- diffs = [] -- pointer = 0 # Cursor in text1 -- tokens = delta.split("\t") -- for token in tokens: -- if token == "": -- # Blank tokens are ok (from a trailing \t). -- continue -- # Each token begins with a one character parameter which specifies the -- # operation of this token (delete, insert, equality). -- param = token[1:] -- if token[0] == "+": -- param = urllib.parse.unquote(param) -- diffs.append((self.DIFF_INSERT, param)) -- elif token[0] == "-" or token[0] == "=": -- try: -- n = int(param) -- except ValueError: -- raise ValueError("Invalid number in diff_fromDelta: " + param) -- if n < 0: -- raise ValueError("Negative number in diff_fromDelta: " + param) -- text = text1[pointer : pointer + n] -- pointer += n -- if token[0] == "=": -- diffs.append((self.DIFF_EQUAL, text)) -- else: -- diffs.append((self.DIFF_DELETE, text)) -- else: -- # Anything else is an error. -- raise ValueError("Invalid diff operation in diff_fromDelta: " + -- token[0]) -- if pointer != len(text1): -- raise ValueError( -- "Delta length (%d) does not equal source text length (%d)." % -- (pointer, len(text1))) -- return diffs -+ diffs = [] -+ pointer = 0 # Cursor in text1 -+ tokens = delta.split("\t") -+ for token in tokens: -+ if token == "": -+ # Blank tokens are ok (from a trailing \t). -+ continue -+ # Each token begins with a one character parameter which specifies the -+ # operation of this token (delete, insert, equality). -+ param = token[1:] -+ if token[0] == "+": -+ param = urllib.parse.unquote(param) -+ diffs.append((self.DIFF_INSERT, param)) -+ elif token[0] == "-" or token[0] == "=": -+ try: -+ n = int(param) -+ except ValueError: -+ raise ValueError("Invalid number in diff_fromDelta: " + param) -+ if n < 0: -+ raise ValueError("Negative number in diff_fromDelta: " + param) -+ text = text1[pointer : pointer + n] -+ pointer += n -+ if token[0] == "=": -+ diffs.append((self.DIFF_EQUAL, text)) -+ else: -+ diffs.append((self.DIFF_DELETE, text)) -+ else: -+ # Anything else is an error. -+ raise ValueError( -+ "Invalid diff operation in diff_fromDelta: " + token[0] -+ ) -+ if pointer != len(text1): -+ raise ValueError( -+ "Delta length (%d) does not equal source text length (%d)." -+ % (pointer, len(text1)) -+ ) -+ return diffs - -- # MATCH FUNCTIONS -+ # MATCH FUNCTIONS - -- def match_main(self, text, pattern, loc): -- """Locate the best instance of 'pattern' in 'text' near 'loc'. -+ def match_main(self, text, pattern, loc): -+ """Locate the best instance of 'pattern' in 'text' near 'loc'. - - Args: - text: The text to search. -@@ -1220,27 +1295,27 @@ class diff_match_patch: - Returns: - Best match index or -1. - """ -- # Check for null inputs. -- if text == None or pattern == None: -- raise ValueError("Null inputs. (match_main)") -- -- loc = max(0, min(loc, len(text))) -- if text == pattern: -- # Shortcut (potentially not guaranteed by the algorithm) -- return 0 -- elif not text: -- # Nothing to match. -- return -1 -- elif text[loc:loc + len(pattern)] == pattern: -- # Perfect match at the perfect spot! (Includes case of null pattern) -- return loc -- else: -- # Do a fuzzy compare. -- match = self.match_bitap(text, pattern, loc) -- return match -+ # Check for null inputs. -+ if text == None or pattern == None: -+ raise ValueError("Null inputs. (match_main)") -+ -+ loc = max(0, min(loc, len(text))) -+ if text == pattern: -+ # Shortcut (potentially not guaranteed by the algorithm) -+ return 0 -+ elif not text: -+ # Nothing to match. -+ return -1 -+ elif text[loc : loc + len(pattern)] == pattern: -+ # Perfect match at the perfect spot! (Includes case of null pattern) -+ return loc -+ else: -+ # Do a fuzzy compare. -+ match = self.match_bitap(text, pattern, loc) -+ return match - -- def match_bitap(self, text, pattern, loc): -- """Locate the best instance of 'pattern' in 'text' near 'loc' using the -+ def match_bitap(self, text, pattern, loc): -+ """Locate the best instance of 'pattern' in 'text' near 'loc' using the - Bitap algorithm. - - Args: -@@ -1251,15 +1326,15 @@ class diff_match_patch: - Returns: - Best match index or -1. - """ -- # Python doesn't have a maxint limit, so ignore this check. -- #if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits: -- # raise ValueError("Pattern too long for this application.") -+ # Python doesn't have a maxint limit, so ignore this check. -+ # if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits: -+ # raise ValueError("Pattern too long for this application.") - -- # Initialise the alphabet. -- s = self.match_alphabet(pattern) -+ # Initialise the alphabet. -+ s = self.match_alphabet(pattern) - -- def match_bitapScore(e, x): -- """Compute and return the score for a match with e errors and x location. -+ def match_bitapScore(e, x): -+ """Compute and return the score for a match with e errors and x location. - Accesses loc and pattern through being a closure. - - Args: -@@ -1269,84 +1344,87 @@ class diff_match_patch: - Returns: - Overall score for match (0.0 = good, 1.0 = bad). - """ -- accuracy = float(e) / len(pattern) -- proximity = abs(loc - x) -- if not self.Match_Distance: -- # Dodge divide by zero error. -- return proximity and 1.0 or accuracy -- return accuracy + (proximity / float(self.Match_Distance)) -- -- # Highest score beyond which we give up. -- score_threshold = self.Match_Threshold -- # Is there a nearby exact match? (speedup) -- best_loc = text.find(pattern, loc) -- if best_loc != -1: -- score_threshold = min(match_bitapScore(0, best_loc), score_threshold) -- # What about in the other direction? (speedup) -- best_loc = text.rfind(pattern, loc + len(pattern)) -- if best_loc != -1: -- score_threshold = min(match_bitapScore(0, best_loc), score_threshold) -- -- # Initialise the bit arrays. -- matchmask = 1 << (len(pattern) - 1) -- best_loc = -1 -- -- bin_max = len(pattern) + len(text) -- # Empty initialization added to appease pychecker. -- last_rd = None -- for d in range(len(pattern)): -- # Scan for the best match each iteration allows for one more error. -- # Run a binary search to determine how far from 'loc' we can stray at -- # this error level. -- bin_min = 0 -- bin_mid = bin_max -- while bin_min < bin_mid: -- if match_bitapScore(d, loc + bin_mid) <= score_threshold: -- bin_min = bin_mid -- else: -- bin_max = bin_mid -- bin_mid = (bin_max - bin_min) // 2 + bin_min -- -- # Use the result from this iteration as the maximum for the next. -- bin_max = bin_mid -- start = max(1, loc - bin_mid + 1) -- finish = min(loc + bin_mid, len(text)) + len(pattern) -- -- rd = [0] * (finish + 2) -- rd[finish + 1] = (1 << d) - 1 -- for j in range(finish, start - 1, -1): -- if len(text) <= j - 1: -- # Out of range. -- charMatch = 0 -- else: -- charMatch = s.get(text[j - 1], 0) -- if d == 0: # First pass: exact match. -- rd[j] = ((rd[j + 1] << 1) | 1) & charMatch -- else: # Subsequent passes: fuzzy match. -- rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | ( -- ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1] -- if rd[j] & matchmask: -- score = match_bitapScore(d, j - 1) -- # This match will almost certainly be better than any existing match. -- # But check anyway. -- if score <= score_threshold: -- # Told you so. -- score_threshold = score -- best_loc = j - 1 -- if best_loc > loc: -- # When passing loc, don't exceed our current distance from loc. -- start = max(1, 2 * loc - best_loc) -- else: -- # Already passed loc, downhill from here on in. -- break -- # No hope for a (better) match at greater error levels. -- if match_bitapScore(d + 1, loc) > score_threshold: -- break -- last_rd = rd -- return best_loc -+ accuracy = float(e) / len(pattern) -+ proximity = abs(loc - x) -+ if not self.Match_Distance: -+ # Dodge divide by zero error. -+ return proximity and 1.0 or accuracy -+ return accuracy + (proximity / float(self.Match_Distance)) -+ -+ # Highest score beyond which we give up. -+ score_threshold = self.Match_Threshold -+ # Is there a nearby exact match? (speedup) -+ best_loc = text.find(pattern, loc) -+ if best_loc != -1: -+ score_threshold = min(match_bitapScore(0, best_loc), score_threshold) -+ # What about in the other direction? (speedup) -+ best_loc = text.rfind(pattern, loc + len(pattern)) -+ if best_loc != -1: -+ score_threshold = min(match_bitapScore(0, best_loc), score_threshold) -+ -+ # Initialise the bit arrays. -+ matchmask = 1 << (len(pattern) - 1) -+ best_loc = -1 -+ -+ bin_max = len(pattern) + len(text) -+ # Empty initialization added to appease pychecker. -+ last_rd = None -+ for d in range(len(pattern)): -+ # Scan for the best match each iteration allows for one more error. -+ # Run a binary search to determine how far from 'loc' we can stray at -+ # this error level. -+ bin_min = 0 -+ bin_mid = bin_max -+ while bin_min < bin_mid: -+ if match_bitapScore(d, loc + bin_mid) <= score_threshold: -+ bin_min = bin_mid -+ else: -+ bin_max = bin_mid -+ bin_mid = (bin_max - bin_min) // 2 + bin_min -+ -+ # Use the result from this iteration as the maximum for the next. -+ bin_max = bin_mid -+ start = max(1, loc - bin_mid + 1) -+ finish = min(loc + bin_mid, len(text)) + len(pattern) -+ -+ rd = [0] * (finish + 2) -+ rd[finish + 1] = (1 << d) - 1 -+ for j in range(finish, start - 1, -1): -+ if len(text) <= j - 1: -+ # Out of range. -+ charMatch = 0 -+ else: -+ charMatch = s.get(text[j - 1], 0) -+ if d == 0: # First pass: exact match. -+ rd[j] = ((rd[j + 1] << 1) | 1) & charMatch -+ else: # Subsequent passes: fuzzy match. -+ rd[j] = ( -+ (((rd[j + 1] << 1) | 1) & charMatch) -+ | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) -+ | last_rd[j + 1] -+ ) -+ if rd[j] & matchmask: -+ score = match_bitapScore(d, j - 1) -+ # This match will almost certainly be better than any existing match. -+ # But check anyway. -+ if score <= score_threshold: -+ # Told you so. -+ score_threshold = score -+ best_loc = j - 1 -+ if best_loc > loc: -+ # When passing loc, don't exceed our current distance from loc. -+ start = max(1, 2 * loc - best_loc) -+ else: -+ # Already passed loc, downhill from here on in. -+ break -+ # No hope for a (better) match at greater error levels. -+ if match_bitapScore(d + 1, loc) > score_threshold: -+ break -+ last_rd = rd -+ return best_loc - -- def match_alphabet(self, pattern): -- """Initialise the alphabet for the Bitap algorithm. -+ def match_alphabet(self, pattern): -+ """Initialise the alphabet for the Bitap algorithm. - - Args: - pattern: The text to encode. -@@ -1354,58 +1432,61 @@ class diff_match_patch: - Returns: - Hash of character locations. - """ -- s = {} -- for char in pattern: -- s[char] = 0 -- for i in range(len(pattern)): -- s[pattern[i]] |= 1 << (len(pattern) - i - 1) -- return s -+ s = {} -+ for char in pattern: -+ s[char] = 0 -+ for i in range(len(pattern)): -+ s[pattern[i]] |= 1 << (len(pattern) - i - 1) -+ return s - -- # PATCH FUNCTIONS -+ # PATCH FUNCTIONS - -- def patch_addContext(self, patch, text): -- """Increase the context until it is unique, -+ def patch_addContext(self, patch, text): -+ """Increase the context until it is unique, - but don't let the pattern expand beyond Match_MaxBits. - - Args: - patch: The patch to grow. - text: Source text. - """ -- if len(text) == 0: -- return -- pattern = text[patch.start2 : patch.start2 + patch.length1] -- padding = 0 -- -- # Look for the first and last matches of pattern in text. If two different -- # matches are found, increase the pattern length. -- while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits == -- 0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin - -- self.Patch_Margin)): -- padding += self.Patch_Margin -- pattern = text[max(0, patch.start2 - padding) : -- patch.start2 + patch.length1 + padding] -- # Add one chunk for good luck. -- padding += self.Patch_Margin -- -- # Add the prefix. -- prefix = text[max(0, patch.start2 - padding) : patch.start2] -- if prefix: -- patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)] -- # Add the suffix. -- suffix = text[patch.start2 + patch.length1 : -- patch.start2 + patch.length1 + padding] -- if suffix: -- patch.diffs.append((self.DIFF_EQUAL, suffix)) -- -- # Roll back the start points. -- patch.start1 -= len(prefix) -- patch.start2 -= len(prefix) -- # Extend lengths. -- patch.length1 += len(prefix) + len(suffix) -- patch.length2 += len(prefix) + len(suffix) -+ if len(text) == 0: -+ return -+ pattern = text[patch.start2 : patch.start2 + patch.length1] -+ padding = 0 -+ -+ # Look for the first and last matches of pattern in text. If two different -+ # matches are found, increase the pattern length. -+ while text.find(pattern) != text.rfind(pattern) and ( -+ self.Match_MaxBits == 0 -+ or len(pattern) < self.Match_MaxBits - self.Patch_Margin - self.Patch_Margin -+ ): -+ padding += self.Patch_Margin -+ pattern = text[ -+ max(0, patch.start2 - padding) : patch.start2 + patch.length1 + padding -+ ] -+ # Add one chunk for good luck. -+ padding += self.Patch_Margin -+ -+ # Add the prefix. -+ prefix = text[max(0, patch.start2 - padding) : patch.start2] -+ if prefix: -+ patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)] -+ # Add the suffix. -+ suffix = text[ -+ patch.start2 + patch.length1 : patch.start2 + patch.length1 + padding -+ ] -+ if suffix: -+ patch.diffs.append((self.DIFF_EQUAL, suffix)) -+ -+ # Roll back the start points. -+ patch.start1 -= len(prefix) -+ patch.start2 -= len(prefix) -+ # Extend lengths. -+ patch.length1 += len(prefix) + len(suffix) -+ patch.length2 += len(prefix) + len(suffix) - -- def patch_make(self, a, b=None, c=None): -- """Compute a list of patches to turn text1 into text2. -+ def patch_make(self, a, b=None, c=None): -+ """Compute a list of patches to turn text1 into text2. - Use diffs if provided, otherwise compute it ourselves. - There are four ways to call this function, depending on what data is - available to the caller: -@@ -1429,96 +1510,102 @@ class diff_match_patch: - Returns: - Array of Patch objects. - """ -- text1 = None -- diffs = None -- if isinstance(a, str) and isinstance(b, str) and c is None: -- # Method 1: text1, text2 -- # Compute diffs from text1 and text2. -- text1 = a -- diffs = self.diff_main(text1, b, True) -- if len(diffs) > 2: -- self.diff_cleanupSemantic(diffs) -- self.diff_cleanupEfficiency(diffs) -- elif isinstance(a, list) and b is None and c is None: -- # Method 2: diffs -- # Compute text1 from diffs. -- diffs = a -- text1 = self.diff_text1(diffs) -- elif isinstance(a, str) and isinstance(b, list) and c is None: -- # Method 3: text1, diffs -- text1 = a -- diffs = b -- elif (isinstance(a, str) and isinstance(b, str) and -- isinstance(c, list)): -- # Method 4: text1, text2, diffs -- # text2 is not used. -- text1 = a -- diffs = c -- else: -- raise ValueError("Unknown call format to patch_make.") -- -- if not diffs: -- return [] # Get rid of the None case. -- patches = [] -- patch = patch_obj() -- char_count1 = 0 # Number of characters into the text1 string. -- char_count2 = 0 # Number of characters into the text2 string. -- prepatch_text = text1 # Recreate the patches to determine context info. -- postpatch_text = text1 -- for x in range(len(diffs)): -- (diff_type, diff_text) = diffs[x] -- if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL: -- # A new patch starts here. -- patch.start1 = char_count1 -- patch.start2 = char_count2 -- if diff_type == self.DIFF_INSERT: -- # Insertion -- patch.diffs.append(diffs[x]) -- patch.length2 += len(diff_text) -- postpatch_text = (postpatch_text[:char_count2] + diff_text + -- postpatch_text[char_count2:]) -- elif diff_type == self.DIFF_DELETE: -- # Deletion. -- patch.length1 += len(diff_text) -- patch.diffs.append(diffs[x]) -- postpatch_text = (postpatch_text[:char_count2] + -- postpatch_text[char_count2 + len(diff_text):]) -- elif (diff_type == self.DIFF_EQUAL and -- len(diff_text) <= 2 * self.Patch_Margin and -- len(patch.diffs) != 0 and len(diffs) != x + 1): -- # Small equality inside a patch. -- patch.diffs.append(diffs[x]) -- patch.length1 += len(diff_text) -- patch.length2 += len(diff_text) -- -- if (diff_type == self.DIFF_EQUAL and -- len(diff_text) >= 2 * self.Patch_Margin): -- # Time for a new patch. -+ text1 = None -+ diffs = None -+ if isinstance(a, str) and isinstance(b, str) and c is None: -+ # Method 1: text1, text2 -+ # Compute diffs from text1 and text2. -+ text1 = a -+ diffs = self.diff_main(text1, b, True) -+ if len(diffs) > 2: -+ self.diff_cleanupSemantic(diffs) -+ self.diff_cleanupEfficiency(diffs) -+ elif isinstance(a, list) and b is None and c is None: -+ # Method 2: diffs -+ # Compute text1 from diffs. -+ diffs = a -+ text1 = self.diff_text1(diffs) -+ elif isinstance(a, str) and isinstance(b, list) and c is None: -+ # Method 3: text1, diffs -+ text1 = a -+ diffs = b -+ elif isinstance(a, str) and isinstance(b, str) and isinstance(c, list): -+ # Method 4: text1, text2, diffs -+ # text2 is not used. -+ text1 = a -+ diffs = c -+ else: -+ raise ValueError("Unknown call format to patch_make.") -+ -+ if not diffs: -+ return [] # Get rid of the None case. -+ patches = [] -+ patch = patch_obj() -+ char_count1 = 0 # Number of characters into the text1 string. -+ char_count2 = 0 # Number of characters into the text2 string. -+ prepatch_text = text1 # Recreate the patches to determine context info. -+ postpatch_text = text1 -+ for x in range(len(diffs)): -+ (diff_type, diff_text) = diffs[x] -+ if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL: -+ # A new patch starts here. -+ patch.start1 = char_count1 -+ patch.start2 = char_count2 -+ if diff_type == self.DIFF_INSERT: -+ # Insertion -+ patch.diffs.append(diffs[x]) -+ patch.length2 += len(diff_text) -+ postpatch_text = ( -+ postpatch_text[:char_count2] -+ + diff_text -+ + postpatch_text[char_count2:] -+ ) -+ elif diff_type == self.DIFF_DELETE: -+ # Deletion. -+ patch.length1 += len(diff_text) -+ patch.diffs.append(diffs[x]) -+ postpatch_text = ( -+ postpatch_text[:char_count2] -+ + postpatch_text[char_count2 + len(diff_text) :] -+ ) -+ elif ( -+ diff_type == self.DIFF_EQUAL -+ and len(diff_text) <= 2 * self.Patch_Margin -+ and len(patch.diffs) != 0 -+ and len(diffs) != x + 1 -+ ): -+ # Small equality inside a patch. -+ patch.diffs.append(diffs[x]) -+ patch.length1 += len(diff_text) -+ patch.length2 += len(diff_text) -+ -+ if diff_type == self.DIFF_EQUAL and len(diff_text) >= 2 * self.Patch_Margin: -+ # Time for a new patch. -+ if len(patch.diffs) != 0: -+ self.patch_addContext(patch, prepatch_text) -+ patches.append(patch) -+ patch = patch_obj() -+ # Unlike Unidiff, our patch lists have a rolling context. -+ # https://github.com/google/diff-match-patch/wiki/Unidiff -+ # Update prepatch text & pos to reflect the application of the -+ # just completed patch. -+ prepatch_text = postpatch_text -+ char_count1 = char_count2 -+ -+ # Update the current character count. -+ if diff_type != self.DIFF_INSERT: -+ char_count1 += len(diff_text) -+ if diff_type != self.DIFF_DELETE: -+ char_count2 += len(diff_text) -+ -+ # Pick up the leftover patch if not empty. - if len(patch.diffs) != 0: -- self.patch_addContext(patch, prepatch_text) -- patches.append(patch) -- patch = patch_obj() -- # Unlike Unidiff, our patch lists have a rolling context. -- # https://github.com/google/diff-match-patch/wiki/Unidiff -- # Update prepatch text & pos to reflect the application of the -- # just completed patch. -- prepatch_text = postpatch_text -- char_count1 = char_count2 -- -- # Update the current character count. -- if diff_type != self.DIFF_INSERT: -- char_count1 += len(diff_text) -- if diff_type != self.DIFF_DELETE: -- char_count2 += len(diff_text) -- -- # Pick up the leftover patch if not empty. -- if len(patch.diffs) != 0: -- self.patch_addContext(patch, prepatch_text) -- patches.append(patch) -- return patches -+ self.patch_addContext(patch, prepatch_text) -+ patches.append(patch) -+ return patches - -- def patch_deepCopy(self, patches): -- """Given an array of patches, return another array that is identical. -+ def patch_deepCopy(self, patches): -+ """Given an array of patches, return another array that is identical. - - Args: - patches: Array of Patch objects. -@@ -1526,20 +1613,20 @@ class diff_match_patch: - Returns: - Array of Patch objects. - """ -- patchesCopy = [] -- for patch in patches: -- patchCopy = patch_obj() -- # No need to deep copy the tuples since they are immutable. -- patchCopy.diffs = patch.diffs[:] -- patchCopy.start1 = patch.start1 -- patchCopy.start2 = patch.start2 -- patchCopy.length1 = patch.length1 -- patchCopy.length2 = patch.length2 -- patchesCopy.append(patchCopy) -- return patchesCopy -+ patchesCopy = [] -+ for patch in patches: -+ patchCopy = patch_obj() -+ # No need to deep copy the tuples since they are immutable. -+ patchCopy.diffs = patch.diffs[:] -+ patchCopy.start1 = patch.start1 -+ patchCopy.start2 = patch.start2 -+ patchCopy.length1 = patch.length1 -+ patchCopy.length2 = patch.length2 -+ patchesCopy.append(patchCopy) -+ return patchesCopy - -- def patch_apply(self, patches, text): -- """Merge a set of patches onto the text. Return a patched text, as well -+ def patch_apply(self, patches, text): -+ """Merge a set of patches onto the text. Return a patched text, as well - as a list of true/false values indicating which patches were applied. - - Args: -@@ -1549,85 +1636,102 @@ class diff_match_patch: - Returns: - Two element Array, containing the new text and an array of boolean values. - """ -- if not patches: -- return (text, []) -+ if not patches: -+ return (text, []) - -- # Deep copy the patches so that no changes are made to originals. -- patches = self.patch_deepCopy(patches) -+ # Deep copy the patches so that no changes are made to originals. -+ patches = self.patch_deepCopy(patches) - -- nullPadding = self.patch_addPadding(patches) -- text = nullPadding + text + nullPadding -- self.patch_splitMax(patches) -- -- # delta keeps track of the offset between the expected and actual location -- # of the previous patch. If there are patches expected at positions 10 and -- # 20, but the first patch was found at 12, delta is 2 and the second patch -- # has an effective expected position of 22. -- delta = 0 -- results = [] -- for patch in patches: -- expected_loc = patch.start2 + delta -- text1 = self.diff_text1(patch.diffs) -- end_loc = -1 -- if len(text1) > self.Match_MaxBits: -- # patch_splitMax will only provide an oversized pattern in the case of -- # a monster delete. -- start_loc = self.match_main(text, text1[:self.Match_MaxBits], -- expected_loc) -- if start_loc != -1: -- end_loc = self.match_main(text, text1[-self.Match_MaxBits:], -- expected_loc + len(text1) - self.Match_MaxBits) -- if end_loc == -1 or start_loc >= end_loc: -- # Can't find valid trailing context. Drop this patch. -- start_loc = -1 -- else: -- start_loc = self.match_main(text, text1, expected_loc) -- if start_loc == -1: -- # No match found. :( -- results.append(False) -- # Subtract the delta for this failed patch from subsequent patches. -- delta -= patch.length2 - patch.length1 -- else: -- # Found a match. :) -- results.append(True) -- delta = start_loc - expected_loc -- if end_loc == -1: -- text2 = text[start_loc : start_loc + len(text1)] -- else: -- text2 = text[start_loc : end_loc + self.Match_MaxBits] -- if text1 == text2: -- # Perfect match, just shove the replacement text in. -- text = (text[:start_loc] + self.diff_text2(patch.diffs) + -- text[start_loc + len(text1):]) -- else: -- # Imperfect match. -- # Run a diff to get a framework of equivalent indices. -- diffs = self.diff_main(text1, text2, False) -- if (len(text1) > self.Match_MaxBits and -- self.diff_levenshtein(diffs) / float(len(text1)) > -- self.Patch_DeleteThreshold): -- # The end points match, but the content is unacceptably bad. -- results[-1] = False -- else: -- self.diff_cleanupSemanticLossless(diffs) -- index1 = 0 -- for (op, data) in patch.diffs: -- if op != self.DIFF_EQUAL: -- index2 = self.diff_xIndex(diffs, index1) -- if op == self.DIFF_INSERT: # Insertion -- text = text[:start_loc + index2] + data + text[start_loc + -- index2:] -- elif op == self.DIFF_DELETE: # Deletion -- text = text[:start_loc + index2] + text[start_loc + -- self.diff_xIndex(diffs, index1 + len(data)):] -- if op != self.DIFF_DELETE: -- index1 += len(data) -- # Strip the padding off. -- text = text[len(nullPadding):-len(nullPadding)] -- return (text, results) -+ nullPadding = self.patch_addPadding(patches) -+ text = nullPadding + text + nullPadding -+ self.patch_splitMax(patches) -+ -+ # delta keeps track of the offset between the expected and actual location -+ # of the previous patch. If there are patches expected at positions 10 and -+ # 20, but the first patch was found at 12, delta is 2 and the second patch -+ # has an effective expected position of 22. -+ delta = 0 -+ results = [] -+ for patch in patches: -+ expected_loc = patch.start2 + delta -+ text1 = self.diff_text1(patch.diffs) -+ end_loc = -1 -+ if len(text1) > self.Match_MaxBits: -+ # patch_splitMax will only provide an oversized pattern in the case of -+ # a monster delete. -+ start_loc = self.match_main( -+ text, text1[: self.Match_MaxBits], expected_loc -+ ) -+ if start_loc != -1: -+ end_loc = self.match_main( -+ text, -+ text1[-self.Match_MaxBits :], -+ expected_loc + len(text1) - self.Match_MaxBits, -+ ) -+ if end_loc == -1 or start_loc >= end_loc: -+ # Can't find valid trailing context. Drop this patch. -+ start_loc = -1 -+ else: -+ start_loc = self.match_main(text, text1, expected_loc) -+ if start_loc == -1: -+ # No match found. :( -+ results.append(False) -+ # Subtract the delta for this failed patch from subsequent patches. -+ delta -= patch.length2 - patch.length1 -+ else: -+ # Found a match. :) -+ results.append(True) -+ delta = start_loc - expected_loc -+ if end_loc == -1: -+ text2 = text[start_loc : start_loc + len(text1)] -+ else: -+ text2 = text[start_loc : end_loc + self.Match_MaxBits] -+ if text1 == text2: -+ # Perfect match, just shove the replacement text in. -+ text = ( -+ text[:start_loc] -+ + self.diff_text2(patch.diffs) -+ + text[start_loc + len(text1) :] -+ ) -+ else: -+ # Imperfect match. -+ # Run a diff to get a framework of equivalent indices. -+ diffs = self.diff_main(text1, text2, False) -+ if ( -+ len(text1) > self.Match_MaxBits -+ and self.diff_levenshtein(diffs) / float(len(text1)) -+ > self.Patch_DeleteThreshold -+ ): -+ # The end points match, but the content is unacceptably bad. -+ results[-1] = False -+ else: -+ self.diff_cleanupSemanticLossless(diffs) -+ index1 = 0 -+ for (op, data) in patch.diffs: -+ if op != self.DIFF_EQUAL: -+ index2 = self.diff_xIndex(diffs, index1) -+ if op == self.DIFF_INSERT: # Insertion -+ text = ( -+ text[: start_loc + index2] -+ + data -+ + text[start_loc + index2 :] -+ ) -+ elif op == self.DIFF_DELETE: # Deletion -+ text = ( -+ text[: start_loc + index2] -+ + text[ -+ start_loc -+ + self.diff_xIndex(diffs, index1 + len(data)) : -+ ] -+ ) -+ if op != self.DIFF_DELETE: -+ index1 += len(data) -+ # Strip the padding off. -+ text = text[len(nullPadding) : -len(nullPadding)] -+ return (text, results) - -- def patch_addPadding(self, patches): -- """Add some padding on text start and end so that edges can match -+ def patch_addPadding(self, patches): -+ """Add some padding on text start and end so that edges can match - something. Intended to be called only from within patch_apply. - - Args: -@@ -1636,144 +1740,154 @@ class diff_match_patch: - Returns: - The padding string added to each side. - """ -- paddingLength = self.Patch_Margin -- nullPadding = "" -- for x in range(1, paddingLength + 1): -- nullPadding += chr(x) -- -- # Bump all the patches forward. -- for patch in patches: -- patch.start1 += paddingLength -- patch.start2 += paddingLength -- -- # Add some padding on start of first diff. -- patch = patches[0] -- diffs = patch.diffs -- if not diffs or diffs[0][0] != self.DIFF_EQUAL: -- # Add nullPadding equality. -- diffs.insert(0, (self.DIFF_EQUAL, nullPadding)) -- patch.start1 -= paddingLength # Should be 0. -- patch.start2 -= paddingLength # Should be 0. -- patch.length1 += paddingLength -- patch.length2 += paddingLength -- elif paddingLength > len(diffs[0][1]): -- # Grow first equality. -- extraLength = paddingLength - len(diffs[0][1]) -- newText = nullPadding[len(diffs[0][1]):] + diffs[0][1] -- diffs[0] = (diffs[0][0], newText) -- patch.start1 -= extraLength -- patch.start2 -= extraLength -- patch.length1 += extraLength -- patch.length2 += extraLength -- -- # Add some padding on end of last diff. -- patch = patches[-1] -- diffs = patch.diffs -- if not diffs or diffs[-1][0] != self.DIFF_EQUAL: -- # Add nullPadding equality. -- diffs.append((self.DIFF_EQUAL, nullPadding)) -- patch.length1 += paddingLength -- patch.length2 += paddingLength -- elif paddingLength > len(diffs[-1][1]): -- # Grow last equality. -- extraLength = paddingLength - len(diffs[-1][1]) -- newText = diffs[-1][1] + nullPadding[:extraLength] -- diffs[-1] = (diffs[-1][0], newText) -- patch.length1 += extraLength -- patch.length2 += extraLength -+ paddingLength = self.Patch_Margin -+ nullPadding = "" -+ for x in range(1, paddingLength + 1): -+ nullPadding += chr(x) -+ -+ # Bump all the patches forward. -+ for patch in patches: -+ patch.start1 += paddingLength -+ patch.start2 += paddingLength -+ -+ # Add some padding on start of first diff. -+ patch = patches[0] -+ diffs = patch.diffs -+ if not diffs or diffs[0][0] != self.DIFF_EQUAL: -+ # Add nullPadding equality. -+ diffs.insert(0, (self.DIFF_EQUAL, nullPadding)) -+ patch.start1 -= paddingLength # Should be 0. -+ patch.start2 -= paddingLength # Should be 0. -+ patch.length1 += paddingLength -+ patch.length2 += paddingLength -+ elif paddingLength > len(diffs[0][1]): -+ # Grow first equality. -+ extraLength = paddingLength - len(diffs[0][1]) -+ newText = nullPadding[len(diffs[0][1]) :] + diffs[0][1] -+ diffs[0] = (diffs[0][0], newText) -+ patch.start1 -= extraLength -+ patch.start2 -= extraLength -+ patch.length1 += extraLength -+ patch.length2 += extraLength -+ -+ # Add some padding on end of last diff. -+ patch = patches[-1] -+ diffs = patch.diffs -+ if not diffs or diffs[-1][0] != self.DIFF_EQUAL: -+ # Add nullPadding equality. -+ diffs.append((self.DIFF_EQUAL, nullPadding)) -+ patch.length1 += paddingLength -+ patch.length2 += paddingLength -+ elif paddingLength > len(diffs[-1][1]): -+ # Grow last equality. -+ extraLength = paddingLength - len(diffs[-1][1]) -+ newText = diffs[-1][1] + nullPadding[:extraLength] -+ diffs[-1] = (diffs[-1][0], newText) -+ patch.length1 += extraLength -+ patch.length2 += extraLength - -- return nullPadding -+ return nullPadding - -- def patch_splitMax(self, patches): -- """Look through the patches and break up any which are longer than the -+ def patch_splitMax(self, patches): -+ """Look through the patches and break up any which are longer than the - maximum limit of the match algorithm. - Intended to be called only from within patch_apply. - - Args: - patches: Array of Patch objects. - """ -- patch_size = self.Match_MaxBits -- if patch_size == 0: -- # Python has the option of not splitting strings due to its ability -- # to handle integers of arbitrary precision. -- return -- for x in range(len(patches)): -- if patches[x].length1 <= patch_size: -- continue -- bigpatch = patches[x] -- # Remove the big old patch. -- del patches[x] -- x -= 1 -- start1 = bigpatch.start1 -- start2 = bigpatch.start2 -- precontext = '' -- while len(bigpatch.diffs) != 0: -- # Create one of several smaller patches. -- patch = patch_obj() -- empty = True -- patch.start1 = start1 - len(precontext) -- patch.start2 = start2 - len(precontext) -- if precontext: -- patch.length1 = patch.length2 = len(precontext) -- patch.diffs.append((self.DIFF_EQUAL, precontext)) -- -- while (len(bigpatch.diffs) != 0 and -- patch.length1 < patch_size - self.Patch_Margin): -- (diff_type, diff_text) = bigpatch.diffs[0] -- if diff_type == self.DIFF_INSERT: -- # Insertions are harmless. -- patch.length2 += len(diff_text) -- start2 += len(diff_text) -- patch.diffs.append(bigpatch.diffs.pop(0)) -- empty = False -- elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and -- patch.diffs[0][0] == self.DIFF_EQUAL and -- len(diff_text) > 2 * patch_size): -- # This is a large deletion. Let it pass in one chunk. -- patch.length1 += len(diff_text) -- start1 += len(diff_text) -- empty = False -- patch.diffs.append((diff_type, diff_text)) -- del bigpatch.diffs[0] -- else: -- # Deletion or equality. Only take as much as we can stomach. -- diff_text = diff_text[:patch_size - patch.length1 - -- self.Patch_Margin] -- patch.length1 += len(diff_text) -- start1 += len(diff_text) -- if diff_type == self.DIFF_EQUAL: -- patch.length2 += len(diff_text) -- start2 += len(diff_text) -- else: -- empty = False -- -- patch.diffs.append((diff_type, diff_text)) -- if diff_text == bigpatch.diffs[0][1]: -- del bigpatch.diffs[0] -- else: -- bigpatch.diffs[0] = (bigpatch.diffs[0][0], -- bigpatch.diffs[0][1][len(diff_text):]) -+ patch_size = self.Match_MaxBits -+ if patch_size == 0: -+ # Python has the option of not splitting strings due to its ability -+ # to handle integers of arbitrary precision. -+ return -+ for x in range(len(patches)): -+ if patches[x].length1 <= patch_size: -+ continue -+ bigpatch = patches[x] -+ # Remove the big old patch. -+ del patches[x] -+ x -= 1 -+ start1 = bigpatch.start1 -+ start2 = bigpatch.start2 -+ precontext = "" -+ while len(bigpatch.diffs) != 0: -+ # Create one of several smaller patches. -+ patch = patch_obj() -+ empty = True -+ patch.start1 = start1 - len(precontext) -+ patch.start2 = start2 - len(precontext) -+ if precontext: -+ patch.length1 = patch.length2 = len(precontext) -+ patch.diffs.append((self.DIFF_EQUAL, precontext)) -+ -+ while ( -+ len(bigpatch.diffs) != 0 -+ and patch.length1 < patch_size - self.Patch_Margin -+ ): -+ (diff_type, diff_text) = bigpatch.diffs[0] -+ if diff_type == self.DIFF_INSERT: -+ # Insertions are harmless. -+ patch.length2 += len(diff_text) -+ start2 += len(diff_text) -+ patch.diffs.append(bigpatch.diffs.pop(0)) -+ empty = False -+ elif ( -+ diff_type == self.DIFF_DELETE -+ and len(patch.diffs) == 1 -+ and patch.diffs[0][0] == self.DIFF_EQUAL -+ and len(diff_text) > 2 * patch_size -+ ): -+ # This is a large deletion. Let it pass in one chunk. -+ patch.length1 += len(diff_text) -+ start1 += len(diff_text) -+ empty = False -+ patch.diffs.append((diff_type, diff_text)) -+ del bigpatch.diffs[0] -+ else: -+ # Deletion or equality. Only take as much as we can stomach. -+ diff_text = diff_text[ -+ : patch_size - patch.length1 - self.Patch_Margin -+ ] -+ patch.length1 += len(diff_text) -+ start1 += len(diff_text) -+ if diff_type == self.DIFF_EQUAL: -+ patch.length2 += len(diff_text) -+ start2 += len(diff_text) -+ else: -+ empty = False -+ -+ patch.diffs.append((diff_type, diff_text)) -+ if diff_text == bigpatch.diffs[0][1]: -+ del bigpatch.diffs[0] -+ else: -+ bigpatch.diffs[0] = ( -+ bigpatch.diffs[0][0], -+ bigpatch.diffs[0][1][len(diff_text) :], -+ ) -+ -+ # Compute the head context for the next patch. -+ precontext = self.diff_text2(patch.diffs) -+ precontext = precontext[-self.Patch_Margin :] -+ # Append the end context for this patch. -+ postcontext = self.diff_text1(bigpatch.diffs)[: self.Patch_Margin] -+ if postcontext: -+ patch.length1 += len(postcontext) -+ patch.length2 += len(postcontext) -+ if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL: -+ patch.diffs[-1] = ( -+ self.DIFF_EQUAL, -+ patch.diffs[-1][1] + postcontext, -+ ) -+ else: -+ patch.diffs.append((self.DIFF_EQUAL, postcontext)) -+ -+ if not empty: -+ x += 1 -+ patches.insert(x, patch) - -- # Compute the head context for the next patch. -- precontext = self.diff_text2(patch.diffs) -- precontext = precontext[-self.Patch_Margin:] -- # Append the end context for this patch. -- postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin] -- if postcontext: -- patch.length1 += len(postcontext) -- patch.length2 += len(postcontext) -- if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL: -- patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] + -- postcontext) -- else: -- patch.diffs.append((self.DIFF_EQUAL, postcontext)) -- -- if not empty: -- x += 1 -- patches.insert(x, patch) -- -- def patch_toText(self, patches): -- """Take a list of patches and return a textual representation. -+ def patch_toText(self, patches): -+ """Take a list of patches and return a textual representation. - - Args: - patches: Array of Patch objects. -@@ -1781,13 +1895,13 @@ class diff_match_patch: - Returns: - Text representation of patches. - """ -- text = [] -- for patch in patches: -- text.append(str(patch)) -- return "".join(text) -+ text = [] -+ for patch in patches: -+ text.append(str(patch)) -+ return "".join(text) - -- def patch_fromText(self, textline): -- """Parse a textual representation of patches and return a list of patch -+ def patch_fromText(self, textline): -+ """Parse a textual representation of patches and return a list of patch - objects. - - Args: -@@ -1799,109 +1913,109 @@ class diff_match_patch: - Raises: - ValueError: If invalid input. - """ -- patches = [] -- if not textline: -- return patches -- text = textline.split('\n') -- while len(text) != 0: -- m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0]) -- if not m: -- raise ValueError("Invalid patch string: " + text[0]) -- patch = patch_obj() -- patches.append(patch) -- patch.start1 = int(m.group(1)) -- if m.group(2) == '': -- patch.start1 -= 1 -- patch.length1 = 1 -- elif m.group(2) == '0': -- patch.length1 = 0 -- else: -- patch.start1 -= 1 -- patch.length1 = int(m.group(2)) -- -- patch.start2 = int(m.group(3)) -- if m.group(4) == '': -- patch.start2 -= 1 -- patch.length2 = 1 -- elif m.group(4) == '0': -- patch.length2 = 0 -- else: -- patch.start2 -= 1 -- patch.length2 = int(m.group(4)) -- -- del text[0] -- -- while len(text) != 0: -- if text[0]: -- sign = text[0][0] -- else: -- sign = '' -- line = urllib.parse.unquote(text[0][1:]) -- if sign == '+': -- # Insertion. -- patch.diffs.append((self.DIFF_INSERT, line)) -- elif sign == '-': -- # Deletion. -- patch.diffs.append((self.DIFF_DELETE, line)) -- elif sign == ' ': -- # Minor equality. -- patch.diffs.append((self.DIFF_EQUAL, line)) -- elif sign == '@': -- # Start of next patch. -- break -- elif sign == '': -- # Blank line? Whatever. -- pass -- else: -- # WTF? -- raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line)) -- del text[0] -- return patches -+ patches = [] -+ if not textline: -+ return patches -+ text = textline.split("\n") -+ while len(text) != 0: -+ m = re.match(r"^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0]) -+ if not m: -+ raise ValueError("Invalid patch string: " + text[0]) -+ patch = patch_obj() -+ patches.append(patch) -+ patch.start1 = int(m.group(1)) -+ if m.group(2) == "": -+ patch.start1 -= 1 -+ patch.length1 = 1 -+ elif m.group(2) == "0": -+ patch.length1 = 0 -+ else: -+ patch.start1 -= 1 -+ patch.length1 = int(m.group(2)) -+ -+ patch.start2 = int(m.group(3)) -+ if m.group(4) == "": -+ patch.start2 -= 1 -+ patch.length2 = 1 -+ elif m.group(4) == "0": -+ patch.length2 = 0 -+ else: -+ patch.start2 -= 1 -+ patch.length2 = int(m.group(4)) -+ -+ del text[0] -+ -+ while len(text) != 0: -+ if text[0]: -+ sign = text[0][0] -+ else: -+ sign = "" -+ line = urllib.parse.unquote(text[0][1:]) -+ if sign == "+": -+ # Insertion. -+ patch.diffs.append((self.DIFF_INSERT, line)) -+ elif sign == "-": -+ # Deletion. -+ patch.diffs.append((self.DIFF_DELETE, line)) -+ elif sign == " ": -+ # Minor equality. -+ patch.diffs.append((self.DIFF_EQUAL, line)) -+ elif sign == "@": -+ # Start of next patch. -+ break -+ elif sign == "": -+ # Blank line? Whatever. -+ pass -+ else: -+ # WTF? -+ raise ValueError(f"Invalid patch mode: '{sign}'\n{line}") -+ del text[0] -+ return patches - - - class patch_obj: -- """Class representing one patch operation. -+ """Class representing one patch operation. - """ - -- def __init__(self): -- """Initializes with an empty list of diffs. -+ def __init__(self): -+ """Initializes with an empty list of diffs. - """ -- self.diffs = [] -- self.start1 = None -- self.start2 = None -- self.length1 = 0 -- self.length2 = 0 -+ self.diffs = [] -+ self.start1 = None -+ self.start2 = None -+ self.length1 = 0 -+ self.length2 = 0 - -- def __str__(self): -- """Emulate GNU diff's format. -+ def __str__(self): -+ """Emulate GNU diff's format. - Header: @@ -382,8 +481,9 @@ - Indices are printed as 1-based, not 0-based. - - Returns: - The GNU diff string. - """ -- if self.length1 == 0: -- coords1 = str(self.start1) + ",0" -- elif self.length1 == 1: -- coords1 = str(self.start1 + 1) -- else: -- coords1 = str(self.start1 + 1) + "," + str(self.length1) -- if self.length2 == 0: -- coords2 = str(self.start2) + ",0" -- elif self.length2 == 1: -- coords2 = str(self.start2 + 1) -- else: -- coords2 = str(self.start2 + 1) + "," + str(self.length2) -- text = ["@@ -", coords1, " +", coords2, " @@\n"] -- # Escape the body of the patch with %xx notation. -- for (op, data) in self.diffs: -- if op == diff_match_patch.DIFF_INSERT: -- text.append("+") -- elif op == diff_match_patch.DIFF_DELETE: -- text.append("-") -- elif op == diff_match_patch.DIFF_EQUAL: -- text.append(" ") -- # High ascii will raise UnicodeDecodeError. Use Unicode instead. -- data = data.encode("utf-8") -- text.append(urllib.parse.quote(data, "!~*'();/?:@&=+$,# ") + "\n") -- return "".join(text) -+ if self.length1 == 0: -+ coords1 = str(self.start1) + ",0" -+ elif self.length1 == 1: -+ coords1 = str(self.start1 + 1) -+ else: -+ coords1 = str(self.start1 + 1) + "," + str(self.length1) -+ if self.length2 == 0: -+ coords2 = str(self.start2) + ",0" -+ elif self.length2 == 1: -+ coords2 = str(self.start2 + 1) -+ else: -+ coords2 = str(self.start2 + 1) + "," + str(self.length2) -+ text = ["@@ -", coords1, " +", coords2, " @@\n"] -+ # Escape the body of the patch with %xx notation. -+ for (op, data) in self.diffs: -+ if op == diff_match_patch.DIFF_INSERT: -+ text.append("+") -+ elif op == diff_match_patch.DIFF_DELETE: -+ text.append("-") -+ elif op == diff_match_patch.DIFF_EQUAL: -+ text.append(" ") -+ # High ascii will raise UnicodeDecodeError. Use Unicode instead. -+ data = data.encode("utf-8") -+ text.append(urllib.parse.quote(data, "!~*'();/?:@&=+$,# ") + "\n") -+ return "".join(text) -Index: xmldiff-2.4/xmldiff/actions.py -=================================================================== ---- xmldiff-2.4.orig/xmldiff/actions.py -+++ xmldiff-2.4/xmldiff/actions.py -@@ -1,17 +1,17 @@ - from collections import namedtuple - - # The edit script actions used in xmldiff --DeleteNode = namedtuple('DeleteNode', 'node') --InsertNode = namedtuple('InsertNode', 'target tag position') --RenameNode = namedtuple('RenameNode', 'node tag') --MoveNode = namedtuple('MoveNode', 'node target position') -+DeleteNode = namedtuple("DeleteNode", "node") -+InsertNode = namedtuple("InsertNode", "target tag position") -+RenameNode = namedtuple("RenameNode", "node tag") -+MoveNode = namedtuple("MoveNode", "node target position") - --UpdateTextIn = namedtuple('UpdateTextIn', 'node text') --UpdateTextAfter = namedtuple('UpdateTextAfter', 'node text') -+UpdateTextIn = namedtuple("UpdateTextIn", "node text") -+UpdateTextAfter = namedtuple("UpdateTextAfter", "node text") - --UpdateAttrib = namedtuple('UpdateAttrib', 'node name value') --DeleteAttrib = namedtuple('DeleteAttrib', 'node name') --InsertAttrib = namedtuple('InsertAttrib', 'node name value') --RenameAttrib = namedtuple('RenameAttrib', 'node oldname newname') -+UpdateAttrib = namedtuple("UpdateAttrib", "node name value") -+DeleteAttrib = namedtuple("DeleteAttrib", "node name") -+InsertAttrib = namedtuple("InsertAttrib", "node name value") -+RenameAttrib = namedtuple("RenameAttrib", "node oldname newname") - --InsertComment = namedtuple('InsertComment', 'target position text') -+InsertComment = namedtuple("InsertComment", "target position text") -Index: xmldiff-2.4/xmldiff/diff.py -=================================================================== ---- xmldiff-2.4.orig/xmldiff/diff.py -+++ xmldiff-2.4/xmldiff/diff.py -@@ -1,15 +1,11 @@ --from __future__ import division -- - from copy import deepcopy - from difflib import SequenceMatcher - from lxml import etree - from xmldiff import utils, actions - - --class Differ(object): -- -- def __init__(self, F=None, uniqueattrs=None, ratio_mode='fast', -- fast_match=False): -+class Differ: -+ def __init__(self, F=None, uniqueattrs=None, ratio_mode="fast", fast_match=False): - # The minimum similarity between two nodes to consider them equal - if F is None: - F = 0.5 -@@ -18,17 +14,17 @@ class Differ(object): - # that uniquely identifies a node inside a document. Defaults - # to 'xml:id'. - if uniqueattrs is None: -- uniqueattrs = ['{http://www.w3.org/XML/1998/namespace}id'] -+ uniqueattrs = ["{http://www.w3.org/XML/1998/namespace}id"] - self.uniqueattrs = uniqueattrs - self.fast_match = fast_match - - # Avoid recreating this for every node - self._sequencematcher = SequenceMatcher() -- if ratio_mode == 'fast': -+ if ratio_mode == "fast": - self._sequence_ratio = self._sequencematcher.quick_ratio -- elif ratio_mode == 'accurate': -+ elif ratio_mode == "accurate": - self._sequence_ratio = self._sequencematcher.ratio -- elif ratio_mode == 'faster': -+ elif ratio_mode == "faster": - self._sequence_ratio = self._sequencematcher.real_quick_ratio - else: - raise ValueError("Unknown ratio_mode '%s'" % ratio_mode) -@@ -57,8 +53,9 @@ class Differ(object): - right = right.getroot() - - if not (etree.iselement(left) and etree.iselement(right)): -- raise TypeError("The 'left' and 'right' parameters must be " -- "lxml Elements.") -+ raise TypeError( -+ "The 'left' and 'right' parameters must be " "lxml Elements." -+ ) - - # Left gets modified as a part of the diff, deepcopy it first. - self.left = deepcopy(left) -@@ -111,14 +108,15 @@ class Differ(object): - - if self.fast_match: - # First find matches with longest_common_subsequence: -- matches = list(utils.longest_common_subsequence( -- lnodes, rnodes, lambda x, y: self.node_ratio(x, y) >= 0.5)) -+ matches = list( -+ utils.longest_common_subsequence( -+ lnodes, rnodes, lambda x, y: self.node_ratio(x, y) >= 0.5 -+ ) -+ ) - - # Add the matches (I prefer this from start to finish): - for left_match, right_match in matches: -- self.append_match(lnodes[left_match], -- rnodes[right_match], -- None) -+ self.append_match(lnodes[left_match], rnodes[right_match], None) - - # Then remove the nodes (needs to be done backwards): - for left_match, right_match in reversed(matches): -@@ -185,16 +183,16 @@ class Differ(object): - if node in self._text_cache: - return self._text_cache[node] - # Get the texts and the tag as a start -- texts = node.xpath('text()') -+ texts = node.xpath("text()") - - # Then add attributes and values - for tag, value in sorted(node.attrib.items()): -- if tag[0] == '{': -- tag = tag.split('}',)[-1] -- texts.append('%s:%s' % (tag, value)) -+ if tag[0] == "{": -+ tag = tag.split("}",)[-1] -+ texts.append(f"{tag}:{value}") - - # Finally make one string, useful to see how similar two nodes are -- text = u' '.join(texts).strip() -+ text = " ".join(texts).strip() - result = utils.cleanup_whitespace(text) - self._text_cache[node] = result - return result -@@ -256,8 +254,7 @@ class Differ(object): - # Move: Check if any of the new attributes have the same value - # as the removed attributes. If they do, it's actually - # a renaming, and a move is one action instead of remove + insert -- newattrmap = {v: k for (k, v) in right.attrib.items() -- if k in new_keys} -+ newattrmap = {v: k for (k, v) in right.attrib.items() if k in new_keys} - for lk in sorted(removed_keys): - value = left.attrib[lk] - if value in newattrmap: -@@ -332,19 +329,23 @@ class Differ(object): - return i - - def align_children(self, left, right): -- lchildren = [c for c in left.getchildren() -- if (id(c) in self._l2rmap and -- self._l2rmap[id(c)].getparent() is right)] -- rchildren = [c for c in right.getchildren() -- if (id(c) in self._r2lmap and -- self._r2lmap[id(c)].getparent() is left)] -+ lchildren = [ -+ c -+ for c in left.getchildren() -+ if (id(c) in self._l2rmap and self._l2rmap[id(c)].getparent() is right) -+ ] -+ rchildren = [ -+ c -+ for c in right.getchildren() -+ if (id(c) in self._r2lmap and self._r2lmap[id(c)].getparent() is left) -+ ] - if not lchildren or not rchildren: - # Nothing to align - return - - lcs = utils.longest_common_subsequence( -- lchildren, rchildren, -- lambda x, y: self._l2rmap[id(x)] is y) -+ lchildren, rchildren, lambda x, y: self._l2rmap[id(x)] is y -+ ) - - for x, y in lcs: - # Mark these as in order -@@ -362,9 +363,8 @@ class Differ(object): - rtarget = rchild.getparent() - ltarget = self._r2lmap[id(rtarget)] - yield actions.MoveNode( -- utils.getpath(lchild), -- utils.getpath(ltarget), -- right_pos) -+ utils.getpath(lchild), utils.getpath(ltarget), right_pos -+ ) - # Do the actual move: - left.remove(lchild) - ltarget.insert(right_pos, lchild) -@@ -394,11 +394,13 @@ class Differ(object): - # (ii) - if rnode.tag is etree.Comment: - yield actions.InsertComment( -- utils.getpath(ltarget, ltree), pos, rnode.text) -+ utils.getpath(ltarget, ltree), pos, rnode.text -+ ) - lnode = etree.Comment(rnode.text) - else: -- yield actions.InsertNode(utils.getpath(ltarget, ltree), -- rnode.tag, pos) -+ yield actions.InsertNode( -+ utils.getpath(ltarget, ltree), rnode.tag, pos -+ ) - lnode = ltarget.makeelement(rnode.tag) - - # (iii) -@@ -409,8 +411,7 @@ class Differ(object): - # And then we update attributes. This is different from the - # paper, because the paper assumes nodes only has labels and - # values. Nodes also has texts, we do them later. -- for action in self.update_node_attr(lnode, rnode): -- yield action -+ yield from self.update_node_attr(lnode, rnode) - - # (c) - else: -@@ -426,9 +427,8 @@ class Differ(object): - if ltarget is not lparent: - pos = self.find_pos(rnode) - yield actions.MoveNode( -- utils.getpath(lnode, ltree), -- utils.getpath(ltarget, ltree), -- pos) -+ utils.getpath(lnode, ltree), utils.getpath(ltarget, ltree), pos -+ ) - # Move the node from current parent to target - lparent.remove(lnode) - ltarget.insert(pos, lnode) -@@ -436,26 +436,22 @@ class Differ(object): - self._inorder.add(rnode) - - # Rename -- for action in self.update_node_tag(lnode, rnode): -- yield action -+ yield from self.update_node_tag(lnode, rnode) - - # (ii) Update - # XXX If they are exactly equal, we can skip this, - # maybe store match results in a cache? -- for action in self.update_node_attr(lnode, rnode): -- yield action -+ yield from self.update_node_attr(lnode, rnode) - - # (d) Align -- for action in self.align_children(lnode, rnode): -- yield action -+ yield from self.align_children(lnode, rnode) - - # And lastly, we update all node texts. We do this after - # aligning children, because when you generate an XML diff - # from this, that XML diff update generates more children, - # confusing later inserts or deletes. - lnode = self._r2lmap[id(rnode)] -- for action in self.update_node_text(lnode, rnode): -- yield action -+ yield from self.update_node_text(lnode, rnode) - - for lnode in utils.reverse_post_order_traverse(self.left): - if id(lnode) not in self._l2rmap: -Index: xmldiff-2.4/xmldiff/diff_match_patch.py -=================================================================== ---- xmldiff-2.4.orig/xmldiff/diff_match_patch.py -+++ xmldiff-2.4/xmldiff/diff_match_patch.py -@@ -1,4 +1,5 @@ - import sys -+ - if sys.version_info[0] == 3: - from xmldiff._diff_match_patch_py3 import * - else: -Index: xmldiff-2.4/xmldiff/formatting.py -=================================================================== ---- xmldiff-2.4.orig/xmldiff/formatting.py -+++ xmldiff-2.4/xmldiff/formatting.py -@@ -1,6 +1,5 @@ - import json - import re --import six - - from collections import namedtuple - from copy import deepcopy -@@ -9,12 +8,12 @@ from xmldiff.diff_match_patch import dif - from xmldiff import utils - - --DIFF_NS = 'http://namespaces.shoobx.com/diff' --DIFF_PREFIX = 'diff' -+DIFF_NS = "http://namespaces.shoobx.com/diff" -+DIFF_PREFIX = "diff" - --INSERT_NAME = '{%s}insert' % DIFF_NS --DELETE_NAME = '{%s}delete' % DIFF_NS --RENAME_NAME = '{%s}rename' % DIFF_NS -+INSERT_NAME = "{%s}insert" % DIFF_NS -+DELETE_NAME = "{%s}delete" % DIFF_NS -+RENAME_NAME = "{%s}rename" % DIFF_NS - - # Flags for whitespace handling in the text aware formatters: - WS_BOTH = 3 # Normalize ignorable whitespace and text whitespace -@@ -34,15 +33,15 @@ T_SINGLE = 2 - # that have narrow builds, we can change this to 0xf00000, which is - # the start of two 64,000 private use blocks. - # PY3: Once Python 2.7 support is dropped we should change this to 0xf00000 --PLACEHOLDER_START = 0xe000 -+PLACEHOLDER_START = 0xE000 - - - # These Bases can be abstract baseclasses, but it's a pain to support - # Python 2.7 in that case, because there is no abc.ABC. Right now this - # is just a description of the API. - --class BaseFormatter(object): - -+class BaseFormatter: - def __init__(self, normalize=WS_TAGS, pretty_print=False): - """Formatters must as a minimum have a normalize parameter - -@@ -75,10 +74,10 @@ class BaseFormatter(object): - """ - - --PlaceholderEntry = namedtuple('PlaceholderEntry', 'element ttype close_ph') -+PlaceholderEntry = namedtuple("PlaceholderEntry", "element ttype close_ph") - - --class PlaceholderMaker(object): -+class PlaceholderMaker: - """Replace tags with unicode placeholders - - This class searches for certain tags in an XML tree and replaces them -@@ -98,20 +97,17 @@ class PlaceholderMaker(object): - self.placeholder = PLACEHOLDER_START - - insert_elem = etree.Element(INSERT_NAME) -- insert_close = self.get_placeholder( -- insert_elem, T_CLOSE, None) -- insert_open = self.get_placeholder( -- insert_elem, T_OPEN, insert_close) -+ insert_close = self.get_placeholder(insert_elem, T_CLOSE, None) -+ insert_open = self.get_placeholder(insert_elem, T_OPEN, insert_close) - - delete_elem = etree.Element(DELETE_NAME) -- delete_close = self.get_placeholder( -- delete_elem, T_CLOSE, None) -- delete_open = self.get_placeholder( -- delete_elem, T_OPEN, delete_close) -+ delete_close = self.get_placeholder(delete_elem, T_CLOSE, None) -+ delete_open = self.get_placeholder(delete_elem, T_OPEN, delete_close) - - self.diff_tags = { -- 'insert': (insert_open, insert_close), -- 'delete': (delete_open, delete_close)} -+ "insert": (insert_open, insert_close), -+ "delete": (delete_open, delete_close), -+ } - - def get_placeholder(self, element, ttype, close_ph): - tag = etree.tounicode(element) -@@ -120,7 +116,7 @@ class PlaceholderMaker(object): - return ph - - self.placeholder += 1 -- ph = six.unichr(self.placeholder) -+ ph = chr(self.placeholder) - self.placeholder2tag[ph] = PlaceholderEntry(element, ttype, close_ph) - self.tag2placeholder[tag, ttype, close_ph] = ph - return ph -@@ -135,17 +131,17 @@ class PlaceholderMaker(object): - for child in element: - # Resolve all formatting text by allowing the inside text to - # participate in the text diffing. -- tail = child.tail or u'' -- child.tail = u'' -- new_text = element.text or u'' -+ tail = child.tail or "" -+ child.tail = "" -+ new_text = element.text or "" - - if self.is_formatting(child): - ph_close = self.get_placeholder(child, T_CLOSE, None) - ph_open = self.get_placeholder(child, T_OPEN, ph_close) - # If it's known text formatting tags, do this hierarchically - self.do_element(child) -- text = child.text or u'' -- child.text = u'' -+ text = child.text or "" -+ child.text = "" - # Stick the placeholder in instead of the start and end tags: - element.text = new_text + ph_open + text + ph_close + tail - else: -@@ -159,15 +155,15 @@ class PlaceholderMaker(object): - - def do_tree(self, tree): - if self.text_tags: -- for elem in tree.xpath('//'+'|//'.join(self.text_tags)): -+ for elem in tree.xpath("//" + "|//".join(self.text_tags)): - self.do_element(elem) - - def split_string(self, text): -- regexp = u'([%s])' % u''.join(self.placeholder2tag) -+ regexp = "([%s])" % "".join(self.placeholder2tag) - return re.split(regexp, text, flags=re.MULTILINE) - - def undo_string(self, text): -- result = etree.Element('wrap') -+ result = etree.Element("wrap") - element = None - - segments = self.split_string(text) -@@ -184,7 +180,7 @@ class PlaceholderMaker(object): - if entry.ttype == T_OPEN: - # Yup - next_seg = segments.pop(0) -- new_text = u'' -+ new_text = "" - while next_seg != entry.close_ph: - new_text += next_seg - next_seg = segments.pop(0) -@@ -195,9 +191,9 @@ class PlaceholderMaker(object): - result.append(element) - else: - if element is not None: -- element.tail = element.tail or u'' + seg -+ element.tail = element.tail or "" + seg - else: -- result.text = result.text or u'' + seg -+ result.text = result.text or "" + seg - - return result - -@@ -244,8 +240,8 @@ class PlaceholderMaker(object): - elem = deepcopy(elem) - if self.is_formatting(elem): - # Formatting element, add a diff attribute -- action += '-formatting' -- elem.attrib['{%s}%s' % (DIFF_NS, action)] = '' -+ action += "-formatting" -+ elem.attrib[f"{{{DIFF_NS}}}{action}"] = "" - - # And make a new placeholder for this new entry: - return self.get_placeholder(elem, entry.ttype, entry.close_ph) -@@ -301,15 +297,17 @@ class XMLFormatter(BaseFormatter): - all whitespace. - """ - -- def __init__(self, normalize=WS_NONE, pretty_print=True, -- text_tags=(), formatting_tags=()): -+ def __init__( -+ self, normalize=WS_NONE, pretty_print=True, text_tags=(), formatting_tags=() -+ ): - # Mapping from placeholders -> structural content and vice versa. - self.normalize = normalize - self.pretty_print = pretty_print - self.text_tags = text_tags - self.formatting_tags = formatting_tags - self.placeholderer = PlaceholderMaker( -- text_tags=text_tags, formatting_tags=formatting_tags) -+ text_tags=text_tags, formatting_tags=formatting_tags -+ ) - - def prepare(self, left_tree, right_tree): - """prepare() is run on the trees before diffing -@@ -352,11 +350,11 @@ class XMLFormatter(BaseFormatter): - - def handle_action(self, action, result): - action_type = type(action) -- method = getattr(self, '_handle_' + action_type.__name__) -+ method = getattr(self, "_handle_" + action_type.__name__) - method(action, result) - - def _remove_comments(self, tree): -- comments = tree.xpath('//comment()') -+ comments = tree.xpath("//comment()") - - for element in comments: - parent = element.getparent() -@@ -371,20 +369,20 @@ class XMLFormatter(BaseFormatter): - # one and exactly one element is found. This is to protect against - # formatting a diff on the wrong tree, or against using ambiguous - # edit script xpaths. -- if xpath[0] == '/': -+ if xpath[0] == "/": - root = True - xpath = xpath[1:] - else: - root = False - -- if '/' in xpath: -- path, rest = xpath.split('/', 1) -+ if "/" in xpath: -+ path, rest = xpath.split("/", 1) - else: - path = xpath -- rest = '' -+ rest = "" - -- if '[' in path: -- path, index = path[:-1].split('[') -+ if "[" in path: -+ path, index = path[:-1].split("[") - index = int(index) - 1 - multiple = False - else: -@@ -392,7 +390,7 @@ class XMLFormatter(BaseFormatter): - multiple = True - - if root: -- path = '/' + path -+ path = "/" + path - - matches = [] - for match in node.xpath(path, namespaces=node.nsmap): -@@ -401,33 +399,39 @@ class XMLFormatter(BaseFormatter): - matches.append(match) - - if index >= len(matches): -- raise ValueError('xpath %s[%s] not found at %s.' % ( -- path, index + 1, utils.getpath(node))) -+ raise ValueError( -+ "xpath {}[{}] not found at {}.".format( -+ path, index + 1, utils.getpath(node) -+ ) -+ ) - if len(matches) > 1 and multiple: -- raise ValueError('Multiple nodes found for xpath %s at %s.' % ( -- path, utils.getpath(node))) -+ raise ValueError( -+ "Multiple nodes found for xpath {} at {}.".format( -+ path, utils.getpath(node) -+ ) -+ ) - match = matches[index] - if rest: - return self._xpath(match, rest) - return match - - def _extend_diff_attr(self, node, action, value): -- diffattr = '{%s}%s-attr' % (DIFF_NS, action) -- oldvalue = node.attrib.get(diffattr, '') -+ diffattr = f"{{{DIFF_NS}}}{action}-attr" -+ oldvalue = node.attrib.get(diffattr, "") - if oldvalue: -- value = oldvalue + ';' + value -+ value = oldvalue + ";" + value - node.attrib[diffattr] = value - - def _delete_attrib(self, node, name): - del node.attrib[name] -- self._extend_diff_attr(node, 'delete', name) -+ self._extend_diff_attr(node, "delete", name) - - def _handle_DeleteAttrib(self, action, tree): - node = self._xpath(tree, action.node) - self._delete_attrib(node, action.name) - - def _delete_node(self, node): -- node.attrib[DELETE_NAME] = '' -+ node.attrib[DELETE_NAME] = "" - - def _handle_DeleteNode(self, action, tree): - node = self._xpath(tree, action.node) -@@ -435,14 +439,14 @@ class XMLFormatter(BaseFormatter): - - def _insert_attrib(self, node, name, value): - node.attrib[name] = value -- self._extend_diff_attr(node, 'add', name) -+ self._extend_diff_attr(node, "add", name) - - def _handle_InsertAttrib(self, action, tree): - node = self._xpath(tree, action.node) - self._insert_attrib(node, action.name, action.value) - - def _insert_node(self, target, node, position): -- node.attrib[INSERT_NAME] = '' -+ node.attrib[INSERT_NAME] = "" - target.insert(position, node) - - def _get_real_insert_position(self, target, position): -@@ -472,7 +476,7 @@ class XMLFormatter(BaseFormatter): - def _rename_attrib(self, node, oldname, newname): - node.attrib[newname] = node.attrib[oldname] - del node.attrib[oldname] -- self._extend_diff_attr(node, 'rename', '%s:%s' % (oldname, newname)) -+ self._extend_diff_attr(node, "rename", f"{oldname}:{newname}") - - def _handle_RenameAttrib(self, action, tree): - node = self._xpath(tree, action.node) -@@ -494,7 +498,7 @@ class XMLFormatter(BaseFormatter): - def _update_attrib(self, node, name, value): - oldval = node.attrib[name] - node.attrib[name] = value -- self._extend_diff_attr(node, 'update', '%s:%s' % (name, oldval)) -+ self._extend_diff_attr(node, "update", f"{name}:{oldval}") - - def _handle_UpdateAttrib(self, action, tree): - node = self._xpath(tree, action.node) -@@ -540,10 +544,7 @@ class XMLFormatter(BaseFormatter): - # needs to be closed before the requested node closure can - # happen. - stack_op, stack_entry = _stack_pop() -- while ( -- stack_entry is not None and -- stack_entry.close_ph != seg -- ): -+ while stack_entry is not None and stack_entry.close_ph != seg: - new_diff.append((stack_op, stack_entry.close_ph)) - stack_op, stack_entry = _stack_pop() - -@@ -564,11 +565,11 @@ class XMLFormatter(BaseFormatter): - - def _make_diff_tags(self, left_value, right_value, node, target=None): - if bool(self.normalize & WS_TEXT): -- left_value = utils.cleanup_whitespace(left_value or u'').strip() -- right_value = utils.cleanup_whitespace(right_value or u'').strip() -+ left_value = utils.cleanup_whitespace(left_value or "").strip() -+ right_value = utils.cleanup_whitespace(right_value or "").strip() - - text_diff = diff_match_patch() -- diff = text_diff.diff_main(left_value or '', right_value or '') -+ diff = text_diff.diff_main(left_value or "", right_value or "") - text_diff.diff_cleanupSemantic(diff) - - diff = self._realign_placeholders(diff) -@@ -582,29 +583,29 @@ class XMLFormatter(BaseFormatter): - for op, text in diff: - if op == 0: - if cur_child is None: -- node.text = (node.text or u'') + text -+ node.text = (node.text or "") + text - else: -- cur_child.tail = (cur_child.tail or u'') + text -+ cur_child.tail = (cur_child.tail or "") + text - continue - - if op == -1: -- action = 'delete' -+ action = "delete" - elif op == 1: -- action = 'insert' -+ action = "insert" - - if self.placeholderer.is_placeholder(text): - ph = self.placeholderer.mark_diff(text, action) - - if cur_child is None: -- node.text = (node.text or u'') + ph -+ node.text = (node.text or "") + ph - - else: - new_text = self.placeholderer.wrap_diff(text, action) - - if cur_child is None: -- node.text = (node.text or u'') + new_text -+ node.text = (node.text or "") + new_text - else: -- cur_child.tail = (cur_child.tail or u'') + new_text -+ cur_child.tail = (cur_child.tail or "") + new_text - - def _handle_UpdateTextIn(self, action, tree): - node = self._xpath(tree, action.node) -@@ -635,66 +636,70 @@ class XMLFormatter(BaseFormatter): - - - class DiffFormatter(BaseFormatter): -- - def __init__(self, normalize=WS_TAGS, pretty_print=False): - self.normalize = normalize - # No pretty print support, nothing to be pretty about - - # Nothing to prepare or finalize (one-liners for code coverage) -- def prepare(self, left, right): return -+ def prepare(self, left, right): -+ return - -- def finalize(self, left, right): return -+ def finalize(self, left, right): -+ return - - def format(self, diff, orig_tree): - # This Formatter don't need the left tree, but the XMLFormatter - # does, so the parameter is required. -- res = u'\n'.join(self._format_action(action) for action in diff) -+ res = "\n".join(self._format_action(action) for action in diff) - return res - -- def _format_action(self, action, ): -- return u'[%s]' % self.handle_action(action) -+ def _format_action( -+ self, action, -+ ): -+ return "[%s]" % self.handle_action(action) - - def handle_action(self, action): - action_type = type(action) -- method = getattr(self, '_handle_' + action_type.__name__) -- return u', '.join(method(action)) -+ method = getattr(self, "_handle_" + action_type.__name__) -+ return ", ".join(method(action)) - - def _handle_DeleteAttrib(self, action): -- return u"delete-attribute", action.node, action.name -+ return "delete-attribute", action.node, action.name - - def _handle_DeleteNode(self, action): -- return u"delete", action.node -+ return "delete", action.node - - def _handle_InsertAttrib(self, action): -- return (u"insert-attribute", action.node, action.name, -- json.dumps(action.value)) -+ return ("insert-attribute", action.node, action.name, json.dumps(action.value)) - - def _handle_InsertNode(self, action): -- return u"insert", action.target, action.tag, str(action.position) -+ return "insert", action.target, action.tag, str(action.position) - - def _handle_RenameAttrib(self, action): -- return (u"rename-attribute", action.node, action.oldname, -- action.newname) -+ return ("rename-attribute", action.node, action.oldname, action.newname) - - def _handle_MoveNode(self, action): -- return u"move", action.node, action.target, str(action.position) -+ return "move", action.node, action.target, str(action.position) - - def _handle_UpdateAttrib(self, action): -- return (u"update-attribute", action.node, action.name, -- json.dumps(action.value)) -+ return ("update-attribute", action.node, action.name, json.dumps(action.value)) - - def _handle_UpdateTextIn(self, action): -- return u"update-text", action.node, json.dumps(action.text) -+ return "update-text", action.node, json.dumps(action.text) - - def _handle_UpdateTextAfter(self, action): -- return u"update-text-after", action.node, json.dumps(action.text) -+ return "update-text-after", action.node, json.dumps(action.text) - - def _handle_RenameNode(self, action): -- return u"rename", action.node, action.tag -+ return "rename", action.node, action.tag - - def _handle_InsertComment(self, action): -- return (u"insert-comment", action.target, str(action.position), -- json.dumps(action.text)) -+ return ( -+ "insert-comment", -+ action.target, -+ str(action.position), -+ json.dumps(action.text), -+ ) - - - class XmlDiffFormatter(BaseFormatter): -@@ -705,9 +710,11 @@ class XmlDiffFormatter(BaseFormatter): - # No pretty print support, nothing to be pretty about - - # Nothing to prepare or finalize (one-liners for code coverage) -- def prepare(self, left, right): return -+ def prepare(self, left, right): -+ return - -- def finalize(self, left, right): return -+ def finalize(self, left, right): -+ return - - def format(self, diff, orig_tree): - # This Formatter don't need the left tree, but the XMLFormatter -@@ -715,45 +722,44 @@ class XmlDiffFormatter(BaseFormatter): - actions = [] - for action in diff: - actions.extend(self.handle_action(action, orig_tree)) -- res = u'\n'.join(self._format_action(action) for action in actions) -+ res = "\n".join(self._format_action(action) for action in actions) - return res - - def _format_action(self, action): -- return u'[%s]' % ', '.join(action) -+ return "[%s]" % ", ".join(action) - - def handle_action(self, action, orig_tree): - action_type = type(action) -- method = getattr(self, '_handle_' + action_type.__name__) -- for item in method(action, orig_tree): -- yield item -+ method = getattr(self, "_handle_" + action_type.__name__) -+ yield from method(action, orig_tree) - - def _handle_DeleteAttrib(self, action, orig_tree): -- yield u"remove", '%s/@%s' % (action.node, action.name) -+ yield "remove", f"{action.node}/@{action.name}" - - def _handle_DeleteNode(self, action, orig_tree): -- yield u"remove", action.node -+ yield "remove", action.node - - def _handle_InsertAttrib(self, action, orig_tree): - value_text = "\n<@{0}>\n{1}\n".format(action.name, action.value) -- yield u"insert", action.node, value_text -+ yield "insert", action.node, value_text - - def _handle_InsertNode(self, action, orig_tree): - if action.position == 0: -- yield u"insert-first", action.target, '\n<%s/>' % action.tag -+ yield "insert-first", action.target, "\n<%s/>" % action.tag - return - sibling = orig_tree.xpath(action.target)[0][action.position - 1] -- yield u"insert-after", utils.getpath(sibling), '\n<%s/>' % action.tag -+ yield "insert-after", utils.getpath(sibling), "\n<%s/>" % action.tag - - def _handle_RenameAttrib(self, action, orig_tree): - node = orig_tree.xpath(action.node)[0] - value = node.attrib[action.oldname] - value_text = "\n<@{0}>\n{1}\n".format(action.newname, value) -- yield u"remove", '%s/@%s' % (action.node, action.oldname) -- yield u"insert", action.node, value_text -+ yield "remove", f"{action.node}/@{action.oldname}" -+ yield "insert", action.node, value_text - - def _handle_MoveNode(self, action, orig_tree): - if action.position == 0: -- yield u"move-first", action.node, action.target -+ yield "move-first", action.node, action.target - return - node = orig_tree.xpath(action.node)[0] - target = orig_tree.xpath(action.target)[0] -@@ -766,21 +772,23 @@ class XmlDiffFormatter(BaseFormatter): - position += 1 - - sibling = target[position] -- yield u"move-after", action.node, utils.getpath(sibling) -+ yield "move-after", action.node, utils.getpath(sibling) - - def _handle_UpdateAttrib(self, action, orig_tree): -- yield (u"update", '%s/@%s' % (action.node, action.name), -- json.dumps(action.value)) -+ yield ( -+ "update", -+ f"{action.node}/@{action.name}", -+ json.dumps(action.value), -+ ) - - def _handle_UpdateTextIn(self, action, orig_tree): -- yield u"update", action.node + '/text()[1]', json.dumps(action.text) -+ yield "update", action.node + "/text()[1]", json.dumps(action.text) - - def _handle_UpdateTextAfter(self, action, orig_tree): -- yield u"update", action.node + '/text()[2]', json.dumps(action.text) -+ yield "update", action.node + "/text()[2]", json.dumps(action.text) - - def _handle_RenameNode(self, action, orig_tree): -- yield u"rename", action.node, action.tag -+ yield "rename", action.node, action.tag - - def _handle_InsertComment(self, action, orig_tree): -- yield (u"insert-comment", action.target, str(action.position), -- action.text) -+ yield ("insert-comment", action.target, str(action.position), action.text) -Index: xmldiff-2.4/xmldiff/main.py -=================================================================== ---- xmldiff-2.4.orig/xmldiff/main.py -+++ xmldiff-2.4/xmldiff/main.py -@@ -1,6 +1,5 @@ - """All major API points and command-line tools""" - import pkg_resources --import six - - from argparse import ArgumentParser, FileType - from lxml import etree -@@ -9,9 +8,9 @@ from xmldiff import diff, formatting, pa - __version__ = pkg_resources.require("xmldiff")[0].version - - FORMATTERS = { -- 'diff': formatting.DiffFormatter, -- 'xml': formatting.XMLFormatter, -- 'old': formatting.XmlDiffFormatter, -+ "diff": formatting.DiffFormatter, -+ "xml": formatting.XMLFormatter, -+ "old": formatting.XmlDiffFormatter, - } - - -@@ -31,59 +30,89 @@ def diff_trees(left, right, diff_options - - - def _diff(parse_method, left, right, diff_options=None, formatter=None): -- normalize = bool(getattr(formatter, 'normalize', 1) & formatting.WS_TAGS) -+ normalize = bool(getattr(formatter, "normalize", 1) & formatting.WS_TAGS) - parser = etree.XMLParser(remove_blank_text=normalize) - left_tree = parse_method(left, parser) - right_tree = parse_method(right, parser) -- return diff_trees(left_tree, right_tree, diff_options=diff_options, -- formatter=formatter) -+ return diff_trees( -+ left_tree, right_tree, diff_options=diff_options, formatter=formatter -+ ) - - - def diff_texts(left, right, diff_options=None, formatter=None): - """Takes two Unicode strings containing XML""" -- return _diff(etree.fromstring, left, right, -- diff_options=diff_options, formatter=formatter) -+ return _diff( -+ etree.fromstring, left, right, diff_options=diff_options, formatter=formatter -+ ) - - - def diff_files(left, right, diff_options=None, formatter=None): - """Takes two filenames or streams, and diffs the XML in those files""" -- return _diff(etree.parse, left, right, -- diff_options=diff_options, formatter=formatter) -+ return _diff( -+ etree.parse, left, right, diff_options=diff_options, formatter=formatter -+ ) - - - def make_diff_parser(): -- parser = ArgumentParser(description='Create a diff for two XML files.', -- add_help=False) -- parser.add_argument('file1', type=FileType('r'), -- help='The first input file.') -- parser.add_argument('file2', type=FileType('r'), -- help='The second input file.') -- parser.add_argument('-h', '--help', action='help', -- help='Show this help message and exit.') -- parser.add_argument('-v', '--version', action='version', -- help='Display version and exit.', -- version='xmldiff %s' % __version__) -- parser.add_argument('-f', '--formatter', default='diff', -- choices=list(FORMATTERS.keys()), -- help='Formatter selection.') -- parser.add_argument('-w', '--keep-whitespace', action='store_true', -- help='Do not strip ignorable whitespace.') -- parser.add_argument('-p', '--pretty-print', action='store_true', -- help='Try to make XML output more readable.') -- parser.add_argument('-F', type=float, -- help='A value between 0 and 1 that determines how ' -- 'similar nodes must be to match.') -- parser.add_argument('--unique-attributes', type=str, nargs='?', -- default='{http://www.w3.org/XML/1998/namespace}id', -- help='A comma separated list of attributes ' -- 'that uniquely identify a node. Can be empty. ' -- 'Unique attributes for certain elements can ' -- 'be specified in the format {NS}element@attr.') -- parser.add_argument('--ratio-mode', default='fast', -- choices={'accurate', 'fast', 'faster'}, -- help='Choose the node comparison optimization.') -- parser.add_argument('--fast-match', action='store_true', -- help='A faster, less optimal match run.') -+ parser = ArgumentParser( -+ description="Create a diff for two XML files.", add_help=False -+ ) -+ parser.add_argument("file1", type=FileType("r"), help="The first input file.") -+ parser.add_argument("file2", type=FileType("r"), help="The second input file.") -+ parser.add_argument( -+ "-h", "--help", action="help", help="Show this help message and exit." -+ ) -+ parser.add_argument( -+ "-v", -+ "--version", -+ action="version", -+ help="Display version and exit.", -+ version="xmldiff %s" % __version__, -+ ) -+ parser.add_argument( -+ "-f", -+ "--formatter", -+ default="diff", -+ choices=list(FORMATTERS.keys()), -+ help="Formatter selection.", -+ ) -+ parser.add_argument( -+ "-w", -+ "--keep-whitespace", -+ action="store_true", -+ help="Do not strip ignorable whitespace.", -+ ) -+ parser.add_argument( -+ "-p", -+ "--pretty-print", -+ action="store_true", -+ help="Try to make XML output more readable.", -+ ) -+ parser.add_argument( -+ "-F", -+ type=float, -+ help="A value between 0 and 1 that determines how " -+ "similar nodes must be to match.", -+ ) -+ parser.add_argument( -+ "--unique-attributes", -+ type=str, -+ nargs="?", -+ default="{http://www.w3.org/XML/1998/namespace}id", -+ help="A comma separated list of attributes " -+ "that uniquely identify a node. Can be empty. " -+ "Unique attributes for certain elements can " -+ "be specified in the format {NS}element@attr.", -+ ) -+ parser.add_argument( -+ "--ratio-mode", -+ default="fast", -+ choices={"accurate", "fast", "faster"}, -+ help="Choose the node comparison optimization.", -+ ) -+ parser.add_argument( -+ "--fast-match", action="store_true", help="A faster, less optimal match run." -+ ) - return parser - - -@@ -91,8 +120,8 @@ def _parse_uniqueattrs(uniqueattrs): - if uniqueattrs is None: - return [] - return [ -- attr if '@' not in attr else attr.split('@', 1) -- for attr in uniqueattrs.split(',') -+ attr if "@" not in attr else attr.split("@", 1) -+ for attr in uniqueattrs.split(",") - ] - - -@@ -105,16 +134,19 @@ def diff_command(args=None): - else: - normalize = formatting.WS_BOTH - -- formatter = FORMATTERS[args.formatter](normalize=normalize, -- pretty_print=args.pretty_print) -- -- diff_options = {'ratio_mode': args.ratio_mode, -- 'F': args.F, -- 'fast_match': args.fast_match, -- 'uniqueattrs': _parse_uniqueattrs(args.unique_attributes), -- } -- result = diff_files(args.file1, args.file2, diff_options=diff_options, -- formatter=formatter) -+ formatter = FORMATTERS[args.formatter]( -+ normalize=normalize, pretty_print=args.pretty_print -+ ) -+ -+ diff_options = { -+ "ratio_mode": args.ratio_mode, -+ "F": args.F, -+ "fast_match": args.fast_match, -+ "uniqueattrs": _parse_uniqueattrs(args.unique_attributes), -+ } -+ result = diff_files( -+ args.file1, args.file2, diff_options=diff_options, formatter=formatter -+ ) - print(result) - - -@@ -136,7 +168,7 @@ def patch_file(actions, tree): - """Takes two filenames or streams, one with XML the other a diff""" - tree = etree.parse(tree) - -- if isinstance(actions, six.string_types): -+ if isinstance(actions, str): - # It's a string, so it's a filename - with open(actions) as f: - actions = f.read() -@@ -150,17 +182,21 @@ def patch_file(actions, tree): - - - def make_patch_parser(): -- parser = ArgumentParser(description='Patch an XML file with an xmldiff', -- add_help=False) -- parser.add_argument('patchfile', type=FileType('r'), -- help='An xmldiff diff file.') -- parser.add_argument('xmlfile', type=FileType('r'), -- help='An unpatched XML file.') -- parser.add_argument('-h', '--help', action='help', -- help='Show this help message and exit.') -- parser.add_argument('-v', '--version', action='version', -- help='Display version and exit.', -- version='xmldiff %s' % __version__) -+ parser = ArgumentParser( -+ description="Patch an XML file with an xmldiff", add_help=False -+ ) -+ parser.add_argument("patchfile", type=FileType("r"), help="An xmldiff diff file.") -+ parser.add_argument("xmlfile", type=FileType("r"), help="An unpatched XML file.") -+ parser.add_argument( -+ "-h", "--help", action="help", help="Show this help message and exit." -+ ) -+ parser.add_argument( -+ "-v", -+ "--version", -+ action="version", -+ help="Display version and exit.", -+ version="xmldiff %s" % __version__, -+ ) - return parser - - -Index: xmldiff-2.4/xmldiff/patch.py -=================================================================== ---- xmldiff-2.4.orig/xmldiff/patch.py -+++ xmldiff-2.4/xmldiff/patch.py -@@ -5,8 +5,7 @@ from lxml import etree - from xmldiff import actions - - --class Patcher(object): -- -+class Patcher: - def patch(self, actions, tree): - # Copy the tree so we don't modify the original - result = deepcopy(tree) -@@ -18,7 +17,7 @@ class Patcher(object): - - def handle_action(self, action, tree): - action_type = type(action) -- method = getattr(self, '_handle_' + action_type.__name__) -+ method = getattr(self, "_handle_" + action_type.__name__) - method(action, tree) - - def _handle_DeleteNode(self, action, tree): -@@ -72,25 +71,25 @@ class Patcher(object): - target.insert(action.position, etree.Comment(action.text)) - - --class DiffParser(object): -+class DiffParser: - """Makes a text diff into a list of actions""" - - def parse(self, diff): -- incomplete = '' -+ incomplete = "" - - for line in diff.splitlines(): - line = incomplete + line - -- if line[0] != '[': -+ if line[0] != "[": - # All actions should start with "[" - raise ValueError("Unknown diff format") -- if line[-1] != ']': -+ if line[-1] != "]": - # This line has been broken into several lines - incomplete = line - continue - - # OK, we found an action -- incomplete = '' -+ incomplete = "" - yield self.make_action(line) - - if incomplete: -@@ -105,7 +104,7 @@ class DiffParser(object): - action = parts[0] - params = parts[1:] - # Get the method, and return the result of calling it -- method = getattr(self, '_handle_' + action.replace('-', '_')) -+ method = getattr(self, "_handle_" + action.replace("-", "_")) - return method(*params) - - def _handle_delete(self, node): -Index: xmldiff-2.4/xmldiff/utils.py -=================================================================== ---- xmldiff-2.4.orig/xmldiff/utils.py -+++ xmldiff-2.4/xmldiff/utils.py -@@ -1,5 +1,3 @@ --from __future__ import division -- - import re - - from operator import eq -@@ -8,16 +6,14 @@ from operator import eq - def post_order_traverse(node): - for child in node.getchildren(): - # PY3: Man, I want yield from! -- for item in post_order_traverse(child): -- yield item -+ yield from post_order_traverse(child) - yield node - - - def reverse_post_order_traverse(node): - for child in reversed(node.getchildren()): - # PY3: Man, I want yield from! -- for item in reverse_post_order_traverse(child): -- yield item -+ yield from reverse_post_order_traverse(child) - yield node - - -@@ -43,13 +39,19 @@ def longest_common_subsequence(left_sequ - rend = rslen = len(right_sequence) - - # Trim off the matching items at the beginning -- while (start < lend and start < rend and -- eqfn(left_sequence[start], right_sequence[start])): -+ while ( -+ start < lend -+ and start < rend -+ and eqfn(left_sequence[start], right_sequence[start]) -+ ): - start += 1 - - # trim off the matching items at the end -- while (start < lend and start < rend and -- eqfn(left_sequence[lend - 1], right_sequence[rend - 1])): -+ while ( -+ start < lend -+ and start < rend -+ and eqfn(left_sequence[lend - 1], right_sequence[rend - 1]) -+ ): - lend -= 1 - rend -= 1 - -@@ -67,8 +69,7 @@ def longest_common_subsequence(left_sequ - - for d in range(0, lmax + rmax + 1): - for k in range(-d, d + 1, 2): -- if (k == -d or -- (k != d and furthest[k - 1][0] < furthest[k + 1][0])): -+ if k == -d or (k != d and furthest[k - 1][0] < furthest[k + 1][0]): - # Go down - old_x, history = furthest[k + 1] - x = old_x -@@ -89,47 +90,51 @@ def longest_common_subsequence(left_sequ - - if x >= lmax and y >= rmax: - # This is the best match -- return [(e, e) for e in range(start)] + history + \ -- list(zip(range(lend, lslen), range(rend, rslen))) -+ return ( -+ [(e, e) for e in range(start)] -+ + history -+ + list(zip(range(lend, lslen), range(rend, rslen))) -+ ) - else: - furthest[k] = (x, history) - - --WHITESPACE = re.compile(u'\\s+', flags=re.MULTILINE) -+WHITESPACE = re.compile("\\s+", flags=re.MULTILINE) - - - def cleanup_whitespace(text): -- return WHITESPACE.sub(' ', text) -+ return WHITESPACE.sub(" ", text) - - - def getpath(element, tree=None): - if tree is None: - tree = element.getroottree() - xpath = tree.getpath(element) -- if xpath[-1] != ']': -+ if xpath[-1] != "]": - # The path is unique without specifying a count. However, we always - # want that count, so we add [1]. -- xpath = xpath + '[1]' -+ xpath = xpath + "[1]" - return xpath - - - # The remainder of the functions here are helpful when debugging. - # They aren't documented, nor very well tested. --def _make_ascii_tree(element, indent=''): -+def _make_ascii_tree(element, indent=""): - from xmldiff.formatting import DIFF_NS # Avoid circular imports -- diffns = '{%s}' % DIFF_NS -+ -+ diffns = "{%s}" % DIFF_NS - if element.prefix: -- name = '%s:%s' % (element.prefix, element.tag.split('}')[1]) -+ name = "{}:{}".format(element.prefix, element.tag.split("}")[1]) - else: - name = element.tag - diff_attrs = [attr for attr in element.attrib if attr.startswith(diffns)] - if diff_attrs: -- diff = '(%s)' % ', '.join(attr.split('}')[1] for attr in diff_attrs) -+ diff = "(%s)" % ", ".join(attr.split("}")[1] for attr in diff_attrs) - else: -- diff = '' -+ diff = "" - -- result = [' '.join((indent, name, diff))] -- indent = ' ' + indent -+ result = [" ".join((indent, name, diff))] -+ indent = " " + indent - - for child in element.getchildren(): - result.extend(_make_ascii_tree(child, indent)) -@@ -138,4 +143,4 @@ def _make_ascii_tree(element, indent='') - - def make_ascii_tree(element): - result = _make_ascii_tree(element) -- return '\n'.join(result) -+ return "\n".join(result) diff --git a/python-xmldiff.changes b/python-xmldiff.changes index 5193445..c09a76f 100644 --- a/python-xmldiff.changes +++ b/python-xmldiff.changes @@ -1,3 +1,32 @@ +------------------------------------------------------------------- +Tue Mar 5 04:33:13 UTC 2024 - Steve Kowalik + +- Update to 2.6.3: + * Solved an error in the xmlformatter when using default namespaces. #89 + * #108: Fixed an error that happens if using namespaces like ns0 or ns1. + * Added `InsertNamespace` and `DeleteNamespace` actions for better handling + of changing namespaces. Should improve any "Unknown namespace prefix" + errors. Changing the URI of a a namespace prefix is not supported, and + will raise an error. + * Used geometric mean for the node_ratio, for better handling of simple nodes. + * Added an experimental --best-match method that is slower, but generate + smaller diffs when you have many nodes that are similar. + * The -F argument now also affects the --fast-match stage. + * Make it possible to adjust the attributes considered when comparing nodes. + * Python versions 3.7 to 3.11 are now supported. + * Improved node matching method, that puts more emphasis similarities than + differences when weighing attributes vs children. + * Added a parameter to return error code 1 when there are differences + between the files + * Added a parameter for ignoring attributes in comparison. + * Solved a bug in xmlpatch in certain namespace situations. + * Added a --diff-encoding parameter to xmlpatch, to support diff-files + that are not in your system default encoding. +- Switch to autosetup and pyproject macros. +- No more greedy globs in %files. +- Drop python-xmldiff-src-upgrades.patch, now included upstream. +- Drop python-xmldiff-no-six-remains.patch, no longer required. + ------------------------------------------------------------------- Fri Oct 14 12:10:39 UTC 2022 - pgajdos@suse.com diff --git a/python-xmldiff.spec b/python-xmldiff.spec index 8450d05..4033489 100644 --- a/python-xmldiff.spec +++ b/python-xmldiff.spec @@ -1,7 +1,7 @@ # # spec file for package python-xmldiff # -# Copyright (c) 2022 SUSE LLC +# Copyright (c) 2024 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,30 +17,26 @@ %define oldpython python -%{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-xmldiff -Version: 2.4 +Version: 2.6.3 Release: 0 Summary: Tree to tree correction between XML documents License: MIT URL: https://github.com/Shoobx/xmldiff Source: https://files.pythonhosted.org/packages/source/x/xmldiff/xmldiff-%{version}.tar.gz -# https://github.com/Shoobx/xmldiff/commit/34b810f4394965aadeca31204e6b76eb023fd11a -Patch0: python-xmldiff-src-upgrades.patch -# https://github.com/Shoobx/xmldiff/issues/93 -Patch1: python-xmldiff-no-six-remains.patch +BuildRequires: %{python_module pip} BuildRequires: %{python_module setuptools} +BuildRequires: %{python_module wheel} BuildRequires: fdupes BuildRequires: python-rpm-macros Requires: python-lxml >= 3.1.0 Requires: python-setuptools Requires(post): update-alternatives -Requires(postun):update-alternatives +Requires(postun): update-alternatives Conflicts: %{oldpython}-xmldiff < %{version} BuildArch: noarch # SECTION test requirements BuildRequires: %{python_module lxml >= 3.1.0} -BuildRequires: %{python_module setuptools} # /SECTION %python_subpackages @@ -53,15 +49,13 @@ structured information", by S. Chawathe, A. Rajaraman, H. Garcia-Molina, and J. Widom, Stanford University, 1996. %prep -%setup -q -n xmldiff-%{version} -%patch0 -p1 -%patch1 -p1 +%autosetup -p1 -n xmldiff-%{version} %build -%python_build +%pyproject_wheel %install -%python_install +%pyproject_install %python_clone -a %{buildroot}%{_bindir}/xmlpatch %python_clone -a %{buildroot}%{_bindir}/xmldiff %python_expand %fdupes %{buildroot}%{$python_sitelib} @@ -82,6 +76,7 @@ Garcia-Molina, and J. Widom, Stanford University, 1996. %license LICENSE.txt %python_alternative %{_bindir}/xmldiff %python_alternative %{_bindir}/xmlpatch -%{python_sitelib}/* +%{python_sitelib}/xmldiff +%{python_sitelib}/xmldiff-%{version}.dist-info %changelog diff --git a/xmldiff-2.4.tar.gz b/xmldiff-2.4.tar.gz deleted file mode 100644 index 2743e39..0000000 --- a/xmldiff-2.4.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:05bea20ce1f2c9678683bcce0c3ba9981f87d92b709d190e018bcbf047eccf63 -size 94826 diff --git a/xmldiff-2.6.3.tar.gz b/xmldiff-2.6.3.tar.gz new file mode 100644 index 0000000..520f8bb --- /dev/null +++ b/xmldiff-2.6.3.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19b030b3fa37d1f0b5c5ad9ada9059884c3bf2c751c5dd8f1eb4ed49cfe3fc60 +size 86132