commit ed220dfd8eb019a6ae4d3e27900b4488dcb2e3482e045610d66b39ab2b2cd0fc
Author: Todd R <toddrme2178@gmail.com>
Date:   Thu May 31 15:18:39 2018 +0000

    Accepting request 613293 from devel:languages:python:misc
    
    Try to convert buggy unicode text to a less broken variant.
    
    OBS-URL: https://build.opensuse.org/request/show/613293
    OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-ftfy?expand=0&rev=1

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..9b03811
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,23 @@
+## Default LFS
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.bsp filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.gem filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.jar filter=lfs diff=lfs merge=lfs -text
+*.lz filter=lfs diff=lfs merge=lfs -text
+*.lzma filter=lfs diff=lfs merge=lfs -text
+*.obscpio filter=lfs diff=lfs merge=lfs -text
+*.oxt filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.rpm filter=lfs diff=lfs merge=lfs -text
+*.tbz filter=lfs diff=lfs merge=lfs -text
+*.tbz2 filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.ttf filter=lfs diff=lfs merge=lfs -text
+*.txz filter=lfs diff=lfs merge=lfs -text
+*.whl filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..57affb6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.osc
diff --git a/ftfy-5.3.0.tar.gz b/ftfy-5.3.0.tar.gz
new file mode 100644
index 0000000..49a1843
--- /dev/null
+++ b/ftfy-5.3.0.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ba702d5138f9b35df32b55920c9466208608108f1f3d5de1a68c17e3d68cb7f
+size 53827
diff --git a/python-ftfy.changes b/python-ftfy.changes
new file mode 100644
index 0000000..bf4a95c
--- /dev/null
+++ b/python-ftfy.changes
@@ -0,0 +1,206 @@
+-------------------------------------------------------------------
+Wed May 16 16:10:48 UTC 2018 - toddrme2178@gmail.com
+
+- Update to Version 5.3 (January 25, 2018)
+    * A heuristic has been too conservative since version 4.2, causing a regression
+      compared to previous versions: ftfy would fail to fix mojibake of common
+      characters such as `á` when seen in isolation. A new heuristic now makes it
+      possible to fix more of these common cases with less evidence.
+- Update to Version 5.2 (November 27, 2017)
+    * The command-line tool will not accept the same filename as its input
+      and output. (Previously, this would write a zero-length file.)
+    * The `uncurl_quotes` fixer, which replaces curly quotes with straight quotes,
+      now also replaces MODIFIER LETTER APOSTROPHE.
+    * Codepoints that contain two Latin characters crammed together for legacy
+      encoding reasons are replaced by those two separate characters, even in NFC
+      mode. We formerly did this just with ligatures such as `ﬁ` and `Ĳ`, but now
+      this includes the Afrikaans digraph `ŉ` and Serbian/Croatian digraphs such as
+      `ǆ`.
+- Update to Version 5.1.1 and 4.4.3 (May 15, 2017)
+  - These releases fix two unrelated problems with the tests, one in each version.
+    * v5.1.1: fixed the CLI tests (which are new in v5) so that they pass
+      on Windows, as long as the Python output encoding is UTF-8.
+    * v4.4.3: added the `# coding: utf-8` declaration to two files that were
+      missing it, so that tests can run on Python 2.
+- Update to Version 5.1 (April 7, 2017)
+    * Removed the dependency on `html5lib` by dropping support for Python 3.2.
+      We previously used the dictionary `html5lib.constants.entities` to decode
+      HTML entities.  In Python 3.3 and later, that exact dictionary is now in the
+      standard library as `html.entities.html5`.
+    * Moved many test cases about how particular text should be fixed into
+      `test_cases.json`, which may ease porting to other languages.
+- Update to Version 5.0.2 and 4.4.2 (March 21, 2017)
+    * Added a `MANIFEST.in` that puts files such as the license file and this
+      changelog inside the source distribution.
+- Update to Version 5.0.1 and 4.4.1 (March 10, 2017)
+  - Bug fix:
+    * The `unescape_html` fixer will decode entities between `&#128;` and `&#159;`
+      as what they would be in Windows-1252, even without the help of
+      `fix_encoding`.
+      This better matches what Web browsers do, and fixes a regression that version
+      4.4 introduced in an example that uses `&#133;` as an ellipsis.
+- Update to Version 5.0 (February 17, 2017)
+  - Breaking changes:
+    * Dropped support for Python 2. If you need Python 2 support, you should get
+      version 4.4, which has the same features as this version.
+    * The top-level functions require their arguments to be given as keyword
+      arguments.
+- Update to Version 4.4.0 (February 17, 2017)
+  - Heuristic changes:
+    * ftfy can now fix mojibake involving the Windows-1250 or ISO-8859-2 encodings.
+    * The `fix_entities` fixer is now applied after `fix_encoding`. This makes
+      more situations resolvable when both fixes are needed.
+    * With a few exceptions for commonly-used characters such as `^`, it is now
+      considered "weird" whenever a diacritic appears in non-combining form,
+      such as the diaeresis character `¨`.
+    * It is also now weird when IPA phonetic letters, besides `ə`, appear next to
+      capital letters.
+    * These changes to the heuristics, and others we've made in recent versions,
+      let us lower the "cost" for fixing mojibake in some encodings, causing them
+      to be fixed in more cases.
+- Update to Version 4.3.1 (January 12, 2017)
+  - Bug fix:
+    * `remove_control_chars` was removing U+0D ('\r') prematurely. That's the
+      job of `fix_line_breaks`.
+- Update to Version 4.3.0 (December 29, 2016)
+    * This version now depends on the `html5lib` and `wcwidth` libraries.
+  - Feature changes:
+    * The `remove_control_chars` fixer will now remove some non-ASCII control
+      characters as well, such as deprecated Arabic control characters and
+      byte-order marks. Bidirectional controls are still left as is.
+      This should have no impact on well-formed text, while cleaning up many
+      characters that the Unicode Consortium deems "not suitable for markup"
+      (see Unicode Technical Report #20).
+    * The `unescape_html` fixer uses a more thorough list of HTML entities,
+      which it imports from `html5lib`.
+    * `ftfy.formatting` now uses `wcwidth` to compute the width that a string
+      will occupy in a text console.
+  - Heuristic changes:
+    * Updated the data file of Unicode character categories to Unicode 9, as used
+      in Python 3.6.0. (No matter what version of Python you're on, ftfy uses the
+      same data.)
+  - Pending deprecations:
+    * The `remove_bom` option will become deprecated in 5.0, because it has been
+      superseded by `remove_control_chars`.
+    * ftfy 5.0 will remove the previously deprecated name `fix_text_encoding`. It
+      was renamed to `fix_encoding` in 4.0.
+    * ftfy 5.0 will require Python 3.2 or later, as planned. Python 2 users, please
+      specify `ftfy < 5` in your dependencies if you haven't already.
+- Update to Version 4.2.0 (September 28, 2016)
+  - Heuristic changes:
+    * Math symbols next to currency symbols are no longer considered 'weird' by the
+      heuristic. This fixes a false positive where text that involved the
+      multiplication sign and British pounds or euros (as in '5×£35') could turn
+      into Hebrew letters.
+    * A heuristic that used to be a bonus for certain punctuation now also gives a
+      bonus to successfully decoding other common codepoints, such as the
+      non-breaking space, the degree sign, and the byte order mark.
+    * In version 4.0, we tried to "future-proof" the categorization of emoji (as a
+      kind of symbol) to include codepoints that would likely be assigned to emoji
+      later. The future happened, and there are even more emoji than we expected.
+      We have expanded the range to include those emoji, too.
+      ftfy is still mostly based on information from Unicode 8 (as Python 3.5 is),
+      but this expanded range should include the emoji from Unicode 9 and 10.
+    * Emoji are increasingly being modified by variation selectors and skin-tone
+      modifiers. Those codepoints are now grouped with 'symbols' in ftfy, so they
+      fit right in with emoji, instead of being considered 'marks' as their Unicode
+      category would suggest.
+      This enables fixing mojibake that involves iOS's new diverse emoji.
+    * An old heuristic that wasn't necessary anymore considered Latin text with
+      high-numbered codepoints to be 'weird', but this is normal in languages such
+      as Vietnamese and Azerbaijani. This does not seem to have caused any false
+      positives, but it caused ftfy to be too reluctant to fix some cases of broken
+      text in those languages.
+      The heuristic has been changed, and all languages that use Latin letters
+      should be on even footing now.
+- Update to Version 4.1.1 (April 13, 2016)
+    * Bug fix: in the command-line interface, the `-e` option had no effect on
+      Python 3 when using standard input. Now, it correctly lets you specify
+      a different encoding for standard input.
+- Update to Version 4.1.0 (February 25, 2016)
+  - Heuristic changes:
+    * ftfy can now deal with "lossy" mojibake. If your text has been run through
+      a strict Windows-1252 decoder, such as the one in Python, it may contain
+      the replacement character � (U+FFFD) where there were bytes that are
+      unassigned in Windows-1252.
+      Although ftfy won't recover the lost information, it can now detect this
+      situation, replace the entire lossy character with �, and decode the rest of
+      the characters. Previous versions would be unable to fix any string that
+      contained U+FFFD.
+      As an example, text in curly quotes that gets corrupted `â€œ like this â€�`
+      now gets fixed to be `“ like this �`.
+    * Updated the data file of Unicode character categories to Unicode 8.0, as used
+      in Python 3.5.0. (No matter what version of Python you're on, ftfy uses the
+      same data.)
+    * Heuristics now count characters such as `~` and `^` as punctuation instead
+      of wacky math symbols, improving the detection of mojibake in some edge cases.
+  - New features:
+    * A new module, `ftfy.formatting`, can be used to justify Unicode text in a
+      monospaced terminal. It takes into account that each character can take up
+      anywhere from 0 to 2 character cells.
+    * Internally, the `utf-8-variants` codec was simplified and optimized.
+- Update to Version 4.0.0 (April 10, 2015)
+  - Breaking changes:
+    * The default normalization form is now NFC, not NFKC. NFKC replaces a large
+      number of characters with 'equivalent' characters, and some of these
+      replacements are useful, but some are not desirable to do by default.
+    * The `fix_text` function has some new options that perform more targeted
+      operations that are part of NFKC normalization, such as
+      `fix_character_width`, without requiring hitting all your text with the huge
+      mallet that is NFKC.
+    * The `remove_unsafe_private_use` parameter has been removed entirely, after
+      two versions of deprecation. The function name `fix_bad_encoding` is also
+      gone.
+  - New features:
+    * Fixers for strange new forms of mojibake, including particularly clear cases
+      of mixed UTF-8 and Windows-1252.
+    * New heuristics, so that ftfy can fix more stuff, while maintaining
+      approximately zero false positives.
+    * The command-line tool trusts you to know what encoding your *input* is in,
+      and assumes UTF-8 by default. You can still tell it to guess with the `-g`
+      option.
+    * The command-line tool can be configured with options, and can be used as a
+      pipe.
+    * Recognizes characters that are new in Unicode 7.0, as well as emoji from
+      Unicode 8.0+ that may already be in use on iOS.
+  - Deprecations:
+    * `fix_text_encoding` is being renamed again, for conciseness and consistency.
+      It's now simply called `fix_encoding`. The name `fix_text_encoding` is
+      available but emits a warning.
+  - Pending deprecations:
+    * Python 2.6 support is largely coincidental.
+    * Python 2.7 support is on notice. If you use Python 2, be sure to pin a
+      version of ftfy less than 5.0 in your requirements.
+
+- Implement single-spec version
+
+-------------------------------------------------------------------
+Mon Jul 13 13:12:38 UTC 2015 - toddrme2178@gmail.com
+
+- Fix building on SLES 11
+
+-------------------------------------------------------------------
+Thu May  7 07:07:50 UTC 2015 - jweberhofer@weberhofer.at
+
+- Use the tar-ball from pypi.python.org
+
+-------------------------------------------------------------------
+Mon May  4 15:04:36 UTC 2015 - jweberhofer@weberhofer.at
+
+- Updated to version 3.4.0
+
+  * ftfy.fixes.fix_surrogates will fix all 16-bit surrogate codepoints, which
+    would otherwise break various encoding and output functions.
+
+  * remove_unsafe_private_use emits a warning, and will disappear in the next
+    minor or major version.
+
+- Updated to version 3.3.1
+
+  * restores compatibility with Python 2.6.
+
+-------------------------------------------------------------------
+Mon Aug 18 12:59:42 UTC 2014 - jweberhofer@weberhofer.at
+
+- Initial RPM package for version 3.3.0
+
diff --git a/python-ftfy.spec b/python-ftfy.spec
new file mode 100644
index 0000000..0853482
--- /dev/null
+++ b/python-ftfy.spec
@@ -0,0 +1,75 @@
+#
+# spec file for package python-ftfy
+#
+# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany.
+#
+# All modifications and additions to the file contributed by third parties
+# remain the property of their copyright owners, unless otherwise agreed
+# upon. The license for this file, and modifications and additions to the
+# file, is the same license as for the pristine package itself (unless the
+# license for the pristine package is not an Open Source License, in which
+# case the license is the MIT License). An "Open Source License" is a
+# license that conforms to the Open Source Definition (Version 1.9)
+# published by the Open Source Initiative.
+
+# Please submit bugfixes or comments via http://bugs.opensuse.org/
+
+
+%{?!python_module:%define python_module() python-%{**} python3-%{**}}
+%define         skip_python2 1
+Name:           python-ftfy
+Version:        5.3.0
+Release:        0
+License:        MIT
+Summary:        Fixes some problems with Unicode text after the fact
+Url:            http://github.com/LuminosoInsight/python-ftfy
+Group:          Development/Languages/Python
+Source:         https://files.pythonhosted.org/packages/source/f/ftfy/ftfy-%{version}.tar.gz
+BuildRequires:  %{python_module devel}
+BuildRequires:  %{python_module setuptools}
+BuildRequires:  fdupes
+BuildRequires:  python-rpm-macros
+# SECTION test requirements
+BuildRequires:  %{python_module nose}
+BuildRequires:  %{python_module pytest}
+BuildRequires:  %{python_module pytest-runner}
+BuildRequires:  %{python_module wcwidth}
+# /SECTION
+Requires:       python-wcwidth
+BuildArch:      noarch
+
+%python_subpackages
+
+%description
+Ftfy makes Unicode text less broken and more consistent.
+
+The most interesting kind of brokenness that this resolves
+is when someone has encoded Unicode with one standard and
+decoded it with a different one.
+
+
+%prep
+%setup -q -n ftfy-%{version}
+
+%build
+%python_build
+
+%install
+%python_install
+%python_expand %fdupes %{buildroot}%{$python_sitelib}
+
+%check
+%{python_expand export PYTHONDONTWRITEBYTECODE=1
+export LANG=en_US.UTF-8
+export PYTHONPATH=%{buildroot}%{$python_sitelib}
+export PATH="$PATH:%{buildroot}%{_bindir}"
+py.test-%{$python_bin_suffix}
+}
+
+%files %{python_files}
+%doc CHANGELOG.md README.md
+%license LICENSE.txt
+%python3_only %{_bindir}/ftfy
+%{python_sitelib}/*
+
+%changelog