diff --git a/coerce_comments_to_work_with_lxml.patch b/coerce_comments_to_work_with_lxml.patch new file mode 100644 index 0000000..034d5ac --- /dev/null +++ b/coerce_comments_to_work_with_lxml.patch @@ -0,0 +1,65 @@ +From 0c551c9519e47f76f8f185089ed71cb9539b6e00 Mon Sep 17 00:00:00 2001 +From: Geoffrey Sneddon +Date: Mon, 23 Nov 2015 15:17:07 +0000 +Subject: [PATCH] Make lxml tree-builder coerce comments to work with lxml 3.5. + +--- + html5lib/ihatexml.py | 3 +++ + html5lib/treebuilders/etree_lxml.py | 9 +++++---- + 2 files changed, 8 insertions(+), 4 deletions(-) + +diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py +index 0fc7930..5da5d93 100644 +--- a/html5lib/ihatexml.py ++++ b/html5lib/ihatexml.py +@@ -225,6 +225,9 @@ def coerceComment(self, data): + while "--" in data: + warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) + data = data.replace("--", "- -") ++ if data.endswith("-"): ++ warnings.warn("Comments cannot end in a dash", DataLossWarning) ++ data += " " + return data + + def coerceCharacters(self, data): +diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py +index 35d08ef..c6c981f 100644 +--- a/html5lib/treebuilders/etree_lxml.py ++++ b/html5lib/treebuilders/etree_lxml.py +@@ -54,7 +54,7 @@ def _getChildNodes(self): + def testSerializer(element): + rv = [] + finalText = None +- infosetFilter = ihatexml.InfosetFilter() ++ infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True) + + def serializeElement(element, indent=0): + if not hasattr(element, "tag"): +@@ -189,7 +189,7 @@ class TreeBuilder(_base.TreeBuilder): + + def __init__(self, namespaceHTMLElements, fullTree=False): + builder = etree_builders.getETreeModule(etree, fullTree=fullTree) +- infosetFilter = self.infosetFilter = ihatexml.InfosetFilter() ++ infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True) + self.namespaceHTMLElements = namespaceHTMLElements + + class Attributes(dict): +@@ -257,7 +257,7 @@ def _getData(self): + data = property(_getData, _setData) + + self.elementClass = Element +- self.commentClass = builder.Comment ++ self.commentClass = Comment + # self.fragmentClass = builder.DocumentFragment + _base.TreeBuilder.__init__(self, namespaceHTMLElements) + +@@ -344,7 +344,8 @@ def insertRoot(self, token): + + # Append the initial comments: + for comment_token in self.initial_comments: +- root.addprevious(etree.Comment(comment_token["data"])) ++ comment = self.commentClass(comment_token["data"]) ++ root.addprevious(comment._element) + + # Create the root document and add the ElementTree to it + self.document = self.documentClass() diff --git a/html5lib-0.999999.tar.gz b/html5lib-0.999999.tar.gz deleted file mode 100644 index b2f0232..0000000 --- a/html5lib-0.999999.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e372b66f4997f8e1de970ea755d0a528d7222d2aa9bd55aac078c7ef39b8f6c3 -size 889015 diff --git a/html5lib-0.9999999.tar.gz b/html5lib-0.9999999.tar.gz new file mode 100644 index 0000000..7bf2de8 --- /dev/null +++ b/html5lib-0.9999999.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2612a191a8d5842bfa057e41ba50bbb9dcb722419d2408c78cff4758d0754868 +size 889312 diff --git a/python-html5lib.changes b/python-html5lib.changes index e97d8b7..3f808ee 100644 --- a/python-html5lib.changes +++ b/python-html5lib.changes @@ -1,3 +1,25 @@ +------------------------------------------------------------------- +Fri Feb 12 14:54:04 UTC 2016 - toddrme2178@gmail.com + +- Add coerce_comments_to_work_with_lxml.patch + Fixes compatibility with python-lxml 3.5+, which adds validation + for xml comments. + Should be in next release/ +- Re-enable tests. + +------------------------------------------------------------------- +Tue Feb 2 11:46:24 UTC 2016 - toddrme2178@gmail.com + +- Disable broken tests. + Check if they are working again in the next release. + +------------------------------------------------------------------- +Mon Feb 1 10:33:59 UTC 2016 - toddrme2178@gmail.com + +- update to version 0.9999999: + * Fix #195: fix the sanitizer to drop broken URLs (it threw an + exception between 0.9999 and 0.999999). + ------------------------------------------------------------------- Mon Aug 3 16:30:05 UTC 2015 - tbechtold@suse.com diff --git a/python-html5lib.spec b/python-html5lib.spec index c87659e..a2d2409 100644 --- a/python-html5lib.spec +++ b/python-html5lib.spec @@ -1,7 +1,7 @@ # # spec file for package python-html5lib # -# Copyright (c) 2015 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2016 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,17 +17,20 @@ Name: python-html5lib -Version: 0.999999 +Version: 0.9999999 Release: 0 Summary: HTML parser based on the WHAT-WG Web Applications 1 License: MIT Group: Development/Languages/Python Url: https://github.com/html5lib/html5lib-python Source: http://pypi.python.org/packages/source/h/html5lib/html5lib-%{version}.tar.gz +# PATCH-FIX-UPSTREAM coerce_comments_to_work_with_lxml.patch - fix comments for new lxml comment validation - https://github.com/html5lib/html5lib-python/issues/224 +Patch0: coerce_comments_to_work_with_lxml.patch BuildRequires: python-Genshi >= 0.7 BuildRequires: python-devel BuildRequires: python-lxml -BuildRequires: python-nose >= 1.3.7 +BuildRequires: python-mock +BuildRequires: python-pytest BuildRequires: python-setuptools >= 18.0.1 BuildRequires: python-six >= 1.9.0 Requires: python-six >= 1.9.0 @@ -55,6 +58,7 @@ simple custom format %prep %setup -q -n html5lib-%{version} +%patch0 -p1 %build python setup.py build @@ -64,7 +68,8 @@ python setup.py install --prefix=%{_prefix} --root=%{buildroot} %if 0%{?suse_version} && 0%{?suse_version} > 1110 %check -nosetests +export LANG=en_US.UTF-8 +py.test %endif %files