642aa315f9
Add coerce_comments_to_work_with_lxml.patch OBS-URL: https://build.opensuse.org/request/show/359049 OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-html5lib?expand=0&rev=20
66 lines
2.7 KiB
Diff
66 lines
2.7 KiB
Diff
From 0c551c9519e47f76f8f185089ed71cb9539b6e00 Mon Sep 17 00:00:00 2001
|
|
From: Geoffrey Sneddon <geoffers@gmail.com>
|
|
Date: Mon, 23 Nov 2015 15:17:07 +0000
|
|
Subject: [PATCH] Make lxml tree-builder coerce comments to work with lxml 3.5.
|
|
|
|
---
|
|
html5lib/ihatexml.py | 3 +++
|
|
html5lib/treebuilders/etree_lxml.py | 9 +++++----
|
|
2 files changed, 8 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
|
|
index 0fc7930..5da5d93 100644
|
|
--- a/html5lib/ihatexml.py
|
|
+++ b/html5lib/ihatexml.py
|
|
@@ -225,6 +225,9 @@ def coerceComment(self, data):
|
|
while "--" in data:
|
|
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
|
|
data = data.replace("--", "- -")
|
|
+ if data.endswith("-"):
|
|
+ warnings.warn("Comments cannot end in a dash", DataLossWarning)
|
|
+ data += " "
|
|
return data
|
|
|
|
def coerceCharacters(self, data):
|
|
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
|
|
index 35d08ef..c6c981f 100644
|
|
--- a/html5lib/treebuilders/etree_lxml.py
|
|
+++ b/html5lib/treebuilders/etree_lxml.py
|
|
@@ -54,7 +54,7 @@ def _getChildNodes(self):
|
|
def testSerializer(element):
|
|
rv = []
|
|
finalText = None
|
|
- infosetFilter = ihatexml.InfosetFilter()
|
|
+ infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
|
|
|
|
def serializeElement(element, indent=0):
|
|
if not hasattr(element, "tag"):
|
|
@@ -189,7 +189,7 @@ class TreeBuilder(_base.TreeBuilder):
|
|
|
|
def __init__(self, namespaceHTMLElements, fullTree=False):
|
|
builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
|
|
- infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
|
|
+ infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
|
|
self.namespaceHTMLElements = namespaceHTMLElements
|
|
|
|
class Attributes(dict):
|
|
@@ -257,7 +257,7 @@ def _getData(self):
|
|
data = property(_getData, _setData)
|
|
|
|
self.elementClass = Element
|
|
- self.commentClass = builder.Comment
|
|
+ self.commentClass = Comment
|
|
# self.fragmentClass = builder.DocumentFragment
|
|
_base.TreeBuilder.__init__(self, namespaceHTMLElements)
|
|
|
|
@@ -344,7 +344,8 @@ def insertRoot(self, token):
|
|
|
|
# Append the initial comments:
|
|
for comment_token in self.initial_comments:
|
|
- root.addprevious(etree.Comment(comment_token["data"]))
|
|
+ comment = self.commentClass(comment_token["data"])
|
|
+ root.addprevious(comment._element)
|
|
|
|
# Create the root document and add the ElementTree to it
|
|
self.document = self.documentClass()
|