66 lines
2.7 KiB
Diff
66 lines
2.7 KiB
Diff
|
From 0c551c9519e47f76f8f185089ed71cb9539b6e00 Mon Sep 17 00:00:00 2001
|
||
|
From: Geoffrey Sneddon <geoffers@gmail.com>
|
||
|
Date: Mon, 23 Nov 2015 15:17:07 +0000
|
||
|
Subject: [PATCH] Make lxml tree-builder coerce comments to work with lxml 3.5.
|
||
|
|
||
|
---
|
||
|
html5lib/ihatexml.py | 3 +++
|
||
|
html5lib/treebuilders/etree_lxml.py | 9 +++++----
|
||
|
2 files changed, 8 insertions(+), 4 deletions(-)
|
||
|
|
||
|
diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
|
||
|
index 0fc7930..5da5d93 100644
|
||
|
--- a/html5lib/ihatexml.py
|
||
|
+++ b/html5lib/ihatexml.py
|
||
|
@@ -225,6 +225,9 @@ def coerceComment(self, data):
|
||
|
while "--" in data:
|
||
|
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
|
||
|
data = data.replace("--", "- -")
|
||
|
+ if data.endswith("-"):
|
||
|
+ warnings.warn("Comments cannot end in a dash", DataLossWarning)
|
||
|
+ data += " "
|
||
|
return data
|
||
|
|
||
|
def coerceCharacters(self, data):
|
||
|
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
|
||
|
index 35d08ef..c6c981f 100644
|
||
|
--- a/html5lib/treebuilders/etree_lxml.py
|
||
|
+++ b/html5lib/treebuilders/etree_lxml.py
|
||
|
@@ -54,7 +54,7 @@ def _getChildNodes(self):
|
||
|
def testSerializer(element):
|
||
|
rv = []
|
||
|
finalText = None
|
||
|
- infosetFilter = ihatexml.InfosetFilter()
|
||
|
+ infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
|
||
|
|
||
|
def serializeElement(element, indent=0):
|
||
|
if not hasattr(element, "tag"):
|
||
|
@@ -189,7 +189,7 @@ class TreeBuilder(_base.TreeBuilder):
|
||
|
|
||
|
def __init__(self, namespaceHTMLElements, fullTree=False):
|
||
|
builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
|
||
|
- infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
|
||
|
+ infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
|
||
|
self.namespaceHTMLElements = namespaceHTMLElements
|
||
|
|
||
|
class Attributes(dict):
|
||
|
@@ -257,7 +257,7 @@ def _getData(self):
|
||
|
data = property(_getData, _setData)
|
||
|
|
||
|
self.elementClass = Element
|
||
|
- self.commentClass = builder.Comment
|
||
|
+ self.commentClass = Comment
|
||
|
# self.fragmentClass = builder.DocumentFragment
|
||
|
_base.TreeBuilder.__init__(self, namespaceHTMLElements)
|
||
|
|
||
|
@@ -344,7 +344,8 @@ def insertRoot(self, token):
|
||
|
|
||
|
# Append the initial comments:
|
||
|
for comment_token in self.initial_comments:
|
||
|
- root.addprevious(etree.Comment(comment_token["data"]))
|
||
|
+ comment = self.commentClass(comment_token["data"])
|
||
|
+ root.addprevious(comment._element)
|
||
|
|
||
|
# Create the root document and add the ElementTree to it
|
||
|
self.document = self.documentClass()
|