python-lxml/lxml-fix-attribute-quoting.patch
Jan Matejek cb3e4ae097 Accepting request 487430 from home:matejcik:branches:devel:languages:python
- lxml-fix-attribute-quoting.patch - stabilize attribute entity encoding
  across platforms
- force-regenerate C code from Cython sources

OBS-URL: https://build.opensuse.org/request/show/487430
OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-lxml?expand=0&rev=94
2017-04-11 17:15:01 +00:00

96 lines
3.6 KiB
Diff

Index: lxml-3.7.3/src/lxml/tests/test_incremental_xmlfile.py
===================================================================
--- lxml-3.7.3.orig/src/lxml/tests/test_incremental_xmlfile.py
+++ lxml-3.7.3/src/lxml/tests/test_incremental_xmlfile.py
@@ -430,7 +430,7 @@ class HtmlFileTestCase(_XmlFileTestCaseB
with xf.element("tagname", attrib={"attr": _str('"misquöted\\u3344\\U00013344"')}):
xf.write("foo")
- self.assertXml('<tagname attr="&quot;misqu&#246;ted&#13124;&#78660;&quot;">foo</tagname>')
+ self.assertXml('<tagname attr="&quot;misqu&#xF6;ted&#x3344;&#x13344;&quot;">foo</tagname>')
def test_unescaped_script(self):
with etree.htmlfile(self._file) as xf:
Index: lxml-3.7.3/src/lxml/serializer.pxi
===================================================================
--- lxml-3.7.3.orig/src/lxml/serializer.pxi
+++ lxml-3.7.3/src/lxml/serializer.pxi
@@ -481,6 +481,7 @@ cdef unsigned char *xmlSerializeHexCharR
cdef _write_attr_string(tree.xmlOutputBuffer* buf, const char *string):
cdef const char *base
cdef const char *cur
+ cdef const unsigned char *ucur
cdef unsigned char tmp[12]
cdef int val = 0
@@ -546,42 +547,44 @@ cdef _write_attr_string(tree.xmlOutputBu
cur += 1
base = cur
- elif (cur[0] >= 0x80) and (cur[1] != 0):
+ elif (<const unsigned char>cur[0] >= 0x80) and (cur[1] != 0):
if (base != cur):
tree.xmlOutputBufferWrite(buf, cur - base, base)
- if (cur[0] < 0xC0):
+ ucur = <const unsigned char *>cur
+
+ if (ucur[0] < 0xC0):
# invalid UTF-8 sequence
- val = cur[0]
+ val = ucur[0]
l = 1
- elif (cur[0] < 0xE0):
- val = (cur[0]) & 0x1F
+ elif (ucur[0] < 0xE0):
+ val = (ucur[0]) & 0x1F
val <<= 6
- val |= (cur[1]) & 0x3F
+ val |= (ucur[1]) & 0x3F
l = 2
- elif ((cur[0] < 0xF0) and (cur[2] != 0)):
- val = (cur[0]) & 0x0F
+ elif ((ucur[0] < 0xF0) and (ucur[2] != 0)):
+ val = (ucur[0]) & 0x0F
val <<= 6
- val |= (cur[1]) & 0x3F
+ val |= (ucur[1]) & 0x3F
val <<= 6
- val |= (cur[2]) & 0x3F
+ val |= (ucur[2]) & 0x3F
l = 3
- elif ((cur[0] < 0xF8) and (cur[2] != 0) and (cur[3] != 0)):
- val = (cur[0]) & 0x07
+ elif ((ucur[0] < 0xF8) and (ucur[2] != 0) and (ucur[3] != 0)):
+ val = (ucur[0]) & 0x07
val <<= 6
- val |= (cur[1]) & 0x3F
+ val |= (ucur[1]) & 0x3F
val <<= 6
- val |= (cur[2]) & 0x3F
+ val |= (ucur[2]) & 0x3F
val <<= 6
- val |= (cur[3]) & 0x3F
+ val |= (ucur[3]) & 0x3F
l = 4
else:
# invalid UTF-8 sequence
- val = cur[0]
+ val = ucur[0]
l = 1
if ((l == 1) or (not tree.xmlIsCharQ(val))):
@@ -590,7 +593,7 @@ cdef _write_attr_string(tree.xmlOutputBu
# We could do multiple things here. Just save
# as a char ref
xmlSerializeHexCharRef(tmp, val)
- tree.xmlOutputBufferWrite(buf, -1, <const char*> tmp)
+ tree.xmlOutputBufferWrite(buf, len(tmp), <const char*> tmp)
cur += l
base = cur