1
0
mirror of https://github.com/openSUSE/osc.git synced 2025-02-22 18:22:12 +01:00

141 lines
3.5 KiB
Python
Raw Normal View History

"""
Functions that manipulate with XML.
"""
import io
import xml.sax.saxutils
from typing import Union
from xml.etree import ElementTree as ET
def xml_escape(string):
"""
Escape the string so it's safe to use in XML and xpath.
"""
entities = {
'"': """,
"'": "'",
}
if isinstance(string, bytes):
return xml.sax.saxutils.escape(string.decode("utf-8"), entities=entities).encode("utf-8")
return xml.sax.saxutils.escape(string, entities=entities)
def xml_unescape(string):
"""
Decode XML entities in the string.
"""
entities = {
""": '"',
"'": "'",
}
if isinstance(string, bytes):
return xml.sax.saxutils.unescape(string.decode("utf-8"), entities=entities).encode("utf-8")
return xml.sax.saxutils.unescape(string, entities=entities)
def xml_strip_text(node):
"""
Recursively strip inner text in nodes:
- if text contains only whitespaces
- if node contains child nodes
"""
if node.text and not node.text.strip():
node.text = None
elif len(node) != 0:
node.text = None
for child in node:
xml_strip_text(child)
def xml_indent_compat(elem, level=0):
"""
XML indentation code for python < 3.9.
Source: http://effbot.org/zone/element-lib.htm#prettyprint
"""
i = "\n" + level * " "
if isinstance(elem, ET.ElementTree):
elem = elem.getroot()
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
for e in elem:
xml_indent_compat(e, level + 1)
if not e.tail or not e.tail.strip():
e.tail = i + " "
if not e.tail or not e.tail.strip():
e.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def xml_indent(root):
"""
Indent XML so it looks pretty after printing or saving to file.
"""
if hasattr(ET, "indent"):
# ElementTree supports indent() in Python 3.9 and newer
xml_strip_text(root)
ET.indent(root)
else:
xml_indent_compat(root)
def _extend_parser_error_msg(e: ET.ParseError, text: Union[str, bytes]):
from ..output import tty
y, x = e.position
text = text.splitlines()[y-1][x-1:]
if isinstance(text, bytes):
text = text.decode("utf-8")
new_text = ""
for char in text:
if char >= " ":
new_text += char
continue
byte = ord(char)
char = f"0x{byte:0>2X}"
char = tty.colorize(char, "bg_red")
new_text += char
e.msg += ": " + new_text
def xml_fromstring(text: str):
"""
xml.etree.ElementTree.fromstring() wrapper that extends error message in ParseError
exceptions with a snippet of the broken XML.
"""
try:
return ET.fromstring(text)
except ET.ParseError as e:
_extend_parser_error_msg(e, text)
raise
def xml_parse(source):
"""
xml.etree.ElementTree.parse() wrapper that extends error message in ParseError
exceptions with a snippet of the broken XML.
"""
if isinstance(source, str):
# source is a file name
with open(source, "rb") as f:
data = f.read()
else:
# source is an IO object
data = source.read()
if isinstance(data, bytes):
f = io.BytesIO(data)
else:
f = io.StringIO(data)
try:
return ET.parse(f)
except ET.ParseError as e:
_extend_parser_error_msg(e, data)
raise