1
0
mirror of https://github.com/openSUSE/osc.git synced 2025-02-22 18:22:12 +01:00

141 lines
3.5 KiB
Python

"""
Functions that manipulate with XML.
"""
import io
import xml.sax.saxutils
from typing import Union
from xml.etree import ElementTree as ET
def xml_escape(string):
"""
Escape the string so it's safe to use in XML and xpath.
"""
entities = {
'"': """,
"'": "'",
}
if isinstance(string, bytes):
return xml.sax.saxutils.escape(string.decode("utf-8"), entities=entities).encode("utf-8")
return xml.sax.saxutils.escape(string, entities=entities)
def xml_unescape(string):
"""
Decode XML entities in the string.
"""
entities = {
""": '"',
"'": "'",
}
if isinstance(string, bytes):
return xml.sax.saxutils.unescape(string.decode("utf-8"), entities=entities).encode("utf-8")
return xml.sax.saxutils.unescape(string, entities=entities)
def xml_strip_text(node):
"""
Recursively strip inner text in nodes:
- if text contains only whitespaces
- if node contains child nodes
"""
if node.text and not node.text.strip():
node.text = None
elif len(node) != 0:
node.text = None
for child in node:
xml_strip_text(child)
def xml_indent_compat(elem, level=0):
"""
XML indentation code for python < 3.9.
Source: http://effbot.org/zone/element-lib.htm#prettyprint
"""
i = "\n" + level * " "
if isinstance(elem, ET.ElementTree):
elem = elem.getroot()
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
for e in elem:
xml_indent_compat(e, level + 1)
if not e.tail or not e.tail.strip():
e.tail = i + " "
if not e.tail or not e.tail.strip():
e.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def xml_indent(root):
"""
Indent XML so it looks pretty after printing or saving to file.
"""
if hasattr(ET, "indent"):
# ElementTree supports indent() in Python 3.9 and newer
xml_strip_text(root)
ET.indent(root)
else:
xml_indent_compat(root)
def _extend_parser_error_msg(e: ET.ParseError, text: Union[str, bytes]):
from ..output import tty
y, x = e.position
text = text.splitlines()[y-1][x-1:]
if isinstance(text, bytes):
text = text.decode("utf-8")
new_text = ""
for char in text:
if char >= " ":
new_text += char
continue
byte = ord(char)
char = f"0x{byte:0>2X}"
char = tty.colorize(char, "bg_red")
new_text += char
e.msg += ": " + new_text
def xml_fromstring(text: str):
"""
xml.etree.ElementTree.fromstring() wrapper that extends error message in ParseError
exceptions with a snippet of the broken XML.
"""
try:
return ET.fromstring(text)
except ET.ParseError as e:
_extend_parser_error_msg(e, text)
raise
def xml_parse(source):
"""
xml.etree.ElementTree.parse() wrapper that extends error message in ParseError
exceptions with a snippet of the broken XML.
"""
if isinstance(source, str):
# source is a file name
with open(source, "rb") as f:
data = f.read()
else:
# source is an IO object
data = source.read()
if isinstance(data, bytes):
f = io.BytesIO(data)
else:
f = io.StringIO(data)
try:
return ET.parse(f)
except ET.ParseError as e:
_extend_parser_error_msg(e, data)
raise