mirror of
https://github.com/openSUSE/osc.git
synced 2025-02-22 18:22:12 +01:00
141 lines
3.5 KiB
Python
141 lines
3.5 KiB
Python
"""
|
|
Functions that manipulate with XML.
|
|
"""
|
|
|
|
import io
|
|
import xml.sax.saxutils
|
|
from typing import Union
|
|
from xml.etree import ElementTree as ET
|
|
|
|
|
|
def xml_escape(string):
|
|
"""
|
|
Escape the string so it's safe to use in XML and xpath.
|
|
"""
|
|
entities = {
|
|
'"': """,
|
|
"'": "'",
|
|
}
|
|
if isinstance(string, bytes):
|
|
return xml.sax.saxutils.escape(string.decode("utf-8"), entities=entities).encode("utf-8")
|
|
return xml.sax.saxutils.escape(string, entities=entities)
|
|
|
|
|
|
def xml_unescape(string):
|
|
"""
|
|
Decode XML entities in the string.
|
|
"""
|
|
entities = {
|
|
""": '"',
|
|
"'": "'",
|
|
}
|
|
if isinstance(string, bytes):
|
|
return xml.sax.saxutils.unescape(string.decode("utf-8"), entities=entities).encode("utf-8")
|
|
return xml.sax.saxutils.unescape(string, entities=entities)
|
|
|
|
|
|
def xml_strip_text(node):
|
|
"""
|
|
Recursively strip inner text in nodes:
|
|
- if text contains only whitespaces
|
|
- if node contains child nodes
|
|
"""
|
|
if node.text and not node.text.strip():
|
|
node.text = None
|
|
elif len(node) != 0:
|
|
node.text = None
|
|
for child in node:
|
|
xml_strip_text(child)
|
|
|
|
|
|
def xml_indent_compat(elem, level=0):
|
|
"""
|
|
XML indentation code for python < 3.9.
|
|
Source: http://effbot.org/zone/element-lib.htm#prettyprint
|
|
"""
|
|
i = "\n" + level * " "
|
|
if isinstance(elem, ET.ElementTree):
|
|
elem = elem.getroot()
|
|
if len(elem):
|
|
if not elem.text or not elem.text.strip():
|
|
elem.text = i + " "
|
|
for e in elem:
|
|
xml_indent_compat(e, level + 1)
|
|
if not e.tail or not e.tail.strip():
|
|
e.tail = i + " "
|
|
if not e.tail or not e.tail.strip():
|
|
e.tail = i
|
|
else:
|
|
if level and (not elem.tail or not elem.tail.strip()):
|
|
elem.tail = i
|
|
|
|
|
|
def xml_indent(root):
|
|
"""
|
|
Indent XML so it looks pretty after printing or saving to file.
|
|
"""
|
|
if hasattr(ET, "indent"):
|
|
# ElementTree supports indent() in Python 3.9 and newer
|
|
xml_strip_text(root)
|
|
ET.indent(root)
|
|
else:
|
|
xml_indent_compat(root)
|
|
|
|
|
|
def _extend_parser_error_msg(e: ET.ParseError, text: Union[str, bytes]):
|
|
from ..output import tty
|
|
|
|
y, x = e.position
|
|
text = text.splitlines()[y-1][x-1:]
|
|
|
|
if isinstance(text, bytes):
|
|
text = text.decode("utf-8")
|
|
|
|
new_text = ""
|
|
for char in text:
|
|
if char >= " ":
|
|
new_text += char
|
|
continue
|
|
byte = ord(char)
|
|
char = f"0x{byte:0>2X}"
|
|
char = tty.colorize(char, "bg_red")
|
|
new_text += char
|
|
e.msg += ": " + new_text
|
|
|
|
|
|
def xml_fromstring(text: str):
|
|
"""
|
|
xml.etree.ElementTree.fromstring() wrapper that extends error message in ParseError
|
|
exceptions with a snippet of the broken XML.
|
|
"""
|
|
try:
|
|
return ET.fromstring(text)
|
|
except ET.ParseError as e:
|
|
_extend_parser_error_msg(e, text)
|
|
raise
|
|
|
|
|
|
def xml_parse(source):
|
|
"""
|
|
xml.etree.ElementTree.parse() wrapper that extends error message in ParseError
|
|
exceptions with a snippet of the broken XML.
|
|
"""
|
|
if isinstance(source, str):
|
|
# source is a file name
|
|
with open(source, "rb") as f:
|
|
data = f.read()
|
|
else:
|
|
# source is an IO object
|
|
data = source.read()
|
|
|
|
if isinstance(data, bytes):
|
|
f = io.BytesIO(data)
|
|
else:
|
|
f = io.StringIO(data)
|
|
|
|
try:
|
|
return ET.parse(f)
|
|
except ET.ParseError as e:
|
|
_extend_parser_error_msg(e, data)
|
|
raise
|