2018-11-07 15:03:43 +01:00
|
|
|
# Copyright (C) 2018 SUSE Linux. All rights reserved.
|
|
|
|
# This program is free software; it may be used, copied, modified
|
|
|
|
# and distributed under the terms of the GNU General Public Licence,
|
|
|
|
# either version 2, or (at your option) any later version.
|
|
|
|
|
2020-03-12 23:00:47 +01:00
|
|
|
|
2022-09-09 11:46:28 +02:00
|
|
|
import builtins
|
2022-07-28 12:28:33 +02:00
|
|
|
import html
|
2018-11-07 15:03:43 +01:00
|
|
|
|
2022-07-28 12:28:33 +02:00
|
|
|
from .. import oscerr
|
2018-11-07 15:03:43 +01:00
|
|
|
|
|
|
|
|
|
|
|
def decode_list(ilist):
|
|
|
|
""" Decodes the elements of a list if needed
|
|
|
|
"""
|
|
|
|
|
|
|
|
dlist = []
|
|
|
|
for elem in ilist:
|
|
|
|
if not isinstance(elem, str):
|
|
|
|
dlist.append(decode_it(elem))
|
|
|
|
else:
|
|
|
|
dlist.append(elem)
|
|
|
|
return dlist
|
|
|
|
|
|
|
|
|
|
|
|
def decode_it(obj):
|
2020-06-25 15:38:14 +02:00
|
|
|
"""Decode the given object unless it is a str.
|
2018-11-07 15:03:43 +01:00
|
|
|
|
2020-06-25 15:38:14 +02:00
|
|
|
If the given object is a str or has no decode method, the object itself is
|
Do not use the chardet module in util.helper.decode_it
In general, decode_it is used to get a str from an arbitrary bytes
instance. For this, decode_it used the chardet module (if present)
to detect the underlying encoding (if the bytes instance corresponds
to a "supported" encoding). The drawback of this detection is that
it can take quite some time in case of a large bytes instance, which
represents no "supported" encoding (see #669 and #746).
Instead of doing a potentially "time consuming" detection, either
assume an utf-8 encoding or a latin-1 encoding. Rationale: it is just
not worth the effort to detect a _potential_ encoding because we have
no clue what the _correct_ encoding is. For instance, consider the
following bytes instance:
b'This character group is not supported: [abc\xc3\xbf]'
It represents a valid utf-8 and latin-1 encoding. What is the "correct"
one? We don't know... Even if you interpret the bytes instance as a
human you cannot give a definite answer (implicit assumption: there is
no additional context available).
That is, if we cannot give a definite answer in case of two potential
encodings, there is no point in bringing even more potential encodings
into play. Hence, do not use the chardet module.
Note: the rationale for trying utf-8 first is that utf-8 is pretty
much in vogue these days and, hence, the chances are "high" that we
guess the "correct" encoding.
Fixes: #669 ("check in huge shell archives is insanely slow")
Fixes: #746 ("Very slow local buildlog parsing")
2020-06-04 13:12:22 +02:00
|
|
|
returned. Otherwise, try to decode the object using utf-8. If this
|
|
|
|
fails due to a UnicodeDecodeError, try to decode the object using
|
|
|
|
latin-1.
|
|
|
|
"""
|
2020-06-25 15:38:14 +02:00
|
|
|
if isinstance(obj, str) or not hasattr(obj, 'decode'):
|
2018-11-07 15:03:43 +01:00
|
|
|
return obj
|
Do not use the chardet module in util.helper.decode_it
In general, decode_it is used to get a str from an arbitrary bytes
instance. For this, decode_it used the chardet module (if present)
to detect the underlying encoding (if the bytes instance corresponds
to a "supported" encoding). The drawback of this detection is that
it can take quite some time in case of a large bytes instance, which
represents no "supported" encoding (see #669 and #746).
Instead of doing a potentially "time consuming" detection, either
assume an utf-8 encoding or a latin-1 encoding. Rationale: it is just
not worth the effort to detect a _potential_ encoding because we have
no clue what the _correct_ encoding is. For instance, consider the
following bytes instance:
b'This character group is not supported: [abc\xc3\xbf]'
It represents a valid utf-8 and latin-1 encoding. What is the "correct"
one? We don't know... Even if you interpret the bytes instance as a
human you cannot give a definite answer (implicit assumption: there is
no additional context available).
That is, if we cannot give a definite answer in case of two potential
encodings, there is no point in bringing even more potential encodings
into play. Hence, do not use the chardet module.
Note: the rationale for trying utf-8 first is that utf-8 is pretty
much in vogue these days and, hence, the chances are "high" that we
guess the "correct" encoding.
Fixes: #669 ("check in huge shell archives is insanely slow")
Fixes: #746 ("Very slow local buildlog parsing")
2020-06-04 13:12:22 +02:00
|
|
|
try:
|
|
|
|
return obj.decode('utf-8')
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
return obj.decode('latin-1')
|
2019-08-27 15:07:41 +02:00
|
|
|
|
|
|
|
|
|
|
|
def raw_input(*args):
|
2022-07-28 12:28:33 +02:00
|
|
|
func = builtins.input
|
2019-08-27 15:07:41 +02:00
|
|
|
|
|
|
|
try:
|
|
|
|
return func(*args)
|
|
|
|
except EOFError:
|
|
|
|
# interpret ctrl-d as user abort
|
|
|
|
raise oscerr.UserAbort()
|
2020-03-12 23:00:47 +01:00
|
|
|
|
|
|
|
|
|
|
|
def _html_escape(data):
|
|
|
|
return html.escape(data, quote=False)
|
2022-07-29 21:32:54 +02:00
|
|
|
|
|
|
|
|
|
|
|
def format_table(rows, headers):
|
|
|
|
"""Format list of tuples into equal width table with headers"""
|
|
|
|
maxlens = [len(h) for h in headers]
|
|
|
|
for r in rows:
|
|
|
|
for i, c in enumerate(r):
|
|
|
|
maxlens[i] = max(maxlens[i], len(c))
|
|
|
|
tpltpl = []
|
|
|
|
for i, m in enumerate(maxlens):
|
|
|
|
tpltpl.append('{%s:<%s}' % (i, m))
|
|
|
|
# {0:12} {1:7} {2:10} {3:8}
|
|
|
|
templ = ' '.join(tpltpl) + '\n'
|
|
|
|
|
|
|
|
out = templ.format(*headers)
|
|
|
|
out += templ.format(*['-'*m for m in maxlens])
|
|
|
|
for r in rows:
|
|
|
|
out += templ.format(*r)
|
|
|
|
return out
|