1
0
mirror of https://github.com/openSUSE/osc.git synced 2025-01-13 17:16:23 +01:00

Fix printing utf-8 characters to stdout

When utf-8 encoded text is transferred over network and read in chunks,
if frequently happens that a character gets split between 2 chunks.
Decoding such chunks fails on invalid multibyte sequence.

The solution is to forward the bytes to stdout's buffer
and make the utf-8 decoding "someone else's problem".
This commit is contained in:
Daniel Mach 2023-06-16 08:45:51 +02:00
parent 03016a6f7b
commit c3954ff4fd
2 changed files with 12 additions and 17 deletions

View File

@ -1683,10 +1683,10 @@ class Osc(cmdln.Cmdln):
url = makeurl(apiurl, url_path, query)
f = http_POST(url)
while True:
buf = f.read(16384)
if not buf:
data = f.read(16384)
if not data:
break
sys.stdout.write(decode_it(buf))
sys.stdout.buffer.write(data)
elif opts.delete:
print("Delete token")
@ -1713,7 +1713,7 @@ class Osc(cmdln.Cmdln):
# just list token
url = makeurl(apiurl, url_path)
for data in streamfile(url, http_GET):
sys.stdout.write(decode_it(data))
sys.stdout.buffer.write(data)
@cmdln.option('-a', '--attribute', metavar='ATTRIBUTE',
help='affect only a given attribute')
@ -1985,7 +1985,7 @@ class Osc(cmdln.Cmdln):
d = '<attributes><attribute namespace=\'%s\' name=\'%s\' >%s</attribute></attributes>' % (aname[0], aname[1], values)
url = makeurl(apiurl, attributepath)
for data in streamfile(url, http_POST, data=d):
sys.stdout.write(decode_it(data))
sys.stdout.buffer.write(data)
# upload file
if opts.file:
@ -2052,7 +2052,7 @@ class Osc(cmdln.Cmdln):
attributepath.append(opts.attribute)
u = makeurl(apiurl, attributepath)
for data in streamfile(u, http_DELETE):
sys.stdout.write(decode_it(data))
sys.stdout.buffer.write(data)
else:
raise oscerr.WrongOptions('The --delete switch is only for pattern metadata or attributes.')
@ -6382,8 +6382,9 @@ Please submit there instead, or use --nodevelproject to force direct submission.
data = decode_it(data)
while len(data):
if opts.strip_time or conf.config['buildlog_strip_time']:
# FIXME: this is not working when the time is split between 2 chunks
data = buildlog_strip_time(data)
sys.stdout.write(decode_it(data))
sys.stdout.buffer.write(data)
data = f.read(BUFSIZE)
f.close()
@ -9561,10 +9562,10 @@ Please submit there instead, or use --nodevelproject to force direct submission.
raise
while True:
buf = f.read(16384)
if not buf:
data = f.read(16384)
if not data:
break
sys.stdout.write(decode_it(buf))
sys.stdout.buffer.write(data)
@cmdln.option('-m', '--message',
help='add MESSAGE to changes (do not open an editor)')

View File

@ -6925,13 +6925,7 @@ def print_buildlog(
def print_data(data, strip_time=False):
if strip_time:
data = buildlog_strip_time(data)
# hmm calling decode_it is a bit problematic because data might begin
# or end with an, for instance, incomplete utf-8 sequence
sys.stdout.write(decode_it(data.translate(all_bytes, remove_bytes)))
# to protect us against control characters
all_bytes = bytes.maketrans(b'', b'')
remove_bytes = all_bytes[:8] + all_bytes[14:32] # accept tabs and newlines
sys.stdout.buffer.write(data)
query = {'nostream': '1', 'start': '%s' % offset}
if last: