From c3954ff4fdd9db75420779bc47e8193dd7c718ad Mon Sep 17 00:00:00 2001 From: Daniel Mach Date: Fri, 16 Jun 2023 08:45:51 +0200 Subject: [PATCH] Fix printing utf-8 characters to stdout When utf-8 encoded text is transferred over network and read in chunks, if frequently happens that a character gets split between 2 chunks. Decoding such chunks fails on invalid multibyte sequence. The solution is to forward the bytes to stdout's buffer and make the utf-8 decoding "someone else's problem". --- osc/commandline.py | 21 +++++++++++---------- osc/core.py | 8 +------- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/osc/commandline.py b/osc/commandline.py index cc5ca69d..c0310590 100644 --- a/osc/commandline.py +++ b/osc/commandline.py @@ -1683,10 +1683,10 @@ class Osc(cmdln.Cmdln): url = makeurl(apiurl, url_path, query) f = http_POST(url) while True: - buf = f.read(16384) - if not buf: + data = f.read(16384) + if not data: break - sys.stdout.write(decode_it(buf)) + sys.stdout.buffer.write(data) elif opts.delete: print("Delete token") @@ -1713,7 +1713,7 @@ class Osc(cmdln.Cmdln): # just list token url = makeurl(apiurl, url_path) for data in streamfile(url, http_GET): - sys.stdout.write(decode_it(data)) + sys.stdout.buffer.write(data) @cmdln.option('-a', '--attribute', metavar='ATTRIBUTE', help='affect only a given attribute') @@ -1985,7 +1985,7 @@ class Osc(cmdln.Cmdln): d = '%s' % (aname[0], aname[1], values) url = makeurl(apiurl, attributepath) for data in streamfile(url, http_POST, data=d): - sys.stdout.write(decode_it(data)) + sys.stdout.buffer.write(data) # upload file if opts.file: @@ -2052,7 +2052,7 @@ class Osc(cmdln.Cmdln): attributepath.append(opts.attribute) u = makeurl(apiurl, attributepath) for data in streamfile(u, http_DELETE): - sys.stdout.write(decode_it(data)) + sys.stdout.buffer.write(data) else: raise oscerr.WrongOptions('The --delete switch is only for pattern metadata or attributes.') @@ -6382,8 +6382,9 @@ Please submit there instead, or use --nodevelproject to force direct submission. data = decode_it(data) while len(data): if opts.strip_time or conf.config['buildlog_strip_time']: + # FIXME: this is not working when the time is split between 2 chunks data = buildlog_strip_time(data) - sys.stdout.write(decode_it(data)) + sys.stdout.buffer.write(data) data = f.read(BUFSIZE) f.close() @@ -9561,10 +9562,10 @@ Please submit there instead, or use --nodevelproject to force direct submission. raise while True: - buf = f.read(16384) - if not buf: + data = f.read(16384) + if not data: break - sys.stdout.write(decode_it(buf)) + sys.stdout.buffer.write(data) @cmdln.option('-m', '--message', help='add MESSAGE to changes (do not open an editor)') diff --git a/osc/core.py b/osc/core.py index 248adeb2..dd2f00de 100644 --- a/osc/core.py +++ b/osc/core.py @@ -6925,13 +6925,7 @@ def print_buildlog( def print_data(data, strip_time=False): if strip_time: data = buildlog_strip_time(data) - # hmm calling decode_it is a bit problematic because data might begin - # or end with an, for instance, incomplete utf-8 sequence - sys.stdout.write(decode_it(data.translate(all_bytes, remove_bytes))) - - # to protect us against control characters - all_bytes = bytes.maketrans(b'', b'') - remove_bytes = all_bytes[:8] + all_bytes[14:32] # accept tabs and newlines + sys.stdout.buffer.write(data) query = {'nostream': '1', 'start': '%s' % offset} if last: