From c3954ff4fdd9db75420779bc47e8193dd7c718ad Mon Sep 17 00:00:00 2001
From: Daniel Mach <daniel.mach@suse.com>
Date: Fri, 16 Jun 2023 08:45:51 +0200
Subject: [PATCH] Fix printing utf-8 characters to stdout

When utf-8 encoded text is transferred over network and read in chunks,
if frequently happens that a character gets split between 2 chunks.
Decoding such chunks fails on invalid multibyte sequence.

The solution is to forward the bytes to stdout's buffer
and make the utf-8 decoding "someone else's problem".
---
 osc/commandline.py | 21 +++++++++++----------
 osc/core.py        |  8 +-------
 2 files changed, 12 insertions(+), 17 deletions(-)
diff --git a/osc/commandline.py b/osc/commandline.py
index cc5ca69d..c0310590 100644
--- a/osc/commandline.py
+++ b/osc/commandline.py
@@ -1683,10 +1683,10 @@ class Osc(cmdln.Cmdln):
             url = makeurl(apiurl, url_path, query)
             f = http_POST(url)
             while True:
-                buf = f.read(16384)
-                if not buf:
+                data = f.read(16384)
+                if not data:
                     break
-                sys.stdout.write(decode_it(buf))
+                sys.stdout.buffer.write(data)
 
         elif opts.delete:
             print("Delete token")
@@ -1713,7 +1713,7 @@ class Osc(cmdln.Cmdln):
             # just list token
             url = makeurl(apiurl, url_path)
             for data in streamfile(url, http_GET):
-                sys.stdout.write(decode_it(data))
+                sys.stdout.buffer.write(data)
 
     @cmdln.option('-a', '--attribute', metavar='ATTRIBUTE',
                         help='affect only a given attribute')
@@ -1985,7 +1985,7 @@ class Osc(cmdln.Cmdln):
             d = '<attributes><attribute namespace=\'%s\' name=\'%s\' >%s</attribute></attributes>' % (aname[0], aname[1], values)
             url = makeurl(apiurl, attributepath)
             for data in streamfile(url, http_POST, data=d):
-                sys.stdout.write(decode_it(data))
+                sys.stdout.buffer.write(data)
 
         # upload file
         if opts.file:
@@ -2052,7 +2052,7 @@ class Osc(cmdln.Cmdln):
                 attributepath.append(opts.attribute)
                 u = makeurl(apiurl, attributepath)
                 for data in streamfile(u, http_DELETE):
-                    sys.stdout.write(decode_it(data))
+                    sys.stdout.buffer.write(data)
             else:
                 raise oscerr.WrongOptions('The --delete switch is only for pattern metadata or attributes.')
 
@@ -6382,8 +6382,9 @@ Please submit there instead, or use --nodevelproject to force direct submission.
         data = decode_it(data)
         while len(data):
             if opts.strip_time or conf.config['buildlog_strip_time']:
+                # FIXME: this is not working when the time is split between 2 chunks
                 data = buildlog_strip_time(data)
-            sys.stdout.write(decode_it(data))
+            sys.stdout.buffer.write(data)
             data = f.read(BUFSIZE)
         f.close()
 
@@ -9561,10 +9562,10 @@ Please submit there instead, or use --nodevelproject to force direct submission.
                             raise
 
         while True:
-            buf = f.read(16384)
-            if not buf:
+            data = f.read(16384)
+            if not data:
                 break
-            sys.stdout.write(decode_it(buf))
+            sys.stdout.buffer.write(data)
 
     @cmdln.option('-m', '--message',
                   help='add MESSAGE to changes (do not open an editor)')
diff --git a/osc/core.py b/osc/core.py
index 248adeb2..dd2f00de 100644
--- a/osc/core.py
+++ b/osc/core.py
@@ -6925,13 +6925,7 @@ def print_buildlog(
     def print_data(data, strip_time=False):
         if strip_time:
             data = buildlog_strip_time(data)
-        # hmm calling decode_it is a bit problematic because data might begin
-        # or end with an, for instance, incomplete utf-8 sequence
-        sys.stdout.write(decode_it(data.translate(all_bytes, remove_bytes)))
-
-    # to protect us against control characters
-    all_bytes = bytes.maketrans(b'', b'')
-    remove_bytes = all_bytes[:8] + all_bytes[14:32]  # accept tabs and newlines
+        sys.stdout.buffer.write(data)
 
     query = {'nostream': '1', 'start': '%s' % offset}
     if last: