glib-mkenums: best effort attempt on non-utf8 encoded files.

Some source files aren't valid utf-8 containing for example
iso8859-1 accented characters in author's names.
Replace invalid data with a replacement '?' character and print a
warning to keep things working.
Based on a patch from Christoph Reiter in
https://bugzilla.gnome.org/show_bug.cgi?id=785113#c20
This commit is contained in:
Patrick Welche 2017-10-23 13:59:58 +01:00 committed by Philip Withnall
parent 6a597f93f6
commit b6b74402d6

View File

@ -26,14 +26,6 @@ the GNU General Public License which can be found in the
GLib source package. Sources, examples and contact
information are available at http://www.gtk.org'''
# Python 2 defaults to ASCII in case stdout is redirected.
# This should make it match Python 3, which uses the locale encoding.
if sys.stdout.encoding is None:
output_stream = codecs.getwriter(
locale.getpreferredencoding())(sys.stdout)
else:
output_stream = sys.stdout
# pylint: disable=too-few-public-methods
class Color:
'''ANSI Terminal colors'''
@ -81,6 +73,29 @@ def write_output(output):
global output_stream
print(output, file=output_stream)
# Python 2 defaults to ASCII in case stdout is redirected.
# This should make it match Python 3, which uses the locale encoding.
if sys.stdout.encoding is None:
output_stream = codecs.getwriter(
locale.getpreferredencoding())(sys.stdout)
else:
output_stream = sys.stdout
# Some source files aren't UTF-8 and the old perl version didn't care.
# Replace invalid data with a replacement character to keep things working.
# https://bugzilla.gnome.org/show_bug.cgi?id=785113#c20
def replace_and_warn(err):
# 7 characters of context either side of the offending character
print_warning('UnicodeWarning: {} at {} ({})'.format(
err.reason, err.start,
err.object[err.start - 7:err.end + 7]))
return ('?', err.end)
codecs.register_error('replace_and_warn', replace_and_warn)
# glib-mkenums.py
# Information about the current enumeration
flags = None # Is enumeration a bitmask?
@ -157,7 +172,8 @@ def parse_entries(file, file_name):
m = re.match(r'\#include\s*<([^>]*)>', line)
if m:
newfilename = os.path.join("..", m.group(1))
newfile = io.open(newfilename, encoding="utf-8")
newfile = io.open(newfilename, encoding="utf-8",
errors="replace_and_warn")
if not parse_entries(newfile, newfilename):
return False
@ -253,7 +269,7 @@ def read_template_file(file):
}
in_ = 'junk'
ifile = io.open(file, encoding="utf-8")
ifile = io.open(file, encoding="utf-8", errors="replace_and_warn")
for line in ifile:
m = re.match(r'\/\*\*\*\s+(BEGIN|END)\s+([\w-]+)\s+\*\*\*\/', line)
if m:
@ -413,7 +429,8 @@ def process_file(curfilename):
firstenum = True
try:
curfile = io.open(curfilename, encoding="utf-8")
curfile = io.open(curfilename, encoding="utf-8",
errors="replace_and_warn")
except IOError as e:
if e.errno == errno.ENOENT:
print_warning('No file "{}" found.'.format(curfilename))