From 2a20ad737e4682b9f304b6c3ba6116f4cc195541 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bosdonnat?= Date: Wed, 14 Feb 2018 10:21:42 +0100 Subject: [PATCH 3/3] inspector: rpm summary and description may not be utf-8 The application inspection code assumes the data in the RPM database are encoded in UTF-8. However this is not always the case. As a basic workaround, try to parse the string to UTF-8 and if that fails, try converting it from latin-1. --- inspector/expected-fedora.img.xml | 4 ++++ lib/inspect-apps.c | 30 +++++++++++++++++++++++---- test-data/phony-guests/fedora-packages.db.txt | 4 ++-- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/inspector/expected-fedora.img.xml b/inspector/expected-fedora.img.xml index df6060a73..c29f9770e 100644 --- a/inspector/expected-fedora.img.xml +++ b/inspector/expected-fedora.img.xml @@ -34,12 +34,16 @@ 1.0 1.fc14 x86_64 + summary with ö + description with ö test2 2.0 2.fc14 x86_64 + summary with ö + description with ö test3 diff --git a/lib/inspect-apps.c b/lib/inspect-apps.c index f0cf16b38..fdea85188 100644 --- a/lib/inspect-apps.c +++ b/lib/inspect-apps.c @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef HAVE_ENDIAN_H #include @@ -43,6 +44,7 @@ #include "guestfs.h" #include "guestfs-internal.h" #include "guestfs-internal-actions.h" +#include "guestfs-utils.h" #include "structs-cleanups.h" #ifdef DB_DUMP @@ -251,7 +253,7 @@ get_rpm_header_tag (guestfs_h *g, const unsigned char *header_start, /* This function parses the RPM header structure to pull out various * tag strings (version, release, arch, etc.). For more detail on the * header format, see: - * http://www.rpm.org/max-rpm/s1-rpm-file-format-rpm-file-format.html#S2-RPM-FILE-FORMAT-HEADER + * http://rpm.org/devel_doc/file_format.html#24-header-format */ /* The minimum header size that makes sense here is 24 bytes. Four @@ -301,6 +303,20 @@ struct read_package_data { struct guestfs_application2_list *apps; }; +static char * +to_utf8 (guestfs_h *g, char *input) +{ + char *out = NULL; + + out = guestfs_int_string_to_utf8 (input, "UTF-8"); + if (!out) { + out = guestfs_int_string_to_utf8 (input, "ISO-8859-1"); + perrorf (g, "Not an UTF-8 or latin-1 string: '%s'", input); + } + + return out; +} + static int read_package (guestfs_h *g, const unsigned char *key, size_t keylen, @@ -311,7 +327,7 @@ read_package (guestfs_h *g, struct rpm_name nkey, *entry; CLEANUP_FREE char *version = NULL, *release = NULL, *epoch_str = NULL, *arch = NULL, *url = NULL, *summary = NULL, - *description = NULL; + *description = NULL, *summary_raw = NULL, *description_raw = NULL; int32_t epoch; /* This function reads one (key, value) pair from the Packages @@ -342,8 +358,14 @@ read_package (guestfs_h *g, epoch_str = get_rpm_header_tag (g, value, valuelen, RPMTAG_EPOCH, 'i'); arch = get_rpm_header_tag (g, value, valuelen, RPMTAG_ARCH, 's'); url = get_rpm_header_tag (g, value, valuelen, RPMTAG_URL, 's'); - summary = get_rpm_header_tag (g, value, valuelen, RPMTAG_SUMMARY, 's'); - description = get_rpm_header_tag (g, value, valuelen, RPMTAG_DESCRIPTION, 's'); + summary_raw = get_rpm_header_tag (g, value, valuelen, RPMTAG_SUMMARY, 's'); + description_raw = get_rpm_header_tag (g, value, valuelen, RPMTAG_DESCRIPTION, 's'); + + /* Try (not too hard) to get UTF-8 */ + if (summary_raw) + summary = to_utf8 (g, summary_raw); + if (description_raw) + description = to_utf8 (g, description_raw); /* The epoch is stored as big-endian integer. */ if (epoch_str) diff --git a/test-data/phony-guests/fedora-packages.db.txt b/test-data/phony-guests/fedora-packages.db.txt index f16a5aa76..927d6eb5f 100644 --- a/test-data/phony-guests/fedora-packages.db.txt +++ b/test-data/phony-guests/fedora-packages.db.txt @@ -5,9 +5,9 @@ h_nelem=3 db_pagesize=4096 HEADER=END \01\00\00\00 - \00\00\00\03\00\00\00\11\00\00\03\e9\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\ea\00\00\00\00\00\00\00\04\00\00\00\00\00\00\03\fe\00\00\00\00\00\00\00\0b\00\00\00\001.0\001.fc14\00x86_64\00 + \00\00\00\05\00\00\00\33\00\00\03\e9\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\ea\00\00\00\00\00\00\00\04\00\00\00\00\00\00\03\fe\00\00\00\00\00\00\00\0b\00\00\00\00\00\00\03\ec\00\00\00\00\00\00\00\12\00\00\00\00\00\00\03\ed\00\00\00\00\00\00\00\21\00\00\00\001.0\001.fc14\00x86_64\00summary with \f6\00description with \f6\00 \02\00\00\00 - \00\00\00\03\00\00\00\11\00\00\03\e9\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\ea\00\00\00\00\00\00\00\04\00\00\00\00\00\00\03\fe\00\00\00\00\00\00\00\0b\00\00\00\002.0\002.fc14\00x86_64\00 + \00\00\00\05\00\00\00\35\00\00\03\e9\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\ea\00\00\00\00\00\00\00\04\00\00\00\00\00\00\03\fe\00\00\00\00\00\00\00\0b\00\00\00\00\00\00\03\ec\00\00\00\00\00\00\00\12\00\00\00\00\00\00\03\ed\00\00\00\00\00\00\00\22\00\00\00\002.0\002.fc14\00x86_64\00summary with \c3\b6\00description with \c3\b6\00 \03\00\00\00 \00\00\00\03\00\00\00\11\00\00\03\e9\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\ea\00\00\00\00\00\00\00\04\00\00\00\00\00\00\03\fe\00\00\00\00\00\00\00\0b\00\00\00\003.0\003.fc14\00x86_64\00 DATA=END -- 2.16.1