From 2a20ad737e4682b9f304b6c3ba6116f4cc195541 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bosdonnat?= Date: Wed, 14 Feb 2018 10:21:42 +0100 Subject: [PATCH 3/3] inspector: rpm summary and description may not be utf-8 The application inspection code assumes the data in the RPM database are encoded in UTF-8. However this is not always the case. As a basic workaround, try to parse the string to UTF-8 and if that fails, try converting it from latin-1. --- inspector/expected-fedora.img.xml | 4 ++++ lib/inspect-apps.c | 30 +++++++++++++++++++++++---- test-data/phony-guests/fedora-packages.db.txt | 4 ++-- 3 files changed, 32 insertions(+), 6 deletions(-) Index: libguestfs-1.42.0/inspector/expected-fedora.img.xml =================================================================== --- libguestfs-1.42.0.orig/inspector/expected-fedora.img.xml +++ libguestfs-1.42.0/inspector/expected-fedora.img.xml @@ -34,12 +34,16 @@ 1.0 1.fc14 x86_64 + summary with ö + description with ö test2 2.0 2.fc14 x86_64 + summary with ö + description with ö test3 Index: libguestfs-1.42.0/lib/inspect-apps.c =================================================================== --- libguestfs-1.42.0.orig/lib/inspect-apps.c +++ libguestfs-1.42.0/lib/inspect-apps.c @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef HAVE_ENDIAN_H #include @@ -43,6 +44,7 @@ #include "guestfs.h" #include "guestfs-internal.h" #include "guestfs-internal-actions.h" +#include "guestfs-utils.h" #include "structs-cleanups.h" /* Some limits on what the inspection code will read, for safety. */ @@ -266,7 +268,7 @@ get_rpm_header_tag (guestfs_h *g, const /* This function parses the RPM header structure to pull out various * tag strings (version, release, arch, etc.). For more detail on the * header format, see: - * http://www.rpm.org/max-rpm/s1-rpm-file-format-rpm-file-format.html#S2-RPM-FILE-FORMAT-HEADER + * http://rpm.org/devel_doc/file_format.html#24-header-format */ /* The minimum header size that makes sense here is 24 bytes. Four @@ -316,6 +318,20 @@ struct read_package_data { struct guestfs_application2_list *apps; }; +static char * +to_utf8 (guestfs_h *g, char *input) +{ + char *out = NULL; + + out = guestfs_int_string_to_utf8 (input, "UTF-8"); + if (!out) { + out = guestfs_int_string_to_utf8 (input, "ISO-8859-1"); + perrorf (g, "Not an UTF-8 or latin-1 string: '%s'", input); + } + + return out; +} + static int read_package (guestfs_h *g, const unsigned char *key, size_t keylen, @@ -326,7 +342,7 @@ read_package (guestfs_h *g, struct rpm_name nkey, *entry; CLEANUP_FREE char *version = NULL, *release = NULL, *epoch_str = NULL, *arch = NULL, *url = NULL, *summary = NULL, - *description = NULL; + *description = NULL, *summary_raw = NULL, *description_raw = NULL; int32_t epoch; /* This function reads one (key, value) pair from the Packages @@ -357,8 +373,14 @@ read_package (guestfs_h *g, epoch_str = get_rpm_header_tag (g, value, valuelen, RPMTAG_EPOCH, 'i'); arch = get_rpm_header_tag (g, value, valuelen, RPMTAG_ARCH, 's'); url = get_rpm_header_tag (g, value, valuelen, RPMTAG_URL, 's'); - summary = get_rpm_header_tag (g, value, valuelen, RPMTAG_SUMMARY, 's'); - description = get_rpm_header_tag (g, value, valuelen, RPMTAG_DESCRIPTION, 's'); + summary_raw = get_rpm_header_tag (g, value, valuelen, RPMTAG_SUMMARY, 's'); + description_raw = get_rpm_header_tag (g, value, valuelen, RPMTAG_DESCRIPTION, 's'); + + /* Try (not too hard) to get UTF-8 */ + if (summary_raw) + summary = to_utf8 (g, summary_raw); + if (description_raw) + description = to_utf8 (g, description_raw); /* The epoch is stored as big-endian integer. */ if (epoch_str) Index: libguestfs-1.42.0/test-data/phony-guests/fedora-packages.db.txt =================================================================== --- libguestfs-1.42.0.orig/test-data/phony-guests/fedora-packages.db.txt +++ libguestfs-1.42.0/test-data/phony-guests/fedora-packages.db.txt @@ -5,9 +5,9 @@ h_nelem=3 db_pagesize=4096 HEADER=END \01\00\00\00 - \00\00\00\03\00\00\00\11\00\00\03\e9\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\ea\00\00\00\00\00\00\00\04\00\00\00\00\00\00\03\fe\00\00\00\00\00\00\00\0b\00\00\00\001.0\001.fc14\00x86_64\00 + \00\00\00\05\00\00\00\33\00\00\03\e9\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\ea\00\00\00\00\00\00\00\04\00\00\00\00\00\00\03\fe\00\00\00\00\00\00\00\0b\00\00\00\00\00\00\03\ec\00\00\00\00\00\00\00\12\00\00\00\00\00\00\03\ed\00\00\00\00\00\00\00\21\00\00\00\001.0\001.fc14\00x86_64\00summary with \f6\00description with \f6\00 \02\00\00\00 - \00\00\00\03\00\00\00\11\00\00\03\e9\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\ea\00\00\00\00\00\00\00\04\00\00\00\00\00\00\03\fe\00\00\00\00\00\00\00\0b\00\00\00\002.0\002.fc14\00x86_64\00 + \00\00\00\05\00\00\00\35\00\00\03\e9\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\ea\00\00\00\00\00\00\00\04\00\00\00\00\00\00\03\fe\00\00\00\00\00\00\00\0b\00\00\00\00\00\00\03\ec\00\00\00\00\00\00\00\12\00\00\00\00\00\00\03\ed\00\00\00\00\00\00\00\22\00\00\00\002.0\002.fc14\00x86_64\00summary with \c3\b6\00description with \c3\b6\00 \03\00\00\00 \00\00\00\03\00\00\00\11\00\00\03\e9\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\ea\00\00\00\00\00\00\00\04\00\00\00\00\00\00\03\fe\00\00\00\00\00\00\00\0b\00\00\00\003.0\003.fc14\00x86_64\00 DATA=END