133 lines
5.7 KiB
Diff
133 lines
5.7 KiB
Diff
|
From 2a20ad737e4682b9f304b6c3ba6116f4cc195541 Mon Sep 17 00:00:00 2001
|
||
|
From: =?UTF-8?q?C=C3=A9dric=20Bosdonnat?= <cbosdonnat@suse.com>
|
||
|
Date: Wed, 14 Feb 2018 10:21:42 +0100
|
||
|
Subject: [PATCH 3/3] inspector: rpm summary and description may not be utf-8
|
||
|
|
||
|
The application inspection code assumes the data in the RPM database
|
||
|
are encoded in UTF-8. However this is not always the case.
|
||
|
|
||
|
As a basic workaround, try to parse the string to UTF-8 and if that
|
||
|
fails, try converting it from latin-1.
|
||
|
---
|
||
|
inspector/expected-fedora.img.xml | 4 ++++
|
||
|
lib/inspect-apps.c | 30 +++++++++++++++++++++++----
|
||
|
test-data/phony-guests/fedora-packages.db.txt | 4 ++--
|
||
|
3 files changed, 32 insertions(+), 6 deletions(-)
|
||
|
|
||
|
diff --git a/inspector/expected-fedora.img.xml b/inspector/expected-fedora.img.xml
|
||
|
index df6060a73..c29f9770e 100644
|
||
|
--- a/inspector/expected-fedora.img.xml
|
||
|
+++ b/inspector/expected-fedora.img.xml
|
||
|
@@ -34,12 +34,16 @@
|
||
|
<version>1.0</version>
|
||
|
<release>1.fc14</release>
|
||
|
<arch>x86_64</arch>
|
||
|
+ <summary>summary with ö</summary>
|
||
|
+ <description>description with ö</description>
|
||
|
</application>
|
||
|
<application>
|
||
|
<name>test2</name>
|
||
|
<version>2.0</version>
|
||
|
<release>2.fc14</release>
|
||
|
<arch>x86_64</arch>
|
||
|
+ <summary>summary with ö</summary>
|
||
|
+ <description>description with ö</description>
|
||
|
</application>
|
||
|
<application>
|
||
|
<name>test3</name>
|
||
|
diff --git a/lib/inspect-apps.c b/lib/inspect-apps.c
|
||
|
index f0cf16b38..fdea85188 100644
|
||
|
--- a/lib/inspect-apps.c
|
||
|
+++ b/lib/inspect-apps.c
|
||
|
@@ -22,6 +22,7 @@
|
||
|
#include <stdlib.h>
|
||
|
#include <unistd.h>
|
||
|
#include <string.h>
|
||
|
+#include <iconv.h>
|
||
|
|
||
|
#ifdef HAVE_ENDIAN_H
|
||
|
#include <endian.h>
|
||
|
@@ -43,6 +44,7 @@
|
||
|
#include "guestfs.h"
|
||
|
#include "guestfs-internal.h"
|
||
|
#include "guestfs-internal-actions.h"
|
||
|
+#include "guestfs-utils.h"
|
||
|
#include "structs-cleanups.h"
|
||
|
|
||
|
#ifdef DB_DUMP
|
||
|
@@ -251,7 +253,7 @@ get_rpm_header_tag (guestfs_h *g, const unsigned char *header_start,
|
||
|
/* This function parses the RPM header structure to pull out various
|
||
|
* tag strings (version, release, arch, etc.). For more detail on the
|
||
|
* header format, see:
|
||
|
- * http://www.rpm.org/max-rpm/s1-rpm-file-format-rpm-file-format.html#S2-RPM-FILE-FORMAT-HEADER
|
||
|
+ * http://rpm.org/devel_doc/file_format.html#24-header-format
|
||
|
*/
|
||
|
|
||
|
/* The minimum header size that makes sense here is 24 bytes. Four
|
||
|
@@ -301,6 +303,20 @@ struct read_package_data {
|
||
|
struct guestfs_application2_list *apps;
|
||
|
};
|
||
|
|
||
|
+static char *
|
||
|
+to_utf8 (guestfs_h *g, char *input)
|
||
|
+{
|
||
|
+ char *out = NULL;
|
||
|
+
|
||
|
+ out = guestfs_int_string_to_utf8 (input, "UTF-8");
|
||
|
+ if (!out) {
|
||
|
+ out = guestfs_int_string_to_utf8 (input, "ISO-8859-1");
|
||
|
+ perrorf (g, "Not an UTF-8 or latin-1 string: '%s'", input);
|
||
|
+ }
|
||
|
+
|
||
|
+ return out;
|
||
|
+}
|
||
|
+
|
||
|
static int
|
||
|
read_package (guestfs_h *g,
|
||
|
const unsigned char *key, size_t keylen,
|
||
|
@@ -311,7 +327,7 @@ read_package (guestfs_h *g,
|
||
|
struct rpm_name nkey, *entry;
|
||
|
CLEANUP_FREE char *version = NULL, *release = NULL,
|
||
|
*epoch_str = NULL, *arch = NULL, *url = NULL, *summary = NULL,
|
||
|
- *description = NULL;
|
||
|
+ *description = NULL, *summary_raw = NULL, *description_raw = NULL;
|
||
|
int32_t epoch;
|
||
|
|
||
|
/* This function reads one (key, value) pair from the Packages
|
||
|
@@ -342,8 +358,14 @@ read_package (guestfs_h *g,
|
||
|
epoch_str = get_rpm_header_tag (g, value, valuelen, RPMTAG_EPOCH, 'i');
|
||
|
arch = get_rpm_header_tag (g, value, valuelen, RPMTAG_ARCH, 's');
|
||
|
url = get_rpm_header_tag (g, value, valuelen, RPMTAG_URL, 's');
|
||
|
- summary = get_rpm_header_tag (g, value, valuelen, RPMTAG_SUMMARY, 's');
|
||
|
- description = get_rpm_header_tag (g, value, valuelen, RPMTAG_DESCRIPTION, 's');
|
||
|
+ summary_raw = get_rpm_header_tag (g, value, valuelen, RPMTAG_SUMMARY, 's');
|
||
|
+ description_raw = get_rpm_header_tag (g, value, valuelen, RPMTAG_DESCRIPTION, 's');
|
||
|
+
|
||
|
+ /* Try (not too hard) to get UTF-8 */
|
||
|
+ if (summary_raw)
|
||
|
+ summary = to_utf8 (g, summary_raw);
|
||
|
+ if (description_raw)
|
||
|
+ description = to_utf8 (g, description_raw);
|
||
|
|
||
|
/* The epoch is stored as big-endian integer. */
|
||
|
if (epoch_str)
|
||
|
diff --git a/test-data/phony-guests/fedora-packages.db.txt b/test-data/phony-guests/fedora-packages.db.txt
|
||
|
index f16a5aa76..927d6eb5f 100644
|
||
|
--- a/test-data/phony-guests/fedora-packages.db.txt
|
||
|
+++ b/test-data/phony-guests/fedora-packages.db.txt
|
||
|
@@ -5,9 +5,9 @@ h_nelem=3
|
||
|
db_pagesize=4096
|
||
|
HEADER=END
|
||
|
\01\00\00\00
|
||
|
- \00\00\00\03\00\00\00\11\00\00\03\e9\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\ea\00\00\00\00\00\00\00\04\00\00\00\00\00\00\03\fe\00\00\00\00\00\00\00\0b\00\00\00\001.0\001.fc14\00x86_64\00
|
||
|
+ \00\00\00\05\00\00\00\33\00\00\03\e9\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\ea\00\00\00\00\00\00\00\04\00\00\00\00\00\00\03\fe\00\00\00\00\00\00\00\0b\00\00\00\00\00\00\03\ec\00\00\00\00\00\00\00\12\00\00\00\00\00\00\03\ed\00\00\00\00\00\00\00\21\00\00\00\001.0\001.fc14\00x86_64\00summary with \f6\00description with \f6\00
|
||
|
\02\00\00\00
|
||
|
- \00\00\00\03\00\00\00\11\00\00\03\e9\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\ea\00\00\00\00\00\00\00\04\00\00\00\00\00\00\03\fe\00\00\00\00\00\00\00\0b\00\00\00\002.0\002.fc14\00x86_64\00
|
||
|
+ \00\00\00\05\00\00\00\35\00\00\03\e9\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\ea\00\00\00\00\00\00\00\04\00\00\00\00\00\00\03\fe\00\00\00\00\00\00\00\0b\00\00\00\00\00\00\03\ec\00\00\00\00\00\00\00\12\00\00\00\00\00\00\03\ed\00\00\00\00\00\00\00\22\00\00\00\002.0\002.fc14\00x86_64\00summary with \c3\b6\00description with \c3\b6\00
|
||
|
\03\00\00\00
|
||
|
\00\00\00\03\00\00\00\11\00\00\03\e9\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\ea\00\00\00\00\00\00\00\04\00\00\00\00\00\00\03\fe\00\00\00\00\00\00\00\0b\00\00\00\003.0\003.fc14\00x86_64\00
|
||
|
DATA=END
|
||
|
--
|
||
|
2.16.1
|
||
|
|