diff --git a/02f69e6243d6c96f78da45fb710a265e5aee2fb5.tar.gz b/02f69e6243d6c96f78da45fb710a265e5aee2fb5.tar.gz deleted file mode 100644 index eb686ee..0000000 --- a/02f69e6243d6c96f78da45fb710a265e5aee2fb5.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b700901f8af180fb0e7d06a5d1b5491dad8a2ae88e141212464c9b7e62a95ecf -size 79888 diff --git a/54b41e87156bc823d5938749d71c4c57adc75b1b.patch b/54b41e87156bc823d5938749d71c4c57adc75b1b.patch new file mode 100644 index 0000000..0ab943c --- /dev/null +++ b/54b41e87156bc823d5938749d71c4c57adc75b1b.patch @@ -0,0 +1,156 @@ +From 54b41e87156bc823d5938749d71c4c57adc75b1b Mon Sep 17 00:00:00 2001 +From: Kevin Boone +Date: Thu, 30 Jun 2022 15:41:59 +0100 +Subject: [PATCH] Fixed handling of URL-encoded spine hrefs + +--- + README.md | 1 + + src/epub2txt.c | 3 ++- + src/util.c | 44 +++++++++++++++++++++++++++++++++++++++++++- + src/util.h | 6 +++++- + src/xhtml.c | 5 +++++ + 5 files changed, 56 insertions(+), 3 deletions(-) + +diff --git a/README.md b/README.md +index 4fbcafc..546242f 100644 +--- a/README.md ++++ b/README.md +@@ -244,6 +244,7 @@ covered. + + Date | Change + -----|------- ++?, Jun 2022 | Fixed handling of URL-encoded spine href's + 2.06, Jun 2022 | Fixed bug in invoking unzip + 2.05, Apr 2022 | Fixed bug with empty metadata tags + 2.04, Apr 2022 | Improved handling of UTF-8 BOMs +diff --git a/src/epub2txt.c b/src/epub2txt.c +index 7e9f4a1..72e0504 100644 +--- a/src/epub2txt.c ++++ b/src/epub2txt.c +@@ -312,7 +312,8 @@ List *epub2txt_get_items (const char *opf, char **error) + char *val2 = r3->attributes[p].value; + if (strcmp (name2, "href") == 0) + { +- list_append (ret, strdup (val2)); ++ char *decoded_val2 = decode_url (val2); ++ list_append (ret, decoded_val2); + } + } + } +diff --git a/src/util.c b/src/util.c +index 853343c..16e7431 100644 +--- a/src/util.c ++++ b/src/util.c +@@ -1,12 +1,14 @@ + /*============================================================================ + epub2txt v2 + util.c +- Copyright (c)2022 Marco Bonelli, GPL v3.0 ++ Copyright (c)2022 Marco Bonelli, Kevin Boone, GPL v3.0 + ============================================================================*/ + + #include + #include + #include ++#include ++#include + #include + #include + #include "util.h" +@@ -16,6 +18,7 @@ + run_command + Run an helper command through fork + execvp, wait for it to finish and return + its status. Log execvp errors, and abort execution if abort_on_error is TRUE. ++(Marco Bonelli) + *==========================================================================*/ + int run_command (const char *const argv[], BOOL abort_on_error) + { +@@ -39,3 +42,42 @@ int run_command (const char *const argv[], BOOL abort_on_error) + waitpid (pid, &status, 0); + return status; + } ++ ++/*========================================================================== ++ Decode %xx in URL-type strings. The caller must free the resulting ++ string, which will be no longer than the input. ++ (Kevin Boone) ++*==========================================================================*/ ++char *decode_url (const char *url) ++ { ++ char *ret = malloc (strlen (url) + 2); ++ ++ int len = 0; ++ for (; *url; len++) ++ { ++ if (*url == '%' && url[1] && url[2] && ++ isxdigit(url[1]) && isxdigit(url[2])) ++ { ++ char url1 = url[1]; ++ char url2 = url[2]; ++ url1 -= url1 <= '9' ? '0' : (url1 <= 'F' ? 'A' : 'a')-10; ++ url2 -= url2 <= '9' ? '0' : (url2 <= 'F' ? 'A' : 'a')-10; ++ ret[len] = 16 * url1 + url2; ++ url += 3; ++ continue; ++ } ++ else if (*url == '+') ++ { ++ /* I have not tested this piece of the function, because I have not ++ seen any instances of '+' (meaning space) in a spine href */ ++ url += 1; ++ ret[len] = ' '; ++ } ++ ret[len] = *url++; ++ } ++ ret[len] = '\0'; ++ ++ return ret; ++ } ++ ++ +diff --git a/src/util.h b/src/util.h +index 2685a02..6b0c197 100644 +--- a/src/util.h ++++ b/src/util.h +@@ -1,7 +1,7 @@ + /*============================================================================ + epub2txt v2 + util.h +- Copyright (c)2022 Marco Bonelli, GPL v3.0 ++ Copyright (c)2022 Marco Bonelli, Kevin Boone GPL v3.0 + ============================================================================*/ + + #pragma once +@@ -9,3 +9,7 @@ + #include "defs.h" + + int run_command (const char *const argv[], BOOL abort_on_error); ++ ++/** Decode %xx in URL-type strings. The caller must free the resulting ++ string, which will be no longer than the input. */ ++char *decode_url (const char *url); +diff --git a/src/xhtml.c b/src/xhtml.c +index 1338882..fbfceae 100644 +--- a/src/xhtml.c ++++ b/src/xhtml.c +@@ -530,6 +530,8 @@ WString *xhtml_transform_char (uint32_t c, BOOL to_ascii) + ============================================================================*/ + WString *xhtml_translate_entity (const WString *entity) + { ++ /* Program flow in this function is very ugly, and prone to memory ++ leaks when modified. The whole thing needs to be rewritten */ + char out[20]; + IN + char *in = wstring_to_utf8 (entity); +@@ -569,8 +571,11 @@ WString *xhtml_translate_entity (const WString *entity) + WString *ret = wstring_create_empty(); + wstring_append_c (ret, (uint32_t)v); + OUT ++ free (s); ++ free (in); + return ret; + } ++ free (s); + } + else + { diff --git a/ac4e73fa79202ccc106b36c06c20e36c37345a58.tar.gz b/ac4e73fa79202ccc106b36c06c20e36c37345a58.tar.gz new file mode 100644 index 0000000..e7d25e8 --- /dev/null +++ b/ac4e73fa79202ccc106b36c06c20e36c37345a58.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b067c613c4cb4e3069a06cd90fa8b80c7ec9e187c296c66337daf5b14a87c259 +size 80852 diff --git a/epub2txt2.changes b/epub2txt2.changes index ae7fd38..b82b1c2 100644 --- a/epub2txt2.changes +++ b/epub2txt2.changes @@ -1,3 +1,12 @@ +------------------------------------------------------------------- +Thu Jul 28 03:10:37 UTC 2022 - Soc Virnyl Estela + +- Update to version 2.06: + * No changelog available + +- Added 54b41e87156bc823d5938749d71c4c57adc75b1b.patch: + * Fixed handling of URL-encoded spine hrefs + ------------------------------------------------------------------- Sun May 1 08:20:56 UTC 2022 - Soc Virnyl Estela diff --git a/epub2txt2.spec b/epub2txt2.spec index d5696f2..18ecea0 100644 --- a/epub2txt2.spec +++ b/epub2txt2.spec @@ -16,16 +16,17 @@ # -%global _commit_hash "02f69e6243d6c96f78da45fb710a265e5aee2fb5" +%global _commit_hash ac4e73fa79202ccc106b36c06c20e36c37345a58 Name: epub2txt2 -Version: 2.04 +Version: 2.06 Release: 0 Summary: Simple command-line utility for extracting text from EPUB documents License: GPL-3.0-only Group: Productivity/Text/Utilities URL: https://github.com/kevinboone/epub2txt2 -Source0: https://github.com/kevinboone/epub2txt2/archive/02f69e6243d6c96f78da45fb710a265e5aee2fb5.tar.gz +Source0: https://github.com/kevinboone/epub2txt2/archive/%{_commit_hash}.tar.gz +Patch0: 54b41e87156bc823d5938749d71c4c57adc75b1b.patch BuildRequires: gcc BuildRequires: make Requires: unzip @@ -34,7 +35,7 @@ Requires: unzip Simple command-line utility for extracting text from EPUB documents %prep -%setup -n %{name}-%{_commit_hash} +%autosetup -n %{name}-%{_commit_hash} -p1 %build %make_build