forked from pool/epub2txt2
- Update to version 2.06:
* No changelog available - Added 54b41e87156bc823d5938749d71c4c57adc75b1b.patch: * Fixed handling of URL-encoded spine hrefs OBS-URL: https://build.opensuse.org/package/show/utilities/epub2txt2?expand=0&rev=18
This commit is contained in:
parent
a88506250f
commit
cf923efd6e
@ -1,3 +0,0 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
|
||||||
oid sha256:b700901f8af180fb0e7d06a5d1b5491dad8a2ae88e141212464c9b7e62a95ecf
|
|
||||||
size 79888
|
|
156
54b41e87156bc823d5938749d71c4c57adc75b1b.patch
Normal file
156
54b41e87156bc823d5938749d71c4c57adc75b1b.patch
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
From 54b41e87156bc823d5938749d71c4c57adc75b1b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Kevin Boone <kevin@railwayterrace.com>
|
||||||
|
Date: Thu, 30 Jun 2022 15:41:59 +0100
|
||||||
|
Subject: [PATCH] Fixed handling of URL-encoded spine hrefs
|
||||||
|
|
||||||
|
---
|
||||||
|
README.md | 1 +
|
||||||
|
src/epub2txt.c | 3 ++-
|
||||||
|
src/util.c | 44 +++++++++++++++++++++++++++++++++++++++++++-
|
||||||
|
src/util.h | 6 +++++-
|
||||||
|
src/xhtml.c | 5 +++++
|
||||||
|
5 files changed, 56 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/README.md b/README.md
|
||||||
|
index 4fbcafc..546242f 100644
|
||||||
|
--- a/README.md
|
||||||
|
+++ b/README.md
|
||||||
|
@@ -244,6 +244,7 @@ covered.
|
||||||
|
|
||||||
|
Date | Change
|
||||||
|
-----|-------
|
||||||
|
+?, Jun 2022 | Fixed handling of URL-encoded spine href's
|
||||||
|
2.06, Jun 2022 | Fixed bug in invoking unzip
|
||||||
|
2.05, Apr 2022 | Fixed bug with empty metadata tags
|
||||||
|
2.04, Apr 2022 | Improved handling of UTF-8 BOMs
|
||||||
|
diff --git a/src/epub2txt.c b/src/epub2txt.c
|
||||||
|
index 7e9f4a1..72e0504 100644
|
||||||
|
--- a/src/epub2txt.c
|
||||||
|
+++ b/src/epub2txt.c
|
||||||
|
@@ -312,7 +312,8 @@ List *epub2txt_get_items (const char *opf, char **error)
|
||||||
|
char *val2 = r3->attributes[p].value;
|
||||||
|
if (strcmp (name2, "href") == 0)
|
||||||
|
{
|
||||||
|
- list_append (ret, strdup (val2));
|
||||||
|
+ char *decoded_val2 = decode_url (val2);
|
||||||
|
+ list_append (ret, decoded_val2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
diff --git a/src/util.c b/src/util.c
|
||||||
|
index 853343c..16e7431 100644
|
||||||
|
--- a/src/util.c
|
||||||
|
+++ b/src/util.c
|
||||||
|
@@ -1,12 +1,14 @@
|
||||||
|
/*============================================================================
|
||||||
|
epub2txt v2
|
||||||
|
util.c
|
||||||
|
- Copyright (c)2022 Marco Bonelli, GPL v3.0
|
||||||
|
+ Copyright (c)2022 Marco Bonelli, Kevin Boone, GPL v3.0
|
||||||
|
============================================================================*/
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
+#include <stdlib.h>
|
||||||
|
+#include <ctype.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include "util.h"
|
||||||
|
@@ -16,6 +18,7 @@
|
||||||
|
run_command
|
||||||
|
Run an helper command through fork + execvp, wait for it to finish and return
|
||||||
|
its status. Log execvp errors, and abort execution if abort_on_error is TRUE.
|
||||||
|
+(Marco Bonelli)
|
||||||
|
*==========================================================================*/
|
||||||
|
int run_command (const char *const argv[], BOOL abort_on_error)
|
||||||
|
{
|
||||||
|
@@ -39,3 +42,42 @@ int run_command (const char *const argv[], BOOL abort_on_error)
|
||||||
|
waitpid (pid, &status, 0);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+/*==========================================================================
|
||||||
|
+ Decode %xx in URL-type strings. The caller must free the resulting
|
||||||
|
+ string, which will be no longer than the input.
|
||||||
|
+ (Kevin Boone)
|
||||||
|
+*==========================================================================*/
|
||||||
|
+char *decode_url (const char *url)
|
||||||
|
+ {
|
||||||
|
+ char *ret = malloc (strlen (url) + 2);
|
||||||
|
+
|
||||||
|
+ int len = 0;
|
||||||
|
+ for (; *url; len++)
|
||||||
|
+ {
|
||||||
|
+ if (*url == '%' && url[1] && url[2] &&
|
||||||
|
+ isxdigit(url[1]) && isxdigit(url[2]))
|
||||||
|
+ {
|
||||||
|
+ char url1 = url[1];
|
||||||
|
+ char url2 = url[2];
|
||||||
|
+ url1 -= url1 <= '9' ? '0' : (url1 <= 'F' ? 'A' : 'a')-10;
|
||||||
|
+ url2 -= url2 <= '9' ? '0' : (url2 <= 'F' ? 'A' : 'a')-10;
|
||||||
|
+ ret[len] = 16 * url1 + url2;
|
||||||
|
+ url += 3;
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ else if (*url == '+')
|
||||||
|
+ {
|
||||||
|
+ /* I have not tested this piece of the function, because I have not
|
||||||
|
+ seen any instances of '+' (meaning space) in a spine href */
|
||||||
|
+ url += 1;
|
||||||
|
+ ret[len] = ' ';
|
||||||
|
+ }
|
||||||
|
+ ret[len] = *url++;
|
||||||
|
+ }
|
||||||
|
+ ret[len] = '\0';
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+
|
||||||
|
diff --git a/src/util.h b/src/util.h
|
||||||
|
index 2685a02..6b0c197 100644
|
||||||
|
--- a/src/util.h
|
||||||
|
+++ b/src/util.h
|
||||||
|
@@ -1,7 +1,7 @@
|
||||||
|
/*============================================================================
|
||||||
|
epub2txt v2
|
||||||
|
util.h
|
||||||
|
- Copyright (c)2022 Marco Bonelli, GPL v3.0
|
||||||
|
+ Copyright (c)2022 Marco Bonelli, Kevin Boone GPL v3.0
|
||||||
|
============================================================================*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
@@ -9,3 +9,7 @@
|
||||||
|
#include "defs.h"
|
||||||
|
|
||||||
|
int run_command (const char *const argv[], BOOL abort_on_error);
|
||||||
|
+
|
||||||
|
+/** Decode %xx in URL-type strings. The caller must free the resulting
|
||||||
|
+ string, which will be no longer than the input. */
|
||||||
|
+char *decode_url (const char *url);
|
||||||
|
diff --git a/src/xhtml.c b/src/xhtml.c
|
||||||
|
index 1338882..fbfceae 100644
|
||||||
|
--- a/src/xhtml.c
|
||||||
|
+++ b/src/xhtml.c
|
||||||
|
@@ -530,6 +530,8 @@ WString *xhtml_transform_char (uint32_t c, BOOL to_ascii)
|
||||||
|
============================================================================*/
|
||||||
|
WString *xhtml_translate_entity (const WString *entity)
|
||||||
|
{
|
||||||
|
+ /* Program flow in this function is very ugly, and prone to memory
|
||||||
|
+ leaks when modified. The whole thing needs to be rewritten */
|
||||||
|
char out[20];
|
||||||
|
IN
|
||||||
|
char *in = wstring_to_utf8 (entity);
|
||||||
|
@@ -569,8 +571,11 @@ WString *xhtml_translate_entity (const WString *entity)
|
||||||
|
WString *ret = wstring_create_empty();
|
||||||
|
wstring_append_c (ret, (uint32_t)v);
|
||||||
|
OUT
|
||||||
|
+ free (s);
|
||||||
|
+ free (in);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
+ free (s);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
3
ac4e73fa79202ccc106b36c06c20e36c37345a58.tar.gz
Normal file
3
ac4e73fa79202ccc106b36c06c20e36c37345a58.tar.gz
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:b067c613c4cb4e3069a06cd90fa8b80c7ec9e187c296c66337daf5b14a87c259
|
||||||
|
size 80852
|
@ -1,3 +1,12 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu Jul 28 03:10:37 UTC 2022 - Soc Virnyl Estela <socvirnyl.estela@gmail.com>
|
||||||
|
|
||||||
|
- Update to version 2.06:
|
||||||
|
* No changelog available
|
||||||
|
|
||||||
|
- Added 54b41e87156bc823d5938749d71c4c57adc75b1b.patch:
|
||||||
|
* Fixed handling of URL-encoded spine hrefs
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Sun May 1 08:20:56 UTC 2022 - Soc Virnyl Estela <socvirnyl.estela@gmail.com>
|
Sun May 1 08:20:56 UTC 2022 - Soc Virnyl Estela <socvirnyl.estela@gmail.com>
|
||||||
|
|
||||||
|
@ -16,16 +16,17 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
%global _commit_hash "02f69e6243d6c96f78da45fb710a265e5aee2fb5"
|
%global _commit_hash "ac4e73fa79202ccc106b36c06c20e36c37345a58"
|
||||||
|
|
||||||
Name: epub2txt2
|
Name: epub2txt2
|
||||||
Version: 2.04
|
Version: 2.06
|
||||||
Release: 0
|
Release: 0
|
||||||
Summary: Simple command-line utility for extracting text from EPUB documents
|
Summary: Simple command-line utility for extracting text from EPUB documents
|
||||||
License: GPL-3.0-only
|
License: GPL-3.0-only
|
||||||
Group: Productivity/Text/Utilities
|
Group: Productivity/Text/Utilities
|
||||||
URL: https://github.com/kevinboone/epub2txt2
|
URL: https://github.com/kevinboone/epub2txt2
|
||||||
Source0: https://github.com/kevinboone/epub2txt2/archive/02f69e6243d6c96f78da45fb710a265e5aee2fb5.tar.gz
|
Source0: https://github.com/kevinboone/epub2txt2/archive/02f69e6243d6c96f78da45fb710a265e5aee2fb5.tar.gz
|
||||||
|
Patch0: 54b41e87156bc823d5938749d71c4c57adc75b1b.patch
|
||||||
BuildRequires: gcc
|
BuildRequires: gcc
|
||||||
BuildRequires: make
|
BuildRequires: make
|
||||||
Requires: unzip
|
Requires: unzip
|
||||||
@ -34,7 +35,7 @@ Requires: unzip
|
|||||||
Simple command-line utility for extracting text from EPUB documents
|
Simple command-line utility for extracting text from EPUB documents
|
||||||
|
|
||||||
%prep
|
%prep
|
||||||
%setup -n %{name}-%{_commit_hash}
|
%autosetup -n %{name}-%{_commit_hash}
|
||||||
|
|
||||||
%build
|
%build
|
||||||
%make_build
|
%make_build
|
||||||
|
Loading…
Reference in New Issue
Block a user