forked from pool/perl-HTML-Parser
- update to 3.64
* Convert files to UTF-8 * Don't allow decode_entities() to generate illegal Unicode chars * Copyright 2009 * Remove rendundant (repeated) test * Make parse_file() method use 3-arg open [RT#49434] * Take more care to prepare the char range for encode_entities [RT#50170] * decode_entities confused by trailing incomplete entity * HTTP::Header doc typo fix. * Do not bother tracking style or script, they're ignored. * Bring HTML 5 head elements up to date with WD-html5-20090423. * Improve HeadParser performance. * Doc patch: Make it clearer what the return value from ->parse is - remove last patch (fixed in upstream) OBS-URL: https://build.opensuse.org/package/show/devel:languages:perl/perl-HTML-Parser?expand=0&rev=11
This commit is contained in:
parent
09dbfcc02a
commit
db168c5f48
3
HTML-Parser-3.64.tar.bz2
Normal file
3
HTML-Parser-3.64.tar.bz2
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:61f40a3c1a2fb646c8e54377d58e1bbae231c42fd7b90242b23747016bac5410
|
||||
size 75156
|
@ -1,81 +0,0 @@
|
||||
commit b9aae1e43eb2c8e989510187cff0ba3e996f9a4c
|
||||
Author: Gisle Aas <gisle@aas.no>
|
||||
Date: Thu Oct 22 21:45:54 2009 +0200
|
||||
|
||||
decode_entities confused by trailing incomplete entity
|
||||
|
||||
Mark Martinec reported crashed when running SpamAssassin, given a
|
||||
particular HTML junk mail to parse. The problem was caused by
|
||||
HTML::Parsers decode_entities function confusing itself when it
|
||||
encountered strings with incomplete entities at the end of the string.
|
||||
|
||||
diff --git a/t/entities.t b/t/entities.t
|
||||
index 7f6a29a..e96501c 100644
|
||||
--- a/t/entities.t
|
||||
+++ b/t/entities.t
|
||||
@@ -1,6 +1,6 @@
|
||||
use HTML::Entities qw(decode_entities encode_entities encode_entities_numeric);
|
||||
|
||||
-use Test::More tests => 12;
|
||||
+use Test::More tests => 13;
|
||||
|
||||
$a = "Våre norske tegn bør æres";
|
||||
|
||||
@@ -71,6 +71,8 @@ is(decode_entities("abc&def&ghi&abc;&def;"), "abc&def&ghi&abc;&def;");
|
||||
is(decode_entities("'"), "'");
|
||||
is(encode_entities("'", "'"), "'");
|
||||
|
||||
+is(decode_entities("Attention Homeοωnөrs...1ѕt Tімe Eνөг"),
|
||||
+ "Attention Home\x{3BF}\x{3C9}n\x{4E9}rs...1\x{455}t T\x{456}\x{43C}e E\x{3BD}\x{4E9}\x{433}");
|
||||
|
||||
__END__
|
||||
# Quoted from rfc1866.txt
|
||||
diff --git a/util.c b/util.c
|
||||
index 28fec78..6f56a2b 100644
|
||||
--- a/util.c
|
||||
+++ b/util.c
|
||||
@@ -94,14 +94,14 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
|
||||
ent_start = s;
|
||||
repl = 0;
|
||||
|
||||
- if (*s == '#') {
|
||||
+ if (s < end && *s == '#') {
|
||||
UV num = 0;
|
||||
UV prev = 0;
|
||||
int ok = 0;
|
||||
s++;
|
||||
- if (*s == 'x' || *s == 'X') {
|
||||
+ if (s < end && (*s == 'x' || *s == 'X')) {
|
||||
s++;
|
||||
- while (*s) {
|
||||
+ while (s < end) {
|
||||
char *tmp = strchr(PL_hexdigit, *s);
|
||||
if (!tmp)
|
||||
break;
|
||||
@@ -117,7 +117,7 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
|
||||
}
|
||||
}
|
||||
else {
|
||||
- while (isDIGIT(*s)) {
|
||||
+ while (s < end && isDIGIT(*s)) {
|
||||
num = num * 10 + (*s - '0');
|
||||
if (prev && num < prev) {
|
||||
/* overflow */
|
||||
@@ -180,7 +180,7 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
|
||||
}
|
||||
else {
|
||||
char *ent_name = s;
|
||||
- while (isALNUM(*s))
|
||||
+ while (s < end && isALNUM(*s))
|
||||
s++;
|
||||
if (ent_name != s && entity2char) {
|
||||
SV** svp;
|
||||
@@ -216,7 +216,7 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
|
||||
|
||||
if (repl) {
|
||||
char *repl_allocated = 0;
|
||||
- if (*s == ';')
|
||||
+ if (s < end && *s == ';')
|
||||
s++;
|
||||
t--; /* '&' already copied, undo it */
|
||||
|
@ -1,3 +1,21 @@
|
||||
-------------------------------------------------------------------
|
||||
Wed Jan 13 17:40:06 CET 2010 - anicka@suse.cz
|
||||
|
||||
- update to 3.64
|
||||
* Convert files to UTF-8
|
||||
* Don't allow decode_entities() to generate illegal Unicode chars
|
||||
* Copyright 2009
|
||||
* Remove rendundant (repeated) test
|
||||
* Make parse_file() method use 3-arg open [RT#49434]
|
||||
* Take more care to prepare the char range for encode_entities [RT#50170]
|
||||
* decode_entities confused by trailing incomplete entity
|
||||
* HTTP::Header doc typo fix.
|
||||
* Do not bother tracking style or script, they're ignored.
|
||||
* Bring HTML 5 head elements up to date with WD-html5-20090423.
|
||||
* Improve HeadParser performance.
|
||||
* Doc patch: Make it clearer what the return value from ->parse is
|
||||
- remove last patch (fixed in upstream)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Sun Jan 10 15:43:32 CET 2010 - jengelh@medozas.de
|
||||
|
||||
|
@ -21,13 +21,12 @@
|
||||
Name: perl-HTML-Parser
|
||||
%define cpan_name %( echo %{name} | %{__sed} -e 's,perl-,,' )
|
||||
Summary: HTML parser class
|
||||
Version: 3.61
|
||||
Version: 3.64
|
||||
Release: 3
|
||||
License: Artistic License ..
|
||||
Group: Development/Libraries/Perl
|
||||
Url: http://search.org/dist/HTML-Parser
|
||||
Source: %{cpan_name}-%{version}.tar.bz2
|
||||
Patch: %{name}-%{version}-entity.diff
|
||||
BuildRoot: %{_tmppath}/%{name}-%{version}-build
|
||||
BuildRequires: perl
|
||||
BuildRequires: perl-macros
|
||||
@ -67,7 +66,6 @@ If event driven parsing does not feel right for your application, you might
|
||||
© 1999-2000 Michael A. Chase. All rights reserved.
|
||||
%prep
|
||||
%setup -q -n %{cpan_name}-%{version}
|
||||
%patch -p1
|
||||
|
||||
%build
|
||||
CFLAGS="$RPM_OPT_FLAGS" perl Makefile.PL
|
||||
|
Loading…
x
Reference in New Issue
Block a user