forked from pool/perl-HTML-Parser
Copy from home:jengelh:branches:devel:languages:perl/perl-HTML-Parser via accept of submit request 28783 revision 3. Request was accepted with message: Reviewed ok OBS-URL: https://build.opensuse.org/request/show/28783 OBS-URL: https://build.opensuse.org/package/show/devel:languages:perl/perl-HTML-Parser?expand=0&rev=8
82 lines
2.4 KiB
Diff
82 lines
2.4 KiB
Diff
commit b9aae1e43eb2c8e989510187cff0ba3e996f9a4c
|
|
Author: Gisle Aas <gisle@aas.no>
|
|
Date: Thu Oct 22 21:45:54 2009 +0200
|
|
|
|
decode_entities confused by trailing incomplete entity
|
|
|
|
Mark Martinec reported crashed when running SpamAssassin, given a
|
|
particular HTML junk mail to parse. The problem was caused by
|
|
HTML::Parsers decode_entities function confusing itself when it
|
|
encountered strings with incomplete entities at the end of the string.
|
|
|
|
diff --git a/t/entities.t b/t/entities.t
|
|
index 7f6a29a..e96501c 100644
|
|
--- a/t/entities.t
|
|
+++ b/t/entities.t
|
|
@@ -1,6 +1,6 @@
|
|
use HTML::Entities qw(decode_entities encode_entities encode_entities_numeric);
|
|
|
|
-use Test::More tests => 12;
|
|
+use Test::More tests => 13;
|
|
|
|
$a = "Våre norske tegn bør æres";
|
|
|
|
@@ -71,6 +71,8 @@ is(decode_entities("abc&def&ghi&abc;&def;"), "abc&def&ghi&abc;&def;");
|
|
is(decode_entities("'"), "'");
|
|
is(encode_entities("'", "'"), "'");
|
|
|
|
+is(decode_entities("Attention Homeοωnөrs...1ѕt Tімe Eνөг"),
|
|
+ "Attention Home\x{3BF}\x{3C9}n\x{4E9}rs...1\x{455}t T\x{456}\x{43C}e E\x{3BD}\x{4E9}\x{433}");
|
|
|
|
__END__
|
|
# Quoted from rfc1866.txt
|
|
diff --git a/util.c b/util.c
|
|
index 28fec78..6f56a2b 100644
|
|
--- a/util.c
|
|
+++ b/util.c
|
|
@@ -94,14 +94,14 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
|
|
ent_start = s;
|
|
repl = 0;
|
|
|
|
- if (*s == '#') {
|
|
+ if (s < end && *s == '#') {
|
|
UV num = 0;
|
|
UV prev = 0;
|
|
int ok = 0;
|
|
s++;
|
|
- if (*s == 'x' || *s == 'X') {
|
|
+ if (s < end && (*s == 'x' || *s == 'X')) {
|
|
s++;
|
|
- while (*s) {
|
|
+ while (s < end) {
|
|
char *tmp = strchr(PL_hexdigit, *s);
|
|
if (!tmp)
|
|
break;
|
|
@@ -117,7 +117,7 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
|
|
}
|
|
}
|
|
else {
|
|
- while (isDIGIT(*s)) {
|
|
+ while (s < end && isDIGIT(*s)) {
|
|
num = num * 10 + (*s - '0');
|
|
if (prev && num < prev) {
|
|
/* overflow */
|
|
@@ -180,7 +180,7 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
|
|
}
|
|
else {
|
|
char *ent_name = s;
|
|
- while (isALNUM(*s))
|
|
+ while (s < end && isALNUM(*s))
|
|
s++;
|
|
if (ent_name != s && entity2char) {
|
|
SV** svp;
|
|
@@ -216,7 +216,7 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
|
|
|
|
if (repl) {
|
|
char *repl_allocated = 0;
|
|
- if (*s == ';')
|
|
+ if (s < end && *s == ';')
|
|
s++;
|
|
t--; /* '&' already copied, undo it */
|
|
|