diff --git a/perl-XML-SAX-0.96-utf8.diff b/perl-XML-SAX-0.96-utf8.diff new file mode 100644 index 0000000..af46e20 --- /dev/null +++ b/perl-XML-SAX-0.96-utf8.diff @@ -0,0 +1,57 @@ +--- SAX/PurePerl/Reader/Stream.pm ++++ SAX/PurePerl/Reader/Stream.pm +@@ -70,6 +70,54 @@ + my $self = shift; + my ($encoding) = @_; + # warn("set encoding to: $encoding\n"); ++ ++ # make sure that the buffer used to detect the encoding ++ # does not end in the middle of a utf8 sequence ++ if ($encoding eq 'UTF-8' && ++ !$self->[EOF] && ++ !utf8::is_utf8($self->[BUFFER]) && # make sure we do not do it twice ++ length($self->[BUFFER]) > 5) { ++ ++ my $x = reverse(substr($self->[BUFFER], -5)); ++ my $y = 0; ++ ++ # skip the all the bytes at the end of buffer ++ # starting with bits 10 (continuation bytes of utf8 sequence) ++ while ($x ne "" && (ord($x) & 0xc0) == 0x80) { ++ $y--; ++ $x = substr($x, 1); ++ } ++ ++ # if $x is ascii character, do nothing ++ # otherwise we must take a look how many ++ # continuation bytes we need ++ if ((ord($x) & 0xc0) == 0xc0) { ++ $x = ord($x); ++ if (($x & 0xe0) == 0xc0) { # the sequence contains one more byte ++ $y++; ++ } elsif (($x & 0xf0) == 0xe0) { # ...2 bytes ++ $y += 2; ++ } elsif (($x & 0xf8) == 0xf0) { # ...3 bytes ++ $y += 3; ++ } elsif (($x & 0xfc) == 0xf8) { # ...4 bytes ++ $y += 4; ++ } elsif (($x & 0xfe) == 0xfc) { # ...5 bytes ++ $y += 5; ++ } ++ ++ # read the last sequence in the buffer completely, if needed ++ if ($y > 0) { ++ my $buf; ++ my $bytesread = read($self->[FH], $buf, $y); ++ if ($bytesread) { ++ $self->[BUFFER] .= $buf; ++ } elsif (defined($bytesread)) { ++ $self->[EOF]++; ++ } ++ } ++ } ++ } ++ + XML::SAX::PurePerl::Reader::switch_encoding_stream($self->[FH], $encoding); + XML::SAX::PurePerl::Reader::switch_encoding_string($self->[BUFFER], $encoding); + $self->[ENCODING] = $encoding; diff --git a/perl-XML-SAX.changes b/perl-XML-SAX.changes index 434c198..1881fc5 100644 --- a/perl-XML-SAX.changes +++ b/perl-XML-SAX.changes @@ -1,3 +1,13 @@ +------------------------------------------------------------------- +Thu Jan 15 22:50:00 CET 2009 - anicka@suse.cz + +- add patch fixing utf8 parsing (bnc#459794) + +------------------------------------------------------------------- +Thu Jan 15 13:35:37 CET 2009 - anicka@suse.cz + +- enable testsuite (bnc#466010) + ------------------------------------------------------------------- Wed Sep 10 17:03:21 CEST 2008 - anicka@suse.cz diff --git a/perl-XML-SAX.spec b/perl-XML-SAX.spec index 8f59b96..7b1514a 100644 --- a/perl-XML-SAX.spec +++ b/perl-XML-SAX.spec @@ -1,7 +1,7 @@ # # spec file for package perl-XML-SAX (Version 0.96) # -# Copyright (c) 2008 SUSE LINUX Products GmbH, Nuernberg, Germany. +# Copyright (c) 2009 SUSE LINUX Products GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -21,7 +21,7 @@ Name: perl-XML-SAX BuildRequires: libxml2-devel perl-XML-NamespaceSupport Version: 0.96 -Release: 1 +Release: 4 Requires: perl = %{perl_version} Requires: perl-XML-NamespaceSupport AutoReqProv: on @@ -30,6 +30,7 @@ License: Artistic License Url: http://www.cpan.org/modules/by-module/XML/ Summary: XML::SAX Perl Module Source: XML-SAX-%{version}.tar.bz2 +Patch: %{name}-%{version}-utf8.diff BuildRoot: %{_tmppath}/%{name}-%{version}-build %description @@ -44,14 +45,16 @@ JAXP specification (SAX part), only without the javaness. %prep %setup -q -n XML-SAX-%{version} +%patch %build perl Makefile.PL make -#make test + +%check +make test %install -rm -rf $RPM_BUILD_ROOT make DESTDIR=$RPM_BUILD_ROOT install_vendor cat << EOF > $RPM_BUILD_ROOT/%{perl_vendorlib}/XML/SAX/ParserDetails.ini [XML::SAX::PurePerl] @@ -60,7 +63,7 @@ EOF %perl_process_packlist %clean -#rm -rf $RPM_BUILD_ROOT +rm -rf $RPM_BUILD_ROOT %files %defattr(-,root,root) @@ -72,6 +75,10 @@ EOF /var/adm/perl-modules/%{name} %changelog +* Thu Jan 15 2009 anicka@suse.cz +- add patch fixing utf8 parsing (bnc#459794) +* Thu Jan 15 2009 anicka@suse.cz +- enable testsuite (bnc#466010) * Wed Sep 10 2008 anicka@suse.cz - update to 0.96 * Fix handling of numeric character entities in attribute