8
0
forked from pool/perl-XML-SAX
OBS User unknown
2009-01-16 00:52:09 +00:00
committed by Git OBS Bridge
parent cc2faa1956
commit 0bfbcbe1e0
3 changed files with 79 additions and 5 deletions

View File

@@ -0,0 +1,57 @@
--- SAX/PurePerl/Reader/Stream.pm
+++ SAX/PurePerl/Reader/Stream.pm
@@ -70,6 +70,54 @@
my $self = shift;
my ($encoding) = @_;
# warn("set encoding to: $encoding\n");
+
+ # make sure that the buffer used to detect the encoding
+ # does not end in the middle of a utf8 sequence
+ if ($encoding eq 'UTF-8' &&
+ !$self->[EOF] &&
+ !utf8::is_utf8($self->[BUFFER]) && # make sure we do not do it twice
+ length($self->[BUFFER]) > 5) {
+
+ my $x = reverse(substr($self->[BUFFER], -5));
+ my $y = 0;
+
+ # skip the all the bytes at the end of buffer
+ # starting with bits 10 (continuation bytes of utf8 sequence)
+ while ($x ne "" && (ord($x) & 0xc0) == 0x80) {
+ $y--;
+ $x = substr($x, 1);
+ }
+
+ # if $x is ascii character, do nothing
+ # otherwise we must take a look how many
+ # continuation bytes we need
+ if ((ord($x) & 0xc0) == 0xc0) {
+ $x = ord($x);
+ if (($x & 0xe0) == 0xc0) { # the sequence contains one more byte
+ $y++;
+ } elsif (($x & 0xf0) == 0xe0) { # ...2 bytes
+ $y += 2;
+ } elsif (($x & 0xf8) == 0xf0) { # ...3 bytes
+ $y += 3;
+ } elsif (($x & 0xfc) == 0xf8) { # ...4 bytes
+ $y += 4;
+ } elsif (($x & 0xfe) == 0xfc) { # ...5 bytes
+ $y += 5;
+ }
+
+ # read the last sequence in the buffer completely, if needed
+ if ($y > 0) {
+ my $buf;
+ my $bytesread = read($self->[FH], $buf, $y);
+ if ($bytesread) {
+ $self->[BUFFER] .= $buf;
+ } elsif (defined($bytesread)) {
+ $self->[EOF]++;
+ }
+ }
+ }
+ }
+
XML::SAX::PurePerl::Reader::switch_encoding_stream($self->[FH], $encoding);
XML::SAX::PurePerl::Reader::switch_encoding_string($self->[BUFFER], $encoding);
$self->[ENCODING] = $encoding;

View File

@@ -1,3 +1,13 @@
-------------------------------------------------------------------
Thu Jan 15 22:50:00 CET 2009 - anicka@suse.cz
- add patch fixing utf8 parsing (bnc#459794)
-------------------------------------------------------------------
Thu Jan 15 13:35:37 CET 2009 - anicka@suse.cz
- enable testsuite (bnc#466010)
-------------------------------------------------------------------
Wed Sep 10 17:03:21 CEST 2008 - anicka@suse.cz

View File

@@ -1,7 +1,7 @@
#
# spec file for package perl-XML-SAX (Version 0.96)
#
# Copyright (c) 2008 SUSE LINUX Products GmbH, Nuernberg, Germany.
# Copyright (c) 2009 SUSE LINUX Products GmbH, Nuernberg, Germany.
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -21,7 +21,7 @@
Name: perl-XML-SAX
BuildRequires: libxml2-devel perl-XML-NamespaceSupport
Version: 0.96
Release: 1
Release: 4
Requires: perl = %{perl_version}
Requires: perl-XML-NamespaceSupport
AutoReqProv: on
@@ -30,6 +30,7 @@ License: Artistic License
Url: http://www.cpan.org/modules/by-module/XML/
Summary: XML::SAX Perl Module
Source: XML-SAX-%{version}.tar.bz2
Patch: %{name}-%{version}-utf8.diff
BuildRoot: %{_tmppath}/%{name}-%{version}-build
%description
@@ -44,14 +45,16 @@ JAXP specification (SAX part), only without the javaness.
%prep
%setup -q -n XML-SAX-%{version}
%patch
%build
perl Makefile.PL
make
#make test
%check
make test
%install
rm -rf $RPM_BUILD_ROOT
make DESTDIR=$RPM_BUILD_ROOT install_vendor
cat << EOF > $RPM_BUILD_ROOT/%{perl_vendorlib}/XML/SAX/ParserDetails.ini
[XML::SAX::PurePerl]
@@ -60,7 +63,7 @@ EOF
%perl_process_packlist
%clean
#rm -rf $RPM_BUILD_ROOT
rm -rf $RPM_BUILD_ROOT
%files
%defattr(-,root,root)
@@ -72,6 +75,10 @@ EOF
/var/adm/perl-modules/%{name}
%changelog
* Thu Jan 15 2009 anicka@suse.cz
- add patch fixing utf8 parsing (bnc#459794)
* Thu Jan 15 2009 anicka@suse.cz
- enable testsuite (bnc#466010)
* Wed Sep 10 2008 anicka@suse.cz
- update to 0.96
* Fix handling of numeric character entities in attribute