texlive/biber-decode.diff

# perl-5.20.0 contains an Encode::decode_utf8 version that no
# longer short-circuits if the input is already utf8. Instead,
# it dies with a 'Cannot decode string with wide characters'
# error message.
---
 lib/Biber/Input/file/bibtex.pm |    5 +++--
 lib/Biber/Output/bibtex.pm     |    3 ++-
 lib/Biber/Utils.pm             |    4 ++--
 3 files changed, 7 insertions(+), 5 deletions(-)

--- lib/Biber/Utils.pm
+++ lib/Biber/Utils.pm	2014-07-15 16:17:58.000000000 +0000
@@ -147,7 +147,7 @@ sub locate_biber_file {
       chomp $found;
       $found =~ s/\cM\z//xms; # kpsewhich in cygwin sometimes returns ^M at the end
       # filename can be UTF-8 and run3() isn't clever with UTF-8
-      return decode_utf8($found);
+      return Encode::is_utf8($found) ? $found : decode_utf8($found);
     }
     else {
       $logger->debug("Could not find '$filename' via kpsewhich");
@@ -935,7 +935,7 @@ sub parse_date {
 =cut
 
 sub biber_decode_utf8 {
-  return NFD(decode_utf8(shift));# Unicode NFD boundary
+  return NFD(Encode::is_utf8($_[0]) ? $_[0] : decode_utf8($_[0]));# Unicode NFD boundary
 }
 
 =head2 out
--- lib/Biber/Input/file/bibtex.pm
+++ lib/Biber/Input/file/bibtex.pm	2014-07-15 16:15:57.000000000 +0000
@@ -605,7 +605,8 @@ sub _literal {
 # URI fields
 sub _uri {
   my ($bibentry, $entry, $f) = @_;
-  my $value = NFC(decode_utf8($entry->get($f)));# Unicode NFC boundary (before hex encoding)
+  my $value = $entry->get($f);
+  $value = NFC(Encode::is_utf8($value) ? $value : decode_utf8($value)); # Unicode NFC boundary (before hex encoding)
   my ($field, $form, $lang) = $f =~ m/$fl_re/xms;
 
   # If there are some escapes in the URI, unescape them
@@ -613,7 +614,7 @@ sub _uri {
     $value =~ s/\\%/%/g; # just in case someone BibTeX escaped the "%"
     # This is what uri_unescape() does but it's faster
     $value =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
-    $value = NFC(decode_utf8($value));# Unicode NFC boundary (before hex encoding)
+    $value = NFC(Encode::is_utf8($value) ? $value : decode_utf8($value)); # Unicode NFC boundary (before hex encoding)
   }
 
   $value = URI->new($value)->as_string;
--- lib/Biber/Output/bibtex.pm
+++ lib/Biber/Output/bibtex.pm	2015-03-26 11:32:20.345519079 +0000
@@ -100,7 +100,8 @@ sub set_output_entry {
       next if first {lc($f) eq $_}  ('xdata', 'crossref');
     }
 
-    my $value = decode_utf8($be->get_rawfield($f));
+    my $value = $be->get_rawfield($f);
+    $value = decode_utf8($value) unless Encode::is_utf8($value);
     $acc .= ' ' x Biber::Config->getoption('output_indent');
     $acc .= $casing->($f);
     $acc .= ' ' x ($max_field_len - Unicode::GCString->new($f)->length) if Biber::Config->getoption('output_align');
Accepting request 241115 from home:mlschroe:branches:Publishing:TeXLive Add biber-decode.diff OBS-URL: https://build.opensuse.org/request/show/241115 OBS-URL: https://build.opensuse.org/package/show/Publishing:TeXLive/texlive?expand=0&rev=176 2014-07-15 19:20:39 +02:00			`# perl-5.20.0 contains an Encode::decode_utf8 version that no`
			`# longer short-circuits if the input is already utf8. Instead,`
			`# it dies with a 'Cannot decode string with wide characters'`
			`# error message.`
. OBS-URL: https://build.opensuse.org/package/show/Publishing:TeXLive/texlive?expand=0&rev=184 2015-03-30 13:39:58 +02:00			`---`
			`lib/Biber/Input/file/bibtex.pm \| 5 +++--`
			`lib/Biber/Output/bibtex.pm \| 3 ++-`
			`lib/Biber/Utils.pm \| 4 ++--`
			`3 files changed, 7 insertions(+), 5 deletions(-)`

			`--- lib/Biber/Utils.pm`
			`+++ lib/Biber/Utils.pm 2014-07-15 16:17:58.000000000 +0000`
			`@@ -147,7 +147,7 @@ sub locate_biber_file {`
			`chomp $found;`
			`$found =~ s/\cM\z//xms; # kpsewhich in cygwin sometimes returns ^M at the end`
			`# filename can be UTF-8 and run3() isn't clever with UTF-8`
			`- return decode_utf8($found);`
			`+ return Encode::is_utf8($found) ? $found : decode_utf8($found);`
			`}`
			`else {`
			`$logger->debug("Could not find '$filename' via kpsewhich");`
			`@@ -935,7 +935,7 @@ sub parse_date {`
			`=cut`

			`sub biber_decode_utf8 {`
			`- return NFD(decode_utf8(shift));# Unicode NFD boundary`
			`+ return NFD(Encode::is_utf8($_[0]) ? $_[0] : decode_utf8($_[0]));# Unicode NFD boundary`
			`}`

			`=head2 out`
			`--- lib/Biber/Input/file/bibtex.pm`
			`+++ lib/Biber/Input/file/bibtex.pm 2014-07-15 16:15:57.000000000 +0000`
			`@@ -605,7 +605,8 @@ sub _literal {`
Accepting request 241115 from home:mlschroe:branches:Publishing:TeXLive Add biber-decode.diff OBS-URL: https://build.opensuse.org/request/show/241115 OBS-URL: https://build.opensuse.org/package/show/Publishing:TeXLive/texlive?expand=0&rev=176 2014-07-15 19:20:39 +02:00			`# URI fields`
			`sub _uri {`
			`my ($bibentry, $entry, $f) = @_;`
			`- my $value = NFC(decode_utf8($entry->get($f)));# Unicode NFC boundary (before hex encoding)`
			`+ my $value = $entry->get($f);`
			`+ $value = NFC(Encode::is_utf8($value) ? $value : decode_utf8($value)); # Unicode NFC boundary (before hex encoding)`
			`my ($field, $form, $lang) = $f =~ m/$fl_re/xms;`

			`# If there are some escapes in the URI, unescape them`
. OBS-URL: https://build.opensuse.org/package/show/Publishing:TeXLive/texlive?expand=0&rev=184 2015-03-30 13:39:58 +02:00			`@@ -613,7 +614,7 @@ sub _uri {`
Accepting request 241115 from home:mlschroe:branches:Publishing:TeXLive Add biber-decode.diff OBS-URL: https://build.opensuse.org/request/show/241115 OBS-URL: https://build.opensuse.org/package/show/Publishing:TeXLive/texlive?expand=0&rev=176 2014-07-15 19:20:39 +02:00			`$value =~ s/\\%/%/g; # just in case someone BibTeX escaped the "%"`
			`# This is what uri_unescape() does but it's faster`
			`$value =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;`
			`- $value = NFC(decode_utf8($value));# Unicode NFC boundary (before hex encoding)`
			`+ $value = NFC(Encode::is_utf8($value) ? $value : decode_utf8($value)); # Unicode NFC boundary (before hex encoding)`
			`}`

			`$value = URI->new($value)->as_string;`
. OBS-URL: https://build.opensuse.org/package/show/Publishing:TeXLive/texlive?expand=0&rev=184 2015-03-30 13:39:58 +02:00			`--- lib/Biber/Output/bibtex.pm`
			`+++ lib/Biber/Output/bibtex.pm 2015-03-26 11:32:20.345519079 +0000`
Accepting request 241115 from home:mlschroe:branches:Publishing:TeXLive Add biber-decode.diff OBS-URL: https://build.opensuse.org/request/show/241115 OBS-URL: https://build.opensuse.org/package/show/Publishing:TeXLive/texlive?expand=0&rev=176 2014-07-15 19:20:39 +02:00			`@@ -100,7 +100,8 @@ sub set_output_entry {`
			`next if first {lc($f) eq $_} ('xdata', 'crossref');`
			`}`
. OBS-URL: https://build.opensuse.org/package/show/Publishing:TeXLive/texlive?expand=0&rev=184 2015-03-30 13:39:58 +02:00
Accepting request 241115 from home:mlschroe:branches:Publishing:TeXLive Add biber-decode.diff OBS-URL: https://build.opensuse.org/request/show/241115 OBS-URL: https://build.opensuse.org/package/show/Publishing:TeXLive/texlive?expand=0&rev=176 2014-07-15 19:20:39 +02:00			`- my $value = decode_utf8($be->get_rawfield($f));`
			`+ my $value = $be->get_rawfield($f);`
			`+ $value = decode_utf8($value) unless Encode::is_utf8($value);`
. OBS-URL: https://build.opensuse.org/package/show/Publishing:TeXLive/texlive?expand=0&rev=184 2015-03-30 13:39:58 +02:00			`$acc .= ' ' x Biber::Config->getoption('output_indent');`
Accepting request 241115 from home:mlschroe:branches:Publishing:TeXLive Add biber-decode.diff OBS-URL: https://build.opensuse.org/request/show/241115 OBS-URL: https://build.opensuse.org/package/show/Publishing:TeXLive/texlive?expand=0&rev=176 2014-07-15 19:20:39 +02:00			`$acc .= $casing->($f);`
. OBS-URL: https://build.opensuse.org/package/show/Publishing:TeXLive/texlive?expand=0&rev=184 2015-03-30 13:39:58 +02:00			`$acc .= ' ' x ($max_field_len - Unicode::GCString->new($f)->length) if Biber::Config->getoption('output_align');`