# perl-5.20.0 contains an Encode::decode_utf8 version that no # longer short-circuits if the input is already utf8. Instead, # it dies with a 'Cannot decode string with wide characters' # error message. --- ./lib/Biber/Input/file/bibtex.pm.orig 2014-07-15 16:11:39.039227206 +0000 +++ ./lib/Biber/Input/file/bibtex.pm 2014-07-15 16:15:57.321972318 +0000 @@ -592,7 +592,8 @@ sub _literal { # URI fields sub _uri { my ($bibentry, $entry, $f) = @_; - my $value = NFC(decode_utf8($entry->get($f)));# Unicode NFC boundary (before hex encoding) + my $value = $entry->get($f); + $value = NFC(Encode::is_utf8($value) ? $value : decode_utf8($value)); # Unicode NFC boundary (before hex encoding) my ($field, $form, $lang) = $f =~ m/$fl_re/xms; # If there are some escapes in the URI, unescape them @@ -600,7 +601,7 @@ sub _uri { $value =~ s/\\%/%/g; # just in case someone BibTeX escaped the "%" # This is what uri_unescape() does but it's faster $value =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; - $value = NFC(decode_utf8($value));# Unicode NFC boundary (before hex encoding) + $value = NFC(Encode::is_utf8($value) ? $value : decode_utf8($value)); # Unicode NFC boundary (before hex encoding) } $value = URI->new($value)->as_string; --- ./lib/Biber/Output/bibtex.pm.orig 2014-07-15 16:14:07.925503890 +0000 +++ ./lib/Biber/Output/bibtex.pm 2014-07-15 16:18:47.922143172 +0000 @@ -100,7 +100,8 @@ sub set_output_entry { next if first {lc($f) eq $_} ('xdata', 'crossref'); } # Save post-mapping data for tool mode - my $value = decode_utf8($be->get_rawfield($f)); + my $value = $be->get_rawfield($f); + $value = decode_utf8($value) unless Encode::is_utf8($value); $acc .= ' ' x Biber::Config->getoption('tool_indent'); $acc .= $casing->($f); $acc .= ' ' x ($max_field_len - Unicode::GCString->new($f)->length) if Biber::Config->getoption('tool_align'); --- ./lib/Biber/Utils.pm.orig 2014-07-15 16:12:11.829067919 +0000 +++ ./lib/Biber/Utils.pm 2014-07-15 16:17:57.874386675 +0000 @@ -147,7 +147,7 @@ sub locate_biber_file { chomp $found; $found =~ s/\cM\z//xms; # kpsewhich in cygwin sometimes returns ^M at the end # filename can be UTF-8 and run3() isn't clever with UTF-8 - return decode_utf8($found); + return Encode::is_utf8($found) ? $found : decode_utf8($found); } else { $logger->debug("Could not find '$filename' via kpsewhich"); @@ -935,7 +935,7 @@ sub parse_date { =cut sub biber_decode_utf8 { - return NFD(decode_utf8(shift));# Unicode NFD boundary + return NFD(Encode::is_utf8($_[0]) ? $_[0] : decode_utf8($_[0]));# Unicode NFD boundary } =head2 out