Accepting request 29905 from Base:System
Copy from Base:System/kbd based on submit request 29905 from user coolo OBS-URL: https://build.opensuse.org/request/show/29905 OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/kbd?expand=0&rev=32
This commit is contained in:
commit
953a39ad3c
316
guess_encoding.pl
Normal file
316
guess_encoding.pl
Normal file
@ -0,0 +1,316 @@
|
||||
#! /usr/bin/perl -w
|
||||
#
|
||||
# encoding.pl -- guess charset encoding
|
||||
#
|
||||
# (C) 2007, jw@suse.de, Novell Inc.
|
||||
# Distribute under GPLv2
|
||||
#
|
||||
# 2006-12-05, jw, V0.1 -- only framework.
|
||||
# 2007-01-23, jw, V0.2 -- utf8 and latin1 for ttys.
|
||||
# 2007-02-08, jw, V0.3 -- utf8 and latin1 for files.
|
||||
|
||||
use Data::Dumper;
|
||||
use POSIX;
|
||||
use IO::Handle;
|
||||
|
||||
my $version = '0.3';
|
||||
my $verbose = 0;
|
||||
my $stdin = 0;
|
||||
|
||||
while (defined (my $arg = shift))
|
||||
{
|
||||
if ($arg !~ m{^-.}) { unshift @ARGV, $arg; last }
|
||||
elsif ($arg =~ m{^(-h|--help|-\?)}) { exit usage(); }
|
||||
elsif ($arg =~ m{^--?v}) { $verbose++; }
|
||||
elsif ($arg =~ m{^--?q}) { $verbose = 0; }
|
||||
else { exit usage("unknown option $arg"); }
|
||||
}
|
||||
|
||||
if (!@ARGV and -t STDIN and -t STDERR)
|
||||
{
|
||||
my $r = probe_tty();
|
||||
print "$r\n";
|
||||
exit 0;
|
||||
}
|
||||
|
||||
for my $file (@ARGV)
|
||||
{
|
||||
my $fd;
|
||||
open $fd, ($file eq '-') ? '<&=STDIN' : "<$file" or die "open($file) failed: $!";
|
||||
probe_file($fd, $file);
|
||||
close $fd;
|
||||
}
|
||||
|
||||
exit 0;
|
||||
########################################################################
|
||||
|
||||
sub usage
|
||||
{
|
||||
my ($msg) = @_;
|
||||
print STDERR qq{$0 V$version usage:
|
||||
|
||||
encoding [options] [file]
|
||||
|
||||
valid options are:
|
||||
-h Print this online help
|
||||
-v Be more verbose. Default $verbose
|
||||
-q Be quiet
|
||||
- Read from stdin.
|
||||
|
||||
Without any parameters, the terminal (if any) is probed,
|
||||
using stdin and stderr.
|
||||
|
||||
Files are searched for characters outside the ascii range.
|
||||
Those characters are tested for their likeliness in
|
||||
various encodings.
|
||||
Thus an illegal mix of encodings can be detected.
|
||||
|
||||
If not verbose, only one single word is printed to stdout:
|
||||
The name of the most likely encoding.
|
||||
|
||||
};
|
||||
|
||||
print STDERR "\nERROR: $msg\n" if $msg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub sysread_tout
|
||||
{
|
||||
my ($FILE, $len, $tout) = @_;
|
||||
my $r = '';
|
||||
while ($len > 0)
|
||||
{
|
||||
my $rout;
|
||||
my $rin = '';
|
||||
vec($rin,fileno($FILE), 1) = 1;
|
||||
my ($n, $t) = select($rout = $rin, undef, undef, $tout);
|
||||
$tout = $t if defined $t;
|
||||
last unless $n;
|
||||
my $buf = '';
|
||||
last if sysread($FILE, $buf, 1) <= 0;
|
||||
$r .= $buf;
|
||||
$len--;
|
||||
}
|
||||
return $r;
|
||||
}
|
||||
|
||||
sub tty_raw
|
||||
{
|
||||
my ($FILE) = @_;
|
||||
|
||||
my $t = POSIX::Termios->new;
|
||||
my $o = POSIX::Termios->new;
|
||||
$t->getattr(fileno $FILE);
|
||||
$o->getattr(fileno $FILE);
|
||||
|
||||
$t->setlflag(0); # -echo, -icanon
|
||||
$t->setcc(POSIX::VMIN, 1);
|
||||
$t->setcc(POSIX::VTIME, 0);
|
||||
tty_set($FILE, $t);
|
||||
return $o;
|
||||
}
|
||||
|
||||
sub tty_set
|
||||
{
|
||||
my ($FILE, $t) = @_;
|
||||
$t->setattr(fileno $FILE, POSIX::TCSANOW) or die "TCSANOW failed: $!\n";
|
||||
}
|
||||
|
||||
sub get_cursor_pos
|
||||
{
|
||||
my ($hint) = @_;
|
||||
# 1 may be an ansi term?
|
||||
# testing device status report 6, as seen in vttest.
|
||||
my $t = tty_raw(STDIN);
|
||||
|
||||
while (length(sysread_tout(STDIN, 1, 0.1))) { }
|
||||
|
||||
syswrite(STDOUT, "\33[6n", 4);
|
||||
my $r = sysread_tout(STDIN, 10, 0.1);
|
||||
tty_set(STDIN, $t);
|
||||
return { x => $2 - 1, y => $1 - 1, hint => 'DC6' } if $r =~ m{^\33\[(\d+);(\d+)R};
|
||||
return undef;
|
||||
}
|
||||
|
||||
sub probe_tty
|
||||
{
|
||||
#
|
||||
# we can use STDIN and STDERR.
|
||||
# 0) first, see, if the terminal can report cursor positions.
|
||||
syswrite(STDOUT, "\r", 1);
|
||||
my $o = get_cursor_pos();
|
||||
print ", x=$o->{x}\n" if $verbose > 1;
|
||||
|
||||
# - if not, abort.
|
||||
die "get_cursor_pos failed.\n" unless defined $o;
|
||||
|
||||
# - if it can, store the current position
|
||||
if ($o->{x} != 0)
|
||||
{
|
||||
warn "strace (or other) output interferes or\n" if $o->{x} >= 20;
|
||||
die "carriage return does not work.\n";
|
||||
}
|
||||
|
||||
# 1) write a single byte ascii character, 'X' and check,
|
||||
# if it advances by one.
|
||||
syswrite(STDOUT, "\rX", 2);
|
||||
my $p = get_cursor_pos($o->{hint});
|
||||
print ", x=$p->{x}\n" if $verbose;
|
||||
|
||||
|
||||
# - If not, it is probably in microsoft-multibyte encoding,
|
||||
# and requires '\0' prefixing. check this, report and abort.
|
||||
die "multi-byte mode" if $p->{x} != 1;
|
||||
|
||||
# 2)Then try non-ascii characters, e.g. a-umlaut.
|
||||
# 2a) send its latin1 code, and see what happens,
|
||||
syswrite(STDOUT, "\r1\34434", 5); # 1, a-umlaut-latin1, 3, 4
|
||||
$p = get_cursor_pos($o->{hint});
|
||||
print ", x=$p->{x}\n" if $verbose;
|
||||
die "no report" unless defined $p;
|
||||
|
||||
# - no advance indicates that the terminal is not in latin1 mode
|
||||
# or a lousy font is used.
|
||||
# - advance by 2 indicates a defect in the tty-emulator.
|
||||
die "latin1 a-umlaut caused confusion." if $p->{x} > 4 or $p->{x} < 2;
|
||||
|
||||
# in utf8, our \344 consumes another char, thus the '3' is not printed.
|
||||
# we don't know what the font does then.
|
||||
my $maybe = 'utf8' if $p->{x} == 2 or $p->{x} == 3;
|
||||
$maybe = 'latin1' if $p->{x} == 4;
|
||||
print "maybe $maybe\n" if $verbose;
|
||||
# - advance by 1 says nothing, may be latin1.
|
||||
# 2b) send its utf8 code.
|
||||
|
||||
syswrite(STDOUT, "\r1\303\24434", 6); # 1, a-umlaut-utf8, 3, 4
|
||||
$p = get_cursor_pos($o->{hint});
|
||||
print ", x=$p->{x}\n" if $verbose;
|
||||
|
||||
die "no report" unless defined $p;
|
||||
# - no advance indicates that a lousy font is used.
|
||||
# - advance by one indicates that the terminal is in utf8 mode.
|
||||
# - advance by two indicates that the terminal is in latin1 mode.
|
||||
|
||||
syswrite(STDOUT, "\r \r", 8) unless $verbose; # clear scratch area
|
||||
|
||||
if ($p->{x} == 4)
|
||||
{
|
||||
return 'utf8' if $maybe eq 'utf8';
|
||||
return 'possibly utf8';
|
||||
}
|
||||
|
||||
return 'latin1' if $maybe eq 'latin1';
|
||||
return 'possibly latin1';
|
||||
}
|
||||
|
||||
##
|
||||
## if utf8_valid is positive, then it can only be utf-8.
|
||||
## (if also utf8_invalid and/or latin1_typ are positive, then it is a mixture)
|
||||
## if only utf8_invalid or latin1_typ are positive, then it is latin1.
|
||||
## if all 3 are zero, it is plain ascii.
|
||||
##
|
||||
## FIXME: should take an optional length parameter to limit runtime.
|
||||
##
|
||||
sub probe_file
|
||||
{
|
||||
my ($fd, $name) = @_;
|
||||
print "probing $name\n" if $verbose;
|
||||
|
||||
my %typ_latin = map { $_ => 1 } qw(169 171 174 176 177 178 179 181
|
||||
185 187 191 192 193 194 195 196 197 199 200 201 202 203 204 205 206 207 208 209
|
||||
210 211 212 213 214 215 216 217 218 219 220
|
||||
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
|
||||
246 249 250 251 252 253 189 164);
|
||||
|
||||
|
||||
# when running incremental, $fd is probably not seekable.
|
||||
# so we need to buffer characters to be re-read after a lookahead.
|
||||
|
||||
# http://de.wikipedia.org/wiki/UTF-8#Kodierung
|
||||
|
||||
my $utf8_valid = 0; # parser happy.
|
||||
my $utf8_invalid = 0; # something wrong.
|
||||
my $latin1_typ = 0; # valid chars in 128..255 range followed by a ascii byte
|
||||
my $ascii = 0; # char in 10..127 range
|
||||
my $utf8_size = 0; # how many bytes belong to this utf-8 char.
|
||||
my $utf8_len = 0; # how many more bytes belong to this utf-8 char.
|
||||
my $utf8_start = 0; # ord of utf_8 start char.
|
||||
|
||||
while (defined(my $c = getc($fd)))
|
||||
{
|
||||
my $v = ord($c);
|
||||
if ($utf8_len)
|
||||
{
|
||||
if (($v & 0xc0) == 0x80) # 10xx xxxx
|
||||
{
|
||||
# printf "0 %02x\n", $v;
|
||||
unless (--$utf8_len)
|
||||
{
|
||||
$utf8_valid++;
|
||||
$utf8_size = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
# printf "0x %02x %02x '$c' $utf8_size-$utf8_len\n", $utf8_start, $v;
|
||||
if (($utf8_size - $utf8_len) == 1 and $typ_latin{$utf8_start})
|
||||
{
|
||||
if ($v > 7 && $v < 128)
|
||||
{
|
||||
$latin1_typ++;
|
||||
$ascii++;
|
||||
}
|
||||
elsif ($typ_latin{$v})
|
||||
{
|
||||
$latin1_typ += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
$utf8_invalid++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
$utf8_invalid++;
|
||||
}
|
||||
$utf8_len = $utf8_size = $utf8_start = 0;
|
||||
}
|
||||
}
|
||||
elsif ($v > 7 && $v < 128)
|
||||
{
|
||||
$ascii++;
|
||||
next;
|
||||
}
|
||||
elsif (($v & 0xe0) == 0xc0) # 110x xxxx
|
||||
{
|
||||
$utf8_start = $v;
|
||||
$utf8_size = 2;
|
||||
$utf8_len = 1;
|
||||
# printf "1 %02x\n", $v;
|
||||
}
|
||||
elsif (($v & 0xf0) == 0xe0) # 1110 xxxx
|
||||
{
|
||||
$utf8_start = $v;
|
||||
$utf8_size = 3;
|
||||
$utf8_len = 2;
|
||||
# printf "2 %02x\n", $v;
|
||||
}
|
||||
elsif (($v & 0xf8) == 0xf0) # 1111 0xxx
|
||||
{
|
||||
$utf8_start = $v;
|
||||
$utf8_size = 4;
|
||||
$utf8_len = 3;
|
||||
# printf "3 %02x\n", $v;
|
||||
}
|
||||
elsif ($typ_latin{$v})
|
||||
{
|
||||
$latin1_typ++;
|
||||
}
|
||||
else
|
||||
{
|
||||
$utf8_invalid++;
|
||||
# printf "x %02x\n", $v;
|
||||
}
|
||||
}
|
||||
print "$name: utf8_valid=$utf8_valid utf8_invalid=$utf8_invalid latin1_typ=$latin1_typ ascii=$ascii\n";
|
||||
}
|
@ -1,3 +1,10 @@
|
||||
-------------------------------------------------------------------
|
||||
Sun Jan 10 01:45:25 UTC 2010 - jw@novell.com
|
||||
|
||||
- added guess_encoding, an old script of mine, that tests
|
||||
if a tty is in utf8 mode or not.
|
||||
(Also helpful with text files in unknown encoding.)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Nov 2 13:48:13 CET 2009 - mmarek@suse.cz
|
||||
|
||||
|
9
kbd.spec
9
kbd.spec
@ -1,7 +1,7 @@
|
||||
#
|
||||
# spec file for package kbd (Version 1.14.1)
|
||||
#
|
||||
# Copyright (c) 2009 SUSE LINUX Products GmbH, Nuernberg, Germany.
|
||||
# Copyright (c) 2010 SUSE LINUX Products GmbH, Nuernberg, Germany.
|
||||
#
|
||||
# All modifications and additions to the file contributed by third parties
|
||||
# remain the property of their copyright owners, unless otherwise agreed
|
||||
@ -20,11 +20,11 @@
|
||||
|
||||
Name: kbd
|
||||
Url: ftp://ftp.altlinux.org/pub/people/legion/kbd/
|
||||
License: GPL v2 or later
|
||||
License: GPLv2+
|
||||
Group: System/Console
|
||||
AutoReqProv: on
|
||||
Version: 1.14.1
|
||||
Release: 20
|
||||
Release: 21
|
||||
Summary: Keyboard and Font Utilities
|
||||
Source: kbd-%{version}.tar.bz2
|
||||
Source1: kbd_fonts.tar.bz2
|
||||
@ -38,6 +38,7 @@ Source9: sysconfig.keyboard
|
||||
Source10: testutf8
|
||||
Source11: fbtest.c
|
||||
Source12: fbtest.8
|
||||
Source13: guess_encoding.pl
|
||||
Source42: convert-kbd-mac.sed
|
||||
Source43: repack_kbd.sh
|
||||
Patch1: kbd-1.14.1-2d01989f.patch
|
||||
@ -229,6 +230,7 @@ rm -f $RPM_BUILD_ROOT/%{_mandir}/man8/setkeycodes.8*
|
||||
install -m 755 %_sourcedir/testutf8 $RPM_BUILD_ROOT/bin/
|
||||
install -m 755 fbtest $RPM_BUILD_ROOT/sbin/
|
||||
install -m 644 %SOURCE12 $RPM_BUILD_ROOT/%{_mandir}/man8/
|
||||
install -m 755 %SOURCE13 $RPM_BUILD_ROOT/bin/guess_encoding
|
||||
%find_lang %{name}
|
||||
|
||||
%post
|
||||
@ -281,6 +283,7 @@ install -m 644 %SOURCE12 $RPM_BUILD_ROOT/%{_mandir}/man8/
|
||||
/bin/unicode_stop
|
||||
/bin/kbdrate
|
||||
/bin/testutf8
|
||||
/bin/guess_encoding
|
||||
%doc %{_mandir}/man1/*
|
||||
%doc %{_mandir}/man5/keymaps.5.gz
|
||||
%ifnarch sparc m68k
|
||||
|
Loading…
Reference in New Issue
Block a user