77 lines
2.6 KiB
Diff
77 lines
2.6 KiB
Diff
![]() |
--- Digest-Nilsimsa-0.06/Nilsimsa.pm.orig 2002-06-13 06:15:10.000000000 +0200
|
||
|
+++ Digest-Nilsimsa-0.06/Nilsimsa.pm 2010-06-22 02:07:21.000000000 +0200
|
||
|
@@ -37,6 +37,41 @@ wrapper around nilsimsa implementation i
|
||
|
Pass in any text, any size, and get back a digest string composed 64
|
||
|
hex chars.
|
||
|
|
||
|
+=item $nilsimsa->hex_distance($n1, $n2);
|
||
|
+
|
||
|
+Compare two nilsimsa digests n1 and n1 given as hex chars.
|
||
|
+The return value is a number in the range -128 .. 127, where any value over 24
|
||
|
+(which is 3 sigma) is believed to indicate that the two texts are not generated
|
||
|
+independently.
|
||
|
+
|
||
|
+=cut
|
||
|
+# 773e2df0a02a319ec34a0b71d54029111da90838cbc20ecd3d2d4e18c25a3025
|
||
|
+# 47182cf0802a11dec24a3b75d5042d310ca90838c9d20ecc3d610e98560a3645
|
||
|
+# The nilsimsa of these two codes is 92 on a scale of -128 to +128. That means that
|
||
|
+# 36 bits are different and 220 bits the same. Any nilsimsa over 24 (which is 3
|
||
|
+# sigma) indicates that the two messages are probably not independently
|
||
|
+# generated.
|
||
|
+
|
||
|
+sub hex_distance
|
||
|
+{
|
||
|
+ shift @_ if ref $_[0];
|
||
|
+
|
||
|
+ # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||
|
+ my @bit_diff = ( 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4);
|
||
|
+
|
||
|
+ my @n1 = split //, $_[0];
|
||
|
+ my @n2 = split //, $_[1];
|
||
|
+ die "length differs", if scalar(@n1) != scalar (@n2);
|
||
|
+ my $val = scalar(@n1) * 4;
|
||
|
+ for my $i (0..$#n1)
|
||
|
+ {
|
||
|
+ next if $n1[$i] eq $n2[$i];
|
||
|
+ my ($v1,$v2) = (hex($n1[$i]), hex($n2[$i]));
|
||
|
+ $val -= $bit_diff[$v1 ^ $v2];
|
||
|
+ }
|
||
|
+ return $val - scalar(@n1)*2;
|
||
|
+}
|
||
|
+
|
||
|
=back
|
||
|
|
||
|
=head1 SEE ALSO
|
||
|
--- Digest-Nilsimsa-0.06/t/01-hex_distance.t.orig 2010-06-22 01:33:37.000000000 +0200
|
||
|
+++ Digest-Nilsimsa-0.06/t/01-hex_distance.t 2010-06-22 02:07:51.000000000 +0200
|
||
|
@@ -0,0 +1,19 @@
|
||
|
+#!perl
|
||
|
+use strict;
|
||
|
+use warnings;
|
||
|
+use Test::More tests => 2;
|
||
|
+
|
||
|
+use Digest::Nilsimsa;
|
||
|
+
|
||
|
+my @n = qw(773e2df0a02a319ec34a0b71d54029111da90838cbc20ecd3d2d4e18c25a3025
|
||
|
+ 47182cf0802a11dec24a3b75d5042d310ca90838c9d20ecc3d610e98560a3645);
|
||
|
+
|
||
|
+# from http://ixazon.dynip.com/~cmeclax/nilsimsa.html:
|
||
|
+# The nilsimsa of these two codes is 92 on a scale of -128 to +128. That means
|
||
|
+# that 36 bits are different and 220 bits the same. Any nilsimsa over 24 (which
|
||
|
+# is 3 sigma) indicates that the two messages are probably not independently
|
||
|
+# generated.
|
||
|
+
|
||
|
+cmp_ok(Digest::Nilsimsa::hex_distance(@n), '==', 92, 'hex_distance class method');
|
||
|
+cmp_ok(Digest::Nilsimsa::hex_distance({}, @n), '==', 92, 'hex_distance obj method');
|
||
|
+
|
||
|
--- Digest-Nilsimsa-0.06/MANIFEST.orig 2002-05-21 00:07:21.000000000 +0200
|
||
|
+++ Digest-Nilsimsa-0.06/MANIFEST 2010-06-22 02:09:12.000000000 +0200
|
||
|
@@ -7,6 +7,7 @@ README
|
||
|
_nilsimsa.c
|
||
|
nilsimsa.h
|
||
|
t/00_load.t
|
||
|
+t/01-hex_distance.t
|
||
|
t/a.txt
|
||
|
t/b.txt
|
||
|
t/d.txt
|