commit 6a4cd76235e2b86c48865d377fecd70f383fcee9cbfca26c42fcaabdadb02179 Author: Stephan Kulow Date: Thu Jan 14 07:00:25 2016 +0000 initial package OBS-URL: https://build.opensuse.org/package/show/devel:languages:perl/perl-Algorithm-KMeans?expand=0&rev=1 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57affb6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.osc diff --git a/Algorithm-KMeans-2.05.tar.gz b/Algorithm-KMeans-2.05.tar.gz new file mode 100644 index 0000000..895fb2e --- /dev/null +++ b/Algorithm-KMeans-2.05.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1d07298d499b9b4e2cd93f1bbd9289814ab461e69ecc35c796e495fe9b5a9a8 +size 57703 diff --git a/perl-Algorithm-KMeans.changes b/perl-Algorithm-KMeans.changes new file mode 100644 index 0000000..93e55d5 --- /dev/null +++ b/perl-Algorithm-KMeans.changes @@ -0,0 +1,6 @@ +------------------------------------------------------------------- +Thu Jan 14 07:00:22 UTC 2016 - coolo@suse.com + +- initial package 2.05 + * created by cpanspec 1.78.08 + diff --git a/perl-Algorithm-KMeans.spec b/perl-Algorithm-KMeans.spec new file mode 100644 index 0000000..4958145 --- /dev/null +++ b/perl-Algorithm-KMeans.spec @@ -0,0 +1,119 @@ +# +# spec file for package perl-Algorithm-KMeans +# +# Copyright (c) 2016 SUSE LINUX GmbH, Nuernberg, Germany. +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via http://bugs.opensuse.org/ +# + + +Name: perl-Algorithm-KMeans +Version: 2.05 +Release: 0 +%define cpan_name Algorithm-KMeans +Summary: For Clustering Multidimensional Data +License: GPL-1.0+ or Artistic-1.0 +Group: Development/Libraries/Perl +Url: http://search.cpan.org/dist/Algorithm-KMeans/ +Source0: http://www.cpan.org/authors/id/A/AV/AVIKAK/%{cpan_name}-%{version}.tar.gz +BuildArch: noarch +BuildRoot: %{_tmppath}/%{name}-%{version}-build +BuildRequires: perl +BuildRequires: perl-macros +BuildRequires: perl(Graphics::GnuplotIF) >= 1.6 +BuildRequires: perl(Math::GSL) >= 0.32 +BuildRequires: perl(Math::Random) >= 0.71 +Requires: perl(Graphics::GnuplotIF) >= 1.6 +Requires: perl(Math::GSL) >= 0.32 +Requires: perl(Math::Random) >= 0.71 +%{perl_requires} + +%description +Clustering with K-Means takes place iteratively and involves two steps: 1) +assignment of data samples to clusters on the basis of how far the data +samples are from the cluster centers; and 2) Recalculation of the cluster +centers (and cluster covariances if you are using the Mahalanobis distance +metric for clustering). + +Obviously, before the two-step approach can proceed, we need to initialize +the the cluster centers. How this initialization is carried out is +important. The module gives you two very different ways for carrying out +this initialization. One option, called the 'smart' option, consists of +subjecting the data to principal components analysis to discover the +direction of maximum variance in the data space. The data points are then +projected on to this direction and a histogram constructed from the +projections. Centers of the smoothed histogram are used to seed the +clustering operation. The other option is to choose the cluster centers +purely randomly. You get the first option if you set 'cluster_seeding' to +'smart' in the constructor, and you get the second option if you set it to +'random'. + +How to specify the number of clusters, 'K', is one of the most vexing +issues in any approach to clustering. In some case, we can set 'K' on the +basis of prior knowledge. But, more often than not, no such prior knowledge +is available. When the programmer does not explicitly specify a value for +'K', the approach taken in the current implementation is to try all +possible values between 2 and some largest possible value that makes +statistical sense. We then choose that value for 'K' which yields the best +value for the QoC (Quality of Clustering) metric. It is generally believed +that the largest value for 'K' should not exceed 'sqrt(N/2)' where 'N' is +the number of data samples to be clustered. + +What to use for the QoC metric is obviously a critical issue unto itself. +In the current implementation, the value of QoC is the ratio of the average +radius of the clusters and the average distance between the cluster +centers. + +Every iterative algorithm requires a stopping criterion. The criterion +implemented here is that we stop iterations when there is no re-assignment +of the data points during the assignment step. + +Ordinarily, the output produced by a K-Means clusterer will correspond to a +local minimum for the QoC values, as opposed to a global minimum. The +current implementation protects against that when the module constructor is +called with the 'random' option for 'cluster_seeding' by trying different +randomly selected initial cluster centers and then selecting the one that +gives the best overall QoC value. + +A K-Means clusterer will generally produce good results if the overlap +between the clusters is minimal and if each cluster exhibits variability +that is uniform in all directions. When the data variability is different +along the different directions in the data space, the results you obtain +with a K-Means clusterer may be improved by first normalizing the data +appropriately, as can be done in this module when you set the +'do_variance_normalization' option in the constructor. However, as pointed +out elsewhere in this documentation, such normalization may actually +decrease the performance of the clusterer if the overall data variability +along any dimension is more a result of separation between the means than a +consequence of intra-cluster variability. + +%prep +%setup -q -n %{cpan_name}-%{version} +find . -type f ! -name \*.pl -print0 | xargs -0 chmod 644 + +%build +%{__perl} Makefile.PL INSTALLDIRS=vendor +%{__make} %{?_smp_mflags} + +%check +%{__make} test + +%install +%perl_make_install +%perl_process_packlist +%perl_gen_filelist + +%files -f %{name}.files +%defattr(-,root,root,755) +%doc examples README + +%changelog