- initial package 1.00
* created by cpanspec 1.78.03 OBS-URL: https://build.opensuse.org/package/show/devel:languages:perl/perl-Parallel-Iterator?expand=0&rev=1
This commit is contained in:
23
.gitattributes
vendored
Normal file
23
.gitattributes
vendored
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
## Default LFS
|
||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bsp filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gem filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.jar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lzma filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.obscpio filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.oxt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.png filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rpm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tbz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tbz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ttf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.txz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.whl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
.osc
|
3
Parallel-Iterator-1.00.tar.bz2
Normal file
3
Parallel-Iterator-1.00.tar.bz2
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:b7553fc1d24d88a1446ba61e49b61bb3b88366c1021d7fb52181095ae4bfbbc4
|
||||||
|
size 14074
|
6
perl-Parallel-Iterator.changes
Normal file
6
perl-Parallel-Iterator.changes
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Jan 17 15:57:27 UTC 2011 - coolo@novell.com
|
||||||
|
|
||||||
|
- initial package 1.00
|
||||||
|
* created by cpanspec 1.78.03
|
||||||
|
|
244
perl-Parallel-Iterator.spec
Normal file
244
perl-Parallel-Iterator.spec
Normal file
@@ -0,0 +1,244 @@
|
|||||||
|
#
|
||||||
|
# spec file for package perl-Parallel-Iterator (Version 1.00)
|
||||||
|
#
|
||||||
|
# Copyright (c) 2010 SUSE LINUX Products GmbH, Nuernberg, Germany.
|
||||||
|
#
|
||||||
|
# All modifications and additions to the file contributed by third parties
|
||||||
|
# remain the property of their copyright owners, unless otherwise agreed
|
||||||
|
# upon. The license for this file, and modifications and additions to the
|
||||||
|
# file, is the same license as for the pristine package itself (unless the
|
||||||
|
# license for the pristine package is not an Open Source License, in which
|
||||||
|
# case the license is the MIT License). An "Open Source License" is a
|
||||||
|
# license that conforms to the Open Source Definition (Version 1.9)
|
||||||
|
# published by the Open Source Initiative.
|
||||||
|
|
||||||
|
# Please submit bugfixes or comments via http://bugs.opensuse.org/
|
||||||
|
#
|
||||||
|
|
||||||
|
Name: perl-Parallel-Iterator
|
||||||
|
Version: 1.00
|
||||||
|
Release: 1
|
||||||
|
License: GPL+ or Artistic
|
||||||
|
%define cpan_name Parallel-Iterator
|
||||||
|
Summary: Simple parallel execution
|
||||||
|
Url: http://search.cpan.org/dist/Parallel-Iterator/
|
||||||
|
Group: Development/Libraries/Perl
|
||||||
|
#Source: http://www.cpan.org/authors/id/A/AN/ANDYA/Parallel-Iterator-%{version}.tar.gz
|
||||||
|
Source: %{cpan_name}-%{version}.tar.bz2
|
||||||
|
BuildRequires: perl(Config)
|
||||||
|
BuildRequires: perl(IO::Handle)
|
||||||
|
BuildRequires: perl(IO::Select)
|
||||||
|
BuildRequires: perl
|
||||||
|
BuildRequires: perl-macros
|
||||||
|
BuildRequires: perl(Module::Build)
|
||||||
|
Requires: perl(Config)
|
||||||
|
Requires: perl(IO::Handle)
|
||||||
|
Requires: perl(IO::Select)
|
||||||
|
BuildRoot: %{_tmppath}/%{name}-%{version}-build
|
||||||
|
BuildArch: noarch
|
||||||
|
%{perl_requires}
|
||||||
|
|
||||||
|
%description
|
||||||
|
The 'map' function applies a user supplied transformation function to each
|
||||||
|
element in a list, returning a new list containing the transformed
|
||||||
|
elements.
|
||||||
|
|
||||||
|
This module provides a 'parallel map'. Multiple worker processes are forked
|
||||||
|
so that many instances of the transformation function may be executed
|
||||||
|
simultaneously.
|
||||||
|
|
||||||
|
For time consuming operations, particularly operations that spend most of
|
||||||
|
their time waiting for I/O, this is a big performance win. It also provides
|
||||||
|
a simple idiom to make effective use of multi CPU systems.
|
||||||
|
|
||||||
|
There is, however, a considerable overhead associated with forking, so the
|
||||||
|
example in the synopsis (doubling a list of numbers) is _not_ a sensible
|
||||||
|
use of this module.
|
||||||
|
|
||||||
|
Example
|
||||||
|
Imagine you have an array of URLs to fetch:
|
||||||
|
|
||||||
|
my @urls = qw(
|
||||||
|
http://google.com/
|
||||||
|
http://hexten.net/
|
||||||
|
http://search.cpan.org/
|
||||||
|
... and lots more ...
|
||||||
|
);
|
||||||
|
|
||||||
|
Write a function that retrieves a URL and returns its contents or undef
|
||||||
|
if it can't be fetched:
|
||||||
|
|
||||||
|
sub fetch {
|
||||||
|
my $url = shift;
|
||||||
|
my $resp = $ua->get($url);
|
||||||
|
return unless $resp->is_success;
|
||||||
|
return $resp->content;
|
||||||
|
};
|
||||||
|
|
||||||
|
Now write a function to synthesize a special kind of iterator:
|
||||||
|
|
||||||
|
sub list_iter {
|
||||||
|
my @ar = @_;
|
||||||
|
my $pos = 0;
|
||||||
|
return sub {
|
||||||
|
return if $pos >= @ar;
|
||||||
|
my @r = ( $pos, $ar[$pos] ); # Note: returns ( index, value )
|
||||||
|
$pos++;
|
||||||
|
return @r;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
The returned iterator will return each element of the array in turn and
|
||||||
|
then undef. Actually it returns both the index _and_ the value of each
|
||||||
|
element in the array. Because multiple instances of the transformation
|
||||||
|
function execute in parallel the results won't necessarily come back in
|
||||||
|
order. The array index will later allow us to put completed items in
|
||||||
|
the correct place in an output array.
|
||||||
|
|
||||||
|
Get an iterator for the list of URLs:
|
||||||
|
|
||||||
|
my $url_iter = list_iter( @urls );
|
||||||
|
|
||||||
|
Then wrap it in another iterator which will return the transformed
|
||||||
|
results:
|
||||||
|
|
||||||
|
my $page_iter = iterate( \&fetch, $url_iter );
|
||||||
|
|
||||||
|
Finally loop over the returned iterator storing results:
|
||||||
|
|
||||||
|
my @out = ( );
|
||||||
|
while ( my ( $index, $value ) = $page_iter->() ) {
|
||||||
|
$out[$index] = $value;
|
||||||
|
}
|
||||||
|
|
||||||
|
Behind the scenes your program forked into ten (by default) instances
|
||||||
|
of itself and executed the page requests in parallel.
|
||||||
|
|
||||||
|
Simpler interfaces
|
||||||
|
Having to construct an iterator is a pain so 'iterate' is smart enough
|
||||||
|
to do that for you. Instead of passing an iterator just pass a
|
||||||
|
reference to the array:
|
||||||
|
|
||||||
|
my $page_iter = iterate( \&fetch, \@urls );
|
||||||
|
|
||||||
|
If you pass a hash reference the iterator you get back will return key,
|
||||||
|
value pairs:
|
||||||
|
|
||||||
|
my $some_iter = iterate( \&fetch, \%some_hash );
|
||||||
|
|
||||||
|
If the returned iterator is inconvenient you can get back a hash or
|
||||||
|
array instead:
|
||||||
|
|
||||||
|
my @done = iterate_as_array( \&fetch, @urls );
|
||||||
|
|
||||||
|
my %done = iterate_as_hash( \&worker, %jobs );
|
||||||
|
|
||||||
|
How It Works
|
||||||
|
The current process is forked once for each worker. Each forked child
|
||||||
|
is connected to the parent by a pair of pipes. The child's STDIN,
|
||||||
|
STDOUT and STDERR are unaffected.
|
||||||
|
|
||||||
|
Input values are serialised (using Storable) and passed to the workers.
|
||||||
|
Completed work items are serialised and returned.
|
||||||
|
|
||||||
|
Caveats
|
||||||
|
Parallel::Iterator is designed to be simple to use - but the underlying
|
||||||
|
forking of the main process can cause mystifying problems unless you
|
||||||
|
have an understanding of what is going on behind the scenes.
|
||||||
|
|
||||||
|
Worker execution enviroment
|
||||||
|
All code apart from the worker subroutine executes in the parent
|
||||||
|
process as normal. The worker executes in a forked instance of the
|
||||||
|
parent process. That means that things like this won't work as
|
||||||
|
expected:
|
||||||
|
|
||||||
|
my %tally = ();
|
||||||
|
my @r = iterate_as_array( sub {
|
||||||
|
my ($id, $name) = @_;
|
||||||
|
$tally{$name}++; # might not do what you think it does
|
||||||
|
return reverse $name;
|
||||||
|
}, @names );
|
||||||
|
|
||||||
|
# Now print out the tally...
|
||||||
|
while ( my ( $name, $count ) = each %tally ) {
|
||||||
|
printf("%5d : %s\n", $count, $name);
|
||||||
|
}
|
||||||
|
|
||||||
|
Because the worker is a closure it can see the '%tally' hash from
|
||||||
|
its enclosing scope; but because it's running in a forked clone of
|
||||||
|
the parent process it modifies its own copy of '%tally' rather than
|
||||||
|
the copy for the parent process.
|
||||||
|
|
||||||
|
That means that after the job terminates the '%tally' in the parent
|
||||||
|
process will be empty.
|
||||||
|
|
||||||
|
In general you should avoid side effects in your worker
|
||||||
|
subroutines.
|
||||||
|
|
||||||
|
Serialization
|
||||||
|
Values are serialised using the Storable manpage to pass to the
|
||||||
|
worker subroutine and results from the worker are again serialised
|
||||||
|
before being passed back. Be careful what your values refer to:
|
||||||
|
everything has to be serialised. If there's an indirect way to
|
||||||
|
reach a large object graph Storable will find it and performance
|
||||||
|
will suffer.
|
||||||
|
|
||||||
|
To find out how large your serialised values are serialise one of
|
||||||
|
them and check its size:
|
||||||
|
|
||||||
|
use Storable qw( freeze );
|
||||||
|
my $serialized = freeze $some_obj;
|
||||||
|
print length($serialized), " bytes\n";
|
||||||
|
|
||||||
|
In your tests you may wish to guard against the possibility of a
|
||||||
|
change to the structure of your values resulting in a sudden
|
||||||
|
increase in serialized size:
|
||||||
|
|
||||||
|
ok length(freeze $some_obj) < 1000, "Object too bulky?";
|
||||||
|
|
||||||
|
See the documetation for the Storable manpage for other caveats.
|
||||||
|
|
||||||
|
Performance
|
||||||
|
Process forking is expensive. Only use Parallel::Iterator in cases
|
||||||
|
where:
|
||||||
|
|
||||||
|
* the worker waits for I/O
|
||||||
|
|
||||||
|
The case of fetching web pages is a good example of this.
|
||||||
|
Fetching a page with LWP::UserAgent may take as long as a few
|
||||||
|
seconds but probably consumes only a few milliseconds of
|
||||||
|
processor time. Running many requests in parallel is a huge win -
|
||||||
|
but be kind to the server you're talking to: don't launch a lot
|
||||||
|
of parallel requests unless it's your server or you know it can
|
||||||
|
handle the load.
|
||||||
|
|
||||||
|
* the worker is CPU intensive and you have multiple cores / CPUs
|
||||||
|
|
||||||
|
If the worker is doing an expensive calculation you can
|
||||||
|
parallelise that across multiple CPU cores. Benchmark first
|
||||||
|
though. There's a considerable overhead associated with
|
||||||
|
Parallel::Iterator; unless your calculations are time consuming
|
||||||
|
that overhead will dwarf whatever time they take.
|
||||||
|
|
||||||
|
%prep
|
||||||
|
%setup -q -n %{cpan_name}-%{version}
|
||||||
|
|
||||||
|
%build
|
||||||
|
%{__perl} Build.PL installdirs=vendor
|
||||||
|
./Build build flags=%{?_smp_mflags}
|
||||||
|
|
||||||
|
%check
|
||||||
|
./Build test
|
||||||
|
|
||||||
|
%install
|
||||||
|
./Build install destdir=%{buildroot} create_packlist=0
|
||||||
|
%perl_gen_filelist
|
||||||
|
|
||||||
|
%clean
|
||||||
|
%{__rm} -rf %{buildroot}
|
||||||
|
|
||||||
|
%files -f %{name}.files
|
||||||
|
%defattr(644,root,root,755)
|
||||||
|
%doc Changes README
|
||||||
|
|
||||||
|
%changelog
|
Reference in New Issue
Block a user