Accepting request 990968 from devel:languages:perl:autoupdate
- updated to 1.001 see /usr/share/doc/packages/perl-Parallel-Iterator/Changes OBS-URL: https://build.opensuse.org/request/show/990968 OBS-URL: https://build.opensuse.org/package/show/devel:languages:perl/perl-Parallel-Iterator?expand=0&rev=5
This commit is contained in:
committed by
Git OBS Bridge
parent
aa3f4a8555
commit
ae27b4e131
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e8495095cf5746a14e154037b11b0d911da2a32283b77291abb37bf6311345f4
|
||||
size 15227
|
3
Parallel-Iterator-1.001.tar.gz
Normal file
3
Parallel-Iterator-1.001.tar.gz
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:72a6191a4fb1fee102b0e7ccc161ec574b0e6b00368286b01999c0f660f21d6e
|
||||
size 20961
|
35
cpanspec.yml
Normal file
35
cpanspec.yml
Normal file
@@ -0,0 +1,35 @@
|
||||
---
|
||||
#description_paragraphs: 3
|
||||
#description: |-
|
||||
# override description from CPAN
|
||||
#summary: override summary from CPAN
|
||||
#no_testing: broken upstream
|
||||
#sources:
|
||||
# - source1
|
||||
# - source2
|
||||
#patches:
|
||||
# foo.patch: -p1
|
||||
# bar.patch:
|
||||
# baz.patch: PATCH-FIX-OPENSUSE
|
||||
#preamble: |-
|
||||
# BuildRequires: gcc-c++
|
||||
#post_prep: |-
|
||||
# hunspell=`pkg-config --libs hunspell | sed -e 's,-l,,; s, *,,g'`
|
||||
# sed -i -e "s,hunspell-X,$hunspell," t/00-prereq.t Makefile.PL
|
||||
#post_build: |-
|
||||
# rm unused.files
|
||||
#post_install: |-
|
||||
# sed on %{name}.files
|
||||
#license: SUSE-NonFree
|
||||
#skip_noarch: 1
|
||||
#custom_build: |-
|
||||
#./Build build flags=%{?_smp_mflags} --myflag
|
||||
#custom_test: |-
|
||||
#startserver && make test
|
||||
#ignore_requires: Bizarre::Module
|
||||
#skip_doc: regexp_to_skip_for_doc.*
|
||||
#add_doc: files to add to docs
|
||||
#misc: |-
|
||||
#anything else to be added to spec file
|
||||
#follows directly after %files section, so it can contain new blocks or also
|
||||
#changes to %files section
|
@@ -1,3 +1,9 @@
|
||||
-------------------------------------------------------------------
|
||||
Sat Jul 16 03:08:02 UTC 2022 - Tina Müller <timueller+perl@suse.de>
|
||||
|
||||
- updated to 1.001
|
||||
see /usr/share/doc/packages/perl-Parallel-Iterator/Changes
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Jan 17 15:57:27 UTC 2011 - coolo@novell.com
|
||||
|
||||
|
@@ -1,7 +1,7 @@
|
||||
#
|
||||
# spec file for package perl-Parallel-Iterator
|
||||
#
|
||||
# Copyright (c) 2011 SUSE LINUX Products GmbH, Nuernberg, Germany.
|
||||
# Copyright (c) 2022 SUSE LLC
|
||||
#
|
||||
# All modifications and additions to the file contributed by third parties
|
||||
# remain the property of their copyright owners, unless otherwise agreed
|
||||
@@ -12,39 +12,25 @@
|
||||
# license that conforms to the Open Source Definition (Version 1.9)
|
||||
# published by the Open Source Initiative.
|
||||
|
||||
# Please submit bugfixes or comments via http://bugs.opensuse.org/
|
||||
# Please submit bugfixes or comments via https://bugs.opensuse.org/
|
||||
#
|
||||
|
||||
|
||||
|
||||
Name: perl-Parallel-Iterator
|
||||
Version: 1.00
|
||||
Release: 1
|
||||
License: GPL+ or Artistic
|
||||
%define cpan_name Parallel-Iterator
|
||||
Name: perl-Parallel-Iterator
|
||||
Version: 1.001
|
||||
Release: 0
|
||||
License: Artistic-1.0 OR GPL-1.0-or-later
|
||||
Summary: Simple parallel execution
|
||||
Url: http://search.cpan.org/dist/Parallel-Iterator/
|
||||
Group: Development/Libraries/Perl
|
||||
#Source: http://www.cpan.org/authors/id/A/AN/ANDYA/Parallel-Iterator-%{version}.tar.gz
|
||||
Source: %{cpan_name}-%{version}.tar.gz
|
||||
BuildRequires: perl(Config)
|
||||
BuildRequires: perl(IO::Handle)
|
||||
BuildRequires: perl(IO::Select)
|
||||
URL: https://metacpan.org/release/%{cpan_name}
|
||||
Source0: https://cpan.metacpan.org/authors/id/A/AR/ARISTOTLE/%{cpan_name}-%{version}.tar.gz
|
||||
Source1: cpanspec.yml
|
||||
BuildArch: noarch
|
||||
BuildRequires: perl
|
||||
BuildRequires: perl-macros
|
||||
BuildRequires: perl(Module::Build)
|
||||
Requires: perl(Config)
|
||||
Requires: perl(IO::Handle)
|
||||
Requires: perl(IO::Select)
|
||||
BuildRoot: %{_tmppath}/%{name}-%{version}-build
|
||||
BuildArch: noarch
|
||||
%{perl_requires}
|
||||
|
||||
%description
|
||||
The 'map' function applies a user supplied transformation function to each
|
||||
element in a list, returning a new list containing the transformed
|
||||
elements.
|
||||
|
||||
This module provides a 'parallel map'. Multiple worker processes are forked
|
||||
so that many instances of the transformation function may be executed
|
||||
simultaneously.
|
||||
@@ -57,190 +43,23 @@ There is, however, a considerable overhead associated with forking, so the
|
||||
example in the synopsis (doubling a list of numbers) is _not_ a sensible
|
||||
use of this module.
|
||||
|
||||
Example
|
||||
Imagine you have an array of URLs to fetch:
|
||||
|
||||
my @urls = qw(
|
||||
http://google.com/
|
||||
http://hexten.net/
|
||||
http://search.cpan.org/
|
||||
... and lots more ...
|
||||
);
|
||||
|
||||
Write a function that retrieves a URL and returns its contents or undef
|
||||
if it can't be fetched:
|
||||
|
||||
sub fetch {
|
||||
my $url = shift;
|
||||
my $resp = $ua->get($url);
|
||||
return unless $resp->is_success;
|
||||
return $resp->content;
|
||||
};
|
||||
|
||||
Now write a function to synthesize a special kind of iterator:
|
||||
|
||||
sub list_iter {
|
||||
my @ar = @_;
|
||||
my $pos = 0;
|
||||
return sub {
|
||||
return if $pos >= @ar;
|
||||
my @r = ( $pos, $ar[$pos] ); # Note: returns ( index, value )
|
||||
$pos++;
|
||||
return @r;
|
||||
};
|
||||
}
|
||||
|
||||
The returned iterator will return each element of the array in turn and
|
||||
then undef. Actually it returns both the index _and_ the value of each
|
||||
element in the array. Because multiple instances of the transformation
|
||||
function execute in parallel the results won't necessarily come back in
|
||||
order. The array index will later allow us to put completed items in
|
||||
the correct place in an output array.
|
||||
|
||||
Get an iterator for the list of URLs:
|
||||
|
||||
my $url_iter = list_iter( @urls );
|
||||
|
||||
Then wrap it in another iterator which will return the transformed
|
||||
results:
|
||||
|
||||
my $page_iter = iterate( \&fetch, $url_iter );
|
||||
|
||||
Finally loop over the returned iterator storing results:
|
||||
|
||||
my @out = ( );
|
||||
while ( my ( $index, $value ) = $page_iter->() ) {
|
||||
$out[$index] = $value;
|
||||
}
|
||||
|
||||
Behind the scenes your program forked into ten (by default) instances
|
||||
of itself and executed the page requests in parallel.
|
||||
|
||||
Simpler interfaces
|
||||
Having to construct an iterator is a pain so 'iterate' is smart enough
|
||||
to do that for you. Instead of passing an iterator just pass a
|
||||
reference to the array:
|
||||
|
||||
my $page_iter = iterate( \&fetch, \@urls );
|
||||
|
||||
If you pass a hash reference the iterator you get back will return key,
|
||||
value pairs:
|
||||
|
||||
my $some_iter = iterate( \&fetch, \%some_hash );
|
||||
|
||||
If the returned iterator is inconvenient you can get back a hash or
|
||||
array instead:
|
||||
|
||||
my @done = iterate_as_array( \&fetch, @urls );
|
||||
|
||||
my %done = iterate_as_hash( \&worker, %jobs );
|
||||
|
||||
How It Works
|
||||
The current process is forked once for each worker. Each forked child
|
||||
is connected to the parent by a pair of pipes. The child's STDIN,
|
||||
STDOUT and STDERR are unaffected.
|
||||
|
||||
Input values are serialised (using Storable) and passed to the workers.
|
||||
Completed work items are serialised and returned.
|
||||
|
||||
Caveats
|
||||
Parallel::Iterator is designed to be simple to use - but the underlying
|
||||
forking of the main process can cause mystifying problems unless you
|
||||
have an understanding of what is going on behind the scenes.
|
||||
|
||||
Worker execution enviroment
|
||||
All code apart from the worker subroutine executes in the parent
|
||||
process as normal. The worker executes in a forked instance of the
|
||||
parent process. That means that things like this won't work as
|
||||
expected:
|
||||
|
||||
my %tally = ();
|
||||
my @r = iterate_as_array( sub {
|
||||
my ($id, $name) = @_;
|
||||
$tally{$name}++; # might not do what you think it does
|
||||
return reverse $name;
|
||||
}, @names );
|
||||
|
||||
# Now print out the tally...
|
||||
while ( my ( $name, $count ) = each %tally ) {
|
||||
printf("%5d : %s\n", $count, $name);
|
||||
}
|
||||
|
||||
Because the worker is a closure it can see the '%tally' hash from
|
||||
its enclosing scope; but because it's running in a forked clone of
|
||||
the parent process it modifies its own copy of '%tally' rather than
|
||||
the copy for the parent process.
|
||||
|
||||
That means that after the job terminates the '%tally' in the parent
|
||||
process will be empty.
|
||||
|
||||
In general you should avoid side effects in your worker
|
||||
subroutines.
|
||||
|
||||
Serialization
|
||||
Values are serialised using the Storable manpage to pass to the
|
||||
worker subroutine and results from the worker are again serialised
|
||||
before being passed back. Be careful what your values refer to:
|
||||
everything has to be serialised. If there's an indirect way to
|
||||
reach a large object graph Storable will find it and performance
|
||||
will suffer.
|
||||
|
||||
To find out how large your serialised values are serialise one of
|
||||
them and check its size:
|
||||
|
||||
use Storable qw( freeze );
|
||||
my $serialized = freeze $some_obj;
|
||||
print length($serialized), " bytes\n";
|
||||
|
||||
In your tests you may wish to guard against the possibility of a
|
||||
change to the structure of your values resulting in a sudden
|
||||
increase in serialized size:
|
||||
|
||||
ok length(freeze $some_obj) < 1000, "Object too bulky?";
|
||||
|
||||
See the documetation for the Storable manpage for other caveats.
|
||||
|
||||
Performance
|
||||
Process forking is expensive. Only use Parallel::Iterator in cases
|
||||
where:
|
||||
|
||||
* the worker waits for I/O
|
||||
|
||||
The case of fetching web pages is a good example of this.
|
||||
Fetching a page with LWP::UserAgent may take as long as a few
|
||||
seconds but probably consumes only a few milliseconds of
|
||||
processor time. Running many requests in parallel is a huge win -
|
||||
but be kind to the server you're talking to: don't launch a lot
|
||||
of parallel requests unless it's your server or you know it can
|
||||
handle the load.
|
||||
|
||||
* the worker is CPU intensive and you have multiple cores / CPUs
|
||||
|
||||
If the worker is doing an expensive calculation you can
|
||||
parallelise that across multiple CPU cores. Benchmark first
|
||||
though. There's a considerable overhead associated with
|
||||
Parallel::Iterator; unless your calculations are time consuming
|
||||
that overhead will dwarf whatever time they take.
|
||||
|
||||
%prep
|
||||
%setup -q -n %{cpan_name}-%{version}
|
||||
%autosetup -n %{cpan_name}-%{version}
|
||||
|
||||
%build
|
||||
%{__perl} Build.PL installdirs=vendor
|
||||
./Build build flags=%{?_smp_mflags}
|
||||
perl Makefile.PL INSTALLDIRS=vendor
|
||||
%make_build
|
||||
|
||||
%check
|
||||
./Build test
|
||||
make test
|
||||
|
||||
%install
|
||||
./Build install destdir=%{buildroot} create_packlist=0
|
||||
%perl_make_install
|
||||
%perl_process_packlist
|
||||
%perl_gen_filelist
|
||||
|
||||
%clean
|
||||
%{__rm} -rf %{buildroot}
|
||||
|
||||
%files -f %{name}.files
|
||||
%defattr(644,root,root,755)
|
||||
%doc Changes README
|
||||
%doc Changes examples README
|
||||
%license LICENSE
|
||||
|
||||
%changelog
|
||||
|
Reference in New Issue
Block a user