From 68ebbd07d34fe9d7dd52974a5e6a285aaac12176aa3c2a520120a6c4a0d00581 Mon Sep 17 00:00:00 2001 From: Stephan Kulow Date: Fri, 11 Nov 2011 11:18:40 +0000 Subject: [PATCH 1/5] OBS-URL: https://build.opensuse.org/package/show/devel:languages:perl/perl-HTML-TableExtract?expand=0&rev=22 --- HTML-TableExtract-2.11.tar.gz | 3 +++ perl-HTML-TableExtract.spec | 8 +++++--- 2 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 HTML-TableExtract-2.11.tar.gz diff --git a/HTML-TableExtract-2.11.tar.gz b/HTML-TableExtract-2.11.tar.gz new file mode 100644 index 0000000..50d4b01 --- /dev/null +++ b/HTML-TableExtract-2.11.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1861d55a2aa1728ef56ea2d08d630b9a008456f1106994e4e49e76f56e4955ee +size 27123 diff --git a/perl-HTML-TableExtract.spec b/perl-HTML-TableExtract.spec index 5da40a1..d683fa9 100644 --- a/perl-HTML-TableExtract.spec +++ b/perl-HTML-TableExtract.spec @@ -1,7 +1,7 @@ # -# spec file for package perl-HTML-TableExtract (Version 2.10) +# spec file for package perl-HTML-TableExtract # -# Copyright (c) 2010 SUSE LINUX Products GmbH, Nuernberg, Germany. +# Copyright (c) 2011 SUSE LINUX Products GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -15,9 +15,11 @@ # Please submit bugfixes or comments via http://bugs.opensuse.org/ # + + Name: perl-HTML-TableExtract Version: 2.10 -Release: 82 +Release: 86 License: GPL+ or Artistic %define cpan_name HTML-TableExtract Summary: For extracting the content contained in tables within an HTML document From 3d4e564a243f61dbabc15ef346af9b8e0d993dfa36019b5686d3cdccb150b5a8 Mon Sep 17 00:00:00 2001 From: Stephan Kulow Date: Tue, 20 Dec 2011 09:14:22 +0000 Subject: [PATCH 2/5] - update to 2.11 - added parsing context, override for eof() and parse() for memory clear on new docs or post-eof() - fixed some long standing test warnings OBS-URL: https://build.opensuse.org/package/show/devel:languages:perl/perl-HTML-TableExtract?expand=0&rev=23 --- HTML-TableExtract-2.10.tar.bz2 | 3 --- perl-HTML-TableExtract.changes | 8 ++++++++ perl-HTML-TableExtract.spec | 12 +++++------- 3 files changed, 13 insertions(+), 10 deletions(-) delete mode 100644 HTML-TableExtract-2.10.tar.bz2 diff --git a/HTML-TableExtract-2.10.tar.bz2 b/HTML-TableExtract-2.10.tar.bz2 deleted file mode 100644 index c2fbfd0..0000000 --- a/HTML-TableExtract-2.10.tar.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:26485211a78da74df7df5b1ebfd34cb017437179cf14937265b23959e1356d56 -size 23675 diff --git a/perl-HTML-TableExtract.changes b/perl-HTML-TableExtract.changes index 69bcc45..af0ee4a 100644 --- a/perl-HTML-TableExtract.changes +++ b/perl-HTML-TableExtract.changes @@ -1,3 +1,11 @@ +------------------------------------------------------------------- +Tue Dec 20 09:13:30 UTC 2011 - coolo@suse.com + +- update to 2.11 + - added parsing context, override for eof() and parse() for + memory clear on new docs or post-eof() + - fixed some long standing test warnings + ------------------------------------------------------------------- Fri Feb 25 17:51:03 UTC 2011 - chris@computersalat.de diff --git a/perl-HTML-TableExtract.spec b/perl-HTML-TableExtract.spec index d683fa9..1e1cfbe 100644 --- a/perl-HTML-TableExtract.spec +++ b/perl-HTML-TableExtract.spec @@ -16,17 +16,15 @@ # - Name: perl-HTML-TableExtract -Version: 2.10 -Release: 86 -License: GPL+ or Artistic +Version: 2.11 +Release: 0 %define cpan_name HTML-TableExtract Summary: For extracting the content contained in tables within an HTML document -Url: http://search.cpan.org/dist/HTML-TableExtract/ +License: GPL-1.0+ or Artistic-1.0 Group: Development/Libraries/Perl -#Source: http://www.cpan.org/authors/id/M/MS/MSISK/HTML-TableExtract-2.10.tar.gz -Source: %{cpan_name}-%{version}.tar.bz2 +Url: http://search.cpan.org/dist/HTML-TableExtract/ +Source: http://www.cpan.org/authors/id/M/MS/MSISK/HTML-TableExtract-%{version}.tar.gz Patch0: %{cpan_name}-2.10-HTML.patch BuildArch: noarch BuildRoot: %{_tmppath}/%{name}-%{version}-build From 80be1dc906869be99055903c4d9e0a16c3dffb2c248f84d256cb33d2e6c5a294 Mon Sep 17 00:00:00 2001 From: Stephan Kulow Date: Tue, 20 Dec 2011 13:40:27 +0000 Subject: [PATCH 3/5] - updated to 2.11 OBS-URL: https://build.opensuse.org/package/show/devel:languages:perl/perl-HTML-TableExtract?expand=0&rev=24 --- perl-HTML-TableExtract.changes | 5 ++++ perl-HTML-TableExtract.spec | 51 ++++++---------------------------- 2 files changed, 13 insertions(+), 43 deletions(-) diff --git a/perl-HTML-TableExtract.changes b/perl-HTML-TableExtract.changes index af0ee4a..86b5a53 100644 --- a/perl-HTML-TableExtract.changes +++ b/perl-HTML-TableExtract.changes @@ -1,3 +1,8 @@ +------------------------------------------------------------------- +Tue Dec 20 13:38:24 UTC 2011 - coolo@suse.com + +- updated to 2.11 + ------------------------------------------------------------------- Tue Dec 20 09:13:30 UTC 2011 - coolo@suse.com diff --git a/perl-HTML-TableExtract.spec b/perl-HTML-TableExtract.spec index 1e1cfbe..35c5408 100644 --- a/perl-HTML-TableExtract.spec +++ b/perl-HTML-TableExtract.spec @@ -20,18 +20,21 @@ Name: perl-HTML-TableExtract Version: 2.11 Release: 0 %define cpan_name HTML-TableExtract -Summary: For extracting the content contained in tables within an HTML document +Summary: Perl module for extracting the content contained in tables within an HTM[cut] License: GPL-1.0+ or Artistic-1.0 Group: Development/Libraries/Perl Url: http://search.cpan.org/dist/HTML-TableExtract/ -Source: http://www.cpan.org/authors/id/M/MS/MSISK/HTML-TableExtract-%{version}.tar.gz -Patch0: %{cpan_name}-2.10-HTML.patch +Source: http://www.cpan.org/authors/id/M/MS/MSISK/%{cpan_name}-%{version}.tar.gz +Patch0: HTML-TableExtract-2.10-HTML.patch BuildArch: noarch BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: perl BuildRequires: perl-macros BuildRequires: perl(HTML::ElementTable) >= 1.16 BuildRequires: perl(HTML::Parser) +#BuildRequires: perl(HTML::Entities) +#BuildRequires: perl(HTML::TableExtract) +#BuildRequires: perl(testload) Requires: perl(HTML::ElementTable) >= 1.16 Requires: perl(HTML::Parser) %{perl_requires} @@ -94,45 +97,10 @@ When extracting only text from tables, the text is decoded with HTML::Entities by default; this can be disabled by setting the _decode_ parameter to 0. -Extraction Modes - The default mode of extraction for HTML::TableExtract is raw text or - HTML. In this mode, embedded tables are completely decoupled from one - another. In this case, HTML::TableExtract is a subclass of - HTML::Parser: - - use HTML::TableExtract; - - Alternativevly, tables can be extracted as HTML::ElementTable - structures, which are in turn embedded in an HTML::Element tree - representing the entire HTML document. Embedded tables are not - decoupled from one another since this tree structure must be - manitained. In this case, HTML::TableExtract is a subclass of - HTML::TreeBuilder (itself a subclass of HTML:::Parser): - - use HTML::TableExtract qw(tree); - - In either case, the basic interface for HTML::TableExtract and the - resulting table objects remains the same -- all that changes is what - you can do with the resulting data. - - HTML::TableExtract is a subclass of HTML::Parser, and as such inherits - all of its basic methods such as 'parse()' and 'parse_file()'. During - scans, 'start()', 'end()', and 'text()' are utilized. Feel free to - override them, but if you do not eventually invoke them in the SUPER - class with some content, results are not guaranteed. - -Advice - The main point of this module was to provide a flexible method of - extracting tabular information from HTML documents without relying to - heavily on the document layout. For that reason, I suggest using - _Headers_ whenever possible -- that way, you are anchoring your - extraction on what the document is trying to communicate rather than - some feature of the HTML comprising the document (other than the fact - that the data is contained in a table). - %prep %setup -q -n %{cpan_name}-%{version} %patch0 -p1 +find . -type f -print0 | xargs -0 chmod 644 %build %{__perl} Makefile.PL INSTALLDIRS=vendor @@ -146,11 +114,8 @@ Advice %perl_process_packlist %perl_gen_filelist -%clean -%{__rm} -rf %{buildroot} - %files -f %{name}.files -%defattr(644,root,root,755) +%defattr(-,root,root,755) %doc Changes README %changelog From fa01585bf57991da0d0b5afdeacf3dae230ca665fe3d0c89ca2d8420d2a580ec Mon Sep 17 00:00:00 2001 From: Stephan Kulow Date: Tue, 20 Dec 2011 13:41:25 +0000 Subject: [PATCH 4/5] OBS-URL: https://build.opensuse.org/package/show/devel:languages:perl/perl-HTML-TableExtract?expand=0&rev=25 --- perl-HTML-TableExtract.changes | 5 ----- 1 file changed, 5 deletions(-) diff --git a/perl-HTML-TableExtract.changes b/perl-HTML-TableExtract.changes index 86b5a53..af0ee4a 100644 --- a/perl-HTML-TableExtract.changes +++ b/perl-HTML-TableExtract.changes @@ -1,8 +1,3 @@ -------------------------------------------------------------------- -Tue Dec 20 13:38:24 UTC 2011 - coolo@suse.com - -- updated to 2.11 - ------------------------------------------------------------------- Tue Dec 20 09:13:30 UTC 2011 - coolo@suse.com From a3f4ed2b3624c0d82c2e8eefee691b50996de46ab5ad0f82d9cfdad07c7a7588 Mon Sep 17 00:00:00 2001 From: Stephan Kulow Date: Wed, 21 Dec 2011 14:24:39 +0000 Subject: [PATCH 5/5] without patch it works better OBS-URL: https://build.opensuse.org/package/show/devel:languages:perl/perl-HTML-TableExtract?expand=0&rev=26 --- HTML-TableExtract-2.10-HTML.patch | 6 ------ perl-HTML-TableExtract.spec | 2 -- 2 files changed, 8 deletions(-) delete mode 100644 HTML-TableExtract-2.10-HTML.patch diff --git a/HTML-TableExtract-2.10-HTML.patch b/HTML-TableExtract-2.10-HTML.patch deleted file mode 100644 index e29f486..0000000 --- a/HTML-TableExtract-2.10-HTML.patch +++ /dev/null @@ -1,6 +0,0 @@ -diff -ruN HTML-TableExtract-2.10-orig/t/gnarly.html HTML-TableExtract-2.10/t/gnarly.html ---- HTML-TableExtract-2.10-orig/t/gnarly.html 2006-05-01 23:22:47.000000000 +0200 -+++ HTML-TableExtract-2.10/t/gnarly.html 2011-02-25 18:41:08.000000000 +0100 -@@ -1 +1 @@ --gnarly table
(0,0) [1,4](0,1) [2,4]
(1,0) [2,1](1,1) [1,1](1,2) [1,2]
(2,0) [2,4](2,1) [2,2](2,2) [1,1]
(3,0) [1,1](3,1) [1,1]
(4,0) [3,2](4,1) [1,1](4,2) [3,1](4,3) [4,4]
(5,0) [1,1]
(6,0) [1,1]
(7,0) [1,4]
-+gnarly table
(0,0) [1,4](0,1) [2,4]
(1,0) [2,1](1,1) [1,1](1,2) [1,2]
(2,0) [2,4](2,1) [2,2](2,2) [1,1]
(3,0) [1,1](3,1) [1,1]
(4,0) [3,2](4,1) [1,1](4,2) [3,1](4,3) [4,4]
(5,0) [1,1]
(6,0) [1,1]
(7,0) [1,4]
diff --git a/perl-HTML-TableExtract.spec b/perl-HTML-TableExtract.spec index 35c5408..cbd3d69 100644 --- a/perl-HTML-TableExtract.spec +++ b/perl-HTML-TableExtract.spec @@ -25,7 +25,6 @@ License: GPL-1.0+ or Artistic-1.0 Group: Development/Libraries/Perl Url: http://search.cpan.org/dist/HTML-TableExtract/ Source: http://www.cpan.org/authors/id/M/MS/MSISK/%{cpan_name}-%{version}.tar.gz -Patch0: HTML-TableExtract-2.10-HTML.patch BuildArch: noarch BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: perl @@ -99,7 +98,6 @@ parameter to 0. %prep %setup -q -n %{cpan_name}-%{version} -%patch0 -p1 find . -type f -print0 | xargs -0 chmod 644 %build