diff --git a/HTML-TableExtract-2.10-HTML.patch b/HTML-TableExtract-2.10-HTML.patch deleted file mode 100644 index e29f486..0000000 --- a/HTML-TableExtract-2.10-HTML.patch +++ /dev/null @@ -1,6 +0,0 @@ -diff -ruN HTML-TableExtract-2.10-orig/t/gnarly.html HTML-TableExtract-2.10/t/gnarly.html ---- HTML-TableExtract-2.10-orig/t/gnarly.html 2006-05-01 23:22:47.000000000 +0200 -+++ HTML-TableExtract-2.10/t/gnarly.html 2011-02-25 18:41:08.000000000 +0100 -@@ -1 +1 @@ --gnarly table
(0,0) [1,4](0,1) [2,4]
(1,0) [2,1](1,1) [1,1](1,2) [1,2]
(2,0) [2,4](2,1) [2,2](2,2) [1,1]
(3,0) [1,1](3,1) [1,1]
(4,0) [3,2](4,1) [1,1](4,2) [3,1](4,3) [4,4]
(5,0) [1,1]
(6,0) [1,1]
(7,0) [1,4]
-+gnarly table
(0,0) [1,4](0,1) [2,4]
(1,0) [2,1](1,1) [1,1](1,2) [1,2]
(2,0) [2,4](2,1) [2,2](2,2) [1,1]
(3,0) [1,1](3,1) [1,1]
(4,0) [3,2](4,1) [1,1](4,2) [3,1](4,3) [4,4]
(5,0) [1,1]
(6,0) [1,1]
(7,0) [1,4]
diff --git a/HTML-TableExtract-2.10.tar.bz2 b/HTML-TableExtract-2.10.tar.bz2 deleted file mode 100644 index c2fbfd0..0000000 --- a/HTML-TableExtract-2.10.tar.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:26485211a78da74df7df5b1ebfd34cb017437179cf14937265b23959e1356d56 -size 23675 diff --git a/HTML-TableExtract-2.11.tar.gz b/HTML-TableExtract-2.11.tar.gz new file mode 100644 index 0000000..50d4b01 --- /dev/null +++ b/HTML-TableExtract-2.11.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1861d55a2aa1728ef56ea2d08d630b9a008456f1106994e4e49e76f56e4955ee +size 27123 diff --git a/perl-HTML-TableExtract.changes b/perl-HTML-TableExtract.changes index 69bcc45..af0ee4a 100644 --- a/perl-HTML-TableExtract.changes +++ b/perl-HTML-TableExtract.changes @@ -1,3 +1,11 @@ +------------------------------------------------------------------- +Tue Dec 20 09:13:30 UTC 2011 - coolo@suse.com + +- update to 2.11 + - added parsing context, override for eof() and parse() for + memory clear on new docs or post-eof() + - fixed some long standing test warnings + ------------------------------------------------------------------- Fri Feb 25 17:51:03 UTC 2011 - chris@computersalat.de diff --git a/perl-HTML-TableExtract.spec b/perl-HTML-TableExtract.spec index d683fa9..cbd3d69 100644 --- a/perl-HTML-TableExtract.spec +++ b/perl-HTML-TableExtract.spec @@ -16,24 +16,24 @@ # - Name: perl-HTML-TableExtract -Version: 2.10 -Release: 86 -License: GPL+ or Artistic +Version: 2.11 +Release: 0 %define cpan_name HTML-TableExtract -Summary: For extracting the content contained in tables within an HTML document -Url: http://search.cpan.org/dist/HTML-TableExtract/ +Summary: Perl module for extracting the content contained in tables within an HTM[cut] +License: GPL-1.0+ or Artistic-1.0 Group: Development/Libraries/Perl -#Source: http://www.cpan.org/authors/id/M/MS/MSISK/HTML-TableExtract-2.10.tar.gz -Source: %{cpan_name}-%{version}.tar.bz2 -Patch0: %{cpan_name}-2.10-HTML.patch +Url: http://search.cpan.org/dist/HTML-TableExtract/ +Source: http://www.cpan.org/authors/id/M/MS/MSISK/%{cpan_name}-%{version}.tar.gz BuildArch: noarch BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: perl BuildRequires: perl-macros BuildRequires: perl(HTML::ElementTable) >= 1.16 BuildRequires: perl(HTML::Parser) +#BuildRequires: perl(HTML::Entities) +#BuildRequires: perl(HTML::TableExtract) +#BuildRequires: perl(testload) Requires: perl(HTML::ElementTable) >= 1.16 Requires: perl(HTML::Parser) %{perl_requires} @@ -96,45 +96,9 @@ When extracting only text from tables, the text is decoded with HTML::Entities by default; this can be disabled by setting the _decode_ parameter to 0. -Extraction Modes - The default mode of extraction for HTML::TableExtract is raw text or - HTML. In this mode, embedded tables are completely decoupled from one - another. In this case, HTML::TableExtract is a subclass of - HTML::Parser: - - use HTML::TableExtract; - - Alternativevly, tables can be extracted as HTML::ElementTable - structures, which are in turn embedded in an HTML::Element tree - representing the entire HTML document. Embedded tables are not - decoupled from one another since this tree structure must be - manitained. In this case, HTML::TableExtract is a subclass of - HTML::TreeBuilder (itself a subclass of HTML:::Parser): - - use HTML::TableExtract qw(tree); - - In either case, the basic interface for HTML::TableExtract and the - resulting table objects remains the same -- all that changes is what - you can do with the resulting data. - - HTML::TableExtract is a subclass of HTML::Parser, and as such inherits - all of its basic methods such as 'parse()' and 'parse_file()'. During - scans, 'start()', 'end()', and 'text()' are utilized. Feel free to - override them, but if you do not eventually invoke them in the SUPER - class with some content, results are not guaranteed. - -Advice - The main point of this module was to provide a flexible method of - extracting tabular information from HTML documents without relying to - heavily on the document layout. For that reason, I suggest using - _Headers_ whenever possible -- that way, you are anchoring your - extraction on what the document is trying to communicate rather than - some feature of the HTML comprising the document (other than the fact - that the data is contained in a table). - %prep %setup -q -n %{cpan_name}-%{version} -%patch0 -p1 +find . -type f -print0 | xargs -0 chmod 644 %build %{__perl} Makefile.PL INSTALLDIRS=vendor @@ -148,11 +112,8 @@ Advice %perl_process_packlist %perl_gen_filelist -%clean -%{__rm} -rf %{buildroot} - %files -f %{name}.files -%defattr(644,root,root,755) +%defattr(-,root,root,755) %doc Changes README %changelog