cpanspec, noarch pkg

OBS-URL: https://build.opensuse.org/package/show/devel:languages:perl/perl-HTML-TableExtract?expand=0&rev=20
2011-02-25 17:52:50 +00:00
parent f8356d3ca3
commit 08f522976f
3 changed files with 130 additions and 32 deletions
--- a/HTML-TableExtract-2.10-HTML.patch
+++ b/HTML-TableExtract-2.10-HTML.patch
@@ -0,0 +1,6 @@
 diff -ruN HTML-TableExtract-2.10-orig/t/gnarly.html HTML-TableExtract-2.10/t/gnarly.html
 --- HTML-TableExtract-2.10-orig/t/gnarly.html	2006-05-01 23:22:47.000000000 +0200
 +++ HTML-TableExtract-2.10/t/gnarly.html	2011-02-25 18:41:08.000000000 +0100
@@ -1 +1 @@
 -<html><head><title>gnarly table</title></head><body><table border=1><tr><td colspan=4 rowspan=1>(0,0) [1,4]</td><td colspan=4 rowspan=2>(0,1) [2,4]</td></tr><tr><td colspan=1 rowspan=2>(1,0) [2,1]</td><td colspan=1 rowspan=1>(1,1) [1,1]</td><td colspan=2 rowspan=1>(1,2) [1,2]</td></tr><tr><td colspan=4 rowspan=2>(2,0) [2,4]</td><td colspan=2 rowspan=2>(2,1) [2,2]</td><td colspan=1 rowspan=1>(2,2) [1,1]</td></tr><tr><td colspan=1 rowspan=1>(3,0) [1,1]</td><td colspan=1 rowspan=1>(3,1) [1,1]</td></tr><tr><td colspan=2 rowspan=3>(4,0) [3,2]</td><td colspan=1 rowspan=1>(4,1) [1,1]</td><td colspan=1 rowspan=3>(4,2) [3,1]</td><td colspan=4 rowspan=4>(4,3) [4,4]</td></tr><tr><td colspan=1 rowspan=1>(5,0) [1,1]</td></tr><tr><td colspan=1 rowspan=1>(6,0) [1,1]</td></tr><tr><td colspan=4 rowspan=1>(7,0) [1,4]</td></tr></table></body></html>
 +<html><head><title>gnarly table</title></head><body><table border="1"><tr><td colspan="4" rowspan="1">(0,0) [1,4]</td><td colspan="4" rowspan="2">(0,1) [2,4]</td></tr><tr><td colspan="1" rowspan="2">(1,0) [2,1]</td><td colspan="1" rowspan="1">(1,1) [1,1]</td><td colspan="2" rowspan="1">(1,2) [1,2]</td></tr><tr><td colspan="4" rowspan="2">(2,0) [2,4]</td><td colspan="2" rowspan="2">(2,1) [2,2]</td><td colspan="1" rowspan="1">(2,2) [1,1]</td></tr><tr><td colspan="1" rowspan="1">(3,0) [1,1]</td><td colspan="1" rowspan="1">(3,1) [1,1]</td></tr><tr><td colspan="2" rowspan="3">(4,0) [3,2]</td><td colspan="1" rowspan="1">(4,1) [1,1]</td><td colspan="1" rowspan="3">(4,2) [3,1]</td><td colspan="4" rowspan="4">(4,3) [4,4]</td></tr><tr><td colspan="1" rowspan="1">(5,0) [1,1]</td></tr><tr><td colspan="1" rowspan="1">(6,0) [1,1]</td></tr><tr><td colspan="4" rowspan="1">(7,0) [1,4]</td></tr></table></body></html>
--- a/perl-HTML-TableExtract.changes
+++ b/perl-HTML-TableExtract.changes
@@ -1,3 +1,11 @@
 -------------------------------------------------------------------
 Fri Feb 25 17:51:03 UTC 2011 - chris@computersalat.de
 - recreated by cpanspec 1.78.03
  o fix deps
 - add HTML patch
 - noarch pkg
 -------------------------------------------------------------------
 Wed Dec  1 13:27:30 UTC 2010 - coolo@novell.com
--- a/perl-HTML-TableExtract.spec
+++ b/perl-HTML-TableExtract.spec
@@ -15,58 +15,142 @@
 # Please submit bugfixes or comments via http://bugs.opensuse.org/
 #
 # norootforbuild
 Name:           perl-HTML-TableExtract
 Url:            http://cpan.org/modules/by-module/HTML/
 License:        Public Domain, Freeware
 Group:          Development/Libraries/Perl
 AutoReqProv:    on
 Requires:       perl-HTML-Parser
 BuildRequires:  perl-HTML-Parser
 BuildRequires:  perl-macros
 # Needed only for tests:
 BuildRequires:  perl-HTML-Tree perl-Test-Pod-Coverage
 Summary:        Simplifies extraction of information within tables in HTML documents
 Version:        2.10
-Release:        81
+Release:        82
-Source:         HTML-TableExtract-%{version}.tar.bz2
+License:        GPL+ or Artistic
 %define cpan_name HTML-TableExtract
 Summary:        For extracting the content contained in tables within an HTML document
 Url:            http://search.cpan.org/dist/HTML-TableExtract/
 Group:          Development/Libraries/Perl
 #Source:         http://www.cpan.org/authors/id/M/MS/MSISK/HTML-TableExtract-2.10.tar.gz
 Source:         %{cpan_name}-%{version}.tar.bz2
 Patch0:         %{cpan_name}-2.10-HTML.patch
 BuildArch:      noarch
 BuildRoot:      %{_tmppath}/%{name}-%{version}-build
 BuildRequires:  perl
 BuildRequires:  perl-macros
 BuildRequires:  perl(HTML::ElementTable) >= 1.16
 BuildRequires:  perl(HTML::Parser)
 Requires:       perl(HTML::ElementTable) >= 1.16
 Requires:       perl(HTML::Parser)
 %{perl_requires}
 %description
-HTML::TableExtract is a module that simplifies the extraction of
+HTML::TableExtract is a subclass of HTML::Parser that serves to extract the
-information contained in tables within HTML documents.
+information from tables of interest contained within an HTML document. The
 information from each extracted table is stored in table objects. Tables
 can be extracted as text, HTML, or HTML::ElementTable structures (for
 in-place editing or manipulation).
-Tables of note may be specified using Headers, Depth, Count,
+There are currently four constraints available to specify which tables you
-Attributes, or some combination of the three. See the module
+would like to extract from a document: _Headers_, _Depth_, _Count_, and
-documentation for details.
+_Attributes_.
 _Headers_, the most flexible and adaptive of the techniques, involves
 specifying text in an array that you expect to appear above the data in the
 tables of interest. Once all headers have been located in a row of that
 table, all further cells beneath the columns that matched your headers are
 extracted. All other columns are ignored: think of it as vertical slices
 through a table. In addition, TableExtract automatically rearranges each
 row in the same order as the headers you provided. If you would like to
 disable this, set _automap_ to 0 during object creation, and instead rely
 on the column_map() method to find out the order in which the headers were
 found. Furthermore, TableExtract will automatically compensate for cell
 span issues so that columns are really the same columns as you would
 visually see in a browser. This behavior can be disabled by setting the
 _gridmap_ parameter to 0. HTML is stripped from the entire textual content
 of a cell before header matches are attempted -- unless the _keep_html_
 parameter was enabled.
 _Depth_ and _Count_ are more specific ways to specify tables in relation to
 one another. _Depth_ represents how deeply a table resides in other tables.
 The depth of a top-level table in the document is 0. A table within a
 top-level table has a depth of 1, and so on. Each depth can be thought of
 as a layer; tables sharing the same depth are on the same layer. Within
 each of these layers, _Count_ represents the order in which a table was
 seen at that depth, starting with 0. Providing both a _depth_ and a _count_
 will uniquely specify a table within a document.
 _Attributes_ match based on the attributes of the html <table> tag, for
 example, boder widths or background color.
 Each of the _Headers_, _Depth_, _Count_, and _Attributes_ specifications
 are cumulative in their effect on the overall extraction. For instance, if
 you specify only a _Depth_, then you get all tables at that depth (note
 that these could very well reside in separate higher- level tables
 throughout the document since depth extends across tables). If you specify
 only a _Count_, then the tables at that _Count_ from all depths are
 returned (i.e., the _n_th occurrence of a table at each depth). If you only
 specify _Headers_, then you get all tables in the document containing those
 column headers. If you have specified multiple constraints of _Headers_,
 _Depth_, _Count_, and _Attributes_, then each constraint has veto power
 over whether a particular table is extracted.
 If no _Headers_, _Depth_, _Count_, or _Attributes_ are specified, then all
 tables match.
 When extracting only text from tables, the text is decoded with
 HTML::Entities by default; this can be disabled by setting the _decode_
 parameter to 0.
 Extraction Modes
    The default mode of extraction for HTML::TableExtract is raw text or
    HTML. In this mode, embedded tables are completely decoupled from one
    another. In this case, HTML::TableExtract is a subclass of
    HTML::Parser:
      use HTML::TableExtract;
    Alternativevly, tables can be extracted as HTML::ElementTable
    structures, which are in turn embedded in an HTML::Element tree
    representing the entire HTML document. Embedded tables are not
    decoupled from one another since this tree structure must be
    manitained. In this case, HTML::TableExtract is a subclass of
    HTML::TreeBuilder (itself a subclass of HTML:::Parser):
      use HTML::TableExtract qw(tree);
    In either case, the basic interface for HTML::TableExtract and the
    resulting table objects remains the same -- all that changes is what
    you can do with the resulting data.
    HTML::TableExtract is a subclass of HTML::Parser, and as such inherits
    all of its basic methods such as 'parse()' and 'parse_file()'. During
    scans, 'start()', 'end()', and 'text()' are utilized. Feel free to
    override them, but if you do not eventually invoke them in the SUPER
    class with some content, results are not guaranteed.
 Advice
    The main point of this module was to provide a flexible method of
    extracting tabular information from HTML documents without relying to
    heavily on the document layout. For that reason, I suggest using
    _Headers_ whenever possible -- that way, you are anchoring your
    extraction on what the document is trying to communicate rather than
    some feature of the HTML comprising the document (other than the fact
    that the data is contained in a table).
 %prep
-%setup -q -n HTML-TableExtract-%{version}
+%setup -q -n %{cpan_name}-%{version}
 %patch0 -p1
 %build
-perl Makefile.PL
+%{__perl} Makefile.PL INSTALLDIRS=vendor
-make %{?_smp_mflags}
+%{__make} %{?_smp_mflags}
 %check
-make test
+%{__make} test
 %install
-make DESTDIR=$RPM_BUILD_ROOT install_vendor
+%perl_make_install
 %perl_process_packlist
 %perl_gen_filelist
 %clean
-[ "$RPM_BUILD_ROOT" != "/" ] && [ -d $RPM_BUILD_ROOT ] && rm -rf $RPM_BUILD_ROOT
+%{__rm} -rf %{buildroot}
-%files
+%files -f %{name}.files
-%defattr(-,root,root)
+%defattr(644,root,root,755)
-%doc Changes MANIFEST README
+%doc Changes README
 %doc %{_mandir}/man?/*
 %{perl_vendorlib}/HTML
 %{perl_vendorarch}/auto/HTML-TableExtract
 %changelog