forked from pool/perl-HTML-TableExtract
		
	Accepting request 97856 from devel:languages:perl
- update to 2.11
    - added parsing context, override for eof() and parse() for
      memory clear on new docs or post-eof()
    - fixed some long standing test warnings
OBS-URL: https://build.opensuse.org/request/show/97856
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/perl-HTML-TableExtract?expand=0&rev=15
			
			
This commit is contained in:
		| @@ -1,6 +0,0 @@ | ||||
| diff -ruN HTML-TableExtract-2.10-orig/t/gnarly.html HTML-TableExtract-2.10/t/gnarly.html | ||||
| --- HTML-TableExtract-2.10-orig/t/gnarly.html	2006-05-01 23:22:47.000000000 +0200 | ||||
| +++ HTML-TableExtract-2.10/t/gnarly.html	2011-02-25 18:41:08.000000000 +0100 | ||||
| @@ -1 +1 @@ | ||||
| -<html><head><title>gnarly table</title></head><body><table border=1><tr><td colspan=4 rowspan=1>(0,0) [1,4]</td><td colspan=4 rowspan=2>(0,1) [2,4]</td></tr><tr><td colspan=1 rowspan=2>(1,0) [2,1]</td><td colspan=1 rowspan=1>(1,1) [1,1]</td><td colspan=2 rowspan=1>(1,2) [1,2]</td></tr><tr><td colspan=4 rowspan=2>(2,0) [2,4]</td><td colspan=2 rowspan=2>(2,1) [2,2]</td><td colspan=1 rowspan=1>(2,2) [1,1]</td></tr><tr><td colspan=1 rowspan=1>(3,0) [1,1]</td><td colspan=1 rowspan=1>(3,1) [1,1]</td></tr><tr><td colspan=2 rowspan=3>(4,0) [3,2]</td><td colspan=1 rowspan=1>(4,1) [1,1]</td><td colspan=1 rowspan=3>(4,2) [3,1]</td><td colspan=4 rowspan=4>(4,3) [4,4]</td></tr><tr><td colspan=1 rowspan=1>(5,0) [1,1]</td></tr><tr><td colspan=1 rowspan=1>(6,0) [1,1]</td></tr><tr><td colspan=4 rowspan=1>(7,0) [1,4]</td></tr></table></body></html> | ||||
| +<html><head><title>gnarly table</title></head><body><table border="1"><tr><td colspan="4" rowspan="1">(0,0) [1,4]</td><td colspan="4" rowspan="2">(0,1) [2,4]</td></tr><tr><td colspan="1" rowspan="2">(1,0) [2,1]</td><td colspan="1" rowspan="1">(1,1) [1,1]</td><td colspan="2" rowspan="1">(1,2) [1,2]</td></tr><tr><td colspan="4" rowspan="2">(2,0) [2,4]</td><td colspan="2" rowspan="2">(2,1) [2,2]</td><td colspan="1" rowspan="1">(2,2) [1,1]</td></tr><tr><td colspan="1" rowspan="1">(3,0) [1,1]</td><td colspan="1" rowspan="1">(3,1) [1,1]</td></tr><tr><td colspan="2" rowspan="3">(4,0) [3,2]</td><td colspan="1" rowspan="1">(4,1) [1,1]</td><td colspan="1" rowspan="3">(4,2) [3,1]</td><td colspan="4" rowspan="4">(4,3) [4,4]</td></tr><tr><td colspan="1" rowspan="1">(5,0) [1,1]</td></tr><tr><td colspan="1" rowspan="1">(6,0) [1,1]</td></tr><tr><td colspan="4" rowspan="1">(7,0) [1,4]</td></tr></table></body></html> | ||||
| @@ -1,3 +0,0 @@ | ||||
| version https://git-lfs.github.com/spec/v1 | ||||
| oid sha256:26485211a78da74df7df5b1ebfd34cb017437179cf14937265b23959e1356d56 | ||||
| size 23675 | ||||
							
								
								
									
										3
									
								
								HTML-TableExtract-2.11.tar.gz
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								HTML-TableExtract-2.11.tar.gz
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| version https://git-lfs.github.com/spec/v1 | ||||
| oid sha256:1861d55a2aa1728ef56ea2d08d630b9a008456f1106994e4e49e76f56e4955ee | ||||
| size 27123 | ||||
| @@ -1,3 +1,11 @@ | ||||
| ------------------------------------------------------------------- | ||||
| Tue Dec 20 09:13:30 UTC 2011 - coolo@suse.com | ||||
|  | ||||
| - update to 2.11 | ||||
|     - added parsing context, override for eof() and parse() for | ||||
|       memory clear on new docs or post-eof() | ||||
|     - fixed some long standing test warnings | ||||
|  | ||||
| ------------------------------------------------------------------- | ||||
| Fri Feb 25 17:51:03 UTC 2011 - chris@computersalat.de | ||||
|  | ||||
|   | ||||
| @@ -16,24 +16,24 @@ | ||||
| # | ||||
|  | ||||
|  | ||||
|  | ||||
| Name:           perl-HTML-TableExtract | ||||
| Version:        2.10 | ||||
| Release:        86 | ||||
| License:        GPL+ or Artistic | ||||
| Version:        2.11 | ||||
| Release:        0 | ||||
| %define cpan_name HTML-TableExtract | ||||
| Summary:        For extracting the content contained in tables within an HTML document | ||||
| Url:            http://search.cpan.org/dist/HTML-TableExtract/ | ||||
| Summary:        Perl module for extracting the content contained in tables within an HTM[cut] | ||||
| License:        GPL-1.0+ or Artistic-1.0 | ||||
| Group:          Development/Libraries/Perl | ||||
| #Source:         http://www.cpan.org/authors/id/M/MS/MSISK/HTML-TableExtract-2.10.tar.gz | ||||
| Source:         %{cpan_name}-%{version}.tar.bz2 | ||||
| Patch0:         %{cpan_name}-2.10-HTML.patch | ||||
| Url:            http://search.cpan.org/dist/HTML-TableExtract/ | ||||
| Source:         http://www.cpan.org/authors/id/M/MS/MSISK/%{cpan_name}-%{version}.tar.gz | ||||
| BuildArch:      noarch | ||||
| BuildRoot:      %{_tmppath}/%{name}-%{version}-build | ||||
| BuildRequires:  perl | ||||
| BuildRequires:  perl-macros | ||||
| BuildRequires:  perl(HTML::ElementTable) >= 1.16 | ||||
| BuildRequires:  perl(HTML::Parser) | ||||
| #BuildRequires: perl(HTML::Entities) | ||||
| #BuildRequires: perl(HTML::TableExtract) | ||||
| #BuildRequires: perl(testload) | ||||
| Requires:       perl(HTML::ElementTable) >= 1.16 | ||||
| Requires:       perl(HTML::Parser) | ||||
| %{perl_requires} | ||||
| @@ -96,45 +96,9 @@ When extracting only text from tables, the text is decoded with | ||||
| HTML::Entities by default; this can be disabled by setting the _decode_ | ||||
| parameter to 0. | ||||
|  | ||||
| Extraction Modes | ||||
|     The default mode of extraction for HTML::TableExtract is raw text or | ||||
|     HTML. In this mode, embedded tables are completely decoupled from one | ||||
|     another. In this case, HTML::TableExtract is a subclass of | ||||
|     HTML::Parser: | ||||
|  | ||||
|       use HTML::TableExtract; | ||||
|  | ||||
|     Alternativevly, tables can be extracted as HTML::ElementTable | ||||
|     structures, which are in turn embedded in an HTML::Element tree | ||||
|     representing the entire HTML document. Embedded tables are not | ||||
|     decoupled from one another since this tree structure must be | ||||
|     manitained. In this case, HTML::TableExtract is a subclass of | ||||
|     HTML::TreeBuilder (itself a subclass of HTML:::Parser): | ||||
|  | ||||
|       use HTML::TableExtract qw(tree); | ||||
|  | ||||
|     In either case, the basic interface for HTML::TableExtract and the | ||||
|     resulting table objects remains the same -- all that changes is what | ||||
|     you can do with the resulting data. | ||||
|  | ||||
|     HTML::TableExtract is a subclass of HTML::Parser, and as such inherits | ||||
|     all of its basic methods such as 'parse()' and 'parse_file()'. During | ||||
|     scans, 'start()', 'end()', and 'text()' are utilized. Feel free to | ||||
|     override them, but if you do not eventually invoke them in the SUPER | ||||
|     class with some content, results are not guaranteed. | ||||
|  | ||||
| Advice | ||||
|     The main point of this module was to provide a flexible method of | ||||
|     extracting tabular information from HTML documents without relying to | ||||
|     heavily on the document layout. For that reason, I suggest using | ||||
|     _Headers_ whenever possible -- that way, you are anchoring your | ||||
|     extraction on what the document is trying to communicate rather than | ||||
|     some feature of the HTML comprising the document (other than the fact | ||||
|     that the data is contained in a table). | ||||
|  | ||||
| %prep | ||||
| %setup -q -n %{cpan_name}-%{version} | ||||
| %patch0 -p1 | ||||
| find . -type f -print0 | xargs -0 chmod 644 | ||||
|  | ||||
| %build | ||||
| %{__perl} Makefile.PL INSTALLDIRS=vendor | ||||
| @@ -148,11 +112,8 @@ Advice | ||||
| %perl_process_packlist | ||||
| %perl_gen_filelist | ||||
|  | ||||
| %clean | ||||
| %{__rm} -rf %{buildroot} | ||||
|  | ||||
| %files -f %{name}.files | ||||
| %defattr(644,root,root,755) | ||||
| %defattr(-,root,root,755) | ||||
| %doc Changes README | ||||
|  | ||||
| %changelog | ||||
|   | ||||
		Reference in New Issue
	
	Block a user