77 lines
1.6 KiB
Perl
77 lines
1.6 KiB
Perl
#! /usr/bin/perl -w -t
|
|
#
|
|
# a small test script, that
|
|
# takes your favourite web pages and turns it into
|
|
# a parsed perl data structure.
|
|
|
|
use strict;
|
|
use HTML::TagParser;
|
|
use Data::Dumper;
|
|
$Data::Dumper::Sortkeys = 1;
|
|
|
|
my $text = eval { local $/; open my $in, "<", shift; <$in>};
|
|
|
|
my $dom = HTML::TagParser->new($text);
|
|
my $r = {};
|
|
for my $e ($dom->getElementsByTagName('form'))
|
|
{
|
|
my $s = $e->subTree;
|
|
my $sr = {};
|
|
for my $se ($s->getElementsByTagName('input'))
|
|
{
|
|
store_element($sr, $se, 'input', 0);
|
|
}
|
|
for my $se ($s->getElementsByTagName('textarea'))
|
|
{
|
|
store_element($sr, $se, 'textarea', 1);
|
|
}
|
|
|
|
store_element($r, $e, 'form', $sr);
|
|
}
|
|
|
|
for my $e ($dom->getElementsByTagName('a'))
|
|
{
|
|
my $attr = $e->attributes;
|
|
next unless defined $attr->{href};
|
|
store_element($r, $e, 'href', 0);
|
|
}
|
|
|
|
die Dumper $r;
|
|
exit 0;
|
|
|
|
##############################################
|
|
|
|
sub store_element
|
|
{
|
|
my ($r, $e, $tn, $container) = @_;
|
|
my $attr = $e->attributes;
|
|
my $inp =
|
|
{
|
|
tagname => $e->tagName,
|
|
text => $e->innerText()||'',
|
|
};
|
|
delete $inp->{text} unless length $inp->{text};
|
|
delete $attr->{'/'};
|
|
if ($container)
|
|
{
|
|
$inp->{attr} = $attr;
|
|
if (ref $container)
|
|
{
|
|
for my $c (keys %$container) { $inp->{$c} = $container->{$c} }
|
|
}
|
|
}
|
|
else
|
|
{
|
|
## inline attr.
|
|
for my $a (keys %$attr) { $inp->{$a} = $attr->{$a} }
|
|
}
|
|
|
|
push @{$r->{$tn}}, $inp;
|
|
for my $a qw(id name type)
|
|
{
|
|
push @{$r->{$tn.'_by_'.$a}{$attr->{$a}}}, $inp
|
|
if defined $attr->{$a};
|
|
}
|
|
}
|
|
|