#!/usr/bin/perl
BEGIN {
exec('perldoc', $0) if not @ARGV;
}
use strict;
use Getopt::Long;
use FEAR::API -base;
use YAML;
use Data::Dumper;
use Carp;
my %opt;
my @init_links;
GetOptions(
\%opt,
'dump|d=s',
'dump-all|a',
'download|o',
'source|s',
'file|f',
'content-type|y=s',
'xpath|p=s',
'verbose|v',
'method|m=s',
'template|t=s',
'eval|e=s',
'pre=s@',
'post=s@',
'dumper|r=s',
'debug|D',
);
if( $opt{debug} ){
local $Data::Dumper::Terse = 1;
local $Data::Dumper::Indent = 1;
print $/,
"Option(s):\n", Dumper(\%opt), $/,
"URL(s):\n", join( q/ /, @ARGV), $/x2;
}
_alias dumper => 'Dumper';
sub Dumper {
if( $opt{dumper} eq 'YAML' ){
YAML::Dump(@_);
}
else {
Data::Dumper::Dumper(@_);
}
}
sub dump_info {
my $action = shift;
my $all = $action eq 'all';
print "\n[ Links ]\n";
if( $all or $action eq 'links' ){
if($opt{verbose}){
print dumper( wua->links);
}
else {
print map{$_->[0],$/} wua->links;
}
}
print "\n[ Images ]\n";
if( $all or $action eq 'images' ){
if($opt{verbose}){
print dumper( wua->images);
}
else {
print map{$_->url,$/} wua->links;
}
}
print "\n[ Forms ]\n";
if( $all or $action eq 'forms' ){
if( $opt{verbose} ){
print dumper( wua->forms);
}
else {
foreach my $f (wua->forms){
print $f->dump;
}
}
}
}
# Main control
foreach my $source (@ARGV[0]){
if( $opt{file} ){
file($source);
force_content_type($opt{'content-type'} || 'text/html');
}
else {
fetch($source);
}
if( $opt{method} ){
extmethod($opt{method});
}
if( $opt{'dump-all'} ){
dump_info('all');
}
elsif ( $opt{dump} ){
dump_info($opt{dump});
}
if( $opt{xpath} ){
my $content_type = wua->ct;
if($content_type =~ /html/){
doc->html_to_xhtml;
}
doc->xpath($opt{xpath});
}
if( $opt{source} ){
print doc->as_string;
}
if( $opt{download} ){
my $outputfile = 'fear.output';
if(-e 'fear.output'){
my $count = 1;
while( 1 ){
if(not -e "fear.output.$count"){
$outputfile = "fear.output.$count";
last;
}
$count++;
}
}
print "Saving content as [ $outputfile ] ... \n";
save_as($outputfile);
}
if( exists $opt{pre} and ref $opt{pre} ){
foreach my $p (@{$opt{pre}}){
preproc($opt{pre});
}
}
if( $opt{template} ){
template(eval $opt{template});
extract;
print dumper extresult;
}
if( exists $opt{post} and ref $opt{post} ){
foreach my $p (@{$opt{post}}){
postproc($opt{post});
}
}
if( $opt{eval} ){
eval $opt{eval};
croak $@ if $@;
}
}
__END__
=pod
=head1 CLI access to FEAR::API
=head2 Dumper information from content
fear -d 'image' google.com # images, links, forms
--dump 'image'
=head2 Dumper ALL information from content
fear -a google.com
-d 'all'
--dump-all
=head2 Save content to fear.output
fear -o google.com
--download
=head2 Load content from filesystem
fear -f 'some.html.file'
=head2 Xpath content
fear -p '/html/body/*' google.com
--xpath
=head2 Print out content
fear -s google.com
--source
=head2 Specify extraction method
fear -m Regexp::GlobalBind
=head2 Specify template
fear -t '[% text %]'
=head2 Choose dumping module (YAML and Data::Dumper)
fear -r YAML
=head2 Verbose output
fear -v
=head2 Input perl code
fear -e "fetch('google.com')"
--execute
=head2 Force content type
fear -y 'text/html'
--content-type
=head1 MIX TOGETHER
fear google.com -m Regexp::GlobalBind -p '/html/head' -t 'qr".+?) />"' -r YAML
Fetch google's homepage
Set extraction method to Regexp::GlobalBind
Use XPATH to select /html/head subtree
Apply template
Dump results using YAML
=head1 COPYRIGHT
Copyright (C) 2006 by Yung-chung Lin (a.k.a. xern)
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself
=cut