Sending output to Spread***



(Reposted in new thread...)

Hi, New to Perl, using ActiveState 5.8, Win XP

I'm not sure if this is the best ng...should misc be my first
port of call?

I am trying to adapt Brent Hughes rget-links.pl original code by collecting discovered web links into a spread*** for later use.
BTW, Any errors are due to me, not Brent!

The code runs OK and prints out the web links found into the command window.

I have looked at the Spread***::SimpleExcel module but I cannot work out the syntax to get the accumulated links into my_List.xls file.

Any suggestions will be appreciated!
Cheers, Peter

#!/usr/bin/perl

use warnings;
use strict;

package RGetLinks;

use LWP::UserAgent;
use HTML::LinkExtor;
use URI::URL;
use Getopt::Long;
use Spread***::SimpleExcel;

$| = 1;

# global data for this program
my $depth;
my %files;

# command line options
my $opt_depth = 4;

# retrieve command line options
my $options = GetOptions ("depth=i" => \$opt_depth); # numeric

my $url = 'http://somesite/';

# abort if the options are improperly formatted
if(!defined $url){ usage(); }

# program enters actual processing at this point
rgetlinks($url,$opt_depth);

# create a new instance of Excel
my $excel = Spread***::SimpleExcel->new();
# add work***
$excel->add_worksheet('Sheet1',{-headers => \@header, -data => \@data});
# print result into a file and handle error
$excel->output_to_file('c:/Documents and Settings/my_List.xls') or die $excel->errstr();
$excel->errstr();

# Subroutines

# A routine to get links recursively
sub rgetlinks
{
my($url,$maxdepth) = @_;
chomp($url);

# initialize globals
$depth = 0;
%files = ();

# descend
rgetlinkshelper($url,$maxdepth);
}

# A helper routine to get links recursively
sub rgetlinkshelper
{
my($url,$maxdepth) = @_;

# return if too deep or already been here
if($depth >= $maxdepth || defined $files{$url})
{
return;
}
else
{
# drop down a level and add the file to the hash
$depth++; $files{$url} = 1;

# show our current location
foreach(1..$depth) {print ' ';}
print $url, "\n";

# retrieve all links
my @links = getlinks($url);

# recursive step
foreach(@links){ rgetlinkshelper($_,$maxdepth); }

# pop up a level # line 101
$depth--;
}
}

# A routine to return links from a URL
# Only retrieve links from text/html files.

my @links = ();

sub getlinks
{
my($url) = @_; # for instance
my $ua = new LWP::UserAgent;

# Make the parser. Unfortunately, we don't know the base yet
# (it might be diffent from $url)
@links = ();
my $p = HTML::LinkExtor->new(\&callback);

# Look at the header to determine what type of document we have
my $headreq = HTTP::Request->new(HEAD => $url);
my $headres = $ua->request($headreq);
my $type = $headres->header('content-type');

# only parse the document for links if it is a text or html document
if(defined $type && $type =~ /text|html/)
{
# Request document and parse it as it arrives
my $getreq = HTTP::Request->new(GET => $url);
my $getres = $ua->request($getreq, sub{ $p->parse($_[0])});

# Expand all URLs to absolute ones
my $base = $getres->base;
@links = map { $_ = url($_, $base)->abs; } @links;
}

# Return the links
return @links;
}

# Set up a callback that collects links
sub callback {
my($tag, %attr) = @_;

return if $tag ne 'a'; # we only look closer at <a ...>
push(@links, values %attr);
}

# A routine to provide instructions
sub usage
{
# strip the progname with a regex
my $progname = $0;
$progname =~ s/(.*\\|.*\/)(.*)/$2/g;

# show instructions
print "\nUsage:\n\t\t",
$progname, " [args] target-url > output-file\n\n",
"Example:\n\t\t",
$progname, " --depth=4 http://www.perl.org\n\n";; # depth=3

print "Options\n", "=======\n",

"--depth\t\t",
"The maximum depth of links to traverse (default = 3)\n";

exit();
}
.