#!/usr/bin/perl -w require "cgi-lib.pl"; use HTML::LinkExtor; use LWP::Simple; use LWP; use LWP::UserAgent; use URI::URL; $filesdir = "files"; print "Content-type: text/html\n\n"; print "
"; print "Rezultat analize"; print "

"; # Get parameter and then retrieve the page ########################################## &getdatetime; if (&ReadParse(*input)) { if (defined($input{'file'})) { if ($input{'file'} ne "") { $file = $input{'file'}; open(FILE,"$filesdir/$file") || print "Napaka! Na strežniku ne najdem vpisane datoteke $file."; @main = ; close(FILE); &getdatetime; print "Začetek analize: $long_date


\n"; print "

";

      foreach $main_line (@main) {  
        $/="\r\n";
        chomp($main_line);
        $/="\n";
        $base_url = "http://$main_line";
        &analyse;
      }
      
      print "
"; &getdatetime; print "Konec analize: $long_date\n"; } else { print "Niste vpisali datoteke z URLji!"; } } else { print "Ni podanih parametrov! Vpišite datoteko z URLji."; } } ################### # Analyse page! ################### sub analyse { my $func_ref = sub { @URLS = ("$base_url"); $browser = LWP::UserAgent->new(); $browser->agent("MatejKovacicGregaPetric/InternetResearchProject"); $webdoc = $browser->request(HTTP::Request->new(GET => $page)); foreach $url (@URLS) { $webdoc = $browser->request(HTTP::Request->new(GET => $url)); if ($webdoc->is_success) { my @images = $webdoc->content =~ m{ < \s* img \s* ["|']? ( [^\s>'"]+ ) ['|"]? }xgm; $num_img=@images; print "<:url:>$url<:title:>", $webdoc->title, "<:content-type:>", $webdoc->content_type, "<:baseurl:>", $webdoc->base, "<:num-images:>$num_img<:links:>"; $base_url = $webdoc->base; $parser = HTML::LinkExtor->new(undef, $base_url); $parser->parse(get($base_url))->eof; @links = $parser->links; foreach $linkarray (@links) { my @element = @$linkarray; my $elt_type = shift @element; while (@element) { my ($attr_name , $attr_value) = splice(@element, 0, 2); $seen{$attr_value}++; } } $i = 0; for (sort keys %seen) { $arr[$i] = $_; $i++; } print "@arr<:konec:>\n"; } else { print "URL $url ne obstaja!"; } } }; my $result = &$func_ref(); } ################### # Get date and time ################### sub getdatetime { ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $month = ($mon + 1); @months = ("januar","februar","marec","april","maj","junij","julij","avgust","september","oktober","november","december"); $year += 1900; $long_date = sprintf("%02d. %s, %4d ob %02d:%02d:%02d",$mday,$months[$mon],$year,$hour,$min,$sec); }