#!/usr/bin/perl
use strict;
use warnings;

## solis.pl, a script to mirror NSO/SOLIS  quicklook photospheric vector magnetic fields 
## and create a simple HTML display.
#
# Changed $spacedirpath to wherever and create a dir ./solis/ within. Then just run the script
# with no arguments. The output HTML path and name is in $SOLISmovieHTML and defaults to solis.html
# within $spacedirpath. I have it set up as a cronjob to run once per day. Since it is set by
# by default to use the quicklook page, the filling factor images are ignored.

use LWP::UserAgent;
use URI::URL;
use HTML::LinkExtor;
#use Image::Magick;

my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
$ua->agent("Opera/9.80 (X11\; Linux x86_64\; U\; en) Presto/2.9.168 Version/11.50");
my @ns_headers = (
   'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, 
        image/pjpeg, image/png, */*',
   'Accept-Charset' => 'iso-8859-1,*,utf-8',
   'Accept-Language' => 'en-US',
);

my $spacedirpath = 'home/superkuh/www/spaceweather';
my $solisdir = 'solis';
my $useragent = "--user-agent=\"Opera/9.80 (X11\; Linux x86_64\; U\; en) Presto/2.9.168 Version/11.50\"";
my $SOLISmovieHTML = "/$spacedirpath/solis.html";

# window.open('http://solis.nso.edu/vsm/betadatapub/QLARBetaView.php?fname=/2011/09/svsm_v01_11302_201109291850.jpg&width=600', 'WIN', 'width=1206, height=1000, toolbar=no, menubar=yes, status=no, scrollbars=yes')

my @activeregions;
# Verified
# @activeregions = httpextract('http://solis.nso.edu/vsm/betadatapub/BetaPage2.php?type=6302v', 'window\.open\(\'(.+)\', \'WIN\',');
# Quicklook
@activeregions = httpextract('http://solis.nso.edu/vsm/betadatapub/QLBetaPage.php', 'window\.open\(\'(.+)\', \'WIN\',');


print scalar(@activeregions) . " active regions found.\n";
print "$_\n" foreach @activeregions;
print "\n\n";

my %regions;
foreach my $url (@activeregions) {
	my @images;
	# <th align="left"> VSM Active Region NOAA AR 11302 Date 9/29/2011 </th>
	# <th align=\"left\"> VSM Active Region NOAA AR (\d+) Date \d{1,2}/\d{1,2}/\d{4} <\/th>
	my ($AR) = httpextract("$url",'<th align=\"left\"> VSM Active Region NOAA AR (\d+) Date \d{1,2}/\d{1,2}/\d{4} <\/th>');
	print "AR $AR\n";
	@images = httpextractimglinks($url);
	#$regions{$AR} = \@images;

	$regions{$AR} = { 
		'images' => \@images,
		'url' => $url
	}
}

print "Mirroring and Generating SOLIS AR Pages... \n";
open (SOLISHTML, ">$SOLISmovieHTML") or die "can't make $SOLISmovieHTML.\n$!";
print SOLISHTML "<H3>Photosphere Vector Magnetic Field by <a href=\"http://solis.nso.edu\">NSO/SOLIS</a>, <a href=\"http://solis.nso.edu/vsm_fulldisk3.html\">SOLIS Active Regions</a>\n";

foreach my $region (keys %regions) {
	if (-d "/$spacedirpath/$solisdir/$region") {
		print "dir for $region exists.\n";
	} else {
		mkdir "/$spacedirpath/$solisdir/$region";
		print "Created dir /$spacedirpath/$solisdir/$region/\n";
	}

	my @existingimages;
	my %imagecheck;
	my $previmagedate;
	@existingimages = </$spacedirpath/$solisdir/$region/*.jpg>;
	%imagecheck = map { makekeyfor($_) => 1 } @existingimages;
	if ($existingimages[0]) {
		$existingimages[0] =~ /_(\d{12})\.jpg$/;
		$previmagedate = $1;
	} else {
		$previmagedate = "";
	}

	#print "\n\nTESTING: @{$regions{$region}}[1]\n\n";
	#print "\n\nTESTING: image1: @{$regions{$region}->{'images'}}[1] , url: $regions{$region}{'url'}\n\n";

	@{$regions{$region}->{'images'}}[1] =~ m#_(\d{12})\.jpg$#;
	my $imagedate = $1;

	my $datadir = "/$spacedirpath/$solisdir/$region";
	if ($previmagedate ne $imagedate) {
		if ($previmagedate) {
			%imagecheck = ();
			`rm $datadir/*`;
			print "\n\nDeleting old files in $datadir!\n\n";
		} else {

		}
	}
	print "duplicate images: " . scalar(keys %imagecheck) . "\n";

	print SOLISHTML "<h3>SOLIS AR $region</h3>\n";
	my $imagecount = 1;
	foreach my $imgurl (@{$regions{$region}->{'images'}}) {
		next if $imgurl =~ /logo_med.jpg/;
		next if $imgurl =~ /_v10_/;
		print "$imgurl \n";
		$imgurl =~ m#/\d\d/(.+)$#;
		my $imagename = $1;
		#$imagename =~ /_(\d{12})\.jpg$/;
		#my $imagedate = $1;
	

		#$ua->default_header('Referer' => "$imgurl");
		print SOLISHTML "<a href=\"" . $regions{$region}{'url'} . "\">";
		print SOLISHTML "<img src=\"$solisdir/$region/$imagename\" /></a>";
		if (0 == $imagecount % 2) {
			print SOLISHTML "<br />\n";
		}

		if ($imagecheck{$imagename}) {
			print "exists, skipping $imagename.\n";	
		} else {
 			my $response = $ua->get($imgurl, @ns_headers);
			my $result = $response->status_line;
			my $content = $response->content();
			print ("$imgurl - $result\n");
			if ($result =~ /200/) {
				open (FILE, ">/$spacedirpath/$solisdir/$region/$imagename") or warn "Could not open output!: $!";
				binmode (FILE);
				print FILE $content;
				close FILE;
				print system "convert -strip -scale 50% -quality 75 /$spacedirpath/$solisdir/$region/$imagename /$spacedirpath/$solisdir/$region/$imagename";
			} else {
				warn "could not download file $imgurl, not including in html.\n";
			}
		}
	}
}
close SOLISHTML;

sub httpextract {
	my ($url, $regex) = @_;
	my @urls;
	print "Gathering URLs...";
	my $response = $ua->get("$url", @ns_headers);
	if ($response->is_success) {
		my $html = $response->decoded_content;
		my @htmluh = split(/\n/, $html);
		foreach my $line (@htmluh) {
			#  window\.open\('(.+)', 'WIN',
			if ($line =~ m#$regex#) {
				my $match = $1;			
				push(@urls, $match);
			}
		}	
	}
	return @urls;
}


sub httpextractimglinks {
	my ($url) = @_;
	print "Extracting image URLs... $url\n";
	my @images;
 	my $p = HTML::LinkExtor->new(sub {my($tag, %attr) = @_;return if $tag ne 'img';push(@images, values %attr);});
	#my $p = HTML::LinkExtor->new(\&callback);
	my $res = $ua->request(HTTP::Request->new(GET => $url),
                      	sub {$p->parse($_[0])});
	my $base = $res->base;
	@images = map { $_ = url($_, $base)->abs; } @images;
	#print join("\n", @images), "\n";
	#print "\# " . scalar(@images) . " images in \@images\n\n";
	my @uhwtf = @images;
	return @uhwtf;
}

sub makekeyfor {
	my $filepath = shift;
	# /home/superkuh/www/spaceweather/solis/11305/svsm_v02_11305_201109291850.jpg
	$filepath =~ m#\d{5}/(.+\.jpg)$#;
	my $name = $1;
	#print "keyname: $name\n";
	return $name;
}
