#!/usr/bin/perl use LWP::Simple; use WWW::Mechanize; use HTTP::Cookies; use Term::ReadKey; use POSIX qw(strftime mktime); my $interneturl = "http://www.google.com"; my $internet = "Google"; my $transcriptionurl = "http://www.ohnorobot.com/transcribe.pl?comicid=apKHvCCc66NMg&url=http:%%2F%%2Fxkcd.com%%2F%d%%2F"; my $baseurl = "http://www.xkcd.com"; my $comicurl = "$baseurl/%d/"; my $imageviewer = "gnome-open"; my $urlviewer = "xdg-open"; my $tempfile = "temp"; my $archiveurl = "archive"; my $trendsurl = "http://www.google.com/trends?q=%s&date=%s&geo=all"; my $hottrendsurl = "http://www.google.com/trends/hottrends?q=%s&date=%s"; my $csvurl = "http://www.google.com/trends/viz?q=%s&date=%s&geo=all&graph=all_csv&scale=%d"; my @scalenums = (0,1); $numvs = length($ARGV[1]) - 1; print "Intializing...\n" if $numvs > 0; #Make sure our urls are correct print "Checking internet connection...\n" if $numvs > 0; unless(head($interneturl)) { print "WARNING: Either you're not connected to the internet, or $internet is down. Probably the former. Proceeding...\n"; } print "Checking OhNoRobot...\n" if $numvs > 0; unless(head(sprintf($transcriptionurl, 1))) { die "OhNoRobot config is incorrect! Correct \$transcriptionurl template."; } print "Checking xkcd...\n" if $numvs > 0; unless(head(sprintf($comicurl, 1))) { die "xkcd config is incorrect! Correct \$comicurl or \$baseurl template."; } print "Downloading archive page...\n" if $numvs > 0; unless($archivepage = get("$baseurl/$archiveurl")) { print "WARNING: Couldn't get archive page! Dates and titles will be unavailable.\n"; } $trendsbot = WWW::Mechanize->new(); $cookiejar = HTTP::Cookies->new(); #Try to load cookies... if(-e 'cookies.dat') { #Load the cookie jar and init it into trendsbot $cookiejar->load('cookies.dat'); $trendsbot->cookie_jar($cookiejar); } else { #Just load the empty cookie jar $trendsbot->cookie_jar($cookiejar); #Login if no cookies print "Google login?"; $login = ; chomp($login); print "Google password?"; ReadMode 2; $pass = ; chomp($pass); ReadMode 0; print "\n"; #Log in to Google Trends $trendsbot->get('http://www.google.com/accounts/ServiceLogin?service=trends'); $trendsbot->form_id('gaia_loginform'); $trendsbot->field('Email',$login); $trendsbot->field('Passwd',$pass); $trendsbot->click(); #Save cookies for next time $cookiejar->save('cookies.dat'); } #Grab the latest comic number $latestcomic = get($baseurl); if($latestcomic =~ /Permanent link to this comic: http:\/\/xkcd\.com\/(\d+)\//) { $latestnum = $1; } my $start, $end; if($ARGV[0] eq '-r') { print "Starting comic [$latestnum]?"; $start = ; chomp($start); unless($start) { $start = $latestnum; } print "Ending comic [1]?"; $end = ; chomp($end); unless($end) { $end = 1; } $inc = -1; } else { print "Starting comic [1]?"; $start = ; chomp($start); unless($start) { $start = 1; } print "Ending comic [$latestnum]?"; $end = ; chomp($end); unless($end) { $end = $latestnum; } $inc = 1; } #Initialize the log file if it doesn't exist unless(-e 'xkcdeffect.csv') { $pubstr = ''; for($i=-2;$i<=2;$i++) { foreach(@scalenums) { $pubstr .= sprintf("Pubdate%+d (scale=%d),",$i,$_); } } open (TSV, '>xkcdeffect.csv') or die "Couldn't open the log file! Something is very wrong.\n"; print TSV "Comic Number,Comic Date,Comic Title,Trends URL,Keyphrase," . $pubstr . "Hotness Rating\n"; close (TSV); } print "$start to $end\n" if $numvs > 0; #Everything looks fine, plow ahead... for(my $comicnum=$start; $comicnum != ($end + $inc); $comicnum+=$inc) { $transpage = sprintf($transcriptionurl, $comicnum); $transcription = get($transpage); if($archivepage =~ /([^<]+)<\/a>/) { $comicdate = $1; $comictitle = $2; } else { $comicdate = ""; $comictitle = ""; } if($comicdate =~ /(\d+)-(\d+)-(\d+)/) { ($year, $month, $day) = ($1, $2, $3); $gtyear = $year; $gtyear =~ s/^20/1/; $gtyear =~ s/^19//; #Zero-pad the month $month = sprintf("%02d",$month); } print "-"x20 . "\n"; print "Comic #$comicnum: $comictitle (" . strftime('%a',0,0,0,$day,$month-1,$gtyear) . " $comicdate)\n"; if($transcription =~ /Here's the transcription for this comic!

(.*?)<\/p>/s or $transcription =~ /]*>([^<]+)<\/(textarea)>/) { $text = $1; $text =~ s/\[\[.*?\]\]//gs; #Eliminate descriptions if($2 ne 'textarea') { $text =~ s/[\r\n\t]//gs; #Eliminate HTML Whitespace $text =~ s/(
)+/\n/g; #Insert newlines for
s } print "$text\n"; print "Do you want to view the comic image [y/N/q]?"; $doimage = ; chomp($doimage); unless($doimage) { $doimage = 'N'; } } else { print "Could not download text" . ($numvs > 0 ? " (tried $transpage)" : '') . "!\nDo you want to view the comic image [Y/n/q]?"; $doimage = ; chomp($doimage); unless($doimage) { $doimage = 'Y'; } } print "\n"; if(uc($doimage) eq 'Q') { last; } $viewerr = 0; if(uc(substr($doimage,0,1)) eq 'Y') { $comicpage = get(sprintf($comicurl, $comicnum)); if($comicpage =~ /[\r\n]+[\r\n]+/) { $imgurl = $1; $imgtitle = $2; $imgalt = $3; if($imgurl =~ /\/([^\/]*)\.(\w+)$/) { if($tempfile eq '') { $filename = "$1.$2"; } else { $filename = "$tempfile.$2"; } print "Fetching $imgurl to $filename...\n" if $numvs > 1; print "Alt text: $imgtitle\n"; getstore($imgurl, $filename); system $imageviewer,($filename); } else { print "Gack! Couldn't parse the image url!\n"; $viewerr = 1; } } else { print "Gack! Couldn't parse the xkcd page!\n"; $viewerr = 1; } } if($viewerr) { print "Open xckd page in browser? [Y/n]?"; $browse = ; chomp($browse); unless($browse) { $browse = 'Y'; } if(uc(substr($browse,0,1)) eq 'Y') { system $urlviewer, (sprintf($comicurl, $comicnum)); print "Opening " . sprintf($comicurl, $comicnum) . "\n"; } } @phraselist = (); while(true) { print "Search phrase?"; $phrase = ; chomp($phrase); if($phrase) { $urlphrase = $phrase; $urlphrase =~ s/ /\+/g; $urldate = "$year-$month"; $prevdate = strftime("%Y-%m",0,0,0,0,$month - 1, $gtyear); $nextdate = strftime("%Y-%m",0,0,0,0,$month + 1, $gtyear); $urldate =~ s/-0/-/; $prevdate =~ s/-0/-/; $nextdate =~ s/-0/-/; $trendspage = sprintf($trendsurl, $urlphrase, $urldate); print "\n" . "#" x 20 . "\n" if $numvs > 0; if(strftime("%m",0,0,0,$day-2,$month-1,$gtyear) != $month) { print "NOTE: Downloading previous month\n" if $numvs > 0; @prevcsv = (); @prevfile = (); foreach(@scalenums) { $curscale = $_; $prevfile[$curscale] = sprintf($csvurl, $urlphrase, $prevdate, $curscale); print "Getting data from " . $prevfile[$curscale] . "...\n" if $numvs > 0; $trendsbot->get($prevfile[$curscale]); $prevcsv[$curscale] = $trendsbot->content(); } } @curcsv = (); @curfile = (); foreach(@scalenums) { $curscale = $_; $curfile[$curscale] = sprintf($csvurl, $urlphrase, $urldate, $curscale); print "Getting data from " . $curfile[$curscale] . "...\n" if $numvs > 0; $trendsbot->get($curfile[$curscale]); $curcsv[$curscale] = $trendsbot->content(); } if(strftime("%m",0,0,0,$day+2,$month-1,$gtyear) != $month) { "NOTE: Downloading next month\n"; @nextcsv = (); @nextfile = (); foreach(@scalenums) { $curscale = $_; $nextfile[$curscale] = sprintf($csvurl, $urlphrase, $nextdate, $curscale); print "Getting data from " . $nextfile[$curscale] . "...\n" if $numvs > 1; $trendsbot->get($nextfile[$curscale]); $nextcsv[$curscale] = $trendsbot->content(); } } @indexlist = (); #Grab the numbers! for($i=-2; $i <= 2; $i++) { #Determine the needed month's CSV $monthval = strftime("%Y.%m",0,0,0,$day+$i,$month-1,$gtyear); print "$monthval/$year.$month" if $numvs > 1; if($monthval == "$year.$month") { @csvlist = @curcsv; } elsif($monthval < "$year.$month") { @csvlist = @prevcsv; } elsif($monthval > "$year.$month") { @csvlist = @nextcsv; } $gtdate = strftime("%b %e %Y",0,0,0,$day+$i,$month-1,$gtyear); $gtdate =~ s/ / /g; #Get rid of the extra space padding from strftime foreach(@csvlist) { $csv = $_; if($csv =~ /$gtdate, ([\d\.]+)/) { push(@indexlist, $1); print $csv if $numvs > 2; print "Index on $gtdate: $1\n" if $numvs > 1; } else { push(@indexlist, 0); print "Error grabbing index for $gtdate.\n" if $numvs > 1; } } } print "#" x 20 . "\n\n" if $numvs > 0; $hoturl = sprintf($hottrendsurl, $urlphrase, $comicdate); $hottrends = get($hoturl); if($hottrends =~ /Hotness: ]+>([^<]+)<\/font>/) { $washot = $1; print "!" x 20 . "\n"; print "This term was $1! $hoturl\n"; print "!" x 20 . "\n"; } else { $washot = 'not'; } $defval = 0; foreach(@indexlist) { if($_ > 0) { $defval = 1; last; } } print "Index for surrounding 5 days: " . join(' ',@indexlist[0..3]) . ' >' . join('/', @indexlist[4,5]) . '< ' . join(' ',@indexlist[6..9]) . " ($trendspage)\n"; if($defval) { #Take a guess as to whether these are good results or not $probgood = 0; if($indexlist[2] < $indexlist[4] && $indexlist[3] < $indexlist[5]) { $probgood = 1; } print "Save " . ($probgood ? '[Y/n]' : '[y/N]') . "?"; $doimage = ; chomp($doimage); unless($doimage) { $doimage = ($probgood ? 'Y' : 'N'); } open (TSV, '>>xkcdeffect.csv'); if(uc(substr($doimage,0,1)) eq 'Y') { $writeline = sprintf("%d,%s,\"%s\",\"%s\",%s,%s,%s\n", $comicnum, $comicdate, $comictitle, $trendspage, $phrase,join(',',@indexlist),$washot ); print "Added to log file." if $numvs > 1; print TSV $writeline; } close(TSV); } else { print "No data avaliable, skipping...\n"; } } else { last; } } }