#!/usr/bin/perl -Tw -- # Check for timestamp of last update versus http headers # If time to update, fetch the floor summary # Damn: HEAD method doesn't work on their server! # # Print MIME header if running as CGI # # Parse the summary and transform to RSS 2.0 # # Print RSS to standard output # # If not running as CGI, publish RSS to web server via DAV # # No way to tell which bill any past summaries refer to, only # the current one, sorry! use 5.6.0; use strict; use CGI::Carp qw(fatalsToBrowser warningsToBrowser); use LWP::UserAgent; use HTTP::Request; if (defined($ENV{'QUERY_STRING'}) && $ENV{'QUERY_STRING'} =~ /source/) { print "Content-type: text/plain\r\n\r\n"; my $fname = $ENV{'SCRIPT_NAME'}; $fname =~ s/.*\/(.*)$/$1/; open my $fh, "<$fname" or die; while (<$fh>) { print $_; } exit 0; } if (defined($ENV{'SERVER_SOFTWARE'})) { print "Content-type: application/rdf+xml\r\n\r\n"; } my $url = qq{http://clerk.house.gov/floorsummary/floor.html}; my $lwp = new LWP::UserAgent; my $req = new HTTP::Request; $req->method("GET"); $req->uri($url); my $res = $lwp->request($req); if (!$res->is_success) { die "Couldnt fetch $url"; } # Parse it #my %items = (); my @items = (); my $state = 0; my $title = ''; my $time = ''; my $desc = ''; open my $fh, "<", \$res->content; while (<$fh>) { if ($title eq '' && $_ =~ /^
.*(.*?)<\/A>/i) { $title .= $1; $state = 5; } elsif ($_ =~ /^
(\d+:\d+\s*[AP]\.\s*M\.)/) { if ($state == 5) { # End of title $state = 0; } if ($state == 1) { # End of previous item push @items, { 'time' => $time, 'desc' => $desc }; $desc = ''; } $time = $1; $state = 1; } elsif ($state == 1) { my $line = $_; $line =~ s/[\r\n]+/ /g; $line =~ s/<.*?>//g; $desc .= $line; } elsif ($state == 5) { my $line = $_; $line =~ s/[\r\n]+/ /g; $line =~ s/<.*?>//g; # Not sure if this should all go in title. #$title.= $line; } } use POSIX 'strftime'; use XML::DOM; use XML::RSS; # Create RSS file my $rss = new XML::RSS (version => '2.0'); my $datefmt = "%a, %d %b %G %H:%M:%S GMT"; my @gmtime = gmtime(time); my $date = strftime($datefmt, @gmtime); $rss->channel( title => 'U.S. House of Representatives Floor Summary', link => $url, language => 'en', description => 'U.S. House of Representatives Floor Summary', rating => '(PICS-1.1 "http://www.classify.org/safesurf/" 1 r (SS~~000 1))', pubDate => $date, lastBuildDate => $date, managingEditor => 'info.clerkweb@mail.house.gov', webMaster => 'www@icequake.net', # Advise readers to update no more frequently than once per minute ttl => '1', syn => { # Advise readers that updates are available 60 times an hour updatePeriod => "hourly", updateFrequency => "60", updateBase => "1901-01-01T00:00+00:00", }, ); shift @gmtime; shift @gmtime; shift @gmtime; my $first = 1; foreach my $item (@items) { my $time = $item->{'time'}; $time =~ /(\d+):/; my $hour = $1; $time =~ /:(\d+)/; my $minute = $1; if ($time =~ /P/ && $hour != 12) { $hour += 12; } if ($time =~ /A/ && $hour == 12) { $hour = 0; } my $date = strftime($datefmt, 0, $minute, $hour, @gmtime); my $short = $item->{'desc'}; $short =~ s/(.*?\w{4}\.\s+).*/$1/; if ($first) { $title .= ": ".$short; } else { $title = $short; } my $guid = $time.$short; $rss->add_item( title => $time." - ".$title, link => $url, description => $item->{'desc'}, pubDate => $date, guid => $guid, ); $first = 0; # Limit RSS items #pop(@{$rss->{'items'}}) if (@{$rss->{'items'}} == 15); } print STDOUT $rss->as_string; if (!defined($ENV{'SERVER_SOFTWARE'})) { use HTTP::DAV; use MD5; my $d = new HTTP::DAV; my $wdurl = "http://home.icequake.net/~nemesis/webdav/"; # I store my DAV password in plaintext under the DAV directory open my $passwdfh, "; close $passwdfh; my ($user, $pass) = split /:/, $passwd; $d->credentials(-user=>"$user",-pass =>"$pass", -url =>$wdurl, -realm=>"nemesis WebDAV Storage" ); $d->open( -url=>"$wdurl" ) or die("Couldn't open $wdurl: " .$d->message . "\n"); # Make a null lock on newdir $d->lock( -url => "$wdurl/house-rss", -timeout => "30s" ) or die "Couldn't obtain specified lock\n"; # Make a new directory #$d->mkcol( -url => "$wdurl/house-rss" ) #or die "Couldn't make dir at $wdurl\n"; # Upload multiple files to newdir. if ( $d->put( -local => \$rss->as_string, -url => $wdurl."/house-rss/house.rss" ) ) { print "successfully uploaded file to $wdurl\n"; } else { print "put failed: " . $d->message . "\n"; } $d->unlock( -url => $wdurl ); } warningsToBrowser(1); exit 0;