#!/usr/bin/perl use strict; use LWP::UserAgent; # main loop sub main { my ($start_page) = @ARGV; $start_page = 0 if (!$start_page); print STDOUT "Starting with page $start_page\n"; for (my $i=$start_page; 1; $i++) { print STDOUT "Getting page: $i\n"; my $data = get_url("http://bash.org/?browse&p=$i"); if (!$data) { print STDERR "Page $i error!\n"; } elsif ($data =~ /Invalid Page/ig) { print STDERR "Done\n"; exit 0; } else { process_page($data); } } } # processes a page sub process_page { my ($page) = @_; while ($page =~ /class="qa">\+<\/a>\(([0-9]+)\).*class="qt">((?s).*?)<\/p>/g) { my ($rating, $quote) = ($1, $2); $quote =~ s/\r\n//ig; $quote =~ s/>/>/ig; $quote =~ s/<//\n/ig; write_quote($rating, $quote); } } # writes a quite sub write_quote { my ($rating, $quote) = @_; $quote =~ s/'/''/ig; $quote =~ s/\\/\\\\/ig; open(FIL, ">>bash.org.sql"); print FIL qq~ insert into kwote (submit_dt, content, rating, ip_address) values ( '2006-05-05 00:00:00', '$quote', '$rating', '127.0.0.1' ); ~; close(FIL); } # gets a web page sub get_url { my ($url) = @_; my $ua = LWP::UserAgent->new; $ua->timeout(10); my $response = $ua->get($url); if ($response->is_success) { return $response->content; } else { print "HTTP Error: ".$response->status_line."\n"; return 0; } } # start the madness main();