#!/usr/bin/perl
use IO::Socket;

#this is used to generate a delay between connections to each proxy
#the formula used is $BASEDELAY + (a random number between 0 and RANDDELAY)
$BASEDELAY = 30;
$RANDDELAY = 30;

$SKIPPERCENT = 25;


if($ARGV[0] eq "" || $ARGV[1] eq ""){
    print "usage: lobotomy.sh <commandfile> <proxylist>\n";
    exit 1;
}

open COMMANDFILE,$ARGV[0] || die "Cannot open command file $ARGV[0]\n";

open PROXYLIST,$ARGV[1] || die "Cannot open proxy list $ARGV[1]\n";
close PROXYLIST;


#current command set in list form
@commandlist;


#control variables - just don't mess wid em!
$i = 0;
$incommands = 0;

#log current time
$timestr = scalar(gmtime);
mylog("\n\nlobotomy.sh invoked at $timestr\n");

#this is the commandfile parsing loop
while(<COMMANDFILE>){

    #clean up input string, skip comments and empty lines
    next if /\A#/;
    $_ =~ s/\A\s*|\s*\Z|\r|\n//g;
    next if /\A\Z/;

    #create commandlist array
    if($_ eq "+++"){
	$incommands = 1;
    }elsif($_ eq "---"){
	$incommands = -1;
    }elsif($incommands == 1){
	$commandlist[$i++] = $_;
    }else{
	print "Invalid line in command file: $_\n";
    }

    #done reading this command list
    #fork off child process to run commands on proxies, reset command list
    if($incommands == -1){
	my $pid = fork();
	if($pid == 0){
	    setupchild();
	    run_proxies();
	    exit 0;
	}

	#reset list
	splice @commandlist,0,scalar(@commandlist);
	$i = 0;
	$incommands = 0;
    }
}


#wait for all our children to terminate
$childrenleft = 1;
while($childrenleft >= 0){
    $childrenleft = wait;
}

#log finishing time
$timestr = scalar(gmtime);
mylog("\nlobotomy.sh finished at $timestr\n");


##############################
#proxy handler from here down#
##############################


#this is a string containing the hostname of the current proxy
$proxy;

#this is a string containing the port of the current proxy
$port;

sub run_proxies()
{

    open PROXYLIST,$ARGV[1] || die "Cannot open proxy list $ARGV[1]\n";

    while(<PROXYLIST>){

	#clean up lines and fill $proxy/$port strings
	next if /\A#/;
	next if (  rand(100000) % (100/$SKIPPERCENT) == 0);
	s/\A\s*|\s*\Z|\r|\n//g;
	($proxy,$port) = split /:/,$_;

	#fork off a command-execution child for each proxy
	#then allow delay between proxies
	my $pid = fork();
	if($pid == 0){
	    setupchild2();
	    run_commands();
	    exit 0;
	}else{
	    wait_rand();
	}

    }

$morechildrenleft = 1;
while($morechildrenleft >= 0){
    $morechildrenleft = wait;
}

}



################################
#command handler from here down#
################################

#so far, command set is FETCH, FIND, (PRINT?)
#
#the command seperator is set to '::' below!
#cuz I can't figure out how to change it....

#this is the current command
@command;

#a buffer containing the text of the last-fetched page is stored here
$pagebuffer;

#a buffer containing last 'found' text
$foundbuffer;

#a buffer containing last-fetched url (ie referer)
$refererbuffer = "";

sub run_commands()
{
    #step through each command...

    foreach $commandstring (@commandlist){

	#parse command string and clean up commandname
	@command = split /::/,$commandstring;
	$command[0] =~ s/\s//g;

	#execute command
	if($command[0] eq "FETCH"){
	    $command[1] =~ s/\s//g;
	    my $header = generate_header($command[1]);
	    my $result = command_fetch($header);
	    if($result != 1){
		mylog("FETCH: Error fetching page $command[1] from $proxy:$port");
	    }
	}elsif($command[0] eq "FIND"){
	    command_find();
	}elsif($command[0] eq "LOG"){
	    command_log();
	}else{
	    mylog("COMMAND: Bad Command: $commandstring");
	}
    }
}


sub command_log()
{
    my $logstring = $command[1];
    
    $logstring =~ s/\A\s*|\s*\Z//g;

    mylog("LOG: $logstring :: at $proxy:$port");
}

sub command_find()
{

    #get expressions to scan for
    my $expression1 = $command[1];
    my $expression2 = $command[2];

#    $expression1 =~ s/\A\s*|\s*\Z//g;
#    $expression2 =~ s/\A\s*|\s*\Z//g;

    #find indexes
    $index1 = index $pagebuffer,$expression1;
    $index1 = $index1 + length($expression1) - 1;
    $index2 = index $pagebuffer,$expression2;

    #index1 needs to be fixed
    if($index1 > 0){
	$index1++;
    }

    #get the substring
    $foundbuffer = substr $pagebuffer,$index1,$index2-$index1;

    my $action = $command[3];

    $action =~ s/\A\s*|\s*\Z//g;
    
    if($action eq "FETCH"){
	$foundbuffer =~ s/\s//g;
	my $fetchprefix = $command[4];
	my $fetchsuffix = $command[5];
	$fetchprefix =~ s/\A\s*|\s*\Z//g;
	$fetchsuffix =~ s/\A\s*|\s*\Z//g;
	mylog("Found and generated $fetchprefix$foundbuffer$fetchsuffix");
	my $header = generate_header("$fetchprefix$foundbuffer$fetchsuffix");
	my $result = command_fetch($header);
	if($result != 1){
	    mylog("FINDFETCH: Error fetching page $fetchprefix$foundbuffer$fetchsuffix from $proxy:$port");
	}	    
    }elsif($action eq "QUIT"){
	exit(0);
    }elsif($action eq "LOG"){
	if($foundbuffer ne ""){
	    mylog("FINDLOG: got $foundbuffer from $refererbuffer at $proxy:$port");
	}else{
	    mylog("FINDLOG: could not FIND from $refererbuffer at $proxy:port");
	}
    }

}


#fetch a page from a server and copy it into $pagebuffer
sub command_fetch()
{
    #this is my standard timeout
    $timeout = 5;

    my($requestheader) = $_[0];

#    mylog($requestheader);

    #this is the socket
    my($sock);

    #open connection to server, timeout if we have to
    eval{

	local $SIG{ALRM} = sub{die "timeout\n"};

	alarm timeout;

	#ok, open socket. Not too sure about the timeout thing...
	#don't trust it!
	$sock = IO::Socket::INET->new(PeerAddr=>$proxy,
				      PeerPort=>$port,
				      Timeout=>$timeout+1);
	alarm 0;
    
    };
    #this is executed if we timed out
    if($@){
	return 0;
    }

    #try to write to server, timeout if it won't respond
    eval{
	local $SIG{ALRM} = sub{die "timeout\n"};

	alarm $timeout*5;
	print $sock $requestheader;
	alarm 0;
    };
    if($@){
	return 0;
    }

    #try to read page from server, timeout if it won't respond
    #copy page into $pagebuffer
    eval{
	local $SIG{ALRM} = sub{die "timeout\n"};

	$pagebuffer = "";
	alarm $timeout*5;
	while(<$sock>){
	    $pagebuffer = $pagebuffer . $_;
	}
	alarm 0;
    };
    if($@){
	return 0;
    }

    #for cryin out loud, this better not block...
    $sock->close();
    return 1;
}

sub generate_header()
{
    my $header = "";
    my $url = $_[0];

    #create HTTP request header
    $header = $header . "GET $url HTTP/1.0\n";
    $header = $header . "Referer: $refererbuffer\n";
    $header = $header . "Connection: Keep-Alive\n";
    $header = $header . "User-Agent: Mozilla/4.6 [en] (Windows 95)\n";
    $header = $header . "Accept: image/gif, image/jpeg, */*\n";
    $header = $header . "Accept-Encoding: gzip\n";
    $header = $header . "Accept-Language: en\n";
    $header = $header . "Accept-Charset: iso-8859-1,*,utf-8\n";
    $header = $header . "\n";

    #update referer
    $refererbuffer = $url;

    return $header;
}

    

sub mylog()
{
    my $logstr = $_[0];

    open LOGFILE,">> lobotomy.log";

    flock LOGFILE,LOCK_EX;

    print LOGFILE "$logstr\n";

    flock LOGFILE,LOCK_UN;

    close LOGFILE;

}


sub wait_rand()
{
    my $sleepval = rand($RANDDELAY) + $BASEDELAY;
    sleep($sleepval);
}

#called for each command-executing child
sub setupchild2()
{
    close PROXYLIST;
}

#called for each proxy-handling child
sub setupchild()
{
    $SIG{CHLD} = DEFAULT;
    close COMMANDFILE;
}


