#!/usr/bin/perl # This program rebuilds the cdb database. If called from the web server it # will add the remote IP address to the blocklist before doing the rebuild. # This should only be pointed to from web pages that use meta tags to # tell robots not to follow links off the page. use CDB_File; umask 022; chdir '/var/www/html/robot'; if ($ENV{REMOTE_ADDR} =~ m/^\d+\.\d+\.\d+\.\d+$/) { if (open(IP,">ip1/$ENV{REMOTE_ADDR}")) { # Save referer header in case some bozo links to this page. We will want to # know that and block access from any server that does this. # Later we will want to add a meta robot tag to keep valid robots who are # misdirected here from remembering the page which will make them come back # repeatedly without using a referer (sic) header which we can block on. print IP "$ENV{HTTP_REFERER}\n"; close(IP); } print "Content-type: text/html\n\n"; $title = "Your IP address ($ENV{REMOTE_ADDR}) has been blocked!"; print << "EOF"; $title

$title

All future requests from your IP address ($ENV{REMOTE_ADDR}) will result in a 403 forbidden result.

Most likely this message is being read by a robot that does not pay attention to meta robot tags. Robots that ignore these tags are not permitted here. for more information see http://www.robotstxt.org/wc/exclusion.html . EOF } else { print "Rebuilding the blocklist.\n"; } # Use a format compatible with rbldns. opendir(IP, 'ip1'); while ($ip = readdir(IP)) { if ($ip =~ m/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) { $key = pack 'CCCCC', $1, $2, $3, $4, 32; $ip{$key} = ''; } elsif ($ip =~ m/^(\d+)\.(\d+)\.(\d+)$/) { $key = pack 'CCCCC', $1, $2, $3, 0, 24; $ip{$key} = ''; } elsif ($ip =~ m/^(\d+)\.(\d+)$/) { $key = pack 'CCCCC', $1, $2, 0, 0, 16; $ip{$key} = ''; } } $ip{''} = (pack 'CCCC', 127, 0, 0, 10) . 'Spam block'; CDB_File::create %ip, 'data1/data.cdb', "data1/data.cdb.$$";