2012-05-21 19:08:04 +01:00
#!/usr/bin/python
2013-07-13 08:21:09 +01:00
""" Program to fetch a blocklist via HTTP """
2012-05-21 19:08:04 +01:00
import sys
import logging
class BlockList :
""" Class to perform basic reading of Sentor blocklist URLs and add them to a common data store. """
data = { }
def __init__ ( self ) :
""" Read and parse configuration to a local object variable. """
import ConfigParser
import os
# Find the configuration file in the same directory at the main script.
config_file = os . path . join ( os . path . dirname ( sys . argv [ 0 ] ) , " blocklist.cfg " )
try :
self . config = ConfigParser . ConfigParser ( )
self . config . readfp ( open ( config_file ) )
2013-07-13 08:21:09 +01:00
except ( IOError , ConfigParser . MissingSectionHeaderError ) , error :
logging . error ( " Could not read configuration file %s : %s " , config_file , error )
2012-05-21 19:08:04 +01:00
raise
def read ( self , source ) :
""" Parse the blocklist from the provided url (source) using a CSV parser. """
import csv
try :
# Parse the Sentor Assassin blocklist format (easist to use a CSV parser)
reader = csv . reader ( self . cache ( source ) )
for line in reader :
# Fetch the items from the input
( remote_ip , forwarded_ip , useragent , cookie ) = line
self . add ( remote_ip , forwarded_ip , useragent , cookie )
2013-07-13 08:21:09 +01:00
except csv . Error , error :
logging . error ( " There was an error retrieving the blocklist. %s " , error )
2012-05-21 19:08:04 +01:00
def add ( self , remote_ip , forwarded_ip , useragent , cookie ) :
2013-07-13 08:21:09 +01:00
""" Method to store the remote_ip, forwarded_ip, useragent and cookie to the in-memory dictionary. """
# Store the various items
if remote_ip not in self . data :
self . data [ remote_ip ] = { " remote_ip " : remote_ip , " forwarded_ip " : forwarded_ip , " useragent " : useragent , " cookie " : cookie }
else :
logging . debug ( " %s already exists in blacklist. Ignoring. " , remote_ip )
2012-05-21 19:08:04 +01:00
def cache ( self , source ) :
""" Attempt to read from the source URL and store results in a cache file, otherwise use the contents of the cache. If the cache isn ' t usable but the data is still available, return the transient data. """
import urllib2
import urlparse
import os
# Build some 'handy' variables
hostname = urlparse . urlparse ( source ) [ 1 ]
cache_dir = self . config . get ( " cache " , " directory " )
cache_path = os . path . join ( cache_dir , " %s .cache " % hostname )
# Create the caching directory
if not os . path . exists ( cache_dir ) :
try :
os . makedirs ( cache_dir )
2013-07-13 08:21:09 +01:00
except OSError , error :
logging . warning ( " Could not create the caching directory: %s Will attempt to run without a cache. " , error )
2012-05-21 19:08:04 +01:00
# Attempt to fetch the data and store it in a cache file
try :
2013-07-13 08:21:09 +01:00
input_list = urllib2 . urlopen ( source )
raw_data = input_list . read ( )
2012-05-21 19:08:04 +01:00
cache_file = open ( cache_path , " w+ " )
cache_file . write ( raw_data )
2013-07-13 08:21:09 +01:00
except ( urllib2 . URLError , urllib2 . HTTPError ) , error :
2012-05-21 19:08:04 +01:00
# Network error. Warn and use the cached content.
2013-07-13 08:21:09 +01:00
logging . warning ( " Reverting to cache file. There was a problem contacting host %s : %s " , hostname , error )
2012-05-21 19:08:04 +01:00
try :
cache_file = open ( cache_path , " r " )
2013-07-13 08:21:09 +01:00
except IOError , error :
logging . error ( " No cache file was available for %s : %s " , hostname , error )
2012-05-21 19:08:04 +01:00
raise
2013-07-13 08:21:09 +01:00
except IOError , error :
2012-05-21 19:08:04 +01:00
# Cache error, but network succeeded. Use String IO to return the data.
import StringIO
2013-07-13 08:21:09 +01:00
logging . warning ( " Could not create cache file: %s . Returning transient data. " , error )
2012-05-21 19:08:04 +01:00
cache_file = StringIO . StringIO ( )
cache_file . write ( raw_data )
2013-07-13 08:25:41 +01:00
# Rewind the file
cache_file . seek ( 0 )
2012-05-21 19:08:04 +01:00
# Return the best available data
return cache_file
def dump ( self ) :
""" Dump the local datastore out to stdout. """
2013-07-13 08:21:09 +01:00
for name , val in self . data . iteritems ( ) :
2012-05-21 19:08:04 +01:00
for address in val :
2013-07-13 08:21:09 +01:00
print " %s : %s " % ( name , address )
2012-05-21 19:08:04 +01:00
def export ( self ) :
""" Output a plaintext blocklist to stdout. """
2013-07-13 08:21:09 +01:00
for item in self . data . values ( ) :
2012-05-21 19:08:04 +01:00
print " %s , %s , %s , %s " % ( item [ " remote_ip " ] , item [ " forwarded_ip " ] , item [ " useragent " ] , item [ " cookie " ] )