diff --git a/blocklist.py b/blocklist.py index ff56229..6832788 100644 --- a/blocklist.py +++ b/blocklist.py @@ -4,8 +4,9 @@ import sys import logging -class BlockList: - """Class to perform basic reading of Sentor blocklist URLs and add them to a common data store.""" +class BlockList(object): + """Class to perform basic reading of Sentor blocklist URLs and add them + to a common data store.""" data = {} def __init__(self): @@ -14,53 +15,80 @@ class BlockList: import os # Find the configuration file in the same directory at the main script. - config_file = os.path.join(os.path.dirname(sys.argv[0]), "blocklist.cfg") + config_file = os.path.join( + os.path.dirname(sys.argv[0]), + "blocklist.cfg" + ) try: self.config = ConfigParser.ConfigParser() self.config.readfp(open(config_file)) except (IOError, ConfigParser.MissingSectionHeaderError), error: - logging.error("Could not read configuration file %s: %s", config_file, error) + logging.error( + "Could not read configuration file %s: %s", + config_file, + error + ) raise def read(self, source): - """Parse the blocklist from the provided url (source) using a CSV parser.""" + """Parse the blocklist from the provided url (source) using a + CSV parser.""" import csv try: - # Parse the Sentor Assassin blocklist format (easist to use a CSV parser) - reader = csv.reader(self.cache(source)) + # Parse the Sentor Assassin blocklist format + # (easist to use a CSV parser) + reader = csv.reader(self.cache(source)) for line in reader: # Fetch the items from the input (remote_ip, forwarded_ip, useragent, cookie) = line self.add(remote_ip, forwarded_ip, useragent, cookie) except csv.Error, error: - logging.error("There was an error retrieving the blocklist. %s", error) + logging.error( + "There was an error retrieving the blocklist. %s", + error + ) def add(self, remote_ip, forwarded_ip, useragent, cookie): - """Method to store the remote_ip, forwarded_ip, useragent and cookie to the in-memory dictionary.""" + """Method to store the remote_ip, forwarded_ip, useragent and cookie + to the in-memory dictionary.""" # Store the various items if remote_ip not in self.data: - self.data[remote_ip] = { "remote_ip": remote_ip, "forwarded_ip": forwarded_ip, "useragent": useragent, "cookie": cookie } + self.data[remote_ip] = { + "remote_ip": remote_ip, + "forwarded_ip": forwarded_ip, + "useragent": useragent, + "cookie": cookie + } else: - logging.debug("%s already exists in blacklist. Ignoring.", remote_ip) + logging.debug( + "%s already exists in blacklist. Ignoring.", + remote_ip + ) def cache(self, source): - """Attempt to read from the source URL and store results in a cache file, otherwise use the contents of the cache. If the cache isn't usable but the data is still available, return the transient data.""" + """Attempt to read from the source URL and store results in a cache + file, otherwise use the contents of the cache. If the cache isn't + usable but the data is still available, return the transient data.""" import urllib2 import urlparse import os - - # Build some 'handy' variables + + # Build some 'handy' variables hostname = urlparse.urlparse(source)[1] cache_dir = self.config.get("cache", "directory") - cache_path = os.path.join(cache_dir, "%s.cache" % hostname ) + cache_path = os.path.join(cache_dir, "%s.cache" % hostname) # Create the caching directory if not os.path.exists(cache_dir): try: os.makedirs(cache_dir) except OSError, error: - logging.warning("Could not create the caching directory: %s Will attempt to run without a cache.", error) + logging.warning( + "Could not create the caching directory: %s." + + "Will attempt to run without a cache.", + error + ) # Attempt to fetch the data and store it in a cache file try: @@ -70,16 +98,30 @@ class BlockList: cache_file.write(raw_data) except (urllib2.URLError, urllib2.HTTPError), error: # Network error. Warn and use the cached content. - logging.warning("Reverting to cache file. There was a problem contacting host %s: %s", hostname, error) + logging.warning( + "Reverting to cache file. There was a problem contacting" + + "host %s: %s", + hostname, + error + ) try: cache_file = open(cache_path, "r") except IOError, error: - logging.error("No cache file was available for %s: %s", hostname, error) + logging.error( + "No cache file was available for %s: %s", + hostname, + error + ) raise except IOError, error: - # Cache error, but network succeeded. Use String IO to return the data. + # Cache error, but network succeeded. Use String IO + # to return the data. import StringIO - logging.warning("Could not create cache file: %s. Returning transient data.", error) + logging.warning( + "Could not create cache file: %s." + + "Returning transient data.", + error + ) cache_file = StringIO.StringIO() cache_file.write(raw_data) @@ -98,4 +140,9 @@ class BlockList: def export(self): """Output a plaintext blocklist to stdout.""" for item in self.data.values(): - print "%s,%s,%s,%s" % (item["remote_ip"], item["forwarded_ip"], item["useragent"], item["cookie"]) + print "%s,%s,%s,%s" % ( + item["remote_ip"], + item["forwarded_ip"], + item["useragent"], + item["cookie"] + )