2012-05-21 19:08:04 +01:00
|
|
|
#!/usr/bin/python
|
2013-07-13 08:21:09 +01:00
|
|
|
"""Program to fetch a blocklist via HTTP"""
|
2012-05-21 19:08:04 +01:00
|
|
|
|
|
|
|
import sys
|
|
|
|
import logging
|
|
|
|
|
2014-09-07 19:57:10 +01:00
|
|
|
class BlockList(object):
|
|
|
|
"""Class to perform basic reading of Sentor blocklist URLs and add them
|
|
|
|
to a common data store."""
|
2012-05-21 19:08:04 +01:00
|
|
|
data = {}
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
"""Read and parse configuration to a local object variable."""
|
|
|
|
import ConfigParser
|
|
|
|
import os
|
|
|
|
|
|
|
|
# Find the configuration file in the same directory at the main script.
|
2014-09-07 19:57:10 +01:00
|
|
|
config_file = os.path.join(
|
|
|
|
os.path.dirname(sys.argv[0]),
|
|
|
|
"blocklist.cfg"
|
|
|
|
)
|
2012-05-21 19:08:04 +01:00
|
|
|
try:
|
|
|
|
self.config = ConfigParser.ConfigParser()
|
|
|
|
self.config.readfp(open(config_file))
|
2013-07-13 08:21:09 +01:00
|
|
|
except (IOError, ConfigParser.MissingSectionHeaderError), error:
|
2014-09-07 19:57:10 +01:00
|
|
|
logging.error(
|
|
|
|
"Could not read configuration file %s: %s",
|
|
|
|
config_file,
|
|
|
|
error
|
|
|
|
)
|
2012-05-21 19:08:04 +01:00
|
|
|
raise
|
|
|
|
|
|
|
|
def read(self, source):
|
2014-09-07 19:57:10 +01:00
|
|
|
"""Parse the blocklist from the provided url (source) using a
|
|
|
|
CSV parser."""
|
2012-05-21 19:08:04 +01:00
|
|
|
import csv
|
|
|
|
|
|
|
|
try:
|
2014-09-07 19:57:10 +01:00
|
|
|
# Parse the Sentor Assassin blocklist format
|
|
|
|
# (easist to use a CSV parser)
|
|
|
|
reader = csv.reader(self.cache(source))
|
2012-05-21 19:08:04 +01:00
|
|
|
for line in reader:
|
|
|
|
# Fetch the items from the input
|
|
|
|
(remote_ip, forwarded_ip, useragent, cookie) = line
|
|
|
|
self.add(remote_ip, forwarded_ip, useragent, cookie)
|
2013-07-13 08:21:09 +01:00
|
|
|
except csv.Error, error:
|
2014-09-07 19:57:10 +01:00
|
|
|
logging.error(
|
|
|
|
"There was an error retrieving the blocklist. %s",
|
|
|
|
error
|
|
|
|
)
|
2012-05-21 19:08:04 +01:00
|
|
|
|
|
|
|
def add(self, remote_ip, forwarded_ip, useragent, cookie):
|
2014-09-07 19:57:10 +01:00
|
|
|
"""Method to store the remote_ip, forwarded_ip, useragent and cookie
|
|
|
|
to the in-memory dictionary."""
|
2013-07-13 08:21:09 +01:00
|
|
|
# Store the various items
|
|
|
|
if remote_ip not in self.data:
|
2014-09-07 19:57:10 +01:00
|
|
|
self.data[remote_ip] = {
|
|
|
|
"remote_ip": remote_ip,
|
|
|
|
"forwarded_ip": forwarded_ip,
|
|
|
|
"useragent": useragent,
|
|
|
|
"cookie": cookie
|
|
|
|
}
|
2013-07-13 08:21:09 +01:00
|
|
|
else:
|
2014-09-07 19:57:10 +01:00
|
|
|
logging.debug(
|
|
|
|
"%s already exists in blacklist. Ignoring.",
|
|
|
|
remote_ip
|
|
|
|
)
|
2012-05-21 19:08:04 +01:00
|
|
|
|
|
|
|
def cache(self, source):
|
2014-09-07 19:57:10 +01:00
|
|
|
"""Attempt to read from the source URL and store results in a cache
|
|
|
|
file, otherwise use the contents of the cache. If the cache isn't
|
|
|
|
usable but the data is still available, return the transient data."""
|
2012-05-21 19:08:04 +01:00
|
|
|
import urllib2
|
|
|
|
import urlparse
|
|
|
|
import os
|
2014-09-07 19:57:10 +01:00
|
|
|
|
|
|
|
# Build some 'handy' variables
|
2012-05-21 19:08:04 +01:00
|
|
|
hostname = urlparse.urlparse(source)[1]
|
|
|
|
cache_dir = self.config.get("cache", "directory")
|
2014-09-07 19:57:10 +01:00
|
|
|
cache_path = os.path.join(cache_dir, "%s.cache" % hostname)
|
2012-05-21 19:08:04 +01:00
|
|
|
|
|
|
|
# Create the caching directory
|
|
|
|
if not os.path.exists(cache_dir):
|
|
|
|
try:
|
|
|
|
os.makedirs(cache_dir)
|
2013-07-13 08:21:09 +01:00
|
|
|
except OSError, error:
|
2014-09-07 19:57:10 +01:00
|
|
|
logging.warning(
|
|
|
|
"Could not create the caching directory: %s." +
|
|
|
|
"Will attempt to run without a cache.",
|
|
|
|
error
|
|
|
|
)
|
2012-05-21 19:08:04 +01:00
|
|
|
|
|
|
|
# Attempt to fetch the data and store it in a cache file
|
|
|
|
try:
|
2013-07-13 08:21:09 +01:00
|
|
|
input_list = urllib2.urlopen(source)
|
|
|
|
raw_data = input_list.read()
|
2012-05-21 19:08:04 +01:00
|
|
|
cache_file = open(cache_path, "w+")
|
|
|
|
cache_file.write(raw_data)
|
2013-07-13 08:21:09 +01:00
|
|
|
except (urllib2.URLError, urllib2.HTTPError), error:
|
2012-05-21 19:08:04 +01:00
|
|
|
# Network error. Warn and use the cached content.
|
2014-09-07 19:57:10 +01:00
|
|
|
logging.warning(
|
|
|
|
"Reverting to cache file. There was a problem contacting" +
|
|
|
|
"host %s: %s",
|
|
|
|
hostname,
|
|
|
|
error
|
|
|
|
)
|
2012-05-21 19:08:04 +01:00
|
|
|
try:
|
|
|
|
cache_file = open(cache_path, "r")
|
2013-07-13 08:21:09 +01:00
|
|
|
except IOError, error:
|
2014-09-07 19:57:10 +01:00
|
|
|
logging.error(
|
|
|
|
"No cache file was available for %s: %s",
|
|
|
|
hostname,
|
|
|
|
error
|
|
|
|
)
|
2012-05-21 19:08:04 +01:00
|
|
|
raise
|
2013-07-13 08:21:09 +01:00
|
|
|
except IOError, error:
|
2014-09-07 19:57:10 +01:00
|
|
|
# Cache error, but network succeeded. Use String IO
|
|
|
|
# to return the data.
|
2012-05-21 19:08:04 +01:00
|
|
|
import StringIO
|
2014-09-07 19:57:10 +01:00
|
|
|
logging.warning(
|
|
|
|
"Could not create cache file: %s." +
|
|
|
|
"Returning transient data.",
|
|
|
|
error
|
|
|
|
)
|
2012-05-21 19:08:04 +01:00
|
|
|
cache_file = StringIO.StringIO()
|
|
|
|
cache_file.write(raw_data)
|
|
|
|
|
2013-07-13 08:25:41 +01:00
|
|
|
# Rewind the file
|
|
|
|
cache_file.seek(0)
|
2012-05-21 19:08:04 +01:00
|
|
|
|
|
|
|
# Return the best available data
|
|
|
|
return cache_file
|
|
|
|
|
|
|
|
def dump(self):
|
|
|
|
"""Dump the local datastore out to stdout."""
|
2013-07-13 08:21:09 +01:00
|
|
|
for name, val in self.data.iteritems():
|
2012-05-21 19:08:04 +01:00
|
|
|
for address in val:
|
2013-07-13 08:21:09 +01:00
|
|
|
print "%s: %s" % (name, address)
|
2012-05-21 19:08:04 +01:00
|
|
|
|
|
|
|
def export(self):
|
|
|
|
"""Output a plaintext blocklist to stdout."""
|
2013-07-13 08:21:09 +01:00
|
|
|
for item in self.data.values():
|
2014-09-07 19:57:10 +01:00
|
|
|
print "%s,%s,%s,%s" % (
|
|
|
|
item["remote_ip"],
|
|
|
|
item["forwarded_ip"],
|
|
|
|
item["useragent"],
|
|
|
|
item["cookie"]
|
|
|
|
)
|