commit b6aa85cf7a34a4a0e668128b794dda7a9fdd50cc Author: Scott Wallace Date: Mon May 21 19:08:04 2012 +0100 Initial code commit. diff --git a/README b/README new file mode 100644 index 0000000..e69de29 diff --git a/apache.py b/apache.py new file mode 100755 index 0000000..123409d --- /dev/null +++ b/apache.py @@ -0,0 +1,24 @@ +#!/usr/bin/python + +import blocklist +import sys + +class ApacheBlockList(blocklist.BlockList): + """New class that extends the main BlockList class for use with Apache.""" + def export(self): + """Exports the blocklist addresses with use within Apache.""" + print "\tOrder deny,allow" + for address in self.data.keys(): + print "\tDeny from %s/32" % address + print "\tAllow from all" + +if __name__ == "__main__": + blocklist = ApacheBlockList() + + blocklist.read("http://server1.example.com/default/blocks.txt") + blocklist.read("http://server2.example.com/default/blocks.txt") + + blocklist.export() + + sys.exit(0) + diff --git a/apache/AUTHORS b/apache/AUTHORS new file mode 100644 index 0000000..44802cd --- /dev/null +++ b/apache/AUTHORS @@ -0,0 +1,2 @@ +Scott Wallace + diff --git a/apache/Changelog b/apache/Changelog new file mode 100644 index 0000000..e69de29 diff --git a/apache/Makefile.am b/apache/Makefile.am new file mode 100644 index 0000000..cabea7c --- /dev/null +++ b/apache/Makefile.am @@ -0,0 +1,11 @@ +## This is the shared library to be built +lib_LTLIBRARIES = libmodblockinator.la + +## Define the source file for the module +libmodblockinator_la_SOURCES = mod_blockinator.c + +## Define that an include directory is required. +INCLUDES = -I@apache_dir@/include/httpd -I@apache_dir@/include/apr-1 + +## ACLOCAL recommendation +ACLOCAL_AMFLAGS = -I m4 diff --git a/apache/NEWS b/apache/NEWS new file mode 100644 index 0000000..e69de29 diff --git a/apache/README b/apache/README new file mode 100644 index 0000000..c928876 --- /dev/null +++ b/apache/README @@ -0,0 +1,39 @@ +Compilation +----------- +1. Ensure the SQLite development libraries (sqlite-devel) are installed. + +2. Either run the build_from_scratch.sh script or run the following commands: + 1. libtoolize + 2. aclocal + 3. autoconf + 4. automake -a + 5. ./configure --with-apache= + 6. make CFLAGS=-lsqlite3 + +Installation +------------ +1. Activate in Apache using: + a) (automatic) Using APXS: + apxs -i -a -n blockinator libmodblockinator.la + b) (manual) Add the following commands to the Apache configuration: + LoadModule blockinator_module modules/libmodblockinator.so + +2. Configure mod_blockinator by adding the following lines in the appropriate location(s): + 1. Define where Blockinator is installed and where the blocklist DB can be found: + + BlockinatorHome /path/to/blockinator + BlockinatorBlocklistDB /path/to/blocklist.db + + 2. Create a mod_rewrite rule to block requests, if matched: + RewriteCond %{HTTP:X-Block} 1 + RewriteRule . - [R=403,L] + +3. Create the SQLite DB: + 1. sqlite3 /path/to/blocklist.db + 2. Run the following SQL: + CREATE TABLE IF NOT EXISTS blocklist(remote_ip VARCHAR(15), forwarded_ip VARCHAR(15), useragent VARCHAR(256), cookie VARCHAR(1024), PRIMARY KEY(remote_ip)); + 3. (optional) Insert some test data: + Block requests from IP address 1.2.3.4 + e.g. INSERT INTO blocklist VALUES("1.2.3.4", "ANY", "ANY", "ANY"); + +4. Restart Apache diff --git a/apache/build_from_scratch.sh b/apache/build_from_scratch.sh new file mode 100755 index 0000000..151c0cf --- /dev/null +++ b/apache/build_from_scratch.sh @@ -0,0 +1,17 @@ +#!/bin/sh -e + +# Check to see if build environment is setup correctly +if [ ! -e Makefile ]; then + # Set up the build environment + libtoolize + aclocal + autoconf + automake -a + ./configure --with-apache=/usr +else + # Remove exising code + make clean +fi + +# Compile +make CFLAGS=-lsqlite3 diff --git a/apache/configure.in b/apache/configure.in new file mode 100644 index 0000000..beb9274 --- /dev/null +++ b/apache/configure.in @@ -0,0 +1,48 @@ +# Required initializer +AC_INIT +AC_CONFIG_MACRO_DIR([m4]) + +# Automake initialization +AM_INIT_AUTOMAKE(mod_blockinator, 1.1) + +# Add a test for a compiler. +AC_PROG_CC +AM_PROG_LIBTOOL + +# Define a macro that is used to parse a --with-apache parameter +# The macro is named "APACHE_DIR" +AC_DEFUN([APACHE_DIR],[ + + AC_ARG_WITH( + apache, + [ --with-apache[=DIR] Apache server directory], + , + [with_apache="no"] + ) + + AC_MSG_CHECKING(for Apache directory) + + if test "$with_apache" = "no"; then + AC_MSG_ERROR( You need to specify the apache directory using --with-apache) + else + # make sure that a well known include file exists + if test -e $with_apache/include/httpd/httpd.h; then + apache_dir=$with_apache + AC_MSG_RESULT(APACHE found!) + else + AC_MSG_ERROR( $with_apache not found. Check the value you specified with --with-apache) + fi + fi + +]) + +# Now call the APACHE_DIR macro that was just specified +APACHE_DIR + +# Save the location of apache into the "apache_dir" variable. +# The AC_SUBST macro causes the variable to be saved in config.status +AC_SUBST(apache_dir) + +# Write config.status and the Makefile +AC_OUTPUT(Makefile) + diff --git a/apache/mod_blockinator.c b/apache/mod_blockinator.c new file mode 100644 index 0000000..141ff1a --- /dev/null +++ b/apache/mod_blockinator.c @@ -0,0 +1,186 @@ +/* + * mod_blockinator for Apache v2.2 + * + * Author: Scott Wallace + * Date: March 2012 + * + * Written for European Directories to integrate traffic + * blocking based upon recommendations from a Sentor + * device. + * + * The SQLite DB is populated with a Python script that + * can be found at, + * http://svn.eurodir.eu/svn/common/trunk/scripts/blockinator/trunk/ + * + */ + +#include "httpd.h" +#include "http_config.h" +#include "http_log.h" +#include +#include + +module AP_MODULE_DECLARE_DATA blockinator_module; + +/* + * Global variables for use everywhere + */ +sqlite3 *db; +typedef struct { + const char *basepath; + const char *db; +} blockinator_cfg_t; + +/* + * Function to build the configuration for the module + */ +static void *create_blockinator_config(apr_pool_t *p, server_rec *s) +{ + /* Allocate some memory using the proper Apache function */ + blockinator_cfg_t *mod_config = apr_pcalloc(p, sizeof(blockinator_cfg_t)); + + return mod_config; +} + +/* + * Function to set the configuration item, 'basepath' + */ +static const char *blockinator_set_config_basepath(cmd_parms *parms, void *mconfig, const char *arg) +{ + blockinator_cfg_t *cfg = ap_get_module_config(parms->server->module_config, &blockinator_module); + cfg->basepath = (char *)arg; + return NULL; +} + +/* + * Function to set the configuration item, 'db' + */ +static const char *blockinator_set_config_db(cmd_parms *parms, void *mconfig, const char *arg) +{ + blockinator_cfg_t *cfg = ap_get_module_config(parms->server->module_config, &blockinator_module); + cfg->db = (char *)arg; + return NULL; +} + +/* + * Main HTTP request handler + */ +static int mod_blockinator_method_handler(request_rec *r) +{ + const char *remote_ip, *forwarded_ip, *useragent, *cookie; + char *statement; + char *sqlite3_error; + sqlite3_stmt *sqlite3_statement; + + /* Capture the relevant information from the inbound request */ + remote_ip = r->connection->remote_ip; + forwarded_ip = apr_table_get(r->headers_in, "X-Forwarded-For"); + useragent = apr_table_get(r->headers_in, "User-Agent"); + cookie = apr_table_get(r->headers_in, "Cookie"); + + /* Build the SQL statement */ + statement = sqlite3_mprintf("SELECT * FROM blocklist WHERE remote_ip = '%q' AND (forwarded_ip = 'ANY' OR forwarded_ip = '%q') AND (useragent = 'ANY' OR useragent = '%q') AND (cookie = 'ANY' OR instr('%q', cookie))", remote_ip, forwarded_ip, useragent, cookie); + + /* Prepare the statement */ + if (sqlite3_prepare_v2(db, statement, BUFSIZ, &sqlite3_statement, NULL) != SQLITE_OK) { + /* SQLite error. Allow. */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "SQLite error (%s). Allow traffic from %s by default.", sqlite3_error, remote_ip); + sqlite3_free(sqlite3_error); + + return DECLINED; + } + + /* Check for any results. */ + if (sqlite3_step(sqlite3_statement) == SQLITE_ROW) { + /* SQLite results. Time to block. */ + ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, "Blocklist match. (Forwarded_IP: %s, User-Agent: %s, Cookie: %s)", forwarded_ip, useragent, cookie); + apr_table_set(r->headers_in, "X-Block", "1"); + } + + /* Tidy-up the SQLite way. */ + if (sqlite3_finalize(sqlite3_statement) != SQLITE_OK) { + ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, "SQLite error freeing the SQLite compile statement (%s). Possible memory leak.", sqlite3_error); + sqlite3_free(sqlite3_error); + } + + return DECLINED; +} + +/* + * Module initialiser + */ +static void mod_blockinator_init_handler(apr_pool_t *p, server_rec *s) +{ + char *sqlite3_error; + char sqlite3_instr_extension[BUFSIZ]; + + /* Read config from module */ + blockinator_cfg_t *cfg = ap_get_module_config(s->module_config, &blockinator_module); + + /* Build the full path to the SQLite instr() extension */ + sprintf(sqlite3_instr_extension, "%s/sqlite_instr/instr.sqlext", cfg->basepath); + + /* Open the SQLite DB */ + if (sqlite3_open_v2(cfg->db, &db, SQLITE_OPEN_READONLY, NULL)) { + /* Error. */ + ap_log_error(APLOG_MARK, APLOG_ERR, 0, s, "mod_blockinator: SQLite error (%s). Could not open database.", sqlite3_errmsg(db)); + } + + /* Load the EDSA SQLite extension for instr() */ + if ((sqlite3_enable_load_extension(db, 1) != SQLITE_OK) || + (sqlite3_load_extension(db, sqlite3_instr_extension, 0, &sqlite3_error) != SQLITE_OK) + ) { + /* FAIL */ + ap_log_error(APLOG_MARK, APLOG_ERR, 0, s, "mod_blockinator: SQLite error (%s). Failed to load the instr() extension.", sqlite3_error); + sqlite3_free(sqlite3_error); + } else { + /* SQLite module successfully loaded. */ + ap_log_error(APLOG_MARK, APLOG_INFO, 0, s, "mod_blockinator: SQLite loaded the instr() extension successfully."); + } +} + +/* + * Register hooks + */ +static void register_hooks(apr_pool_t *p) +{ + /* Register the module initialiser */ + ap_hook_child_init(mod_blockinator_init_handler, NULL, NULL, APR_HOOK_LAST); + + /* Register the module header parser in the post_read_request phase of Apache */ + ap_hook_post_read_request(mod_blockinator_method_handler, NULL, NULL, APR_HOOK_FIRST); +} + +/* + * Apache configuration directives + */ +static const command_rec mod_blockinator_directives[] = { + AP_INIT_TAKE1( + "BlockinatorHome", + blockinator_set_config_basepath, + NULL, + RSRC_CONF, + "BlockinatorHome (filepath). The base directory of Blockinator." + ), + AP_INIT_TAKE1( + "BlockinatorBlocklistDB", + blockinator_set_config_db, + NULL, + RSRC_CONF, + "BlockinatorBlocklistDB (filepath). The absolute path of Blockinator blocklist SQLite DB." + ), + {NULL} +}; + +/* + * Main module code + */ +module AP_MODULE_DECLARE_DATA blockinator_module = { + STANDARD20_MODULE_STUFF, + NULL, /* create per-directory config structures */ + NULL, /* merge per-directory config structures */ + create_blockinator_config, /* create per-server config structures */ + NULL, /* merge per-server config structures */ + mod_blockinator_directives, /* command handlers */ + register_hooks /* register hooks */ +}; diff --git a/blocklist.cfg b/blocklist.cfg new file mode 100644 index 0000000..1811839 --- /dev/null +++ b/blocklist.cfg @@ -0,0 +1,5 @@ +[sqlite] +database = /srv/tmp/blocklist.db + +[cache] +directory = /tmp/cache diff --git a/blocklist.py b/blocklist.py new file mode 100644 index 0000000..d9aa1af --- /dev/null +++ b/blocklist.py @@ -0,0 +1,102 @@ +#!/usr/bin/python + +import sys +import logging + +class BlockList: + """Class to perform basic reading of Sentor blocklist URLs and add them to a common data store.""" + data = {} + + def __init__(self): + """Read and parse configuration to a local object variable.""" + import ConfigParser + import os + + # Find the configuration file in the same directory at the main script. + config_file = os.path.join(os.path.dirname(sys.argv[0]), "blocklist.cfg") + try: + self.config = ConfigParser.ConfigParser() + self.config.readfp(open(config_file)) + except Exception, e: + logging.error("Could not read configuration file %s: %s" % (config_file, e)) + raise + + def read(self, source): + """Parse the blocklist from the provided url (source) using a CSV parser.""" + import csv + + try: + # Parse the Sentor Assassin blocklist format (easist to use a CSV parser) + reader = csv.reader(self.cache(source)) + for line in reader: + # Fetch the items from the input + (remote_ip, forwarded_ip, useragent, cookie) = line + self.add(remote_ip, forwarded_ip, useragent, cookie) + except Exception, e: + logging.error("There was an error retrieving the blocklist. %s" % e) + + def add(self, remote_ip, forwarded_ip, useragent, cookie): + # Store the various items + if remote_ip not in self.data: + self.data[remote_ip] = { "remote_ip": remote_ip, "forwarded_ip": forwarded_ip, "useragent": useragent, "cookie": cookie } + else: + logging.debug("%s already exists in blacklist. Ignoring." % remote_ip) + + def cache(self, source): + """Attempt to read from the source URL and store results in a cache file, otherwise use the contents of the cache. If the cache isn't usable but the data is still available, return the transient data.""" + import urllib2 + import urlparse + import os + + # Build some 'handy' variables + hostname = urlparse.urlparse(source)[1] + cache_dir = self.config.get("cache", "directory") + cache_path = os.path.join(cache_dir, "%s.cache" % hostname ) + + # Create the caching directory + if not os.path.exists(cache_dir): + try: + os.makedirs(cache_dir) + except: + logging.warning("Could not create the caching directory. Will attempt to run without a cache.") + + # Attempt to fetch the data and store it in a cache file + try: + list = urllib2.urlopen(source) + raw_data = list.read() + cache_file = open(cache_path, "w+") + cache_file.write(raw_data) + + # Rewind the file + cache_file.seek(0) + except (urllib2.URLError, urllib2.HTTPError), e: + # Network error. Warn and use the cached content. + logging.warning("Reverting to cache file. There was a problem contacting host %s: %s" % (hostname, e)) + try: + cache_file = open(cache_path, "r") + except IOError, e: + logging.error("No cache file was available for %s." % hostname) + raise + except Exception, e: + # Cache error, but network succeeded. Use String IO to return the data. + import StringIO + logging.warning("Could not create cache file: %s. Returning transient data." % e) + cache_file = StringIO.StringIO() + cache_file.write(raw_data) + + # Rewind the file + cache_file.seek(0) + + # Return the best available data + return cache_file + + def dump(self): + """Dump the local datastore out to stdout.""" + for list,val in self.data.iteritems(): + for address in val: + print "%s: %s" % (list, address) + + def export(self): + """Output a plaintext blocklist to stdout.""" + for key,item in self.data.iteritems(): + print "%s,%s,%s,%s" % (item["remote_ip"], item["forwarded_ip"], item["useragent"], item["cookie"]) diff --git a/sqlite.py b/sqlite.py new file mode 100755 index 0000000..5f64b31 --- /dev/null +++ b/sqlite.py @@ -0,0 +1,39 @@ +#!/usr/bin/python + +import blocklist +import sys +import logging + +if sys.version_info >= (2,5): + import sqlite3 +else: + from pysqlite2 import dbapi2 as sqlite3 + +class SQLiteBlockList(blocklist.BlockList): + """New class to extend the main BlockList class for implementation with Varnish.""" + def export(self): + """Exports blocklist criteria to a SQLite file.""" + + try: + db = sqlite3.connect(self.config.get("sqlite", "database")) + cur = db.cursor() + + cur.execute("CREATE TABLE IF NOT EXISTS blocklist(remote_ip VARCHAR(15), forwarded_ip VARCHAR(15), useragent VARCHAR(256), cookie VARCHAR(1024), PRIMARY KEY(remote_ip))") + + cur.execute("DELETE FROM blocklist") + + for key,item in self.data.iteritems(): + cur.execute("INSERT INTO blocklist VALUES ('%s', '%s', '%s', '%s')" % (item["remote_ip"], item["forwarded_ip"], item["useragent"], item["cookie"])) + db.commit() + cur.close() + except Exception, e: + logging.error("There was a problem exporting the data to SQLite. %s" % e) + +if __name__ == "__main__": + blocklist = SQLiteBlockList() + + blocklist.read("http://example.com/default/blocks.txt") + + blocklist.export() + + sys.exit(0) diff --git a/sqlite_instr/Makefile b/sqlite_instr/Makefile new file mode 100644 index 0000000..b512500 --- /dev/null +++ b/sqlite_instr/Makefile @@ -0,0 +1,6 @@ +CC=gcc + +all: instr.sqlext + +instr.sqlext: sqlite_instr.c + $(CC) -shared -fPIC -Isqlite3 -o instr.sqlext sqlite_instr.c diff --git a/sqlite_instr/instr.sqlext b/sqlite_instr/instr.sqlext new file mode 100755 index 0000000..53118c2 Binary files /dev/null and b/sqlite_instr/instr.sqlext differ diff --git a/sqlite_instr/sqlite_instr.c b/sqlite_instr/sqlite_instr.c new file mode 100644 index 0000000..6917d58 --- /dev/null +++ b/sqlite_instr/sqlite_instr.c @@ -0,0 +1,35 @@ +#include +#include + +SQLITE_EXTENSION_INIT1 + +/* +** The sqlite3_instr() SQL function returns the location of a substring match. An +** implementation of MySQL's instr() function. +*/ +void sqlite3_instr(sqlite3_context* pContext, int argc, sqlite3_value** argv) +{ + const char *str1 = (const char *) sqlite3_value_text(argv[0]); + const char *str2 = (const char *) sqlite3_value_text(argv[1]); + + char *p = strstr(str1, str2); + int nResult = 0; + + if(p != NULL) { + nResult = p - str1 + 1; + } + + sqlite3_result_int(pContext, nResult); +} + +/* SQLite invokes this routine once when it loads the extension. +** Create new functions, collating sequences, and virtual table +** modules here. This is usually the only exported symbol in +** the shared library. +*/ +int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi) +{ + SQLITE_EXTENSION_INIT2(pApi) + sqlite3_create_function(db, "instr", 2, SQLITE_ANY, 0, sqlite3_instr, 0, 0); + return 0; +} diff --git a/varnish.py b/varnish.py new file mode 100755 index 0000000..aa803d3 --- /dev/null +++ b/varnish.py @@ -0,0 +1,23 @@ +#!/usr/bin/python + +import blocklist +import sys + +class VarnishBlockList(blocklist.BlockList): + """New class to extend the main BlockList class for implementation with Varnish.""" + def export(self): + """Exports blocklist addresses for use within Varnish.""" + print "acl block_list {" + for address in self.data.keys(): + print "\t\"%s\";" % address + print "}" + +if __name__ == "__main__": + blocklist = VarnishBlockList() + + blocklist.read("http://server1.example.com/default/blocks.txt") + blocklist.read("http://server2.example.com/default/blocks.txt") + + blocklist.export() + + sys.exit(0) diff --git a/varnish/blockinator.vcl b/varnish/blockinator.vcl new file mode 100644 index 0000000..d445403 --- /dev/null +++ b/varnish/blockinator.vcl @@ -0,0 +1,73 @@ +C{ + #define BLOCKINATOR_HOME "/srv/app/blockinator" + #define BLOCKLIST_DB "/srv/tmp/blocklist.db" + + #include + #include + #include + #include + #include + + char *remote_ip, *forwarded_ip, *useragent, *cookie; + + sqlite3 *db; + + int sqlite3_init() + { + static int init = 0; + char *sqlite3_error; + + if (!init) { + /* Open the SQLite DB */ + if (sqlite3_open_v2(BLOCKLIST_DB, &db, SQLITE_OPEN_READONLY, NULL)) { + syslog(LOG_ERR, "SQLite error (%s). Could not open database.", sqlite3_errmsg(db)); + } + init = 1; + + /* Load the EDSA SQLite extension for instr() */ + if ((sqlite3_enable_load_extension(db, 1) != SQLITE_OK) || + (sqlite3_load_extension(db, BLOCKINATOR_HOME"/sqlite_instr/instr.sqlext", 0, &sqlite3_error) != SQLITE_OK) + ) { + syslog(LOG_ERR, "SQLite error (%s). Failed to load the instr() extension.", sqlite3_error); + sqlite3_free(sqlite3_error); + } else { + syslog(LOG_INFO, "SQLite loaded the instr() extension successfully."); + } + } + } + + int resultHandler(void *sp, int argc, char **argv, char **azColName) + { + char *sqlite3_error; + + if (atoi(argv[0]) > 0) { + /* Any results indicate a block */ + syslog(LOG_INFO, "Blocklist match found for %s. (Forwarded_IP: %s, User-Agent: %s, Cookie: %s)", remote_ip, forwarded_ip, useragent, cookie); + VRT_SetHdr(sp, HDR_REQ, "\010X-Block:", "1", vrt_magic_string_end); + } + + return 0; + } + + char *str_replace(char *input, char *search, char *replace) + { + char *string_ptr, *match_ptr; + int offset = strlen(search); + + char *output = malloc(BUFSIZ); + memset(output, 0, BUFSIZ); + + if (! input) return output; + + string_ptr = input; + + while (match_ptr = strstr(string_ptr, search)) { + strncat(output, string_ptr, match_ptr-string_ptr); + strcat(output, replace); + string_ptr = match_ptr + offset; + } + strcat(output, string_ptr); + + return output; + } +}C diff --git a/varnish/blocklist-check.vcl b/varnish/blocklist-check.vcl new file mode 100644 index 0000000..e1e6778 --- /dev/null +++ b/varnish/blocklist-check.vcl @@ -0,0 +1,22 @@ +C{ + remote_ip = VRT_IP_string(sp, VRT_r_client_ip(sp)); + forwarded_ip = VRT_GetHdr(sp, HDR_REQ, "\020X-Forwarded-For:"); + useragent = VRT_GetHdr(sp, HDR_REQ, "\013User-Agent:"); + cookie = VRT_GetHdr(sp, HDR_REQ, "\007Cookie:"); + + char statement[BUFSIZ]; + char *sqlite3_error; + + char *safecookie = str_replace(cookie, "'", "''"); + + snprintf(statement, BUFSIZ, "SELECT COUNT(*) FROM blocklist WHERE remote_ip = '%s' AND (forwarded_ip = 'ANY' OR forwarded_ip = '%s') AND (useragent = 'ANY' OR useragent = '%s') AND (cookie = 'ANY' OR instr('%s', cookie))", remote_ip, forwarded_ip, useragent, safecookie); + + sqlite3_init(); + if (sqlite3_exec(db, statement, resultHandler, sp, &sqlite3_error) != SQLITE_OK) { + /* SQLite error. Allow. */ + syslog(LOG_WARNING, "SQLite error (%s). Allow traffic from %s by default.", sqlite3_error, remote_ip); + syslog(LOG_INFO, "SQLite statment: %s", statement); + sqlite3_free(sqlite3_error); + } + free(safecookie); +}C