Skip to content

Instantly share code, notes, and snippets.

@davidfischer
Created July 25, 2016 18:04
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save davidfischer/d9bcb69d1791c9fdf7a73c56ae9f0645 to your computer and use it in GitHub Desktop.
Save davidfischer/d9bcb69d1791c9fdf7a73c56ae9f0645 to your computer and use it in GitHub Desktop.
Scripts for importing IP geolocation data from MaxMind
"""
Process the MaxMind GeoLite2 IPv4 and IPv6 blocks CSVs
into a single CSV with network and broadcast addresses
calculated
STEPS
* Download the MaxMind GeoLite2 city databases:
http://dev.maxmind.com/geoip/geoip2/geolite2/
* Run this program to calculate network and broadcast
addresses for each IP network entry
"""
import csv
import sys
# Requires Python3 or the ipaddress module backport
import ipaddress
def process_blockfile(blockfile):
reader = csv.DictReader(blockfile)
for block in reader:
ip_network = ipaddress.ip_network(block['network'])
block['network_address'] = int(ip_network.network_address)
block['broadcast_address'] = int(ip_network.broadcast_address)
yield block
def main(ipv4blockfile, ipv6blockfile, outfile):
fields = [
'network',
'geoname_id',
'registered_country_geoname_id',
'represented_country_geoname_id',
'is_anonymous_proxy',
'is_satellite_provider',
'postal_code',
'latitude',
'longitude',
'accuracy_radius',
'network_address',
'broadcast_address',
]
writer = csv.DictWriter(outfile, fields)
writer.writeheader()
for blockfile in (ipv4blockfile, ipv6blockfile):
for block in process_blockfile(blockfile):
writer.writerow(block)
outfile.close()
return 0
if __name__ == '__main__':
import argparse
# Defaults
default_ipv4blockfile = 'GeoLite2-City-Blocks-IPv4.csv'
default_ipv6blockfile = 'GeoLite2-City-Blocks-IPv6.csv'
default_outfile = 'blocks.csv'
usage = 'Combine MaxMind IPv4 and IPv6 block files and compute network and broadcast addresses'
parser = argparse.ArgumentParser(description=usage)
parser.add_argument(
'--ipv4',
type=argparse.FileType('r'),
help='Default ipv4 block file [{}]'.format(default_ipv4blockfile),
)
parser.add_argument(
'--ipv6',
type=argparse.FileType('r'),
help='Default ipv6 block file [{}]'.format(default_ipv6blockfile),
)
parser.add_argument(
'-o',
'--outfile',
type=argparse.FileType('w', encoding='UTF-8'),
help='Default combined CSV [{}]'.format(default_outfile),
)
args = parser.parse_args()
# Setup default in and outfiles
ipv4blockfile = args.ipv4 or open(default_ipv4blockfile, 'r')
ipv6blockfile = args.ipv6 or open(default_ipv6blockfile, 'r')
outfile = args.outfile or open(default_outfile, 'w')
sys.exit(main(ipv4blockfile, ipv6blockfile, outfile))
-- DROP existing Tables
DROP TABLE IF EXISTS geoip_blocks;
DROP TABLE IF EXISTS geoip_location;
CREATE TABLE `geoip_location` (
`geoname_id` int(11) UNSIGNED NOT NULL AUTO_INCREMENT,
`locale_code` varchar(2) DEFAULT NULL,
`continent_code` varchar(2) DEFAULT NULL,
`continent_name` varchar(2) DEFAULT NULL,
`country_iso_code` varchar(2) DEFAULT NULL,
`country_name` varchar(45) DEFAULT NULL,
`subdivision_1_iso_code` tinyint DEFAULT NULL,
`subdivision_1_name` varchar(1000) DEFAULT NULL,
`subdivision_2_iso_code` tinyint DEFAULT NULL,
`subdivision_2_name` varchar(1000) DEFAULT NULL,
`city_name` varchar(255) DEFAULT NULL,
`metro_code` int(11) DEFAULT NULL,
`time_zone` VARCHAR(255) DEFAULT NULL,
PRIMARY KEY (`geoname_id`),
KEY `city_name` (`city_name`)
) ENGINE=InnoDB;
CREATE TABLE `geoip_blocks` (
`network_cidr` varchar(32) NOT NULL DEFAULT '',
`geoname_id` int(11) UNSIGNED DEFAULT NULL,
`registered_country_geoname_id` int(11) DEFAULT NULL,
`represented_country_geoname_id` int(11) DEFAULT NULL,
`is_anonymous_proxy` tinyint(1) DEFAULT '0',
`is_satellite_provider` tinyint(1) DEFAULT '0',
`postal_code` varchar(45) DEFAULT NULL,
`latitude` float DEFAULT NULL,
`longitude` float DEFAULT NULL,
`accuracy_radius` INT(10) unsigned DEFAULT NULL,
`network` NUMERIC(40, 0) unsigned DEFAULT NULL,
`broadcast` NUMERIC(40, 0) unsigned DEFAULT NULL,
PRIMARY KEY (`network_cidr`),
-- Unfortunately, there are inconsistencies in the data
-- and the FK won't work correctly
-- FOREIGN KEY (`geoname_id`)
-- REFERENCES geoip_location (`geoname_id`)
-- ON DELETE CASCADE,
KEY `idx_blocks_network` (`network`),
KEY `idx_blocks_broadcast` (`broadcast`)
) ENGINE=InnoDB;
#!/bin/bash
# Imports data from MaxMind's GeoIP database into MySQL
# and normalizes it for fast querying
# First, download the data from MaxMind:
# http://dev.maxmind.com/geoip/geoip2/geolite2/
# This is based on the article here:
# http://davidkane.net/installing-new-geoip-database-sql-database/
echo "Creating the tables"
mysql --user=root geoip < create.sql
echo "Creating and normalizing the blocks (this can take minutes)"
python3 create-blocks.py -o geoip_blocks.csv
echo "Importing the data"
cp GeoLite2-City-Locations-en.csv geoip_location.csv
mysqlimport -u root --ignore-lines=1 --fields-terminated-by=, --fields-optionally-enclosed-by='"' --local geoip geoip_location.csv
mysqlimport -u root --ignore-lines=1 --fields-terminated-by=, --fields-optionally-enclosed-by='"' --local geoip geoip_blocks.csv
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment