Skip to content
Snippets Groups Projects
Commit 4c53717c authored by Alex Reisner's avatar Alex Reisner
Browse files

Add MaxmindDatabase module.

Also split single rake task into several.
parent ec1dc13c
No related branches found
No related tags found
No related merge requests found
require 'csv'
require 'net/http'
module Geocoder
module MaxmindDatabase
extend self
def download(package, dir = "tmp")
filepath = File.expand_path(File.join(dir, archive_filename(package)))
open(filepath, 'wb') do |file|
uri = URI.parse(archive_url(package))
Net::HTTP.start(uri.host, uri.port) do |http|
http.request_get(uri.path) do |resp|
# TODO: show progress
resp.read_body do |segment|
file.write(segment)
end
end
end
end
end
def insert(package, dir = "tmp")
data_files(package).each do |filepath,table|
# delete from table
print "Resetting table #{table}..."
ActiveRecord::Base.connection.execute("DELETE FROM #{table}")
puts "done"
# insert into table
start_time = Time.now
print "Loading data for table #{table}"
rows = []
headers = nil
CSV.foreach(filepath, encoding: "ISO-8859-1") do |line|
if line.first[0...9] == "Copyright"
next
elsif headers.nil?
headers = line
next
else
rows << line.to_a
if rows.size == 10000
insert_into_table(table, headers, rows)
rows = []
print "."
end
end
end
insert_into_table(table, headers, rows) if rows.size > 0
puts "finished (#{Time.now - start_time} seconds)"
end
end
def archive_filename(package)
p = archive_url_path(package)
s = !(pos = p.rindex('/')).nil? && pos + 1 || 0
p[s..-1]
end
private # -------------------------------------------------------------
def insert_into_table(table, headers, rows)
value_strings = rows.map do |row|
"(" + row.map{ |col| sql_escaped_value(col) }.join(',') + ")"
end
q = "INSERT INTO #{table} (#{headers.join(',')}) " +
"VALUES #{value_strings.join(',')}"
ActiveRecord::Base.connection.execute(q)
end
def sql_escaped_value(value)
value.to_i.to_s == value ? value :
ActiveRecord::Base.connection.quote(value)
end
def data_files(package, dir = "tmp")
case package
when :geolite_city_csv
# use the last two in case multiple versions exist
files = Dir.glob(File.join(dir, "GeoLiteCity_*/*.csv"))[-2..-1]
Hash[*files.zip(["maxmind_blocks", "maxmind_location"]).flatten]
end
end
def archive_url(package)
base_url + archive_url_path(package)
end
def archive_url_path(package)
{
geolite_country_csv: "GeoIPCountryCSV.zip",
geolite_city_csv: "GeoLiteCity_CSV/GeoLiteCity-latest.zip",
geolite_asn_csv: "asnum/GeoIPASNum2.zip"
}[package]
end
def base_url
"http://geolite.maxmind.com/download/geoip/database/"
end
end
end
require 'maxmind_database'
namespace :geocoder do
namespace :maxmind do
namespace :geolite_city do
desc "Load MaxMind GeoLite City into SQL database"
task load_data: :environment do
clear_tables_mysql
load_table_mysql('maxmind_blocks', 'tmp/GeoLiteCity-Blocks.csv')
load_table_mysql('maxmind_location', 'tmp/GeoLiteCity-Location.csv')
desc "Download and load/refresh MaxMind GeoLite City data"
task load: [:download, :extract, :insert]
desc "Download MaxMind GeoLite City data"
task :download do
dir = ENV['DIR'] || "tmp/"
Geocoder::MaxmindDatabase.download(:geolite_city_csv, dir)
# TODO: confirm data was fetched properly
end
end
end
end
# IMPORTANT: http://stackoverflow.com/questions/10737974/load-data-local-infile-gives-the-error-the-used-command-is-not-allowed-with-this
def load_table_mysql(table, filepath)
q = <<-END
LOAD DATA LOCAL INFILE '#{filepath}'
INTO TABLE #{table}
IGNORE 2 LINES
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '\"'
LINES TERMINATED BY '\n';
END
ActiveRecord::Base.connection.execute(q)
end
desc "Extract (unzip) MaxMind GeoLite City data"
task :extract do
dir = ENV['DIR'] || "tmp/"
filename = Geocoder::MaxmindDatabase.archive_filename(:geolite_city_csv)
`unzip -o #{File.join(dir, filename)} -d #{dir}` # TODO: make platform independent, overwrite w/out confirm
# TODO: confirm data was unzipped properly
end
def clear_tables_mysql
[
#"LOCK TABLES maxmind_blocks READ, maxmind_location READ",
"DELETE from maxmind_blocks",
"DELETE from maxmind_location",
#"UNLOCK TABLES"
].each{ |q| ActiveRecord::Base.connection.execute(q) }
desc "Load/refresh MaxMind GeoLite City data"
task insert: [:environment] do
dir = ENV['DIR'] || "tmp/"
Geocoder::MaxmindDatabase.insert(:geolite_city_csv, dir)
end
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment