diff --git a/lib/maxmind_database.rb b/lib/maxmind_database.rb new file mode 100644 index 0000000000000000000000000000000000000000..53a4a53331c1374d0642b000c17686187e851143 --- /dev/null +++ b/lib/maxmind_database.rb @@ -0,0 +1,101 @@ +require 'csv' +require 'net/http' + +module Geocoder + module MaxmindDatabase + extend self + + def download(package, dir = "tmp") + filepath = File.expand_path(File.join(dir, archive_filename(package))) + open(filepath, 'wb') do |file| + uri = URI.parse(archive_url(package)) + Net::HTTP.start(uri.host, uri.port) do |http| + http.request_get(uri.path) do |resp| + # TODO: show progress + resp.read_body do |segment| + file.write(segment) + end + end + end + end + end + + def insert(package, dir = "tmp") + data_files(package).each do |filepath,table| + # delete from table + print "Resetting table #{table}..." + ActiveRecord::Base.connection.execute("DELETE FROM #{table}") + puts "done" + # insert into table + start_time = Time.now + print "Loading data for table #{table}" + rows = [] + headers = nil + CSV.foreach(filepath, encoding: "ISO-8859-1") do |line| + if line.first[0...9] == "Copyright" + next + elsif headers.nil? + headers = line + next + else + rows << line.to_a + if rows.size == 10000 + insert_into_table(table, headers, rows) + rows = [] + print "." + end + end + end + insert_into_table(table, headers, rows) if rows.size > 0 + puts "finished (#{Time.now - start_time} seconds)" + end + end + + def archive_filename(package) + p = archive_url_path(package) + s = !(pos = p.rindex('/')).nil? && pos + 1 || 0 + p[s..-1] + end + + private # ------------------------------------------------------------- + + def insert_into_table(table, headers, rows) + value_strings = rows.map do |row| + "(" + row.map{ |col| sql_escaped_value(col) }.join(',') + ")" + end + q = "INSERT INTO #{table} (#{headers.join(',')}) " + + "VALUES #{value_strings.join(',')}" + ActiveRecord::Base.connection.execute(q) + end + + def sql_escaped_value(value) + value.to_i.to_s == value ? value : + ActiveRecord::Base.connection.quote(value) + end + + def data_files(package, dir = "tmp") + case package + when :geolite_city_csv + # use the last two in case multiple versions exist + files = Dir.glob(File.join(dir, "GeoLiteCity_*/*.csv"))[-2..-1] + Hash[*files.zip(["maxmind_blocks", "maxmind_location"]).flatten] + end + end + + def archive_url(package) + base_url + archive_url_path(package) + end + + def archive_url_path(package) + { + geolite_country_csv: "GeoIPCountryCSV.zip", + geolite_city_csv: "GeoLiteCity_CSV/GeoLiteCity-latest.zip", + geolite_asn_csv: "asnum/GeoIPASNum2.zip" + }[package] + end + + def base_url + "http://geolite.maxmind.com/download/geoip/database/" + end + end +end diff --git a/lib/tasks/maxmind.rake b/lib/tasks/maxmind.rake index bb2c6d7cd79ed8df9e358e0d969992edb800cd8c..970a826e992065bbb8afb6c9e57e02984770acd2 100644 --- a/lib/tasks/maxmind.rake +++ b/lib/tasks/maxmind.rake @@ -1,35 +1,32 @@ +require 'maxmind_database' + namespace :geocoder do namespace :maxmind do namespace :geolite_city do - desc "Load MaxMind GeoLite City into SQL database" - task load_data: :environment do - clear_tables_mysql - load_table_mysql('maxmind_blocks', 'tmp/GeoLiteCity-Blocks.csv') - load_table_mysql('maxmind_location', 'tmp/GeoLiteCity-Location.csv') + desc "Download and load/refresh MaxMind GeoLite City data" + task load: [:download, :extract, :insert] + + desc "Download MaxMind GeoLite City data" + task :download do + dir = ENV['DIR'] || "tmp/" + Geocoder::MaxmindDatabase.download(:geolite_city_csv, dir) + # TODO: confirm data was fetched properly end - end - end -end -# IMPORTANT: http://stackoverflow.com/questions/10737974/load-data-local-infile-gives-the-error-the-used-command-is-not-allowed-with-this -def load_table_mysql(table, filepath) - q = <<-END - LOAD DATA LOCAL INFILE '#{filepath}' - INTO TABLE #{table} - IGNORE 2 LINES - FIELDS TERMINATED BY ',' - OPTIONALLY ENCLOSED BY '\"' - LINES TERMINATED BY '\n'; - END - ActiveRecord::Base.connection.execute(q) -end + desc "Extract (unzip) MaxMind GeoLite City data" + task :extract do + dir = ENV['DIR'] || "tmp/" + filename = Geocoder::MaxmindDatabase.archive_filename(:geolite_city_csv) + `unzip -o #{File.join(dir, filename)} -d #{dir}` # TODO: make platform independent, overwrite w/out confirm + # TODO: confirm data was unzipped properly + end -def clear_tables_mysql - [ - #"LOCK TABLES maxmind_blocks READ, maxmind_location READ", - "DELETE from maxmind_blocks", - "DELETE from maxmind_location", - #"UNLOCK TABLES" - ].each{ |q| ActiveRecord::Base.connection.execute(q) } + desc "Load/refresh MaxMind GeoLite City data" + task insert: [:environment] do + dir = ENV['DIR'] || "tmp/" + Geocoder::MaxmindDatabase.insert(:geolite_city_csv, dir) + end + end + end end