It's been waaay too long since I last committed. Eesh.

This commit is contained in:
Dan Buch
2012-03-07 00:11:05 -05:00
parent a5b139f6e7
commit ee337ba7ee
15 changed files with 323 additions and 117 deletions

View File

@@ -0,0 +1,61 @@
require 'base64'
require 'logger'
require 'uri'
require 'nokogiri'
require 'typhoeus'
class GoogleMapLocationFetcher
attr_accessor :base_map_url, :log
def initialize
@base_map_url = [
'http://maps.googleapis.com/maps/api/staticmap',
'?zoom=15',
'&sensor=false',
'&size=512x512',
'&maptype=satellite',
].join('')
@log = Logger.new(
File.expand_path('../log/map-crawler.log', File.dirname(__FILE__))
)
@log.level = Logger::INFO
@log.formatter = lambda do |severity, time, prog, message|
"#{time} - #{severity} - #{message}\n"
end
end
def self.mapdump_callback(location, image)
puts "Map '#{location}':"
puts Base64.encode64(image)
end
def fetch(locations, &callback)
callback ||= self.class.method(:mapdump_callback)
hydra = Typhoeus::Hydra.new(:initial_pool_size => 26)
locations.each do |location|
request = Typhoeus::Request.new(
"#{@base_map_url}&center=#{URI.encode(location)}"
)
request.on_complete do |response|
handle_response(response, location, &callback)
end
hydra.queue(request)
end
hydra.run
end
def handle_response(response, location, &callback)
@log.info("Handling request at url #{response.effective_url}")
if response.success? and response.headers_hash[:content_type] =~ /image\/.*/
callback.call(location, response.body)
else
callback.call(location, '')
end
end
end

View File

@@ -1,49 +0,0 @@
require 'logger'
require 'nokogiri'
require 'typhoeus'
class MapCrawler
attr_accessor :base_map_url, :log, :request_pool
def initialize(base_map_url)
@base_map_url = base_map_url
@log = Logger.new(
File.expand_path('../log/map-crawler.log', File.dirname(__FILE__))
)
@log.level = Logger::INFO
@log.formatter = lambda do |severity, time, prog, message|
"#{time} - #{severity} - #{message}\n"
end
end
def crawl(city_name_callback = nil)
city_name_callback ||= lambda { |n| puts n }
hydra = Typhoeus::Hydra.new(:initial_pool_size => 26)
('A'..'Z').each do |letter|
letter_request = Typhoeus::Request.new(
@base_map_url.gsub(/\{FIRST_LETTER\}/, letter)
)
letter_request.on_complete do |response|
handle_cities(response, city_name_callback)
end
hydra.queue(letter_request)
end
hydra.run
end
def handle_cities(response, city_name_callback)
@log.info("Handling cities at url #{response.effective_url}")
doc = Nokogiri::HTML(response.body)
doc.css('div.mw-content-ltr ul')[3].css('li a').each do |anchor|
@log.info("Found city: #{anchor.text}")
city_name_callback.call(anchor.text.strip)
end
end
end

View File

@@ -1,6 +1,13 @@
namespace :maps do
desc 'Index the maps!'
task :index => :environment do
MapCrawler.new(Setting.map(:base_url)).crawl(Map.method(:from_city_name))
desc 'Seed the maps!'
task :seed => :environment do
require 'app/models/map'
csv_filename = File.expand_path(
'../../db/capital-cities.csv', File.dirname(__FILE__)
)
Map.import(csv_filename) do |map|
puts "Seeded map '#{map.name}'"
end
end
end