parent
a5b139f6e7
commit
ee337ba7ee
@ -1,5 +1,25 @@
|
||||
require 'fastercsv'
|
||||
|
||||
|
||||
class Map < ActiveRecord::Base
|
||||
def self.from_city_name(city_name)
|
||||
self.find_or_initialize_by_name(city_name).save!
|
||||
end
|
||||
|
||||
def self.rand(count = 2)
|
||||
self.find(:all, :order => 'RANDOM()', :limit => count)
|
||||
end
|
||||
|
||||
def self.import(csv_filename)
|
||||
FasterCSV.parse(open(csv_filename), :headers => true,
|
||||
:header_converters => [:downcase, :symbol]).each do |row|
|
||||
map = self.find_or_initialize_by_name(
|
||||
"#{row[:city]}, #{row[:country]}"
|
||||
)
|
||||
map.save
|
||||
if block_given?
|
||||
yield map
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -0,0 +1,5 @@
|
||||
resque_web:
|
||||
port: 15678
|
||||
|
||||
redis:
|
||||
port: 16379
|
@ -1,8 +0,0 @@
|
||||
resque_web:
|
||||
port: 15678
|
||||
|
||||
redis:
|
||||
port: 16379
|
||||
|
||||
map:
|
||||
base_url: 'http://en.wikipedia.org/wiki/List_of_towns_and_cities_with_100,000_or_more_inhabitants/cityname:_{FIRST_LETTER}'
|
|
@ -0,0 +1,61 @@
|
||||
require 'base64'
|
||||
require 'logger'
|
||||
require 'uri'
|
||||
|
||||
require 'nokogiri'
|
||||
require 'typhoeus'
|
||||
|
||||
|
||||
class GoogleMapLocationFetcher
|
||||
attr_accessor :base_map_url, :log
|
||||
|
||||
def initialize
|
||||
@base_map_url = [
|
||||
'http://maps.googleapis.com/maps/api/staticmap',
|
||||
'?zoom=15',
|
||||
'&sensor=false',
|
||||
'&size=512x512',
|
||||
'&maptype=satellite',
|
||||
].join('')
|
||||
|
||||
@log = Logger.new(
|
||||
File.expand_path('../log/map-crawler.log', File.dirname(__FILE__))
|
||||
)
|
||||
@log.level = Logger::INFO
|
||||
@log.formatter = lambda do |severity, time, prog, message|
|
||||
"#{time} - #{severity} - #{message}\n"
|
||||
end
|
||||
end
|
||||
|
||||
def self.mapdump_callback(location, image)
|
||||
puts "Map '#{location}':"
|
||||
puts Base64.encode64(image)
|
||||
end
|
||||
|
||||
def fetch(locations, &callback)
|
||||
callback ||= self.class.method(:mapdump_callback)
|
||||
hydra = Typhoeus::Hydra.new(:initial_pool_size => 26)
|
||||
|
||||
locations.each do |location|
|
||||
request = Typhoeus::Request.new(
|
||||
"#{@base_map_url}¢er=#{URI.encode(location)}"
|
||||
)
|
||||
request.on_complete do |response|
|
||||
handle_response(response, location, &callback)
|
||||
end
|
||||
|
||||
hydra.queue(request)
|
||||
end
|
||||
|
||||
hydra.run
|
||||
end
|
||||
|
||||
def handle_response(response, location, &callback)
|
||||
@log.info("Handling request at url #{response.effective_url}")
|
||||
if response.success? and response.headers_hash[:content_type] =~ /image\/.*/
|
||||
callback.call(location, response.body)
|
||||
else
|
||||
callback.call(location, '')
|
||||
end
|
||||
end
|
||||
end
|
@ -1,49 +0,0 @@
|
||||
require 'logger'
|
||||
|
||||
require 'nokogiri'
|
||||
require 'typhoeus'
|
||||
|
||||
|
||||
class MapCrawler
|
||||
attr_accessor :base_map_url, :log, :request_pool
|
||||
|
||||
def initialize(base_map_url)
|
||||
@base_map_url = base_map_url
|
||||
|
||||
@log = Logger.new(
|
||||
File.expand_path('../log/map-crawler.log', File.dirname(__FILE__))
|
||||
)
|
||||
@log.level = Logger::INFO
|
||||
@log.formatter = lambda do |severity, time, prog, message|
|
||||
"#{time} - #{severity} - #{message}\n"
|
||||
end
|
||||
end
|
||||
|
||||
def crawl(city_name_callback = nil)
|
||||
city_name_callback ||= lambda { |n| puts n }
|
||||
|
||||
hydra = Typhoeus::Hydra.new(:initial_pool_size => 26)
|
||||
|
||||
('A'..'Z').each do |letter|
|
||||
letter_request = Typhoeus::Request.new(
|
||||
@base_map_url.gsub(/\{FIRST_LETTER\}/, letter)
|
||||
)
|
||||
letter_request.on_complete do |response|
|
||||
handle_cities(response, city_name_callback)
|
||||
end
|
||||
|
||||
hydra.queue(letter_request)
|
||||
end
|
||||
|
||||
hydra.run
|
||||
end
|
||||
|
||||
def handle_cities(response, city_name_callback)
|
||||
@log.info("Handling cities at url #{response.effective_url}")
|
||||
doc = Nokogiri::HTML(response.body)
|
||||
doc.css('div.mw-content-ltr ul')[3].css('li a').each do |anchor|
|
||||
@log.info("Found city: #{anchor.text}")
|
||||
city_name_callback.call(anchor.text.strip)
|
||||
end
|
||||
end
|
||||
end
|
@ -1,6 +1,13 @@
|
||||
namespace :maps do
|
||||
desc 'Index the maps!'
|
||||
task :index => :environment do
|
||||
MapCrawler.new(Setting.map(:base_url)).crawl(Map.method(:from_city_name))
|
||||
desc 'Seed the maps!'
|
||||
task :seed => :environment do
|
||||
require 'app/models/map'
|
||||
|
||||
csv_filename = File.expand_path(
|
||||
'../../db/capital-cities.csv', File.dirname(__FILE__)
|
||||
)
|
||||
Map.import(csv_filename) do |map|
|
||||
puts "Seeded map '#{map.name}'"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -1,19 +0,0 @@
|
||||
require 'spec_helper'
|
||||
|
||||
|
||||
describe MapCrawler do
|
||||
let(:subject) { MapCrawler.new(Setting.map(:base_url)) }
|
||||
|
||||
describe 'when crawling for actual maps', :integration => true do
|
||||
it 'should increment the map count for each map found' do
|
||||
map_count = 0
|
||||
count_increment = lambda do |n|
|
||||
map_count += 1
|
||||
end
|
||||
|
||||
expect do
|
||||
subject.crawl(count_increment)
|
||||
end.to change{ map_count }.by_at_least(26)
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in new issue