diff --git a/postgresql/tutorial/weather/Gemfile b/postgresql/tutorial/weather/Gemfile new file mode 100644 index 0000000..0b1bb15 --- /dev/null +++ b/postgresql/tutorial/weather/Gemfile @@ -0,0 +1,5 @@ +source :rubygems + +gem 'archive-tar-minitar' +gem 'progressbar' +gem 'typhoeus' diff --git a/postgresql/tutorial/weather/Gemfile.lock b/postgresql/tutorial/weather/Gemfile.lock new file mode 100644 index 0000000..deb5ea3 --- /dev/null +++ b/postgresql/tutorial/weather/Gemfile.lock @@ -0,0 +1,18 @@ +GEM + remote: http://rubygems.org/ + specs: + archive-tar-minitar (0.5.2) + ffi (1.1.0) + mime-types (1.19) + progressbar (0.9.0) + typhoeus (0.4.2) + ffi (~> 1.0) + mime-types (~> 1.18) + +PLATFORMS + ruby + +DEPENDENCIES + archive-tar-minitar + progressbar + typhoeus diff --git a/postgresql/tutorial/weather/import-data b/postgresql/tutorial/weather/import-data index d9709fa..a1db080 100755 --- a/postgresql/tutorial/weather/import-data +++ b/postgresql/tutorial/weather/import-data @@ -1,25 +1,52 @@ -#!/bin/bash +#!/usr/bin/env ruby -FTP_BASE='ftp://ftp.ncdc.noaa.gov/pub/data/gsod' +require 'fileutils' -pushd $(dirname $(readlink -f $0)) +require 'bundler/setup' +require 'archive/tar/minitar' +require 'progressbar' +require 'typhoeus' -pushd ./data -if [[ ! -f ish-history.csv ]] -then - curl -O $FTP_BASE/ish-history.csv -fi +FTP_BASE = 'ftp://ftp.ncdc.noaa.gov/pub/data/gsod' +YEARS = %w(1950 1960 1970 1980 1990 2000 2010) -for year in 1950 1960 1970 1980 1990 2000 2010 -do - tarname=gsod_$year.tar - if [[ ! -f $tarname ]] - then - curl -O $FTP_BASE/$year/$tarname - fi - tar xf $tarname - for gzfile in *-$year.op.gz - do - : - done -done +def main + include Archive::Tar + dest_dir = File.expand_path('../data', __FILE__) + Dir.chdir(dest_dir) + + download_here_maybe?("#{FTP_BASE}/ish-history.csv") + + YEARS.each do |year| + tarname = "gsod_#{year}.tar" + url = "#{FTP_BASE}/#{year}/#{tarname}" + dest = "#{dest_dir}/#{tarname}" + download_here_maybe?(url) + to_unpack = Minitar.open(dest, 'r').collect(&:full_name).select do |f| + f =~ /\.gz$/ && !File.exist?(f) + end + if !to_unpack.empty? + @progress = ProgressBar.new(File.basename(dest), to_unpack.length) + Minitar.unpack(dest, dest_dir, to_unpack) do |action,name,stats| + if action == :file_done + @progress.inc(1) + end + end + puts + end + end +end + +def download_here_maybe?(url) + outfile = File.basename(url) + if !File.exist?(outfile) + File.open(outfile, 'w') do |f| + puts "Writing #{url} to #{outfile}" + f.write(Typhoeus::Request.get(url).body) + end + end +end + +if $0 == __FILE__ + main +end