require 'net/http' ph, pp = "localhost", 80 #ph, pp = nil http = Net::HTTP.start('en.wikipedia.org', 80, ph, pp) for line in ARGF cccc, iata, ppath, name, ppath2, name2 = line.chomp.split(/\t/) next unless %r{^/wiki/(\S+)} === ppath page, fragment = $1.split(/#/, 2) path = "/w/index.php?title=#{page}&printable=yes" STDERR.puts path if $DEBUG hit = hit_box = box = kill = nil buf = '' http.get(path) {|body| lines = (buf + body).split(/\n/) buf = lines.pop for line in lines case line when %r{>ICAO: (?:|)?(\S\S\S\S)} if $1 != cccc STDERR.puts "Bad airport page (#{$1} != #{cccc})" kill = true end when %r{
} box = true if $1 == fragment when %r{} box = false when %r{([^<]*)} if box then hit_box = $1 unless hit_box else hit = $1 unless hit end end end } next if kill lat, lon, dummy = (hit_box || hit).to_s.split(/;\s*/) next unless lon STDERR.puts cccc puts [cccc, iata, lat, lon, name].join("\t") sleep 1.4 end http.finish STDERR.puts "done"