#!/usr/bin/ruby

$LOAD_PATH.push '/usr/local/etc'

class Time
  def rfc1123
    utc.strftime('%a, %d %b %Y %H:%M:%S GMT')
  end
  MONTAB = {
    :jan => 1,	:feb => 2,	:mar => 3,	:apr => 4,
    :may => 5,	:jun => 6,	:jul => 7,	:aug => 8,
    :sep => 9,	:oct => 10,	:nov => 11,	:dec => 12
  }
  def Time.parse str
    begin
      case str
      when /(\d+)\s+(\w+)\s+(\d+)\s+(\d+):(\d+):(\d+)\s+GMT/ then
	d, m, y, h, n, s = $1, $2, $3, $4, $5, $6
	d, y, h, n, s = [d, y, h, n, s].map{|s| s.to_i}
	m = MONTAB[m.downcase.to_sym].to_i
	Time.gm(y, m, d, h, n, s)
      when /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+(?:\.\d+)?)Z/ then
	y, m, d, h, n = [$1, $2, $3, $4, $5].map{|s| s.to_i}
	s = $6.to_f
	Time.gm(y, m, d, h, n, s)
      when /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+(?:\.\d+)?)([-+]\d\d):?(\d\d)/ then
	y, m, d, h, n, zh, zn = [$1, $2, $3, $4, $5, $7, $8].map{|s| s.to_i}
	s = $6.to_f
	Time.gm(y, m, d, h, n, s) - (zh * 60 + zn)
      else raise "unknown datetime format #{str}"
      end
    rescue
      Time.now
    end
  end
end

module PSHBSpool
class App

  def initialize
    @method = ENV['REQUEST_METHOD'].to_s
    @qstr = ENV['QUERY_STRING'].to_s
    @path = ENV['PATH_INFO'].to_s
    @addr = ENV['REMOTE_ADDR'].to_s
    @ctype = ENV['CONTENT_TYPE'].to_s
    @clen = ENV['CONTENT_LENGTH']
    @clen = @clen.to_i if @clen
    @reqbody = nil
  end

  def verify_auth topic, vtok
    for cfg in TOPICS
      next unless cfg[:urlpat] === topic
      next unless cfg[:vtoken] === vtok
      return true
    end
    raise Errno::EPERM, "unregistered topic(#{topic}) & verify_token(#{vtok})"
  end

  def post_auth topic
    for cfg in TOPICS
      next unless cfg[:urlpat] === topic
      return true
    end
    raise Errno::EPERM, "unregistered topic(#{topic})"
  end

  def verify_save pa
    database {|db|
      sql = <<-ENDSQL 
      INSERT INTO verified
      (ip, mode, topic, chal, lsec, vtok, ins)
      VALUES(?, ?, ?, ?, ?, ?, NOW())
      ENDSQL
      st = db.prepare(sql)
      st.execute(@addr, pa['hub.mode'].to_s, pa['hub.topic'].to_s,
	pa['hub.challenge'].to_s, pa['hub.lease_seconds'].to_s,
	pa['hub.verify_token'].to_s)
      st.close
    }
  end

  def verify
    pa = {}
    for span in @qstr.split(/[&;]/)
      next unless /^([.\w]+)=/ === span
      k, v = $1, $'
      pa[k] = v.gsub(/%[\dA-Fa-f]{2}/){|s| [s[1,2]].pack('H2') }
    end
    verify_auth(pa['hub.topic'].to_s, pa['hub.verify_token'].to_s)
    verify_save(pa)
    chal = pa['hub.challenge'].to_s
    <<EOF + chal
Content-Type: text/plain\r
Content-Length: #{chal.size}\r
\r
EOF
  end

  def post_store1(db)
    sql = <<-ENDSQL
    INSERT INTO posted(ip, ins, body)
    VALUES(?, NOW(), ?)
    ENDSQL
    st = db.prepare(sql)
    begin
      st.execute(@addr, @reqbody)
    ensure
      st.close
    end
    db.insert_id
  end

  def post_parse db, postid
    require 'rexml/document'
    doc = REXML::Document.new(@reqbody)
    ns = {'a'=>'http://www.w3.org/2005/Atom'}
    tl = REXML::XPath.first(doc.root, '/a:feed/a:link[@rel="self"]/@href', ns)
    begin
      tl = tl.value
    rescue
      tl = nil
    end
    STDERR.puts "parsing atom feed #{tl} id #{postid}"
    post_auth tl
    entfields = {
      :title	=> ['a:title/text()'],
      :update	=> ['a:updated/text()'],
      :uri	=> ['a:id/text()'],
      :link	=> ['a:link[@rel="alternate"]/@href', 'a:link/@href'],
      :author	=> ['a:author/a:name/text()'],
    }
    REXML::XPath.each(doc.root, '/a:feed/a:entry', ns) { |entry|
      h = {:postid => postid}
      for kwd, paths in entfields
        begin
	  v = nil
	  for path in paths
	    v = REXML::XPath.first(entry, path, ns)
	    if v then
	      v = v.value
	      break
	    end
	  end
	rescue
	  v = nil
	end
	h[kwd] = v
      end
      yield h
    }
  end

  def post_webget ent
    require 'net/http'
    require 'uri'
    return unless ent[:link]
    post_auth ent[:link]
    begin
      url = URI.parse(ent[:link])
      res = Net::HTTP.start(url.host, url.port) {|http|
        http.get(url.path)
      }
      case res.code
      when /^2/
	ent[:body] = res.body
      else
	ent[:body] = ''
      end
      ent[:wgetst] = "#{res.code} #{res.message}"
    end
  end

  def post_store2 db, ent
    r = nil
    sql = <<-ENDSQL
    INSERT INTO msgs(postid, ins, uri, title, upd, author, link, body, wgetst)
    VALUES(?, NOW(), ?, ?, ?, ?, ?, ?, ?)
    ENDSQL
    st = db.prepare(sql)
    begin
      st.execute(*ent.values_at(:postid, :uri, :title, :update, :author, :link,
        :body, :wgetst).map{|s| s.to_s})
      r = true
    rescue Mysql::Error => e
      raise e unless /^Duplicate entry/ === e.message
      r = false
    ensure
      st.close
    end
    r
  end

  def database
    db = Mysql.connect(STORAGE[:srv], STORAGE[:usr],
      STORAGE[:pwd], STORAGE[:db])
    begin
      yield db
    ensure
      db.close
    end
  end

  def post
    STDERR.puts "post 1 #{@clen.inspect}" if $DEBUG
    @reqbody = STDIN.read(@clen)
    STDERR.puts "post 2" if $DEBUG
    stat = {true => 0, false => 0}
    database {|db|
      postid = post_store1(db)
      post_parse(db, postid) { |ent|
        post_webget(ent)
        stat[post_store2(db, ent)] += 1
      }
    }
    STDERR.puts "ok #{stat[true]} ng #{stat[false]}"
    "Content-Type: text/plain\r\n\r\nok #{stat[true]} ng #{stat[false]}\r\n"
  end

  def myname
    host = ENV['SERVER_NAME'] || 'localhost'
    port = ENV['SERVER_PORT'] || '80'
    script = ENV['SCRIPT_NAME']
    url = "http://#{host}:#{port}#{script}"
    if PRMS[:urlhook]
      PRMS[:urlhook].call(url)
    end
    url
  end

  def path_atom
    now = (tnow = Time.now).rfc1123
    if hims = ENV['HTTP_IF_MODIFIED_SINCE'] then
      STDERR.puts "HIMS #{hims.inspect}" if $DEBUG
      t = Time.parse(hims) + 60
      STDERR.puts "CMP t=#{t} tnow=#{tnow}" if $DEBUG
      raise Errno::EAGAIN, "#{myname}/atom.xml" if t > tnow
      STDERR.puts "CMP PASSTHRU" if $DEBUG
    end
    require 'rexml/document'
    url = "#{myname}/atom.xml"
    template = <<ENDXML
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns='http://www.w3.org/2005/Atom'>
<title>PubSubHubbub stream status</title>
<updated>#{now}</updated>
<id>#{url}</id>
<description>THIS ATOM FEED IS PROVIDED "AS IS" WITHOUT NO GUARANTEE.</description>
<link rel="self" href="#{url}"/>
</feed>
ENDXML
    insmax = '1900-01-01T00:00:00Z'
    d = REXML::Document.new(template)
    root = d.root
    database {|db|
      sql = <<-ENDSQL
      SELECT ins, uri, title, upd, author, link, wgetst, length(body), postid
      FROM msgs
      ORDER BY postid DESC
      LIMIT 300
      ENDSQL
      st = db.prepare(sql)
      st.execute
      while row = st.fetch
	url = "#{myname}/entry/#{row[1]}"
	ent = root.add_element('entry')
	ent.add_element('id').add_text(row[1])
	ent.add_element('title').add_text(row[2])
	ent.add_element('updated').add_text(row[3])
	ent.add_element('author').add_element('name').add_text(row[4])
	ent.add_element('link', 'rel'=>'alternate', 'type'=>'application/xml', 'href'=>url)
	arrtime = row[0].to_s.sub(/ /, 'T').sub(/$/, 'Z')
	smry = <<-EOF
<a href="#{url}">post=#{row[8]} arrived=#{arrtime} status=#{row[6]} size=#{row[7]} author=#{row[4]}</a>
EOF
	ent.add_element('summary', 'type'=>'html').add_text(smry)
	insmax = arrtime if arrtime > insmax
      end
      st.close
    }
    REXML::XPath.first(d.root, '/*/*[2]').text = insmax
    insmax = Time.parse(insmax).rfc1123
    xpr = (Time.now.utc + 60).rfc1123
    "Expires: #{xpr}\r\nLast-Modified: #{insmax}\r\nContent-Type: application/atom+xml\r\n\r\n#{d.to_s}"
  end

  def path_list day1, day2
    now = (tnow = Time.now).rfc1123
    if hims = ENV['HTTP_IF_MODIFIED_SINCE'] then
      STDERR.puts "HIMS #{hims.inspect}" if $DEBUG
      t = Time.parse(hims) + 60
      STDERR.puts "CMP t=#{t} tnow=#{tnow}" if $DEBUG
      raise Errno::EAGAIN, "#{myname}/atom.xml" if t > tnow
      STDERR.puts "CMP PASSTHRU" if $DEBUG
    end
    insmax = '1900-01-01T00:00:00Z'
    d = []
    database {|db|
      sql = <<-ENDSQL
      SELECT ins, uri
      FROM msgs
      WHERE ins BETWEEN ? AND ?
      ORDER BY ins ASC
      ENDSQL
      st = db.prepare(sql)
      st.execute(day1, day2)
      while row = st.fetch
	arrtime = row[0].to_s.sub(/ /, 'T').sub(/$/, 'Z')
	d.push "#{arrtime}\t#{row[1]}\n"
	insmax = arrtime if arrtime > insmax
      end
      st.close
    }
    insmax = Time.parse(insmax).rfc1123
    xpr = (Time.now.utc + 60).rfc1123
    "Expires: #{xpr}\r\nLast-Modified: #{insmax}\r\nContent-Type: text/plain\r\n\r\n#{d.join}"
  end

  def path_entry uri
    raise Errno::EAGAIN, "#{myname}/entry/#{uri}" if ENV['HTTP_IF_MODIFIED_SINCE']
    upd, body = nil
    database {|db|
      sql = "SELECT upd, body FROM msgs WHERE uri = ?"
      st = db.prepare(sql)
      begin
        st.execute(uri)
        upd, body = st.fetch
      ensure
        st.close
      end
    }
    raise Errno::EPERM, "uri #{uri} not found" unless body
    upd = Time.parse(upd).rfc1123
    xpr = (Time.now.utc + 86400 * 2).rfc1123
    "Expires:#{xpr}\r\nLast-Modified: #{upd}\r\nContent-Type: application/xml\r\n\r\n" + body.to_s
  end

  def path
    case @path
    when %r{^/atom.xml}
      path_atom
    when %r{^/list(\d\d\d\d-\d\d-\d\d)_(\d\d\d\d-\d\d-\d\d)\.txt$}
      path_list($1, $2)
    when %r{^/entry/}
      path_entry($')
    else
      url = "#{myname}/atom.xml"
      "Status: 302 Found\r\nLocation: #{url}\r\n\r\n#{url}"
    end
  end

  def getmethod
    if not @qstr.empty? then
      verify
    else
      path
    end
  end

  def run
    require 'pshbspool-cfg'
    require 'mysql'
    resp = case @method
    when 'GET' then getmethod
    when 'POST' then post
    else raise "unknown http method #@method"
    end
    resp = ["Date: #{Time.now.rfc1123}\r\n", resp].join
    print resp
  rescue Errno::EAGAIN => e
    puts <<EOF
Status: 304 Not Modified\r
Date: #{Time.now.rfc1123}\r
Content-Location: #{e.message.sub(/.* - /, '')}\r
\r
EOF
  rescue Errno::EPERM => e
    puts <<EOF
Status: 404 File Not Found\r
Date: #{Time.now.rfc1123}\r
Content-Type: text/plain; charset=utf8\r
\r
#{e.message}\r
EOF
  rescue Exception => e
    puts <<EOF
Status: 501 Internal Server Error\r
Content-Type: text/plain; charset=utf8\r
\r
#{e.message} (#{e.class})
#{e.backtrace.join("\n")}
EOF
  end
  
end
end

PSHBSpool::App.new.run
