#!/usr/bin/env ruby

require 'rubygems'
require 'tweetstream'
require 'solr'
require 'pp'

tweet_count = 0
skip_count = 0

STOPWORDS = %w{the}

  pp STOPWORDS if ENV['DEBUG']=="true"

  client = TweetStream::Client.new('rickyrobinson','woodstock')
  solr = Solr::Connection.new('http://broadway.citemine.com:8983/solr', :autocommit => :on)

  pp "Connected to Solr" if ENV['DEBUG']=="true"

  client.on_error do |message|
    # do something with message
  end

  client.on_delete do |status_id, user_id|
    Tweet.delete_all(["status_id = ?", status_id])
    puts "Deleted tweet."
  end

  client.on_limit do |skipped|
    puts "Skipped #{skipped} tweets due to rate limiting."
    skip_count = skipped
  end

  client.filter(:track => STOPWORDS) do |status|

    begin
      doc = {}

      doc[:id] = status.id
      doc[:fromID] = status.user.id
      doc[:fromName] = status.user.screen_name
      doc[:toID] = status.in_reply_to_user_id unless status.in_reply_to_user_id.nil?
      doc[:toName] = status.in_reply_to_screen_name unless status.in_reply_to_screen_name.nil?

      # Check for new style retweet
      if status.has_key? 'retweeted_status'
        doc[:isRT] = true
        doc[:originID] = status.retweeted_status.user.id
        doc[:originName] = status.retweeted_status.user.screen_name
      elsif match = status.text.match(/^RT @(\w+)/)
        # old style retweet
        doc[:originName] = match[1]
        doc[:isRT] = true
      else
        doc[:isRT] = false
      end

      doc[:timestamp] = Time.parse(status.created_at).utc.xmlschema
      doc[:text] = status.text

      pp doc if ENV['DEBUG']=="true"

      solr.add(doc)

      tweet_count += 1
      
      pp "Count: #{tweet_count}" if ENV['DEBUG']=="true"
    rescue Exception => e
      pp e
    end

  end

