#!/usr/bin/env ruby

require 'rubygems'
require 'solr'

# connect to both solr instances
connOld = Solr::Connection.new('http://localhost:8983/solr', :autocommit => :off)
connNew = Solr::Connection.new('http://braccetto-lin.atp.nicta.com.au:8983/solr', :autocommit => :off)

count = 0
ROWS = 100000

begin
  # iterate through all the hits for 'action'
  response = connOld.query('*:*', :rows => ROWS, :start => count, :field_list => ['*'])
  total_hits = response.total_hits

  response.each do |hit|
    doc = {}
    doc[:id] = hit['id']
    doc[:fromName] = hit['fromName']
    doc[:fromID] = hit['fromID']
    doc[:toName] = hit['toName'] if hit['toName']
    doc[:toID] = hit['toID'] if hit['toID']
    doc[:originName] = hit['originName'] if hit['originName']
    doc[:originID] = hit['originID'] if hit['originID']
    doc[:timestamp] = hit['timestamp']
    doc[:text] = hit['text']
    if doc[:originID] # new style retweet
      doc[:isRT] = true
    elsif match = doc[:text].match(/^ *RT *:? @ *(\w+)/)
      doc[:isRT] = true
    else
      doc[:isRT] = false
    end

    # add the document to new solr instance
    connNew.add(doc)
  end
  connNew.commit
  count += response.hits.length
  puts "Progress: #{count}/#{total_hits}"
end while count < total_hits

