Commit 5809ea1c by jimabramson

reimplement db:reindex_search using tire api; avoid OOM errors

parent 5645549b
......@@ -32,7 +32,7 @@ gem 'mongoid_magic_counter_cache', :git => 'https://github.com/dementrock/mongoi
gem 'kaminari', :require => 'kaminari/sinatra', :git => 'https://github.com/dementrock/kaminari.git'
gem 'faker'
gem 'will_paginate_mongoid'
gem 'rdiscount'
gem 'nokogiri'
......
......@@ -151,6 +151,10 @@ GEM
kgio (~> 2.6)
rack
raindrops (~> 0.7)
will_paginate (3.0.4)
will_paginate_mongoid (1.1.0)
mongoid (>= 2.4)
will_paginate (~> 3.0)
yajl-ruby (1.1.0)
PLATFORMS
......@@ -187,4 +191,5 @@ DEPENDENCIES
tire-contrib
unicorn
voteable_mongo!
will_paginate_mongoid
yajl-ruby
......@@ -225,43 +225,62 @@ namespace :db do
task :reindex_search => :environment do
#Mongoid.identity_map_enabled = false
#klasses = [CommentThread,Comment]
#klasses.each do |klass|
klass = CommentThread
ENV['CLASS'] = klass.name
ENV['INDEX'] = new_index = klass.tire.index.name << '_' << Time.now.strftime('%Y%m%d%H%M%S')
Rake::Task["tire:import"].invoke
puts '[IMPORT] about to swap index'
if a = Tire::Alias.find(klass.tire.index.name)
puts "[IMPORT] aliases found: #{Tire::Alias.find(klass.tire.index.name).indices.to_ary.join(',')}. deleting."
old_indices = Tire::Alias.find(klass.tire.index.name).indices
old_indices.each do |index|
a.indices.delete index
logger = Logger.new(STDERR)
cutoff_dt = DateTime.now
klasses = [Comment, CommentThread]
Mongoid.identity_map_enabled = false
Mongoid.unit_of_work(disable: :all) do
klasses.each do |klass|
## generate a versioned name for the rebuilt index
new_index = klass.tire.index.name << '_' << Time.now.strftime('%Y%m%d%H%M%S')
logger.info "[IMPORT] *BEGIN* importing #{klass.name}"
# find the number of docs to be indexed
tot = klass.where(:updated_at.lte => cutoff_dt).count
cnt = 0
t = Time.now
Tire.index new_index do
import klass.where(:updated_at.lte => DateTime.now), {:method => :paginate, :per_page => 200} do |documents|
GC.start
if cnt % 1000 == 0 then
logger.info "[IMPORT] indexed #{cnt} of #{tot} #{klass.name}s (#{(100 * (cnt/tot.to_f)).round(2)}% complete after #{((Time.now - t) / 60).round(2)} minutes)"
end
cnt += documents.length
documents
end
end
logger.info "[IMPORT] *DONE* imported #{klass.name}: #{cnt} documents in #{((Time.now - t) / 60).round(2)} minutes"
logger.info '[IMPORT] about to swap index'
if a = Tire::Alias.find(klass.tire.index.name)
logger.info "[IMPORT] aliases found: #{Tire::Alias.find(klass.tire.index.name).indices.to_ary.join(',')}. deleting."
old_indices = Tire::Alias.find(klass.tire.index.name).indices
old_indices.each do |index|
a.indices.delete index
end
a.indices.add new_index
a.save
old_indices.each do |index|
puts "[IMPORT] deleting index: #{index}"
i = Tire::Index.new(index)
i.delete if i.exists?
a.indices.add new_index
a.save
old_indices.each do |index|
logger.info "[IMPORT] deleting index: #{index}"
i = Tire::Index.new(index)
i.delete if i.exists?
end
else
logger.info "[IMPORT] no aliases found. deleting index. Creating new one for #{klass} and setting up alias."
klass.tire.index.delete
a = Tire::Alias.new
a.name(klass.tire.index.name)
a.index(new_index)
a.save
end
else
puts "[IMPORT] no aliases found. deleting index. Creating new one for #{klass} and setting up alias."
klass.tire.index.delete
a = Tire::Alias.new
a.name(klass.tire.index.name)
a.index(new_index)
a.save
end
puts "[IMPORT] done. Index: '#{new_index}' created."
#end
logger.info "[IMPORT] done. Index: '#{new_index}' created."
end
end
end
task :add_anonymous_to_peers => :environment do
......@@ -279,4 +298,4 @@ namespace :jobs do
task :work => :environment do
Delayed::Worker.new(:min_priority => ENV['MIN_PRIORITY'], :max_priority => ENV['MAX_PRIORITY'], :queues => (ENV['QUEUES'] || ENV['QUEUE'] || '').split(','), :quiet => false).start
end
end
\ No newline at end of file
end
......@@ -89,11 +89,11 @@ class Comment < Content
as_document.slice(*%w[body course_id endorsed anonymous anonymous_to_peers created_at updated_at at_position_list])
.merge("id" => _id)
.merge("user_id" => author_id)
.merge("username" => author.username)
.merge("username" => author.nil? ? "na" : author.username) # avoid crashing to_hash on orphaned comments
.merge("depth" => depth)
.merge("closed" => comment_thread.closed)
.merge("closed" => comment_thread.nil? ? false : comment_thread.closed) # ditto
.merge("thread_id" => comment_thread_id)
.merge("commentable_id" => comment_thread.commentable_id)
.merge("commentable_id" => comment_thread.nil? ? nil : comment_thread.commentable_id) # ditto
.merge("votes" => votes.slice(*%w[count up_count down_count point]))
.merge("abuse_flaggers" => abuse_flaggers)
.merge("type" => "comment")
......
......@@ -124,6 +124,7 @@ class CommentThread < Content
#so first, find the comment threads associated with comments that hit the query
search = Tire::Search::Search.new 'comment_threads'
search.query {|query| query.text :_all, params["text"]} if params["text"]
search.highlight({title: { number_of_fragments: 0 } } , {body: { number_of_fragments: 0 } }, options: { tag: "<highlight>" })
search.filter(:bool, :must => params["tags"].split(/,/).map{ |tag| { :term => { :tags_array => tag } } }) if params["tags"]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment