Commit 05f285a9 by Kevin Chugh

limit the number of comments searched, prepare for ES and Tire upgrade to allow…

limit the number of comments searched, prepare for ES and Tire upgrade to allow indexing comments comment_thread_id
parent 7fc690b5
...@@ -133,8 +133,8 @@ namespace :db do ...@@ -133,8 +133,8 @@ namespace :db do
task :generate_comments, [:commentable_id, :num_threads, :num_top_comments, :num_subcomments] => :environment do |t, args| task :generate_comments, [:commentable_id, :num_threads, :num_top_comments, :num_subcomments] => :environment do |t, args|
args.with_defaults(:num_threads => THREADS_PER_COMMENTABLE, args.with_defaults(:num_threads => THREADS_PER_COMMENTABLE,
:num_top_comments=>TOP_COMMENTS_PER_THREAD, :num_top_comments=>TOP_COMMENTS_PER_THREAD,
:num_subcomments=> ADDITIONAL_COMMENTS_PER_THREAD) :num_subcomments=> ADDITIONAL_COMMENTS_PER_THREAD)
generate_comments_for(args[:commentable_id], args[:num_threads], args[:num_top_comments], args[:num_subcomments]) generate_comments_for(args[:commentable_id], args[:num_threads], args[:num_top_comments], args[:num_subcomments])
end end
...@@ -152,54 +152,54 @@ namespace :db do ...@@ -152,54 +152,54 @@ namespace :db do
coll = db.collection("contents") coll = db.collection("contents")
args[:num].to_i.times do args[:num].to_i.times do
doc = {"_type" => "CommentThread", "anonymous" => [true, false].sample, "at_position_list" => [], doc = {"_type" => "CommentThread", "anonymous" => [true, false].sample, "at_position_list" => [],
"tags_array" => [], "tags_array" => [],
"comment_count" => 0, "title" => Faker::Lorem.sentence(6), "author_id" => rand(1..10).to_s, "comment_count" => 0, "title" => Faker::Lorem.sentence(6), "author_id" => rand(1..10).to_s,
"body" => Faker::Lorem.paragraphs.join("\n\n"), "course_id" => COURSE_ID, "created_at" => Time.now, "body" => Faker::Lorem.paragraphs.join("\n\n"), "course_id" => COURSE_ID, "created_at" => Time.now,
"commentable_id" => COURSE_ID, "closed" => [true, false].sample, "updated_at" => Time.now, "last_activity_at" => Time.now, "commentable_id" => COURSE_ID, "closed" => [true, false].sample, "updated_at" => Time.now, "last_activity_at" => Time.now,
"votes" => {"count" => 0, "down" => [], "down_count" => 0, "point" => 0, "up" => [], "up_count" => []}} "votes" => {"count" => 0, "down" => [], "down_count" => 0, "point" => 0, "up" => [], "up_count" => []}}
coll.insert(doc)
end
binding.pry
Tire.index('comment_threads').delete
CommentThread.create_elasticsearch_index
Tire.index('comment_threads') { import CommentThread.all }
end
task :seed_fast => :environment do
ADDITIONAL_COMMENTS_PER_THREAD = 20
config = YAML.load_file("config/mongoid.yml")[Sinatra::Base.environment]["sessions"]["default"]
connnection = Mongo::Connection.new(config["hosts"][0].split(":")[0], config["hosts"][0].split(":")[1])
db = Mongo::Connection.new.db(config["database"])
coll = db.collection("contents")
Comment.delete_all
CommentThread.each do |thread|
ADDITIONAL_COMMENTS_PER_THREAD.times do
doc = {"_type" => "Comment", "anonymous" => false, "at_position_list" => [],
"author_id" => rand(1..10).to_s, "body" => Faker::Lorem.paragraphs.join("\n\n"),
"comment_thread_id" => BSON::ObjectId.from_string(thread.id.to_s), "course_id" => COURSE_ID,
"created_at" => Time.now,
"endorsed" => [true, false].sample, "parent_ids" => [], "updated_at" => Time.now,
"votes" => {"count" => 0, "down" => [], "down_count" => 0, "point" => 0, "up" => [], "up_count" => []}}
coll.insert(doc) coll.insert(doc)
end end
binding.pry
Tire.index('comment_threads').delete
CommentThread.create_elasticsearch_index
Tire.index('comment_threads') { import CommentThread.all }
end end
end
task :seed => :environment do task :seed_fast => :environment do
ADDITIONAL_COMMENTS_PER_THREAD = 20
config = YAML.load_file("config/mongoid.yml")[Sinatra::Base.environment]["sessions"]["default"]
connnection = Mongo::Connection.new(config["hosts"][0].split(":")[0], config["hosts"][0].split(":")[1])
db = Mongo::Connection.new.db(config["database"])
coll = db.collection("contents")
Comment.delete_all
CommentThread.each do |thread|
ADDITIONAL_COMMENTS_PER_THREAD.times do
doc = {"_type" => "Comment", "anonymous" => false, "at_position_list" => [],
"author_id" => rand(1..10).to_s, "body" => Faker::Lorem.paragraphs.join("\n\n"),
"comment_thread_id" => BSON::ObjectId.from_string(thread.id.to_s), "course_id" => COURSE_ID,
"created_at" => Time.now,
"endorsed" => [true, false].sample, "parent_ids" => [], "updated_at" => Time.now,
"votes" => {"count" => 0, "down" => [], "down_count" => 0, "point" => 0, "up" => [], "up_count" => []}}
coll.insert(doc)
end
end
end
Comment.delete_all task :seed => :environment do
CommentThread.delete_all
CommentThread.recalculate_all_context_tag_weights!
User.delete_all
Notification.delete_all
Subscription.delete_all
Tire.index 'comment_threads' do delete end
CommentThread.create_elasticsearch_index
beginning_time = Time.now Comment.delete_all
CommentThread.delete_all
CommentThread.recalculate_all_context_tag_weights!
User.delete_all
Notification.delete_all
Subscription.delete_all
Tire.index 'comment_threads' do delete end
CommentThread.create_elasticsearch_index
users = (1..10).map {|id| create_test_user(id)} beginning_time = Time.now
users = (1..10).map {|id| create_test_user(id)}
# 3.times do # 3.times do
# other_user = users[1..9].sample # other_user = users[1..9].sample
# users.first.subscribe(other_user) # users.first.subscribe(other_user)
...@@ -228,7 +228,7 @@ namespace :db do ...@@ -228,7 +228,7 @@ namespace :db do
Mongoid.identity_map_enabled = false Mongoid.identity_map_enabled = false
klasses = [Comment] klasses = [Comment]
klasses.each do |klass| klasses.each do |klass|
ENV['CLASS'] = klass.name ENV['CLASS'] = klass.name
ENV['INDEX'] = new_index = klass.tire.index.name << '_' << Time.now.strftime('%Y%m%d%H%M%S') ENV['INDEX'] = new_index = klass.tire.index.name << '_' << Time.now.strftime('%Y%m%d%H%M%S')
...@@ -251,7 +251,7 @@ namespace :db do ...@@ -251,7 +251,7 @@ namespace :db do
i.delete if i.exists? i.delete if i.exists?
end end
else else
puts "[IMPORT] no aliases found. deleting index. creating new one and setting up alias." puts "[IMPORT] no aliases found. deleting index. Creating new one for #{klass} and setting up alias."
klass.tire.index.delete klass.tire.index.delete
a = Tire::Alias.new a = Tire::Alias.new
a.name(klass.tire.index.name) a.name(klass.tire.index.name)
...@@ -278,4 +278,4 @@ namespace :jobs do ...@@ -278,4 +278,4 @@ namespace :jobs do
task :work => :environment do task :work => :environment do
Delayed::Worker.new(:min_priority => ENV['MIN_PRIORITY'], :max_priority => ENV['MAX_PRIORITY'], :queues => (ENV['QUEUES'] || ENV['QUEUE'] || '').split(','), :quiet => false).start Delayed::Worker.new(:min_priority => ENV['MIN_PRIORITY'], :max_priority => ENV['MAX_PRIORITY'], :queues => (ENV['QUEUES'] || ENV['QUEUE'] || '').split(','), :quiet => false).start
end end
end end
\ No newline at end of file
...@@ -49,8 +49,8 @@ DEFAULT_PER_PAGE = 20 ...@@ -49,8 +49,8 @@ DEFAULT_PER_PAGE = 20
if RACK_ENV.to_s != "test" # disable api_key auth in test environment if RACK_ENV.to_s != "test" # disable api_key auth in test environment
before do before do
#duct tape to avoid 401 on deep search performance test #duct tape to avoid 401 on deep search performance test
error 401 unless params[:api_key] == CommentService.config[:api_key] or true #error 401 unless params[:api_key] == CommentService.config[:api_key] or true
#error 401 unless params[:api_key] == CommentService.config[:api_key] error 401 unless params[:api_key] == CommentService.config[:api_key]
end end
end end
......
...@@ -5,3 +5,4 @@ elasticsearch_server: <%= ENV['SEARCH_SERVER'] || 'http://localhost:9200' %> ...@@ -5,3 +5,4 @@ elasticsearch_server: <%= ENV['SEARCH_SERVER'] || 'http://localhost:9200' %>
cache_timeout: cache_timeout:
threads_search: 10 threads_search: 10
threads_query: 10 threads_query: 10
max_deep_search_comment_count: 5000
...@@ -5,6 +5,16 @@ development: ...@@ -5,6 +5,16 @@ development:
hosts: hosts:
- localhost:27017 - localhost:27017
development2:
sessions:
default:
database: comments-prod-clone
hosts:
- charlotte.mongohq.com:10035
username: kevinchugh@edx.org
password: charlie123
test: test:
sessions: sessions:
default: default:
......
...@@ -25,6 +25,8 @@ class Comment < Content ...@@ -25,6 +25,8 @@ class Comment < Content
mapping do mapping do
indexes :body, type: :string, analyzer: :snowball, stored: true, term_vector: :with_positions_offsets indexes :body, type: :string, analyzer: :snowball, stored: true, term_vector: :with_positions_offsets
indexes :course_id, type: :string, index: :not_analyzed, included_in_all: false indexes :course_id, type: :string, index: :not_analyzed, included_in_all: false
#indexes :comment_thread_id, type: :string, stored: true, index: :not_analyzed, included_in_all: false
#current prod tire doesn't support indexing BSON ids, will reimplement when we upgrade
end end
......
...@@ -155,6 +155,7 @@ class CommentThread < Content ...@@ -155,6 +155,7 @@ class CommentThread < Content
search = Tire::Search::Search.new 'comments' search = Tire::Search::Search.new 'comments'
search.query {|query| query.text :_all, params["text"]} if params["text"] search.query {|query| query.text :_all, params["text"]} if params["text"]
search.filter(:term, course_id: params["course_id"]) if params["course_id"] search.filter(:term, course_id: params["course_id"]) if params["course_id"]
search.size CommentService.config["max_deep_search_comment_count"].to_i
#unforutnately, we cannot paginate here, b/c we don't know how the ordinality is totally #unforutnately, we cannot paginate here, b/c we don't know how the ordinality is totally
#unrelated to that of threads #unrelated to that of threads
...@@ -162,10 +163,15 @@ class CommentThread < Content ...@@ -162,10 +163,15 @@ class CommentThread < Content
c_results = search.results c_results = search.results
comment_ids = c_results.collect{|c| c.id}.uniq comment_ids = c_results.collect{|c| c.id}.uniq
comments = Comment.where(:id.in => comment_ids) comments = Comment.where(:id.in => comment_ids)
thread_ids = comments.collect{|c| c.comment_thread_id} thread_ids = comments.collect{|c| c.comment_thread_id}
#thread_ids = c_results.collect{|c| c.comment_thread_id}
#as soon as we can add comment thread id to the ES index, via Tire updgrade, we'll
#use ES instead of mongo to collect the thread ids
#use the elasticsearch index instead to avoid DB hit
original_thread_ids = results.collect{|r| r.id} original_thread_ids = results.collect{|r| r.id}
#now add the original search thread ids #now add the original search thread ids
......
...@@ -11,13 +11,13 @@ describe "app" do ...@@ -11,13 +11,13 @@ describe "app" do
commentable = Commentable.new("question_1") commentable = Commentable.new("question_1")
random_string = (0...8).map{ ('a'..'z').to_a[rand(26)] }.join random_string = (0...8).map{ ('a'..'z').to_a[rand(26)] }.join
thread = CommentThread.new(title: "Test title", body: random_string, course_id: "1", commentable_id: commentable.id) thread = CommentThread.new(title: "Test title", body: random_string, course_id: "1", commentable_id: commentable.id)
thread.author = user thread.author = user
thread.save! thread.save!
sleep 3 sleep 3
get "/api/v1/search/threads", text: random_string get "/api/v1/search/threads", text: random_string
last_response.should be_ok last_response.should be_ok
...@@ -25,32 +25,33 @@ describe "app" do ...@@ -25,32 +25,33 @@ describe "app" do
threads.select{|t| t["id"].to_s == thread.id.to_s}.first.should_not be_nil threads.select{|t| t["id"].to_s == thread.id.to_s}.first.should_not be_nil
end end
end end
end end
describe "comment search" do describe "comment search" do
describe "GET /api/v1/search/threads" do describe "GET /api/v1/search/threads" do
it "returns thread with comment query match" do it "returns thread with comment query match" do
user = User.find 1 user = User.find 1
if user.nil? if user.nil?
user = create_test_user(1) user = create_test_user(1)
end end
commentable = Commentable.new("question_1") commentable = Commentable.new("question_1")
random_string = (0...8).map{ ('a'..'z').to_a[rand(26)] }.join random_string = (0...8).map{ ('a'..'z').to_a[rand(26)] }.join
thread = CommentThread.new(title: "Test title", body: "elephant otter", course_id: "1", commentable_id: commentable.id) thread = CommentThread.new(title: "Test title", body: "elephant otter", course_id: "1", commentable_id: commentable.id)
thread.author = user thread.author = user
thread.save! thread.save!
sleep 3 sleep 3
comment = Comment.new(body: random_string, course_id: "1", commentable_id: commentable.id) comment = Comment.new(body: random_string, course_id: "1", commentable_id: commentable.id)
comment.author = user comment.author = user
comment.comment_thread = thread comment.comment_thread = thread
comment.save! comment.save!
sleep 1 sleep 1
get "/api/v1/search/threads", text: random_string get "/api/v1/search/threads", text: random_string
last_response.should be_ok last_response.should be_ok
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment