Commit 55f70cb9 by jimabramson

Revert "Merge pull request #41 from edx/feature/kevin/deep_search"

This reverts commit d692c082, reversing
changes made to c2a53256.
parent 55cc9867
......@@ -133,8 +133,8 @@ namespace :db do
task :generate_comments, [:commentable_id, :num_threads, :num_top_comments, :num_subcomments] => :environment do |t, args|
args.with_defaults(:num_threads => THREADS_PER_COMMENTABLE,
:num_top_comments=>TOP_COMMENTS_PER_THREAD,
:num_subcomments=> ADDITIONAL_COMMENTS_PER_THREAD)
:num_top_comments=>TOP_COMMENTS_PER_THREAD,
:num_subcomments=> ADDITIONAL_COMMENTS_PER_THREAD)
generate_comments_for(args[:commentable_id], args[:num_threads], args[:num_top_comments], args[:num_subcomments])
end
......@@ -152,54 +152,54 @@ namespace :db do
coll = db.collection("contents")
args[:num].to_i.times do
doc = {"_type" => "CommentThread", "anonymous" => [true, false].sample, "at_position_list" => [],
"tags_array" => [],
"comment_count" => 0, "title" => Faker::Lorem.sentence(6), "author_id" => rand(1..10).to_s,
"body" => Faker::Lorem.paragraphs.join("\n\n"), "course_id" => COURSE_ID, "created_at" => Time.now,
"commentable_id" => COURSE_ID, "closed" => [true, false].sample, "updated_at" => Time.now, "last_activity_at" => Time.now,
"votes" => {"count" => 0, "down" => [], "down_count" => 0, "point" => 0, "up" => [], "up_count" => []}}
coll.insert(doc)
end
binding.pry
Tire.index('comment_threads').delete
CommentThread.create_elasticsearch_index
Tire.index('comment_threads') { import CommentThread.all }
"tags_array" => [],
"comment_count" => 0, "title" => Faker::Lorem.sentence(6), "author_id" => rand(1..10).to_s,
"body" => Faker::Lorem.paragraphs.join("\n\n"), "course_id" => COURSE_ID, "created_at" => Time.now,
"commentable_id" => COURSE_ID, "closed" => [true, false].sample, "updated_at" => Time.now, "last_activity_at" => Time.now,
"votes" => {"count" => 0, "down" => [], "down_count" => 0, "point" => 0, "up" => [], "up_count" => []}}
coll.insert(doc)
end
binding.pry
Tire.index('comment_threads').delete
CommentThread.create_elasticsearch_index
Tire.index('comment_threads') { import CommentThread.all }
end
task :seed_fast => :environment do
ADDITIONAL_COMMENTS_PER_THREAD = 20
config = YAML.load_file("config/mongoid.yml")[Sinatra::Base.environment]["sessions"]["default"]
connnection = Mongo::Connection.new(config["hosts"][0].split(":")[0], config["hosts"][0].split(":")[1])
db = Mongo::Connection.new.db(config["database"])
coll = db.collection("contents")
Comment.delete_all
CommentThread.each do |thread|
ADDITIONAL_COMMENTS_PER_THREAD.times do
doc = {"_type" => "Comment", "anonymous" => false, "at_position_list" => [],
"author_id" => rand(1..10).to_s, "body" => Faker::Lorem.paragraphs.join("\n\n"),
"comment_thread_id" => BSON::ObjectId.from_string(thread.id.to_s), "course_id" => COURSE_ID,
"created_at" => Time.now,
"endorsed" => [true, false].sample, "parent_ids" => [], "updated_at" => Time.now,
"votes" => {"count" => 0, "down" => [], "down_count" => 0, "point" => 0, "up" => [], "up_count" => []}}
coll.insert(doc)
end
end
task :seed_fast => :environment do
ADDITIONAL_COMMENTS_PER_THREAD = 20
config = YAML.load_file("config/mongoid.yml")[Sinatra::Base.environment]["sessions"]["default"]
connnection = Mongo::Connection.new(config["hosts"][0].split(":")[0], config["hosts"][0].split(":")[1])
db = Mongo::Connection.new.db(config["database"])
coll = db.collection("contents")
Comment.delete_all
CommentThread.each do |thread|
ADDITIONAL_COMMENTS_PER_THREAD.times do
doc = {"_type" => "Comment", "anonymous" => false, "at_position_list" => [],
"author_id" => rand(1..10).to_s, "body" => Faker::Lorem.paragraphs.join("\n\n"),
"comment_thread_id" => BSON::ObjectId.from_string(thread.id.to_s), "course_id" => COURSE_ID,
"created_at" => Time.now,
"endorsed" => [true, false].sample, "parent_ids" => [], "updated_at" => Time.now,
"votes" => {"count" => 0, "down" => [], "down_count" => 0, "point" => 0, "up" => [], "up_count" => []}}
coll.insert(doc)
end
end
end
task :seed => :environment do
task :seed => :environment do
Comment.delete_all
CommentThread.delete_all
CommentThread.recalculate_all_context_tag_weights!
User.delete_all
Notification.delete_all
Subscription.delete_all
Tire.index 'comment_threads' do delete end
CommentThread.create_elasticsearch_index
Comment.delete_all
CommentThread.delete_all
CommentThread.recalculate_all_context_tag_weights!
User.delete_all
Notification.delete_all
Subscription.delete_all
Tire.index 'comment_threads' do delete end
CommentThread.create_elasticsearch_index
beginning_time = Time.now
beginning_time = Time.now
users = (1..10).map {|id| create_test_user(id)}
users = (1..10).map {|id| create_test_user(id)}
# 3.times do
# other_user = users[1..9].sample
# users.first.subscribe(other_user)
......@@ -224,44 +224,40 @@ namespace :db do
end
task :reindex_search => :environment do
Mongoid.identity_map_enabled = false
#Mongoid.identity_map_enabled = false
#klasses = [CommentThread,Comment]
#klasses.each do |klass|
klass = CommentThread
ENV['CLASS'] = klass.name
ENV['INDEX'] = new_index = klass.tire.index.name << '_' << Time.now.strftime('%Y%m%d%H%M%S')
klass = CommentThread
ENV['CLASS'] = klass.name
ENV['INDEX'] = new_index = klass.tire.index.name << '_' << Time.now.strftime('%Y%m%d%H%M%S')
Rake::Task["tire:import"].invoke
Rake::Task["tire:import"].invoke
puts '[IMPORT] about to swap index'
if a = Tire::Alias.find(klass.tire.index.name)
puts "[IMPORT] aliases found: #{Tire::Alias.find(klass.tire.index.name).indices.to_ary.join(',')}. deleting."
old_indices = Tire::Alias.find(klass.tire.index.name).indices
old_indices.each do |index|
a.indices.delete index
end
puts '[IMPORT] about to swap index'
if a = Tire::Alias.find(klass.tire.index.name)
puts "[IMPORT] aliases found: #{Tire::Alias.find(klass.tire.index.name).indices.to_ary.join(',')}. deleting."
old_indices = Tire::Alias.find(klass.tire.index.name).indices
old_indices.each do |index|
a.indices.delete index
end
a.indices.add new_index
a.save
a.indices.add new_index
a.save
old_indices.each do |index|
puts "[IMPORT] deleting index: #{index}"
i = Tire::Index.new(index)
i.delete if i.exists?
end
else
puts "[IMPORT] no aliases found. deleting index. Creating new one for #{klass} and setting up alias."
klass.tire.index.delete
a = Tire::Alias.new
a.name(klass.tire.index.name)
a.index(new_index)
a.save
old_indices.each do |index|
puts "[IMPORT] deleting index: #{index}"
i = Tire::Index.new(index)
i.delete if i.exists?
end
else
puts "[IMPORT] no aliases found. deleting index. creating new one and setting up alias."
klass.tire.index.delete
a = Tire::Alias.new
a.name(klass.tire.index.name)
a.index(new_index)
a.save
end
puts "[IMPORT] done. Index: '#{new_index}' created."
#end
puts "[IMPORT] done. Index: '#{new_index}' created."
end
task :add_anonymous_to_peers => :environment do
......@@ -279,4 +275,4 @@ namespace :jobs do
task :work => :environment do
Delayed::Worker.new(:min_priority => ENV['MIN_PRIORITY'], :max_priority => ENV['MAX_PRIORITY'], :queues => (ENV['QUEUES'] || ENV['QUEUE'] || '').split(','), :quiet => false).start
end
end
\ No newline at end of file
end
......@@ -52,8 +52,6 @@ DEFAULT_PER_PAGE = 20
if RACK_ENV.to_s != "test" # disable api_key auth in test environment
before do
#duct tape to avoid 401 on deep search performance test
#error 401 unless params[:api_key] == CommentService.config[:api_key] or true
error 401 unless params[:api_key] == CommentService.config[:api_key]
end
end
......
......@@ -5,4 +5,3 @@ elasticsearch_server: <%= ENV['SEARCH_SERVER'] || 'http://localhost:9200' %>
cache_timeout:
threads_search: 10
threads_query: 10
max_deep_search_comment_count: 5000
development:
sessions:
default:
database: cs_comments_service_development
hosts:
- localhost:27017
default:
database: cs_comments_service_development
hosts:
- localhost:27017
test:
sessions:
......
require 'rest_client'
roots = {}
roots['development'] = "http://localhost:8000"
roots['test'] = "http://localhost:8000"
roots['production'] = "http://edx.org"
roots['staging'] = "http://stage.edx.org"
ROOT = roots[ENV['SINATRA_ENV']]
namespace :deep_search do
task :performance => :environment do
#USAGE
#SINATRA_ENV=development rake kpis:prolific
#or
#SINATRA_ENV=development bundle exec rake kpis:prolific
#create comment and thread bodies
bodies = []
50.times do |i|
bodies << (0...8).map{ ('a'..'z').to_a[rand(26)] }.join
end
parents = CommentThread.limit(100)
#now create comments and threads with hits
puts "Manufacturing Threads"
100.times do |j|
(1..5).to_a.sample.times do |i|
c = CommentThread.new
c.course_id = 'sample course'
c.title = 'sample title'
c.commentable_id = 'sample commetable'
c.body = bodies.sample
c.author = 1
c.save
end
end
puts "Manufacturing Comments"
100.times do |j|
(1..5).to_a.sample.times do |i|
c = Comment.new
c.course_id = 'sample course'
c.body = bodies.sample
c.comment_thread_id = parents.sample.id
c.author = 1
c.save
end
end
sort_keys = %w[date activity votes comments]
sort_order = "desc"
#set the sinatra env to test to avoid 401'ing
set :environment, :test
start_time = Time.now
puts "Starting test at #{start_time}"
1000.times do |i|
query_params = { course_id: "1", sort_key: sort_keys.sample, sort_order: sort_order, page: 1, per_page: 5, text: bodies.sample }
RestClient.get "#{PREFIX}/threads", params: query_params
end
end_time = Time.now
puts "Ending test at #{end_time}"
puts "Total Time: #{(end_time - start_time).to_f} seconds"
end
end
......@@ -17,18 +17,6 @@ class Comment < Content
field :at_position_list, type: Array, default: []
index({author_id: 1, course_id: 1})
include Tire::Model::Search
include Tire::Model::Callbacks
mapping do
indexes :body, type: :string, analyzer: :snowball, stored: true, term_vector: :with_positions_offsets
indexes :course_id, type: :string, index: :not_analyzed, included_in_all: false
#indexes :comment_thread_id, type: :string, stored: true, index: :not_analyzed, included_in_all: false
#current prod tire doesn't support indexing BSON ids, will reimplement when we upgrade
end
belongs_to :comment_thread, index: true
belongs_to :author, class_name: "User", index: true
......
......@@ -45,8 +45,7 @@ class CommentThread < Content
indexes :commentable_id, type: :string, index: :not_analyzed, included_in_all: false
indexes :author_id, type: :string, as: 'author_id', index: :not_analyzed, included_in_all: false
indexes :group_id, type: :integer, as: 'group_id', index: :not_analyzed, included_in_all: false
indexes :id, :index => :not_analyzed
indexes :thread_id, :analyzer => :keyword, :as => "_id"
#indexes :pinned, type: :boolean, as: 'pinned', index: :not_analyzed, included_in_all: false
end
belongs_to :author, class_name: "User", inverse_of: :comment_threads, index: true#, autosave: true
......@@ -94,7 +93,6 @@ class CommentThread < Content
end
def self.perform_search(params, options={})
page = [1, options[:page] || 1].max
per_page = options[:per_page] || 20
sort_key = options[:sort_key]
......@@ -106,23 +104,6 @@ class CommentThread < Content
return results
end
end
#GET /api/v1/search/threads?user_id=1&recursive=False&sort_key=date&│[2013-06-28 10:16:46,104][INFO ][plugins ] [Glamor] loaded [], sites []
#text=response&sort_order=desc&course_id=HarvardX%2FHLS1xD%2FCopyright&per_page=20&api_key=PUT_YOUR_API_KE│T1GYWxzZSZzb3J0X2tleT1kYXRlJnRleHQ9cmVzcG9uc2Umc29ydF9vcmRlcj1kZXNjJmNvdXJzZV9pZA==: initialized
#Y_HERE&page=1
#KChugh - Unfortunately, there's no algorithmically nice way to handle pagination with
#stitching together Comments and CommentThreads, because there is no determinstic relationship
#between the ordinality of comments and threads.
#the best solution is to find all of the thread ids for matching comment hits, and union them
#with the comment thread query, however, Tire does not support ORing a query key with a term filter
#so the 3rd best solution is to run two Tire searches (3 actually, one to query the comments, one to query the threads based on
#thread ids and the original thread search) and merge the results, uniqifying the results in the process.
#so first, find the comment threads associated with comments that hit the query
search = Tire::Search::Search.new 'comment_threads'
search.query {|query| query.text :_all, params["text"]} if params["text"]
search.highlight({title: { number_of_fragments: 0 } } , {body: { number_of_fragments: 0 } }, options: { tag: "<highlight>" })
......@@ -130,79 +111,23 @@ class CommentThread < Content
search.filter(:term, commentable_id: params["commentable_id"]) if params["commentable_id"]
search.filter(:terms, commentable_id: params["commentable_ids"]) if params["commentable_ids"]
search.filter(:term, course_id: params["course_id"]) if params["course_id"]
if params["group_id"]
search.filter :or, [
{:not => {:exists => {:field => :group_id}}},
{:term => {:group_id => params["group_id"]}}
]
end
{:term => {:group_id => params["group_id"]}}
]
end
search.sort {|sort| sort.by sort_key, sort_order} if sort_key && sort_order #TODO should have search option 'auto sort or sth'
#again, b/c there is no relationship in ordinality, we cannot paginate if it's a text query
search.size per_page
search.from per_page * (page - 1)
if not params["text"]
search.size per_page
search.from per_page * (page - 1)
end
results = search.results
#if this is a search query, then also search the comments and harvest the matching comments
if params["text"]
search = Tire::Search::Search.new 'comments'
search.query {|query| query.text :_all, params["text"]} if params["text"]
search.filter(:term, course_id: params["course_id"]) if params["course_id"]
search.size CommentService.config["max_deep_search_comment_count"].to_i
#unforutnately, we cannot paginate here, b/c we don't know how the ordinality is totally
#unrelated to that of threads
c_results = search.results
comment_ids = c_results.collect{|c| c.id}.uniq
comments = Comment.where(:id.in => comment_ids)
thread_ids = comments.collect{|c| c.comment_thread_id}
#thread_ids = c_results.collect{|c| c.comment_thread_id}
#as soon as we can add comment thread id to the ES index, via Tire updgrade, we'll
#use ES instead of mongo to collect the thread ids
#use the elasticsearch index instead to avoid DB hit
original_thread_ids = results.collect{|r| r.id}
#now add the original search thread ids
thread_ids += original_thread_ids
thread_ids = thread_ids.uniq
#now run one more search to harvest the threads and filter by group
search = Tire::Search::Search.new 'comment_threads'
search.filter(:terms, :thread_id => thread_ids)
search.filter(:terms, commentable_id: params["commentable_ids"]) if params["commentable_ids"]
search.filter(:term, course_id: params["course_id"]) if params["course_id"]
search.size per_page
search.from per_page * (page - 1)
if params["group_id"]
search.filter :or, [
{:not => {:exists => {:field => :group_id}}},
{:term => {:group_id => params["group_id"]}}
]
end
search.sort {|sort| sort.by sort_key, sort_order} if sort_key && sort_order
results = search.results
end
if CommentService.config[:cache_enabled]
Sinatra::Application.cache.set(memcached_key, results, CommentService.config[:cache_timeout][:threads_search].to_i)
end
......@@ -256,6 +181,7 @@ class CommentThread < Content
"group_id" => group_id,
"pinned" => pinned?,
"endorsed" => endorsed?)
if params[:recursive]
doc = doc.merge("children" => root_comments.map{|c| c.to_hash(recursive: true)})
end
......@@ -276,8 +202,8 @@ class CommentThread < Content
# unread count
if last_read_time
unread_count = self.comments.where(
:updated_at => {:$gte => last_read_time},
:author_id => {:$ne => params[:user_id]},
:updated_at => {:$gte => last_read_time},
:author_id => {:$ne => params[:user_id]},
).count
read = last_read_time >= self.updated_at
else
......@@ -291,8 +217,8 @@ class CommentThread < Content
end
doc = doc.merge("unread_comments_count" => unread_count)
.merge("read" => read)
.merge("comments_count" => comments_count)
.merge("read" => read)
.merge("comments_count" => comments_count)
doc
......@@ -302,12 +228,13 @@ class CommentThread < Content
!!(tag =~ RE_TAG)
end
private
def comment_thread_id
#so that we can use the comment thread id as a common attribute for flagging
self.id
end
private
RE_HEADCHAR = /[a-z0-9]/
RE_ENDONLYCHAR = /\+/
RE_ENDCHAR = /[a-z0-9\#]/
......
require 'spec_helper'
describe "app" do
describe "thread search" do
describe "GET /api/v1/search/threads" do
it "returns thread with query match" do
user = User.find 1
if user.nil?
user = create_test_user(1)
end
commentable = Commentable.new("question_1")
random_string = (0...8).map{ ('a'..'z').to_a[rand(26)] }.join
thread = CommentThread.new(title: "Test title", body: random_string, course_id: "1", commentable_id: commentable.id)
thread.author = user
thread.save!
sleep 3
get "/api/v1/search/threads", text: random_string
last_response.should be_ok
threads = parse(last_response.body)['collection']
threads.select{|t| t["id"].to_s == thread.id.to_s}.first.should_not be_nil
end
end
end
describe "comment search" do
describe "GET /api/v1/search/threads" do
it "returns thread with comment query match" do
user = User.find 1
if user.nil?
user = create_test_user(1)
end
commentable = Commentable.new("question_1")
random_string = (0...8).map{ ('a'..'z').to_a[rand(26)] }.join
thread = CommentThread.new(title: "Test title", body: "elephant otter", course_id: "1", commentable_id: commentable.id)
thread.author = user
thread.save!
sleep 3
comment = Comment.new(body: random_string, course_id: "1", commentable_id: commentable.id)
comment.author = user
comment.comment_thread = thread
comment.save!
sleep 1
get "/api/v1/search/threads", text: random_string
last_response.should be_ok
threads = parse(last_response.body)['collection']
threads.select{|t| t["id"].to_s == thread.id.to_s}.first.should_not be_nil
end
end
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment