Commit d2bf94ef by Jim Abramson

Merge pull request #44 from edx/hotfix/jim/revert_ds

Revert "Merge pull request #41 from edx/feature/kevin/deep_search"

un-merge deep search, as we've rolled it back on prod.
parents 9f427757 55f70cb9
......@@ -224,11 +224,8 @@ namespace :db do
end
task :reindex_search => :environment do
Mongoid.identity_map_enabled = false
#Mongoid.identity_map_enabled = false
#klasses = [CommentThread,Comment]
#klasses.each do |klass|
klass = CommentThread
ENV['CLASS'] = klass.name
ENV['INDEX'] = new_index = klass.tire.index.name << '_' << Time.now.strftime('%Y%m%d%H%M%S')
......@@ -252,7 +249,7 @@ namespace :db do
i.delete if i.exists?
end
else
puts "[IMPORT] no aliases found. deleting index. Creating new one for #{klass} and setting up alias."
puts "[IMPORT] no aliases found. deleting index. creating new one and setting up alias."
klass.tire.index.delete
a = Tire::Alias.new
a.name(klass.tire.index.name)
......@@ -261,7 +258,6 @@ namespace :db do
end
puts "[IMPORT] done. Index: '#{new_index}' created."
#end
end
task :add_anonymous_to_peers => :environment do
......
......@@ -52,8 +52,6 @@ DEFAULT_PER_PAGE = 20
if RACK_ENV.to_s != "test" # disable api_key auth in test environment
before do
#duct tape to avoid 401 on deep search performance test
#error 401 unless params[:api_key] == CommentService.config[:api_key] or true
error 401 unless params[:api_key] == CommentService.config[:api_key]
end
end
......
......@@ -5,4 +5,3 @@ elasticsearch_server: <%= ENV['SEARCH_SERVER'] || 'http://localhost:9200' %>
cache_timeout:
threads_search: 10
threads_query: 10
max_deep_search_comment_count: 5000
require 'rest_client'
roots = {}
roots['development'] = "http://localhost:8000"
roots['test'] = "http://localhost:8000"
roots['production'] = "http://edx.org"
roots['staging'] = "http://stage.edx.org"
ROOT = roots[ENV['SINATRA_ENV']]
namespace :deep_search do
task :performance => :environment do
#USAGE
#SINATRA_ENV=development rake kpis:prolific
#or
#SINATRA_ENV=development bundle exec rake kpis:prolific
#create comment and thread bodies
bodies = []
50.times do |i|
bodies << (0...8).map{ ('a'..'z').to_a[rand(26)] }.join
end
parents = CommentThread.limit(100)
#now create comments and threads with hits
puts "Manufacturing Threads"
100.times do |j|
(1..5).to_a.sample.times do |i|
c = CommentThread.new
c.course_id = 'sample course'
c.title = 'sample title'
c.commentable_id = 'sample commetable'
c.body = bodies.sample
c.author = 1
c.save
end
end
puts "Manufacturing Comments"
100.times do |j|
(1..5).to_a.sample.times do |i|
c = Comment.new
c.course_id = 'sample course'
c.body = bodies.sample
c.comment_thread_id = parents.sample.id
c.author = 1
c.save
end
end
sort_keys = %w[date activity votes comments]
sort_order = "desc"
#set the sinatra env to test to avoid 401'ing
set :environment, :test
start_time = Time.now
puts "Starting test at #{start_time}"
1000.times do |i|
query_params = { course_id: "1", sort_key: sort_keys.sample, sort_order: sort_order, page: 1, per_page: 5, text: bodies.sample }
RestClient.get "#{PREFIX}/threads", params: query_params
end
end_time = Time.now
puts "Ending test at #{end_time}"
puts "Total Time: #{(end_time - start_time).to_f} seconds"
end
end
......@@ -18,18 +18,6 @@ class Comment < Content
index({author_id: 1, course_id: 1})
include Tire::Model::Search
include Tire::Model::Callbacks
mapping do
indexes :body, type: :string, analyzer: :snowball, stored: true, term_vector: :with_positions_offsets
indexes :course_id, type: :string, index: :not_analyzed, included_in_all: false
#indexes :comment_thread_id, type: :string, stored: true, index: :not_analyzed, included_in_all: false
#current prod tire doesn't support indexing BSON ids, will reimplement when we upgrade
end
belongs_to :comment_thread, index: true
belongs_to :author, class_name: "User", index: true
......
......@@ -45,8 +45,7 @@ class CommentThread < Content
indexes :commentable_id, type: :string, index: :not_analyzed, included_in_all: false
indexes :author_id, type: :string, as: 'author_id', index: :not_analyzed, included_in_all: false
indexes :group_id, type: :integer, as: 'group_id', index: :not_analyzed, included_in_all: false
indexes :id, :index => :not_analyzed
indexes :thread_id, :analyzer => :keyword, :as => "_id"
#indexes :pinned, type: :boolean, as: 'pinned', index: :not_analyzed, included_in_all: false
end
belongs_to :author, class_name: "User", inverse_of: :comment_threads, index: true#, autosave: true
......@@ -94,7 +93,6 @@ class CommentThread < Content
end
def self.perform_search(params, options={})
page = [1, options[:page] || 1].max
per_page = options[:per_page] || 20
sort_key = options[:sort_key]
......@@ -106,23 +104,6 @@ class CommentThread < Content
return results
end
end
#GET /api/v1/search/threads?user_id=1&recursive=False&sort_key=date&│[2013-06-28 10:16:46,104][INFO ][plugins ] [Glamor] loaded [], sites []
#text=response&sort_order=desc&course_id=HarvardX%2FHLS1xD%2FCopyright&per_page=20&api_key=PUT_YOUR_API_KE│T1GYWxzZSZzb3J0X2tleT1kYXRlJnRleHQ9cmVzcG9uc2Umc29ydF9vcmRlcj1kZXNjJmNvdXJzZV9pZA==: initialized
#Y_HERE&page=1
#KChugh - Unfortunately, there's no algorithmically nice way to handle pagination with
#stitching together Comments and CommentThreads, because there is no determinstic relationship
#between the ordinality of comments and threads.
#the best solution is to find all of the thread ids for matching comment hits, and union them
#with the comment thread query, however, Tire does not support ORing a query key with a term filter
#so the 3rd best solution is to run two Tire searches (3 actually, one to query the comments, one to query the threads based on
#thread ids and the original thread search) and merge the results, uniqifying the results in the process.
#so first, find the comment threads associated with comments that hit the query
search = Tire::Search::Search.new 'comment_threads'
search.query {|query| query.text :_all, params["text"]} if params["text"]
search.highlight({title: { number_of_fragments: 0 } } , {body: { number_of_fragments: 0 } }, options: { tag: "<highlight>" })
......@@ -132,77 +113,21 @@ class CommentThread < Content
search.filter(:term, course_id: params["course_id"]) if params["course_id"]
if params["group_id"]
search.filter :or, [
{:not => {:exists => {:field => :group_id}}},
{:term => {:group_id => params["group_id"]}}
]
end
search.sort {|sort| sort.by sort_key, sort_order} if sort_key && sort_order #TODO should have search option 'auto sort or sth'
#again, b/c there is no relationship in ordinality, we cannot paginate if it's a text query
if not params["text"]
search.size per_page
search.from per_page * (page - 1)
end
results = search.results
#if this is a search query, then also search the comments and harvest the matching comments
if params["text"]
search = Tire::Search::Search.new 'comments'
search.query {|query| query.text :_all, params["text"]} if params["text"]
search.filter(:term, course_id: params["course_id"]) if params["course_id"]
search.size CommentService.config["max_deep_search_comment_count"].to_i
#unforutnately, we cannot paginate here, b/c we don't know how the ordinality is totally
#unrelated to that of threads
c_results = search.results
comment_ids = c_results.collect{|c| c.id}.uniq
comments = Comment.where(:id.in => comment_ids)
thread_ids = comments.collect{|c| c.comment_thread_id}
#thread_ids = c_results.collect{|c| c.comment_thread_id}
#as soon as we can add comment thread id to the ES index, via Tire updgrade, we'll
#use ES instead of mongo to collect the thread ids
#use the elasticsearch index instead to avoid DB hit
original_thread_ids = results.collect{|r| r.id}
#now add the original search thread ids
thread_ids += original_thread_ids
thread_ids = thread_ids.uniq
#now run one more search to harvest the threads and filter by group
search = Tire::Search::Search.new 'comment_threads'
search.filter(:terms, :thread_id => thread_ids)
search.filter(:terms, commentable_id: params["commentable_ids"]) if params["commentable_ids"]
search.filter(:term, course_id: params["course_id"]) if params["course_id"]
search.size per_page
search.from per_page * (page - 1)
if params["group_id"]
search.filter :or, [
{:not => {:exists => {:field => :group_id}}},
{:term => {:group_id => params["group_id"]}}
]
end
search.sort {|sort| sort.by sort_key, sort_order} if sort_key && sort_order
results = search.results
end
if CommentService.config[:cache_enabled]
Sinatra::Application.cache.set(memcached_key, results, CommentService.config[:cache_timeout][:threads_search].to_i)
end
......@@ -256,6 +181,7 @@ class CommentThread < Content
"group_id" => group_id,
"pinned" => pinned?,
"endorsed" => endorsed?)
if params[:recursive]
doc = doc.merge("children" => root_comments.map{|c| c.to_hash(recursive: true)})
end
......@@ -302,12 +228,13 @@ class CommentThread < Content
!!(tag =~ RE_TAG)
end
private
def comment_thread_id
#so that we can use the comment thread id as a common attribute for flagging
self.id
end
private
RE_HEADCHAR = /[a-z0-9]/
RE_ENDONLYCHAR = /\+/
RE_ENDCHAR = /[a-z0-9\#]/
......
require 'spec_helper'
describe "app" do
describe "thread search" do
describe "GET /api/v1/search/threads" do
it "returns thread with query match" do
user = User.find 1
if user.nil?
user = create_test_user(1)
end
commentable = Commentable.new("question_1")
random_string = (0...8).map{ ('a'..'z').to_a[rand(26)] }.join
thread = CommentThread.new(title: "Test title", body: random_string, course_id: "1", commentable_id: commentable.id)
thread.author = user
thread.save!
sleep 3
get "/api/v1/search/threads", text: random_string
last_response.should be_ok
threads = parse(last_response.body)['collection']
threads.select{|t| t["id"].to_s == thread.id.to_s}.first.should_not be_nil
end
end
end
describe "comment search" do
describe "GET /api/v1/search/threads" do
it "returns thread with comment query match" do
user = User.find 1
if user.nil?
user = create_test_user(1)
end
commentable = Commentable.new("question_1")
random_string = (0...8).map{ ('a'..'z').to_a[rand(26)] }.join
thread = CommentThread.new(title: "Test title", body: "elephant otter", course_id: "1", commentable_id: commentable.id)
thread.author = user
thread.save!
sleep 3
comment = Comment.new(body: random_string, course_id: "1", commentable_id: commentable.id)
comment.author = user
comment.comment_thread = thread
comment.save!
sleep 1
get "/api/v1/search/threads", text: random_string
last_response.should be_ok
threads = parse(last_response.body)['collection']
threads.select{|t| t["id"].to_s == thread.id.to_s}.first.should_not be_nil
end
end
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment