Commit ead2f5e3 by jsa

use a single index in elasticsearch for both document types.

WARNING: Must rebuild search index!

Starting with this change, the name of the search index used by the
service has changed.  The new search index must be created before
deploying this version of the application.  There are two ways of doing
this:

* Offline (recommended)
Follow [these instructions](https://github.com/edx/cs_comments_service/wiki/Search-Indexes#offline-procedure) to perform an offline rebuild.

* Online
1. Deploy this code to a host which is not serving HTTP requests, and
run `rake search:rebuild`
2. When the rebuild finishes, deploy the updated code on live servers.
3. run `rake search:catchup[MINUTES]` where minutes is the amount of
time it took to run rebuild in step 1.
4. run `rake search:prune`
parent d5523cc7
...@@ -53,6 +53,7 @@ CommentService.config = YAML.load(application_yaml).with_indifferent_access ...@@ -53,6 +53,7 @@ CommentService.config = YAML.load(application_yaml).with_indifferent_access
Tire.configure do Tire.configure do
url CommentService.config[:elasticsearch_server] url CommentService.config[:elasticsearch_server]
logger STDERR if ENV["ENABLE_ELASTICSEARCH_DEBUGGING"]
end end
Mongoid.load!("config/mongoid.yml", environment) Mongoid.load!("config/mongoid.yml", environment)
...@@ -75,6 +76,10 @@ Dir[File.dirname(__FILE__) + '/lib/**/*.rb'].each {|file| require file} ...@@ -75,6 +76,10 @@ Dir[File.dirname(__FILE__) + '/lib/**/*.rb'].each {|file| require file}
Dir[File.dirname(__FILE__) + '/models/*.rb'].each {|file| require file} Dir[File.dirname(__FILE__) + '/models/*.rb'].each {|file| require file}
Dir[File.dirname(__FILE__) + '/presenters/*.rb'].each {|file| require file} Dir[File.dirname(__FILE__) + '/presenters/*.rb'].each {|file| require file}
# Ensure elasticsearch index mappings exist.
Comment.put_search_index_mapping
CommentThread.put_search_index_mapping
# Comment out observers until notifications are actually set up properly. # Comment out observers until notifications are actually set up properly.
#Dir[File.dirname(__FILE__) + '/models/observers/*.rb'].each {|file| require file} #Dir[File.dirname(__FILE__) + '/models/observers/*.rb'].each {|file| require file}
#Mongoid.observers = PostReplyObserver, PostTopicObserver, AtUserObserver #Mongoid.observers = PostReplyObserver, PostTopicObserver, AtUserObserver
......
...@@ -30,9 +30,14 @@ class Comment < Content ...@@ -30,9 +30,14 @@ class Comment < Content
include Tire::Model::Search include Tire::Model::Search
include Tire::Model::Callbacks include Tire::Model::Callbacks
index_name Content::ES_INDEX_NAME
mapping do mapping do
indexes :body, type: :string, analyzer: :english, stored: true, term_vector: :with_positions_offsets indexes :body, type: :string, analyzer: :english, stored: true, term_vector: :with_positions_offsets
indexes :course_id, type: :string, index: :not_analyzed, included_in_all: false indexes :course_id, type: :string, index: :not_analyzed, included_in_all: false
indexes :comment_thread_id, type: :string, index: :not_analyzed, included_in_all: false, as: 'comment_thread_id'
indexes :commentable_id, type: :string, index: :not_analyzed, included_in_all: false, as: 'commentable_id'
indexes :group_id, type: :string, index: :not_analyzed, included_in_all: false, as: 'group_id'
indexes :created_at, type: :date, included_in_all: false indexes :created_at, type: :date, included_in_all: false
indexes :updated_at, type: :date, included_in_all: false indexes :updated_at, type: :date, included_in_all: false
end end
...@@ -111,6 +116,19 @@ class Comment < Content ...@@ -111,6 +116,19 @@ class Comment < Content
t.commentable_id t.commentable_id
end end
end end
rescue Mongoid::Errors::DocumentNotFound
nil
end
def group_id
if self.comment_thread_id
t = CommentThread.find self.comment_thread_id
if t
t.group_id
end
end
rescue Mongoid::Errors::DocumentNotFound
nil
end end
def self.by_date_range_and_thread_ids from_when, to_when, thread_ids def self.by_date_range_and_thread_ids from_when, to_when, thread_ids
......
...@@ -26,6 +26,8 @@ class CommentThread < Content ...@@ -26,6 +26,8 @@ class CommentThread < Content
include Tire::Model::Search include Tire::Model::Search
include Tire::Model::Callbacks include Tire::Model::Callbacks
index_name Content::ES_INDEX_NAME
mapping do mapping do
indexes :title, type: :string, analyzer: :english, boost: 5.0, stored: true, term_vector: :with_positions_offsets indexes :title, type: :string, analyzer: :english, boost: 5.0, stored: true, term_vector: :with_positions_offsets
indexes :body, type: :string, analyzer: :english, stored: true, term_vector: :with_positions_offsets indexes :body, type: :string, analyzer: :english, stored: true, term_vector: :with_positions_offsets
...@@ -97,10 +99,11 @@ class CommentThread < Content ...@@ -97,10 +99,11 @@ class CommentThread < Content
#so first, find the comment threads associated with comments that hit the query #so first, find the comment threads associated with comments that hit the query
search = Tire::Search::Search.new 'comment_threads' search = Tire::Search::Search.new Content::ES_INDEX_NAME
search.query {|query| query.match [:title, :body], params["text"]} if params["text"] search.query {|query| query.match [:title, :body], params["text"]} if params["text"]
search.highlight({title: { number_of_fragments: 0 } } , {body: { number_of_fragments: 0 } }, options: { tag: "<highlight>" }) search.highlight({title: { number_of_fragments: 0 } } , {body: { number_of_fragments: 0 } }, options: { tag: "<highlight>" })
search.filter(:type, value: 'comment_thread')
search.filter(:term, commentable_id: params["commentable_id"]) if params["commentable_id"] search.filter(:term, commentable_id: params["commentable_id"]) if params["commentable_id"]
search.filter(:terms, commentable_id: params["commentable_ids"]) if params["commentable_ids"] search.filter(:terms, commentable_id: params["commentable_ids"]) if params["commentable_ids"]
search.filter(:term, course_id: params["course_id"]) if params["course_id"] search.filter(:term, course_id: params["course_id"]) if params["course_id"]
...@@ -117,8 +120,9 @@ class CommentThread < Content ...@@ -117,8 +120,9 @@ class CommentThread < Content
#again, b/c there is no relationship in ordinality, we cannot paginate if it's a text query #again, b/c there is no relationship in ordinality, we cannot paginate if it's a text query
results = search.results results = search.results
search = Tire::Search::Search.new 'comments' search = Tire::Search::Search.new Content::ES_INDEX_NAME
search.query {|query| query.match :body, params["text"]} if params["text"] search.query {|query| query.match :body, params["text"]} if params["text"]
search.filter(:type, value: 'comment')
search.filter(:term, course_id: params["course_id"]) if params["course_id"] search.filter(:term, course_id: params["course_id"]) if params["course_id"]
search.size CommentService.config["max_deep_search_comment_count"].to_i search.size CommentService.config["max_deep_search_comment_count"].to_i
...@@ -151,7 +155,8 @@ class CommentThread < Content ...@@ -151,7 +155,8 @@ class CommentThread < Content
end end
#now run one more search to harvest the threads and filter by group #now run one more search to harvest the threads and filter by group
search = Tire::Search::Search.new 'comment_threads' search = Tire::Search::Search.new Content::ES_INDEX_NAME
search.filter(:type, value: 'comment_thread')
search.filter(:terms, :thread_id => thread_ids) search.filter(:terms, :thread_id => thread_ids)
search.filter(:terms, commentable_id: params["commentable_ids"]) if params["commentable_ids"] search.filter(:terms, commentable_id: params["commentable_ids"]) if params["commentable_ids"]
search.filter(:term, course_id: params["course_id"]) if params["course_id"] search.filter(:term, course_id: params["course_id"]) if params["course_id"]
......
...@@ -16,6 +16,16 @@ class Content ...@@ -16,6 +16,16 @@ class Content
index({comment_thread_id: 1, endorsed: 1}, {sparse: true}) index({comment_thread_id: 1, endorsed: 1}, {sparse: true})
index({commentable_id: 1}, {sparse: true, background: true}) index({commentable_id: 1}, {sparse: true, background: true})
ES_INDEX_NAME = 'content'
def self.put_search_index_mapping(idx=nil)
idx ||= self.tire.index
success = idx.mapping(self.tire.document_type, {:properties => self.tire.mapping})
unless success
logger.warn "WARNING! could not apply search index mapping for #{self.name}"
end
end
before_save :set_username before_save :set_username
def set_username def set_username
# avoid having to look this attribute up later, since it does not change # avoid having to look this attribute up later, since it does not change
......
...@@ -9,8 +9,8 @@ describe "app" do ...@@ -9,8 +9,8 @@ describe "app" do
let(:author) { create_test_user(42) } let(:author) { create_test_user(42) }
describe "GET /api/v1/search/threads" do describe "GET /api/v1/search/threads" do
it "returns the correct values for total_results and num_pages", :focus => true do it "returns the correct values for total_results and num_pages" do
course_id = "test_course_id" course_id = "test/course/id"
for i in 1..100 do for i in 1..100 do
text = "all" text = "all"
text += " half" if i % 2 == 0 text += " half" if i % 2 == 0
...@@ -24,8 +24,7 @@ describe "app" do ...@@ -24,8 +24,7 @@ describe "app" do
end end
# Elasticsearch does not necessarily make newly indexed content # Elasticsearch does not necessarily make newly indexed content
# available immediately, so we must explicitly refresh the index # available immediately, so we must explicitly refresh the index
CommentThread.tire.index.refresh refresh_es_index
Comment.tire.index.refresh
test_text = lambda do |text, expected_total_results, expected_num_pages| test_text = lambda do |text, expected_total_results, expected_num_pages|
get "/api/v1/search/threads", course_id: course_id, text: text, per_page: "10" get "/api/v1/search/threads", course_id: course_id, text: text, per_page: "10"
...@@ -46,12 +45,12 @@ describe "app" do ...@@ -46,12 +45,12 @@ describe "app" do
# Elasticsearch may not be able to handle searching for non-ASCII text, # Elasticsearch may not be able to handle searching for non-ASCII text,
# so prepend the text with an ASCII term we can search for. # so prepend the text with an ASCII term we can search for.
search_term = "artichoke" search_term = "artichoke"
course_id = "unicode_course" course_id = "unicode/course"
thread = make_thread(author, "#{search_term} #{text}", course_id, "unicode_commentable") thread = make_thread(author, "#{search_term} #{text}", course_id, "unicode_commentable")
make_comment(author, thread, text) make_comment(author, thread, text)
# Elasticsearch does not necessarily make newly indexed content # Elasticsearch does not necessarily make newly indexed content
# available immediately, so we must explicitly refresh the index # available immediately, so we must explicitly refresh the index
CommentThread.tire.index.refresh refresh_es_index
get "/api/v1/search/threads", course_id: course_id, text: search_term get "/api/v1/search/threads", course_id: course_id, text: search_term
last_response.should be_ok last_response.should be_ok
result = parse(last_response.body)["collection"] result = parse(last_response.body)["collection"]
......
...@@ -28,6 +28,25 @@ def set_api_key_header ...@@ -28,6 +28,25 @@ def set_api_key_header
current_session.header "X-Edx-Api-Key", TEST_API_KEY current_session.header "X-Edx-Api-Key", TEST_API_KEY
end end
def delete_es_index
Tire.index Content::ES_INDEX_NAME do delete end
end
def create_es_index
new_index = Tire.index Content::ES_INDEX_NAME
new_index.create
[CommentThread, Comment].each do |klass|
klass.put_search_index_mapping
end
end
def refresh_es_index
# we are using the same index for two types, which is against the
# grain of Tire's design. This is why this method works for both
# comment_threads and comments.
CommentThread.tire.index.refresh
end
RSpec.configure do |config| RSpec.configure do |config|
config.include Rack::Test::Methods config.include Rack::Test::Methods
config.treat_symbols_as_metadata_keys_with_true_values = true config.treat_symbols_as_metadata_keys_with_true_values = true
...@@ -36,10 +55,8 @@ RSpec.configure do |config| ...@@ -36,10 +55,8 @@ RSpec.configure do |config|
config.before(:each) do config.before(:each) do
Mongoid::IdentityMap.clear Mongoid::IdentityMap.clear
DatabaseCleaner.clean DatabaseCleaner.clean
[CommentThread, Comment].each do |class_| delete_es_index
class_.tire.index.delete create_es_index
class_.create_elasticsearch_index
end
end end
end end
...@@ -59,8 +76,8 @@ def init_without_subscriptions ...@@ -59,8 +76,8 @@ def init_without_subscriptions
[Comment, CommentThread, User, Notification, Subscription, Activity, Delayed::Backend::Mongoid::Job].each(&:delete_all).each(&:remove_indexes).each(&:create_indexes) [Comment, CommentThread, User, Notification, Subscription, Activity, Delayed::Backend::Mongoid::Job].each(&:delete_all).each(&:remove_indexes).each(&:create_indexes)
Content.mongo_session[:blocked_hash].drop Content.mongo_session[:blocked_hash].drop
Tire.index 'comment_threads' do delete end delete_es_index
CommentThread.create_elasticsearch_index create_es_index
commentable = Commentable.new("question_1") commentable = Commentable.new("question_1")
...@@ -140,8 +157,8 @@ end ...@@ -140,8 +157,8 @@ end
def init_with_subscriptions def init_with_subscriptions
[Comment, CommentThread, User, Notification, Subscription, Activity, Delayed::Backend::Mongoid::Job].each(&:delete_all).each(&:remove_indexes).each(&:create_indexes) [Comment, CommentThread, User, Notification, Subscription, Activity, Delayed::Backend::Mongoid::Job].each(&:delete_all).each(&:remove_indexes).each(&:create_indexes)
Tire.index 'comment_threads' do delete end delete_es_index
CommentThread.create_elasticsearch_index create_es_index
user1 = create_test_user(1) user1 = create_test_user(1)
user2 = create_test_user(2) user2 = create_test_user(2)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment