Commit ead2f5e3 by jsa

use a single index in elasticsearch for both document types.

WARNING: Must rebuild search index!

Starting with this change, the name of the search index used by the
service has changed.  The new search index must be created before
deploying this version of the application.  There are two ways of doing
this:

* Offline (recommended)
Follow [these instructions](https://github.com/edx/cs_comments_service/wiki/Search-Indexes#offline-procedure) to perform an offline rebuild.

* Online
1. Deploy this code to a host which is not serving HTTP requests, and
run `rake search:rebuild`
2. When the rebuild finishes, deploy the updated code on live servers.
3. run `rake search:catchup[MINUTES]` where minutes is the amount of
time it took to run rebuild in step 1.
4. run `rake search:prune`
parent d5523cc7
......@@ -53,6 +53,7 @@ CommentService.config = YAML.load(application_yaml).with_indifferent_access
Tire.configure do
url CommentService.config[:elasticsearch_server]
logger STDERR if ENV["ENABLE_ELASTICSEARCH_DEBUGGING"]
end
Mongoid.load!("config/mongoid.yml", environment)
......@@ -75,6 +76,10 @@ Dir[File.dirname(__FILE__) + '/lib/**/*.rb'].each {|file| require file}
Dir[File.dirname(__FILE__) + '/models/*.rb'].each {|file| require file}
Dir[File.dirname(__FILE__) + '/presenters/*.rb'].each {|file| require file}
# Ensure elasticsearch index mappings exist.
Comment.put_search_index_mapping
CommentThread.put_search_index_mapping
# Comment out observers until notifications are actually set up properly.
#Dir[File.dirname(__FILE__) + '/models/observers/*.rb'].each {|file| require file}
#Mongoid.observers = PostReplyObserver, PostTopicObserver, AtUserObserver
......
......@@ -30,9 +30,14 @@ class Comment < Content
include Tire::Model::Search
include Tire::Model::Callbacks
index_name Content::ES_INDEX_NAME
mapping do
indexes :body, type: :string, analyzer: :english, stored: true, term_vector: :with_positions_offsets
indexes :course_id, type: :string, index: :not_analyzed, included_in_all: false
indexes :comment_thread_id, type: :string, index: :not_analyzed, included_in_all: false, as: 'comment_thread_id'
indexes :commentable_id, type: :string, index: :not_analyzed, included_in_all: false, as: 'commentable_id'
indexes :group_id, type: :string, index: :not_analyzed, included_in_all: false, as: 'group_id'
indexes :created_at, type: :date, included_in_all: false
indexes :updated_at, type: :date, included_in_all: false
end
......@@ -111,6 +116,19 @@ class Comment < Content
t.commentable_id
end
end
rescue Mongoid::Errors::DocumentNotFound
nil
end
def group_id
if self.comment_thread_id
t = CommentThread.find self.comment_thread_id
if t
t.group_id
end
end
rescue Mongoid::Errors::DocumentNotFound
nil
end
def self.by_date_range_and_thread_ids from_when, to_when, thread_ids
......
......@@ -26,6 +26,8 @@ class CommentThread < Content
include Tire::Model::Search
include Tire::Model::Callbacks
index_name Content::ES_INDEX_NAME
mapping do
indexes :title, type: :string, analyzer: :english, boost: 5.0, stored: true, term_vector: :with_positions_offsets
indexes :body, type: :string, analyzer: :english, stored: true, term_vector: :with_positions_offsets
......@@ -97,10 +99,11 @@ class CommentThread < Content
#so first, find the comment threads associated with comments that hit the query
search = Tire::Search::Search.new 'comment_threads'
search = Tire::Search::Search.new Content::ES_INDEX_NAME
search.query {|query| query.match [:title, :body], params["text"]} if params["text"]
search.highlight({title: { number_of_fragments: 0 } } , {body: { number_of_fragments: 0 } }, options: { tag: "<highlight>" })
search.filter(:type, value: 'comment_thread')
search.filter(:term, commentable_id: params["commentable_id"]) if params["commentable_id"]
search.filter(:terms, commentable_id: params["commentable_ids"]) if params["commentable_ids"]
search.filter(:term, course_id: params["course_id"]) if params["course_id"]
......@@ -117,8 +120,9 @@ class CommentThread < Content
#again, b/c there is no relationship in ordinality, we cannot paginate if it's a text query
results = search.results
search = Tire::Search::Search.new 'comments'
search = Tire::Search::Search.new Content::ES_INDEX_NAME
search.query {|query| query.match :body, params["text"]} if params["text"]
search.filter(:type, value: 'comment')
search.filter(:term, course_id: params["course_id"]) if params["course_id"]
search.size CommentService.config["max_deep_search_comment_count"].to_i
......@@ -151,7 +155,8 @@ class CommentThread < Content
end
#now run one more search to harvest the threads and filter by group
search = Tire::Search::Search.new 'comment_threads'
search = Tire::Search::Search.new Content::ES_INDEX_NAME
search.filter(:type, value: 'comment_thread')
search.filter(:terms, :thread_id => thread_ids)
search.filter(:terms, commentable_id: params["commentable_ids"]) if params["commentable_ids"]
search.filter(:term, course_id: params["course_id"]) if params["course_id"]
......
......@@ -16,6 +16,16 @@ class Content
index({comment_thread_id: 1, endorsed: 1}, {sparse: true})
index({commentable_id: 1}, {sparse: true, background: true})
ES_INDEX_NAME = 'content'
def self.put_search_index_mapping(idx=nil)
idx ||= self.tire.index
success = idx.mapping(self.tire.document_type, {:properties => self.tire.mapping})
unless success
logger.warn "WARNING! could not apply search index mapping for #{self.name}"
end
end
before_save :set_username
def set_username
# avoid having to look this attribute up later, since it does not change
......@@ -29,7 +39,7 @@ class Content
(anonymous || anonymous_to_peers) ? attr_when_anonymous : author.send(attr)
end
end
def self.flagged
#return an array of flagged content
holder = []
......
......@@ -9,8 +9,8 @@ describe "app" do
let(:author) { create_test_user(42) }
describe "GET /api/v1/search/threads" do
it "returns the correct values for total_results and num_pages", :focus => true do
course_id = "test_course_id"
it "returns the correct values for total_results and num_pages" do
course_id = "test/course/id"
for i in 1..100 do
text = "all"
text += " half" if i % 2 == 0
......@@ -24,8 +24,7 @@ describe "app" do
end
# Elasticsearch does not necessarily make newly indexed content
# available immediately, so we must explicitly refresh the index
CommentThread.tire.index.refresh
Comment.tire.index.refresh
refresh_es_index
test_text = lambda do |text, expected_total_results, expected_num_pages|
get "/api/v1/search/threads", course_id: course_id, text: text, per_page: "10"
......@@ -46,12 +45,12 @@ describe "app" do
# Elasticsearch may not be able to handle searching for non-ASCII text,
# so prepend the text with an ASCII term we can search for.
search_term = "artichoke"
course_id = "unicode_course"
course_id = "unicode/course"
thread = make_thread(author, "#{search_term} #{text}", course_id, "unicode_commentable")
make_comment(author, thread, text)
# Elasticsearch does not necessarily make newly indexed content
# available immediately, so we must explicitly refresh the index
CommentThread.tire.index.refresh
refresh_es_index
get "/api/v1/search/threads", course_id: course_id, text: search_term
last_response.should be_ok
result = parse(last_response.body)["collection"]
......
......@@ -28,6 +28,25 @@ def set_api_key_header
current_session.header "X-Edx-Api-Key", TEST_API_KEY
end
def delete_es_index
Tire.index Content::ES_INDEX_NAME do delete end
end
def create_es_index
new_index = Tire.index Content::ES_INDEX_NAME
new_index.create
[CommentThread, Comment].each do |klass|
klass.put_search_index_mapping
end
end
def refresh_es_index
# we are using the same index for two types, which is against the
# grain of Tire's design. This is why this method works for both
# comment_threads and comments.
CommentThread.tire.index.refresh
end
RSpec.configure do |config|
config.include Rack::Test::Methods
config.treat_symbols_as_metadata_keys_with_true_values = true
......@@ -36,10 +55,8 @@ RSpec.configure do |config|
config.before(:each) do
Mongoid::IdentityMap.clear
DatabaseCleaner.clean
[CommentThread, Comment].each do |class_|
class_.tire.index.delete
class_.create_elasticsearch_index
end
delete_es_index
create_es_index
end
end
......@@ -59,8 +76,8 @@ def init_without_subscriptions
[Comment, CommentThread, User, Notification, Subscription, Activity, Delayed::Backend::Mongoid::Job].each(&:delete_all).each(&:remove_indexes).each(&:create_indexes)
Content.mongo_session[:blocked_hash].drop
Tire.index 'comment_threads' do delete end
CommentThread.create_elasticsearch_index
delete_es_index
create_es_index
commentable = Commentable.new("question_1")
......@@ -140,8 +157,8 @@ end
def init_with_subscriptions
[Comment, CommentThread, User, Notification, Subscription, Activity, Delayed::Backend::Mongoid::Job].each(&:delete_all).each(&:remove_indexes).each(&:create_indexes)
Tire.index 'comment_threads' do delete end
CommentThread.create_elasticsearch_index
delete_es_index
create_es_index
user1 = create_test_user(1)
user2 = create_test_user(2)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment