Commit 640e3f9e by Clinton Blackburn Committed by Robert Raposa

Replaced Tire with elasticsearch-model

- Replaced Tire with elasticsearch-model
- Updated README
- Added task to initialize a new index
- Fix and add tests
- Remove search:reindex and search:prune in favor of search:reindex
- Use milliseconds in auto-generated index name.
- Delete unused application.rb.
parent 488e7fdb
......@@ -15,3 +15,6 @@ services:
volumes:
- ..:/edx/app/forum/cs_comments_service
command: tail -f /dev/null
depends_on:
- "elasticsearch"
- "mongo"
......@@ -6,4 +6,13 @@ cd /edx/app/forum/cs_comments_service
bundle install
bundle exec rspec
# allow dependent services to finish start up (e.g. ElasticSearch, Mongo)
sleep 10
bin/rake search:initialize
# Use 'bin/rspec -fd' to print test names for debugging
# Printing test names can be especially helpful for tracking down test
# failure differences between Travis and Mac, because tests are loaded
# and run in different orders.
bin/rspec
......@@ -3,7 +3,9 @@ ruby "1.9.3"
gem 'pry'
gem 'pry-nav'
# gem 'debugger'
# Use with command-line debugging, but not RubyMine
#gem 'debugger'
gem 'bundler'
......@@ -31,8 +33,8 @@ gem 'will_paginate_mongoid', "~>2.0"
gem 'rdiscount'
gem 'nokogiri', "~>1.6.8"
gem 'tire', "0.6.2"
gem 'tire-contrib'
gem 'elasticsearch', '~> 0.4'
gem 'elasticsearch-model', '~> 0.1.8'
gem 'dalli'
......@@ -47,6 +49,7 @@ group :test do
gem 'guard-unicorn'
gem 'rack-test', :require => 'rack/test'
gem 'rspec', '~> 2.11.0'
gem 'webmock', '~> 1.22'
end
gem 'newrelic_rpm'
......
......@@ -32,7 +32,7 @@ GEM
minitest (~> 5.1)
thread_safe (~> 0.3, >= 0.3.4)
tzinfo (~> 1.1)
ansi (1.5.0)
addressable (2.4.0)
bson (3.2.4)
bson_ext (1.5.1)
builder (3.2.2)
......@@ -41,6 +41,8 @@ GEM
simplecov
url
coderay (1.0.7)
crack (0.4.3)
safe_yaml (~> 1.0.0)
dalli (2.1.0)
delayed_job (4.1.1)
activesupport (>= 3.0, < 5.0)
......@@ -52,18 +54,33 @@ GEM
docile (1.1.5)
domain_name (0.5.24)
unf (>= 0.0.5, < 1.0.0)
elasticsearch (0.4.11)
elasticsearch-api (= 0.4.11)
elasticsearch-transport (= 0.4.11)
elasticsearch-api (0.4.11)
multi_json
elasticsearch-model (0.1.8)
activesupport (> 3)
elasticsearch (> 0.4)
hashie
elasticsearch-transport (0.4.11)
faraday
multi_json
enumerize (0.11.0)
activesupport (>= 3.2)
factory_girl (4.5.0)
activesupport (>= 3.0.0)
faker (1.6.1)
i18n (~> 0.5)
faraday (0.9.2)
multipart-post (>= 1.2, < 3)
guard (1.3.2)
listen (>= 0.4.2)
thor (>= 0.14.6)
guard-unicorn (0.0.7)
guard (>= 1.1)
hashr (0.0.22)
hashdiff (0.2.3)
hashie (3.4.3)
http-cookie (1.0.2)
domain_name (~> 0.5)
i18n (0.7.0)
......@@ -90,6 +107,7 @@ GEM
mongoid
rake
multi_json (1.11.2)
multipart-post (2.0.0)
netrc (0.10.3)
newrelic_rpm (3.16.0.318)
nokogiri (1.6.8)
......@@ -126,6 +144,7 @@ GEM
rspec-expectations (2.11.2)
diff-lcs (~> 1.1.3)
rspec-mocks (2.11.2)
safe_yaml (1.0.4)
simplecov (0.11.1)
docile (~> 1.1.0)
json (~> 1.8)
......@@ -139,16 +158,6 @@ GEM
thor (0.16.0)
thread_safe (0.3.5)
tilt (1.3.3)
tire (0.6.2)
activemodel (>= 3.0)
activesupport
ansi
hashr (~> 0.0.19)
multi_json (~> 1.3)
rake
rest-client (~> 1.6)
tire-contrib (0.1.1)
tire
tzinfo (1.2.2)
thread_safe (~> 0.1)
unf (0.1.4)
......@@ -159,6 +168,10 @@ GEM
rack
raindrops (~> 0.7)
url (0.3.2)
webmock (1.22.3)
addressable (>= 2.3.6)
crack (>= 0.3.2)
hashdiff
will_paginate (3.0.7)
will_paginate_mongoid (2.0.1)
mongoid
......@@ -176,6 +189,8 @@ DEPENDENCIES
dalli
delayed_job
delayed_job_mongoid
elasticsearch (~> 0.4)
elasticsearch-model (~> 0.1.8)
enumerize
factory_girl (~> 4.0)
faker (~> 1.6)
......@@ -200,14 +215,10 @@ DEPENDENCIES
rs_voteable_mongo!
rspec (~> 2.11.0)
sinatra
tire (= 0.6.2)
tire-contrib
unicorn
webmock (~> 1.22)
will_paginate_mongoid (~> 2.0)
yajl-ruby
RUBY VERSION
ruby 1.9.3p551
BUNDLED WITH
1.12.5
......@@ -14,9 +14,8 @@ An independent comment system which supports voting and nested comments. It
also supports features including instructor endorsement for education-aimed
discussion platforms.
Running the Server
------------------
Getting Started
---------------
If you are running cs_comments_service as part of edx-platform__ development under
devstack, it is strongly recommended to read `those setup documents`__ first. Note that
devstack will take care of just about all of the installation, configuration, and
......@@ -30,15 +29,70 @@ This service relies on Elasticsearch and MongoDB. By default the service will us
however, if you wish to change these values, refer to `config/application.yml` and `config/mongoid.yml` for the
environment variables that can be set to override the defaults.
Before the server is first run, ensure gems are installed by doing ``bundle install``.
Install the requisite gems:
.. code-block:: bash
$ bundle install
Setup the search index. Note that the command below creates an alias with a unique name (e.g.
content_20161220185820323), and assigns it a known alias: content. If you choose not to use the command below, you
should still opt to reference your index by an alias rather than the actual index name. This will enable you to swap out
indices (e.g. rebuild_index) without having to take downtime or modify code with a new index name.
.. code-block:: bash
$ bin/rake search:initialize
To rebuild a new index without moving the alias and without running catchup, do the following:
.. code-block:: bash
$ bin/rake search:rebuild_index
To rebuild a new index from the database and then point the alias 'content' to it, you can use the
rebuild_index task. This task will also run catchup before and after the alias is moved, to minimize time where the
alias does not contain all documents.
.. code-block:: bash
$ bin/rake search:rebuild_index[true]
You can also adjust the batch size (e.g. 200) and the sleep time (e.g. 2 seconds) between batches to lighten the load
on MongoDB.
To run the server, do ``ruby app.rb [-p PORT]`` where PORT defaults to 4567.
.. code-block:: bash
$ bin/rake search:rebuild_index[true,200,2]
Run the server:
.. code-block::
$ ruby app.rb
By default Sinatra runs on port `4567`. If you'd like to use a different port pass the `-p` parameter:
.. code-block::
$ ruby app.rb -p 5678
Running Tests
-------------
To run tests, do ``bundle exec rspec``. Append ``--help`` or see rspec documentation
for additional options to this command.
Tests are built using the rspec__ framework, and can be run with the command below:
.. code-block::
$ bin/rspec
If you'd like to view additional options for the command, append the `--help` option:
.. code-block::
$ bin/rspec --help
__ http://rspec.info/
Internationalization (i18n) and Localization (l10n)
---------------------------------------------------
......@@ -62,12 +116,12 @@ follow the instructions here__ to set up your ``.transifexrc`` file.
__ http://support.transifex.com/customer/portal/articles/1000855-configuring-the-client
To upload strings to Transifex for translation when you change the set
of translatable strings: ``bundle exec rake i18n:push``
of translatable strings: ``bin/rake i18n:push``
To fetch the latest translations from Transifex: ``bundle exec rake i18n:pull``
To fetch the latest translations from Transifex: ``bin/rake i18n:pull``
The repository includes some translations so they will be available
upon deployment. To commit an update to these: ``bundle exec rake i18n:commit``
upon deployment. To commit an update to these: ``bin/rake i18n:commit``
License
-------
......
......@@ -16,15 +16,18 @@ end
LOG = Logger.new(STDERR)
RAKE_SEARCH_INITIALIZE = (Rake.application.top_level_tasks.include? 'search:initialize')
desc 'Load the environment'
task :environment do
# Load all of app.rb, because it is too easy to introduce bugs otherwise where Rake
# does not have a fix or config that is added to app.rb.
# Load all of app.rb to keep rake and the app as similar as possible.
# Without this, we had run into bugs where certain overriding fixes in app.rb
# were not used from the rake tasks.
require File.dirname(__FILE__) + '/app.rb'
end
Dir.glob('lib/tasks/*.rake').each { |r| import r }
task :console => :environment do
binding.pry
end
Dir.glob('lib/tasks/*.rake').each { |r| import r }
get "#{APIPREFIX}/threads" do # retrieve threads by course
threads = CommentThread.where({"course_id" => params["course_id"]})
if params[:commentable_ids]
threads = threads.in({"commentable_id" => params[:commentable_ids].split(",")})
......
......@@ -53,6 +53,7 @@ end
delete "#{APIPREFIX}/comments/:comment_id" do |comment_id|
parent_id = comment.parent_id
comment_as_json = comment.to_hash.to_json
comment.destroy
unless parent_id.nil?
begin
......@@ -62,5 +63,5 @@ delete "#{APIPREFIX}/comments/:comment_id" do |comment_id|
pass
end
end
comment.to_hash.to_json
comment_as_json
end
get "#{APIPREFIX}/search/threads" do
local_params = params # Necessary for params to be available inside blocks
group_ids = get_group_ids_from_params(local_params)
context = local_params["context"] ? local_params["context"] : "course"
search_text = local_params["text"]
if !search_text
{}.to_json
else
# Because threads and comments are currently separate unrelated documents in
# Elasticsearch, we must first query for all matching documents, then
# extract the set of thread ids, and then sort the threads by the specified
# criteria and paginate. For performance reasons, we currently limit the
# number of documents considered (ordered by update recency), which means
# that matching threads can be missed if the search terms are very common.
def get_thread_ids(context, group_ids, local_params, search_text)
filters = []
filters.push({term: {commentable_id: local_params['commentable_id']}}) if local_params['commentable_id']
filters.push({terms: {commentable_id: local_params['commentable_ids'].split(',')}}) if local_params['commentable_ids']
filters.push({term: {course_id: local_params['course_id']}}) if local_params['course_id']
get_matching_thread_ids = lambda do |search_text|
self.class.trace_execution_scoped(["Custom/get_search_threads/es_search"]) do
search = Tire.search Content::ES_INDEX_NAME do
query do
match [:title, :body], search_text, :operator => "AND"
filtered do
filter :term, :commentable_id => local_params["commentable_id"] if local_params["commentable_id"]
filter :terms, :commentable_id => local_params["commentable_ids"].split(",") if local_params["commentable_ids"]
filter :term, :course_id => local_params["course_id"] if local_params["course_id"]
filter :or, [
{:not => {:exists => {:field => :context}}},
{:term => {:context => context}}
]
filters.push({or: [
{not: {exists: {field: :context}}},
{term: {context: context}}
]})
unless group_ids.empty?
filters.push(
{
or: [
{:not => {:exists => {:field => :group_id}}},
{:terms => {:group_id => group_ids}}
]
}
)
end
if not group_ids.empty?
if group_ids.length > 1
group_id_criteria = {:terms => {:group_id => group_ids}}
else
group_id_criteria = {:term => {:group_id => group_ids[0]}}
end
self.class.trace_execution_scoped(['Custom/get_search_threads/es_search']) do
body = {
size: CommentService.config['max_deep_search_comment_count'].to_i,
sort: [
{updated_at: :desc}
],
query: {
multi_match: {
query: search_text,
fields: [:title, :body],
operator: :AND
},
filtered: {
filter: {
and: filters
}
}
}
}
filter :or, [
{:not => {:exists => {:field => :group_id}}},
group_id_criteria
]
end
response = Elasticsearch::Model.client.search(index: Content::ES_INDEX_NAME, body: body)
end
end
sort do
by "updated_at", "desc"
end
size CommentService.config["max_deep_search_comment_count"].to_i
end
thread_ids = Set.new
search.results.each do |content|
case content.type
when "comment_thread"
thread_ids.add(content.id)
when "comment"
thread_ids.add(content.comment_thread_id)
end
end
thread_ids
thread_ids = Set.new
response['hits']['hits'].each do |hit|
case hit['_type']
when CommentThread.document_type
thread_ids.add(hit['_id'])
when Comment.document_type
thread_ids.add(hit['_source']['comment_thread_id'])
else
# There shouldn't be any other document types. Nevertheless, ignore them, if they are present.
next
end
end
thread_ids
end
end
def get_suggested_text(search_text)
body = {
suggestions: {
text: search_text,
phrase: {
field: :_all
}
}
}
response = Elasticsearch::Model.client.suggest(index: Content::ES_INDEX_NAME, body: body)
suggestions = response.fetch('suggestions', [])
if suggestions.length > 0
options = suggestions[0]['options']
if options.length > 0
return options[0]['text']
end
end
nil
end
def get_threads(context, group_ids, local_params, search_text)
# Because threads and comments are currently separate unrelated documents in
# Elasticsearch, we must first query for all matching documents, then
# extract the set of thread ids, and then sort the threads by the specified
# criteria and paginate. For performance reasons, we currently limit the
# number of documents considered (ordered by update recency), which means
# that matching threads can be missed if the search terms are very common.
thread_ids = get_thread_ids(context, group_ids, local_params, search_text)
corrected_text = nil
if thread_ids.empty?
# Sadly, Elasticsearch does not have a facility for computing suggestions
# with respect to a filter. It would be expensive to determine the best
# suggestion with respect to our filter parameters, so we simply re-query
# with the top suggestion. If that has no results, then we return no results
# and no correction.
thread_ids = get_matching_thread_ids.call(search_text)
corrected_text = nil
if thread_ids.empty?
suggest = Tire.suggest Content::ES_INDEX_NAME do
suggestion "" do
text search_text
phrase :_all
end
end
corrected_text = suggest.results.texts.first
thread_ids = get_matching_thread_ids.call(corrected_text) if corrected_text
corrected_text = nil if thread_ids.empty?
end
corrected_text = get_suggested_text(search_text)
thread_ids = get_thread_ids(context, group_ids, local_params, corrected_text) if corrected_text
corrected_text = nil if thread_ids.empty?
end
result_obj = handle_threads_query(
CommentThread.in({"_id" => thread_ids.to_a}),
local_params["user_id"],
local_params["course_id"],
result_obj = handle_threads_query(
CommentThread.in({_id: thread_ids.to_a}),
local_params['user_id'],
local_params['course_id'],
group_ids,
value_to_boolean(local_params["flagged"]),
value_to_boolean(local_params["unread"]),
value_to_boolean(local_params["unanswered"]),
local_params["sort_key"],
local_params["page"],
local_params["per_page"],
value_to_boolean(local_params['flagged']),
value_to_boolean(local_params['unread']),
value_to_boolean(local_params['unanswered']),
local_params['sort_key'],
local_params['page'],
local_params['per_page'],
context
)
if !result_obj.empty?
result_obj[:corrected_text] = corrected_text
# NOTE this reflects the total results from ES, but does not consider
# any post-filtering that might happen (e.g. unread, flagged...) before
# results are shown to the user.
result_obj[:total_results] = thread_ids.size
end
result_obj.to_json
)
unless result_obj.empty?
result_obj[:corrected_text] = corrected_text
# NOTE this reflects the total results from ES, but does not consider
# any post-filtering that might happen (e.g. unread, flagged...) before
# results are shown to the user.
result_obj[:total_results] = thread_ids.size
end
result_obj.to_json
end
get "#{APIPREFIX}/search/threads" do
local_params = params # Necessary for params to be available inside blocks
group_ids = get_group_ids_from_params(local_params)
context = local_params["context"] ? local_params["context"] : "course"
search_text = local_params["text"]
if !search_text
'{}'
else
get_threads(context, group_ids, local_params, search_text)
end
end
......@@ -14,6 +14,10 @@ module CommentService
class << self
attr_accessor :config
attr_accessor :blocked_hashes
def search_enabled?
self.config[:enable_search]
end
end
API_VERSION = 'v1'
API_PREFIX = "/api/#{API_VERSION}"
......@@ -26,11 +30,6 @@ end
application_yaml = ERB.new(File.read("config/application.yml")).result()
CommentService.config = YAML.load(application_yaml).with_indifferent_access
Tire.configure do
url CommentService.config[:elasticsearch_server]
logger STDERR if ENV["ENABLE_ELASTICSEARCH_DEBUGGING"]
end
Mongoid.load!("config/mongoid.yml", environment)
Mongoid.logger.level = Logger::INFO
Mongo::Logger.logger.level = ENV["ENABLE_MONGO_DEBUGGING"] ? Logger::DEBUG : Logger::INFO
......@@ -48,13 +47,18 @@ helpers do
end
end
Dir[File.dirname(__FILE__) + '/lib/**/*.rb'].each {|file| require file}
Dir[File.dirname(__FILE__) + '/models/*.rb'].each {|file| require file}
Dir[File.dirname(__FILE__) + '/presenters/*.rb'].each {|file| require file}
Dir[File.dirname(__FILE__) + '/lib/**/*.rb'].each { |file| require file }
Dir[File.dirname(__FILE__) + '/models/*.rb'].each { |file| require file }
Dir[File.dirname(__FILE__) + '/presenters/*.rb'].each { |file| require file }
# Ensure elasticsearch index mappings exist.
Comment.put_search_index_mapping
CommentThread.put_search_index_mapping
Elasticsearch::Model.client = Elasticsearch::Client.new(host: CommentService.config[:elasticsearch_server], log: false)
$check_index_mapping_exists = defined?(RAKE_SEARCH_INITIALIZE) === nil || RAKE_SEARCH_INITIALIZE === false
if $check_index_mapping_exists
# Ensure Elasticsearch index mappings exist, unless we are creating it in the rake search initialize
Comment.put_search_index_mapping
CommentThread.put_search_index_mapping
end
# Comment out observers until notifications are actually set up properly.
#Dir[File.dirname(__FILE__) + '/models/observers/*.rb'].each {|file| require file}
......@@ -106,7 +110,6 @@ class Time
end
# these files must be required in order
require './api/search'
require './api/commentables'
......@@ -138,55 +141,61 @@ error ArgumentError do
error 400, [env['sinatra.error'].message].to_json
end
CommentService.blocked_hashes = Content.mongo_client[:blocked_hash].find(nil, projection: {hash: 1}).map {|d| d["hash"]}
CommentService.blocked_hashes = Content.mongo_client[:blocked_hash].find(nil, projection: {hash: 1}).map { |d| d["hash"] }
def get_db_is_master
Mongoid::Clients.default.command(isMaster: 1)
end
def get_es_status
res = Tire::Configuration.client.get Tire::Configuration.url
JSON.parse res.body
def elasticsearch_health
Elasticsearch::Model.client.cluster.health
end
get '/heartbeat' do
# mongo is reachable and ready to handle requests
db_ok = false
def is_mongo_available?
begin
res = get_db_is_master
db_ok = res.ok? && res.documents.first['ismaster'] == true
response = get_db_is_master
return response.ok? && (response.documents.first['ismaster'] == true)
rescue
# ignored
end
error 500, JSON.generate({"OK" => false, "check" => "db"}) unless db_ok
# E_S is reachable and ready to handle requests
es_ok = false
false
end
def is_elasticsearch_available?
begin
es_status = get_es_status
es_ok = es_status["status"] == 200
health = elasticsearch_health
return !health['timed_out'] && %w(yellow green).include?(health['status'])
rescue
# ignored
end
error 500, JSON.generate({"OK" => false, "check" => "es"}) unless es_ok
JSON.generate({"OK" => true})
false
end
get '/heartbeat' do
error 500, JSON.generate({OK: false, check: :db}) unless is_mongo_available?
error 500, JSON.generate({OK: false, check: :es}) unless is_elasticsearch_available?
JSON.generate({OK: true})
end
get '/selftest' do
begin
t1 = Time.now
status = {
"db" => get_db_is_master,
"es" => get_es_status,
"last_post_created" => (Content.last.created_at rescue nil),
"total_posts" => Content.count,
"total_users" => User.count,
"elapsed_time" => Time.now - t1
db: get_db_is_master,
es: elasticsearch_health,
last_post_created: (Content.last.created_at rescue nil),
total_posts: Content.count,
total_users: User.count,
elapsed_time: Time.now - t1
}
JSON.generate(status)
rescue => ex
[ 500,
{'Content-Type' => 'text/plain'},
"#{ex.backtrace.first}: #{ex.message} (#{ex.class})\n\t#{ex.backtrace[1..-1].join("\n\t")}"
[500,
{'Content-Type' => 'text/plain'},
"#{ex.backtrace.first}: #{ex.message} (#{ex.class})\n\t#{ex.backtrace[1..-1].join("\n\t")}"
]
end
end
env_index = ARGV.index("-e")
env_arg = ARGV[env_index + 1] if env_index
env = env_arg || ENV["SINATRA_ENV"] || "development"
module CommentService
class << self; attr_accessor :config; end
end
CommentService.config = YAML.load_file("config/application.yml")
Mongoid.load!("config/mongoid.yml")
Mongoid.logger.level = Logger::INFO
......@@ -2,6 +2,7 @@ level_limit: 3
api_key: <%= ENV['API_KEY'] || 'PUT_YOUR_API_KEY_HERE' %>
elasticsearch_server: <%= ENV['SEARCH_SERVER'] || 'http://localhost:9200' %>
max_deep_search_comment_count: 5000
enable_search: true
default_locale: <%= ENV['SERVICE_LANGUAGE'] || 'en-US' %>
manual_pagination_batch_size: <%= ENV['MANUAL_PAGINATION_BATCH_SIZE'] || 500 %>
thread_response_default_size: <%= ENV['THREAD_RESPONSE_DEFAULT_SIZE'] || 100 %>
......
require 'elasticsearch'
module TaskHelpers
module ElasticsearchHelper
def self.create_index(name=nil)
name ||= "#{Content::ES_INDEX_NAME}_#{Time.now.strftime('%Y%m%d%H%M%S')}"
index = Tire.index(name)
LOG = Logger.new(STDERR)
# Creates a new index and loads data from the database. If an alias name
# is supplied, it will be pointed to the new index and catch up will be
# called both before and after the alias switch..
#
# Returns the name of the newly created index.
#
# Params:
# +alias_name+:: (optional) The alias to point to the new index.
# +batch_size+:: (optional) The number of elements to index at a time. Defaults to 500.
# +sleep_time+:: (optional) The number of seconds to sleep between batches. Defaults to 0.
def self.rebuild_index(alias_name=nil, batch_size=500, sleep_time=0)
initial_start_time = Time.now
index_name = create_index()
[Comment, CommentThread].each do |model|
current_batch = 1
model.import(index: index_name, batch_size: batch_size) do |response|
batch_import_post_process(response, current_batch, sleep_time)
current_batch += 1
end
end
if alias_name
# Just in case initial rebuild took days and first catch up takes hours,
# we catch up once before the alias move and once afterwards.
first_catchup_start_time = Time.now
catchup_index(initial_start_time, index_name, batch_size, sleep_time)
move_alias(alias_name, index_name, force_delete: true)
catchup_index(first_catchup_start_time, alias_name, batch_size, sleep_time)
end
LOG.info "Creating new index: #{name}..."
index.create
LOG.info "Rebuild index complete."
index_name
end
def self.catchup_index(start_time, index_name, batch_size=100, sleep_time=0)
[Comment, CommentThread].each do |model|
current_batch = 1
model.where(:updated_at.gte => start_time).import(index: index_name, batch_size: batch_size) do |response|
batch_import_post_process(response, current_batch, sleep_time)
current_batch += 1
end
end
LOG.info "Catch up from #{start_time} complete."
end
[CommentThread, Comment].each do |model|
LOG.info "Applying index mappings for #{model.name}"
model.put_search_index_mapping(index)
def self.create_index(name=nil)
name ||= "#{Content::ES_INDEX_NAME}_#{Time.now.strftime('%Y%m%d%H%M%S%L')}"
mappings = {}
[Comment, CommentThread].each do |model|
mappings.merge! model.mappings.to_hash
end
LOG.info '...done!'
index
Elasticsearch::Model.client.indices.create(index: name, body: {mappings: mappings})
LOG.info "Created new index: #{name}."
name
end
def self.delete_index(name)
Tire.index(name).delete
begin
Elasticsearch::Model.client.indices.delete(index: name)
LOG.info "Deleted index: #{name}."
rescue Elasticsearch::Transport::Transport::Errors::NotFound
# NOTE (CCB): Future versions of the Elasticsearch client support the ignore parameter,
# that can be used to ignore 404 errors.
LOG.info "Unable to delete non-existent index: #{name}."
end
end
def self.get_index
CommentThread.tire.index
def self.batch_import_post_process(response, batch_number, sleep_time)
response['items'].select { |i| i['index']['error'] }.each do |item|
LOG.error "Error indexing. Response was: #{response}"
end
LOG.info "Imported batch #{batch_number} into the index"
sleep(sleep_time)
end
def self.get_index_shard_count(name)
settings = Tire.index(name)
settings['index.number_of_shards']
settings = Elasticsearch::Model.client.indices.get_settings(index: name)
settings[name]['settings']['index']['number_of_shards']
end
def self.exists_alias(alias_name)
Elasticsearch::Model.client.indices.exists_alias(name: alias_name)
end
def self.exists_index(index_name)
Elasticsearch::Model.client.indices.exists(index: index_name)
end
def self.move_alias(alias_name, index_name, force_delete=false)
unless index_name != alias_name
raise ArgumentError, "Can't point alias [#{alias_name}] to an index of the same name."
end
unless exists_index(index_name)
raise ArgumentError, "Can't point alias to non-existent index [#{index_name}]."
end
# You cannot use an alias name if an index of the same name (that is not an alias) already exists.
# This could happen if the index was auto-created before the alias was properly set up. In this
# case, we either warn the user or delete the already existing index.
if exists_index(alias_name) and not exists_alias(alias_name)
if force_delete
self.delete_index(alias_name)
else
raise ArgumentError, "Can't create alias [#{alias_name}] because there is already an " +
"auto-generated index of the same name. Try again with force_delete=true to first " +
"delete this pre-existing index."
end
end
actions = [
{add: {index: index_name, alias: alias_name}}
]
begin
response = Elasticsearch::Model.client.indices.get_alias(name: alias_name)
if response.length
actions.unshift({remove: {index: response.keys.join(','), alias: alias_name}})
end
rescue Elasticsearch::Transport::Transport::Errors::NotFound
# NOTE (CCB): Future versions of the Elasticsearch client support the ignore parameter,
# that can be used to ignore 404 errors.
end
body = {actions: actions}
Elasticsearch::Model.client.indices.update_aliases(body: body)
LOG.info "Alias [#{alias_name}] now points to index [#{index_name}]."
end
def self.refresh_index(name)
Elasticsearch::Model.client.indices.refresh(index: name)
end
end
end
......@@ -91,11 +91,6 @@ namespace :db do
end
task :seed => [:environment, :clean] do
Tire.index 'comment_threads' do
delete
end
CommentThread.create_elasticsearch_index
beginning_time = Time.now
(1..10).map { |id| create_test_user(id) }
......
require 'task_helpers'
namespace :search do
def import_from_cursor(cursor, index, opts)
tot = cursor.count
cnt = 0
t = Time.now
index.import cursor, {:method => :paginate, :per_page => opts[:batch_size]} do |documents|
if cnt % opts[:batch_size] == 0 then
elapsed_secs = (Time.now - t).round(2)
pct_complete = (100 * (cnt/tot.to_f)).round(2)
LOG.info "#{index.name}: imported #{cnt} of #{tot} (#{pct_complete}% complete after #{elapsed_secs} seconds)"
end
cnt += documents.length
sleep opts[:sleep_time]
documents
end
LOG.info "#{index.name}: finished importing #{cnt} documents"
cnt
end
def move_alias_to(name, index)
# if there was a previous index, switch over the alias to point to the new index
alias_ = Tire::Alias.find name
if alias_
# does the alias already point to this index?
if alias_.indices.include? index.name
return false
end
# remove the alias from wherever it points to now
LOG.info "alias already exists (will move): #{alias_.indices.to_ary.join(',')}"
alias_.indices.each do |old_index_name|
alias_.indices.delete old_index_name unless old_index_name == name
end
else
# create the alias
LOG.info "alias \"#{name}\" does not yet exist - creating."
alias_ = Tire::Alias.new :name => name
end
# point the alias at our new index
alias_.indices.add index.name
alias_.save
LOG.info "alias \"#{name}\" now points to index #{index.name}."
true
end
def do_reindex (opts, in_place=false)
start_time = Time.now
# create the new index with a unique name
new_index = TaskHelpers::ElasticsearchHelper.create_index
# unless the user is forcing a rebuild, or the index does not yet exist, we
# can do a Tire api reindex which is much faster than reimporting documents
# from mongo.
#
# Checking if the index exists is tricky. Tire automatically created an index
# for the model class when the app loaded if one did not already exist. However,
# it won't create an alias, which is what our app uses. So if the index exists
# but not the alias, we know that it's auto-created.
old_index = TaskHelpers::ElasticsearchHelper.get_index
alias_name = old_index.name
alias_ = Tire::Alias.find alias_name
if alias_.nil?
# edge case.
# the alias doesn't exist, so we know the index was auto-created.
# We will delete it and replace it with an alias.
raise RuntimeError, 'Cannot reindex in-place, no valid source index' if in_place
LOG.warn 'deleting auto-created index to make room for the alias'
old_index.delete
# NOTE on the small chance that another process re-auto-creates the index
# we just deleted before we have a chance to create the alias, this next
# call will fail.
move_alias_to(Content::ES_INDEX_NAME, new_index)
end
op = in_place ? 'reindex' : '(re)build index'
LOG.info "preparing to #{op}"
content_types = %w(Comment CommentThread)
if in_place
# reindex, moving source documents directly from old index to new
LOG.info 'copying documents from original index (this may take a while!)'
old_index.reindex new_index.name
LOG.info 'done copying!'
else
# fetch all the documents ever, up til start_time
cursor = Content.where(:_type.in => content_types, :updated_at.lte => start_time)
# import them to the new index
import_from_cursor(cursor, new_index, opts)
end
# move the alias if necessary
did_alias_move = move_alias_to(Content::ES_INDEX_NAME, new_index)
if did_alias_move
# Reimport any source documents that got updated since start_time,
# while the alias still pointed to the old index.
# Elasticsearch understands our document ids, so re-indexing the same
# document won't create duplicates.
LOG.info "importing any documents that changed between #{start_time} and now"
cursor = Content.where(:_type.in => content_types, :updated_at.gte => start_time)
import_from_cursor(cursor, new_index, opts)
end
end
desc 'Copies contents of MongoDB into Elasticsearch if updated in the last N minutes.'
task :catchup, [:minutes, :batch_size, :sleep_time] => :environment do |t, args|
opts = batch_opts args
the_index = TaskHelpers::ElasticsearchHelper.get_index
alias_ = Tire::Alias.find the_index.name
# this check makes sure we are working with the index to which
# the desired model's alias presently points.
raise RuntimeError, "could not find live index" if alias_.nil?
desc 'Indexes content updated in the last N minutes.'
task :catchup, [:minutes, :index_name, :batch_size, :sleep_time] => :environment do |t, args|
start_time = Time.now - (args[:minutes].to_i * 60)
cursor = Content.where(:_type.in => %w(Comment CommentThread), :updated_at.gte => start_time)
import_from_cursor(cursor, the_index, opts)
args.with_defaults(:index_name => Content::ES_INDEX_NAME)
args.with_defaults(:batch_size => 500)
args.with_defaults(:sleep_time => 0)
TaskHelpers::ElasticsearchHelper.catchup_index(start_time, args[:index_name], args[:batch_size].to_i, args[:sleep_time].to_i)
end
def batch_opts(args)
args = args.to_hash
{:batch_size => args[:batch_size].nil? ? 500 : args[:batch_size].to_i,
:sleep_time => args[:sleep_time].nil? ? 0 : args[:sleep_time].to_i}
desc 'Rebuilds a new index of all data from the database and then updates alias.'
task :rebuild_index, [:call_move_alias, :batch_size, :sleep_time] => :environment do |t, args|
args.with_defaults(:call_move_alias => false)
args.with_defaults(:batch_size => 500)
args.with_defaults(:sleep_time => 0)
alias_name = args[:call_move_alias] ? Content::ES_INDEX_NAME : nil
TaskHelpers::ElasticsearchHelper.rebuild_index(alias_name, args[:batch_size].to_i, args[:sleep_time].to_i)
end
desc 'Removes any data from Elasticsearch that no longer exists in MongoDB.'
task :prune, [:batch_size, :sleep_time] => :environment do |t, args|
opts = batch_opts args
the_index = TaskHelpers::ElasticsearchHelper.get_index
puts "pruning #{the_index.name}"
alias_ = Tire::Alias.find the_index.name
raise RuntimeError, 'could not find live index' if alias_.nil?
scan_size = opts[:batch_size] / TaskHelpers::ElasticsearchHelper.get_index_shard_count(the_index.name)
cnt = 0
[CommentThread, Comment].each do |klass|
doc_type = klass.document_type
# this check makes sure we are working with the index to which
# the desired model's alias presently points.
search = Tire::Search::Scan.new the_index.name, {size: scan_size, type: doc_type}
search.each do |results|
es_ids = results.map(&:id)
mongo_ids = klass.where(:id.in => es_ids).map { |d| d.id.to_s }
to_delete = es_ids - mongo_ids
if to_delete.size > 0
cnt += to_delete.size
puts "deleting #{to_delete.size} orphaned #{doc_type} documents from elasticsearch"
the_index.bulk_delete (to_delete).map { |v| {"type" => doc_type, "id" => v} }
end
puts "#{the_index.name}/#{doc_type}: processed #{search.seen} of #{search.total}"
sleep opts[:sleep_time]
end
end
puts "done pruning #{the_index.name}, deleted a total of #{cnt} orphaned documents"
end
desc 'Rebuild the content index from MongoDB data.'
task :rebuild, [:batch_size, :sleep_time] => :environment do |t, args|
do_reindex(batch_opts(args))
desc 'Generate a new, empty physical index, without bringing it online.'
task :create_index => :environment do
TaskHelpers::ElasticsearchHelper.create_index
end
desc 'Rebuild the content index from already-indexed data (in place).'
task :reindex, [:batch_size, :sleep_time] => :environment do |t, args|
do_reindex(batch_opts(args), true)
desc 'Creates a new search index and points the "content" alias to it'
task :initialize => :environment do
index = TaskHelpers::ElasticsearchHelper.create_index
TaskHelpers::ElasticsearchHelper.move_alias(Content::ES_INDEX_NAME, index)
end
desc 'Generate a new, empty physical index, without bringing it online.'
task :create_index => :environment do
TaskHelpers::ElasticsearchHelper.create_index
desc 'Sets/moves an alias to the specified index'
task :move_alias, [:index, :force_delete] => :environment do |t, args|
# Forces delete of an index with same name as alias if it exists.
args.with_defaults(:force_delete => false)
alias_name = Content::ES_INDEX_NAME
TaskHelpers::ElasticsearchHelper.move_alias(alias_name, args[:index], args[:force_delete])
end
end
require 'new_relic/agent/method_tracer'
require_relative 'concerns/searchable'
require_relative 'content'
require_relative 'constants'
......@@ -7,8 +8,7 @@ class Comment < Content
include Mongoid::Timestamps
include Mongoid::MagicCounterCache
include ActiveModel::MassAssignmentSecurity
include Tire::Model::Search
include Tire::Model::Callbacks
include Searchable
voteable self, :up => +1, :down => -1
......
require 'new_relic/agent/method_tracer'
require_relative 'concerns/searchable'
require_relative 'content'
require_relative 'constants'
......@@ -6,8 +7,7 @@ class CommentThread < Content
include Mongoid::Timestamps
include Mongoid::Attributes::Dynamic
include ActiveModel::MassAssignmentSecurity
include Tire::Model::Search
include Tire::Model::Callbacks
include Searchable
extend Enumerize
voteable self, :up => +1, :down => -1
......@@ -31,7 +31,6 @@ class CommentThread < Content
index({author_id: 1, course_id: 1})
index_name Content::ES_INDEX_NAME
mapping do
......@@ -40,10 +39,8 @@ class CommentThread < Content
indexes :created_at, type: :date, included_in_all: false
indexes :updated_at, type: :date, included_in_all: false
indexes :last_activity_at, type: :date, included_in_all: false
indexes :comment_count, type: :integer, included_in_all: false
indexes :votes_point, type: :integer, as: 'votes_point', included_in_all: false
indexes :context, type: :string, index: :not_analyzed, included_in_all: false
indexes :course_id, type: :string, index: :not_analyzed, included_in_all: false
indexes :commentable_id, type: :string, index: :not_analyzed, included_in_all: false
......
module Searchable
extend ActiveSupport::Concern
included do
include Elasticsearch::Model
# We specify our own callbacks, instead of using Elasticsearch::Model::Callbacks, so that we can disable
# indexing for tests where search functionality is not needed. This should improve test execution times.
after_create :index_document
after_update :update_indexed_document
after_destroy :delete_document
def self.put_search_index_mapping(index=nil)
index ||= self.index_name
success = self.__elasticsearch__.client.indices.put_mapping(index: index, type: self.document_type, body: self.mappings.to_hash)
unless success
logger.warn "WARNING! could not apply search index mapping for #{self.name}"
end
end
def as_indexed_json(options={})
# TODO: Play with the `MyModel.indexes` method -- reject non-mapped attributes, `:as` options, etc
self.as_json(options.merge root: false)
end
private # all methods below are private
def index_document
__elasticsearch__.index_document if CommentService.search_enabled?
end
# This is named in this manner to prevent collisions with Mongoid's update_document method.
def update_indexed_document
__elasticsearch__.update_document if CommentService.search_enabled?
end
def delete_document
__elasticsearch__.delete_document if CommentService.search_enabled?
end
end
end
class Content
include Mongoid::Document
include Mongo::Voteable
ES_INDEX_NAME = 'content'
field :visible, type: Boolean, default: true
field :abuse_flaggers, type: Array, default: []
field :historical_abuse_flaggers, type: Array, default: [] #preserve abuse flaggers after a moderator unflags
......@@ -16,16 +18,6 @@ class Content
index({comment_thread_id: 1, endorsed: 1}, {sparse: true})
index({commentable_id: 1}, {sparse: true, background: true})
ES_INDEX_NAME = 'content'
def self.put_search_index_mapping(idx=nil)
idx ||= self.tire.index
success = idx.mapping(self.tire.document_type, {:properties => self.tire.mapping})
unless success
logger.warn "WARNING! could not apply search index mapping for #{self.name}"
end
end
before_save :set_username
......
require 'spec_helper'
require 'unicode_shared_examples'
describe "app" do
describe "comment threads" do
describe 'app' do
describe 'comment threads' do
before(:each) { set_api_key_header }
......
......@@ -3,6 +3,7 @@ require 'faker'
describe 'app' do
include_context 'search_enabled'
before(:each) { set_api_key_header }
let(:body) { Faker::Lorem.word }
......
......@@ -3,15 +3,14 @@ require 'unicode_shared_examples'
describe "app" do
describe "search" do
include_context 'search_enabled'
before (:each) { set_api_key_header }
let(:author) { create_test_user(42) }
let(:course_id) { "test/course/id" }
def get_result_ids(result)
result["collection"].map {|t| t["id"]}
result["collection"].map { |t| t["id"] }
end
describe "GET /api/v1/search/threads" do
......@@ -31,6 +30,67 @@ describe "app" do
assert_empty_response
end
describe "search for updated/deleted comment/thread works" do
let(:course_id) { 'test/course/id' }
def assert_result_total(expected_total)
last_response.should be_ok
result = parse(last_response.body)
result["total_results"].should == expected_total
end
def create_and_delete_comment_or_thread(factory_name, text)
comment_or_thread = create(factory_name, course_id: course_id, body: text)
comment_or_thread.destroy
refresh_es_index
end
def update_comment_or_thread(factory_name, original_text, new_text)
comment_or_thread = create(factory_name, course_id: course_id, body: original_text)
comment_or_thread.body = new_text
comment_or_thread.save!
refresh_es_index
end
it 'returns an empty result if thread is deleted' do
text = 'thread-to-be-deleted-text'
create_and_delete_comment_or_thread(:comment_thread, text)
get '/api/v1/search/threads', course_id: course_id, text: text
assert_result_total(0)
end
it 'returns result only for updated thread' do
original_text = 'thread-to-be-updated-original-text'
new_text = 'thread-updated-text'
update_comment_or_thread(:comment_thread, original_text, new_text)
get '/api/v1/search/threads', course_id: course_id, text: original_text
assert_result_total(0)
get '/api/v1/search/threads', course_id: course_id, text: new_text
assert_result_total(1)
end
it 'returns an empty result if comment is deleted' do
text = 'comment-to-be-deleted-text'
create_and_delete_comment_or_thread(:comment, text)
get '/api/v1/search/threads', course_id: course_id, text: text
assert_result_total(0)
end
it 'returns result only for updated comment' do
original_text = 'comment-to-be-updated-original-text'
new_text = 'comment-updated-text'
update_comment_or_thread(:comment, original_text, new_text)
get '/api/v1/search/threads', course_id: course_id, text: original_text
assert_result_total(0)
get '/api/v1/search/threads', course_id: course_id, text: new_text
assert_result_total(1)
end
end
describe "filtering works" do
let!(:threads) do
threads = (0..34).map do |i|
......@@ -64,13 +124,13 @@ describe "app" do
last_response.should be_ok
result = parse(last_response.body)
actual_ids = Set.new get_result_ids(result)
expected_ids = Set.new expected_thread_indexes.map {|i| threads[i].id.to_s}
expected_ids = Set.new expected_thread_indexes.map { |i| threads[i].id.to_s }
actual_ids.should == expected_ids
end
it "by course_id" do
get "/api/v1/search/threads", text: "text", course_id: "test/course/id0"
assert_response_contains((0..29).find_all {|i| i % 2 == 0})
assert_response_contains((0..29).find_all { |i| i % 2 == 0 })
end
it "by context" do
......@@ -82,7 +142,7 @@ describe "app" do
user = create_test_user(Random.new)
user.mark_as_read(threads[0])
get "/api/v1/search/threads", text: "text", course_id: "test/course/id0", user_id: user.id, unread: true
assert_response_contains((1..29).find_all {|i| i % 2 == 0})
assert_response_contains((1..29).find_all { |i| i % 2 == 0 })
end
it "with flagged filter" do
......@@ -116,22 +176,22 @@ describe "app" do
it "by commentable_id" do
get "/api/v1/search/threads", text: "text", commentable_id: "commentable0"
assert_response_contains((0..29).find_all {|i| i % 3 == 0})
assert_response_contains((0..29).find_all { |i| i % 3 == 0 })
end
it "by commentable_ids" do
get "/api/v1/search/threads", text: "text", commentable_ids: "commentable0,commentable1"
assert_response_contains((0..29).find_all {|i| i % 3 == 0 || i % 3 == 1})
assert_response_contains((0..29).find_all { |i| i % 3 == 0 || i % 3 == 1 })
end
it "by group_id" do
get "/api/v1/search/threads", text: "text", group_id: "1"
assert_response_contains((0..29).find_all {|i| i % 5 == 0 || i % 5 == 1})
assert_response_contains((0..29).find_all { |i| i % 5 == 0 || i % 5 == 1 })
end
it "by group_ids" do
get "/api/v1/search/threads", text: "text", group_ids: "1,2"
expected_ids = (0..29).find_all {|i| i % 5 == 0 || i % 5 == 1 || i % 5 == 2}
expected_ids = (0..29).find_all { |i| i % 5 == 0 || i % 5 == 1 || i % 5 == 2 }
assert_response_contains(expected_ids)
end
......@@ -143,8 +203,8 @@ describe "app" do
describe "sorting works" do
let!(:threads) do
threads = (0..5).map {|i| make_thread(author, "text", course_id, "dummy")}
[1, 2].map {|i| author.vote(threads[i], :up)}
threads = (0..5).map { |i| make_thread(author, "text", course_id, "dummy") }
[1, 2].map { |i| author.vote(threads[i], :up) }
[1, 3].map do |i|
threads[i].comment_count = 5
threads[i].save!
......@@ -159,7 +219,7 @@ describe "app" do
last_response.should be_ok
result = parse(last_response.body)
actual_ids = get_result_ids(result)
expected_ids = expected_thread_indexes.map {|i| threads[i].id.to_s}
expected_ids = expected_thread_indexes.map { |i| threads[i].id.to_s }
actual_ids.should == expected_ids
end
......@@ -186,7 +246,7 @@ describe "app" do
describe "pagination" do
let!(:threads) do
threads = (1..50).map {|i| make_thread(author, "text", course_id, "dummy")}
threads = (1..50).map { |i| make_thread(author, "text", course_id, "dummy") }
refresh_es_index
threads
end
......@@ -199,7 +259,7 @@ describe "app" do
result = parse(last_response.body)
result_ids += get_result_ids(result)
end
result_ids.should == threads.reverse.map {|t| t.id.to_s}
result_ids.should == threads.reverse.map { |t| t.id.to_s }
end
it "works correctly with page size 1" do
......@@ -216,7 +276,7 @@ describe "app" do
end
describe "spelling correction" do
let(:commentable_id) {"test_commentable"}
let(:commentable_id) { "test_commentable" }
def check_correction(original_text, corrected_text)
get "/api/v1/search/threads", text: original_text
......@@ -281,8 +341,8 @@ describe "app" do
end
end
it "returns the correct values for total_results and num_pages" do
course_id = "test/course/id"
it 'returns the correct values for total_results and num_pages' do
course_id = 'test/course/id'
for i in 1..100 do
text = "all"
text += " half" if i % 2 == 0
......@@ -291,15 +351,14 @@ describe "app" do
text += " one" if i == 100
# There is currently a bug that causes only 10 threads with matching
# titles/bodies to be considered, so this test case uses comments.
thread = make_thread(author, "dummy text", course_id, "dummy_commentable")
make_comment(author, thread, text)
create(:comment, course_id: course_id, body: text)
end
# Elasticsearch does not necessarily make newly indexed content
# available immediately, so we must explicitly refresh the index
refresh_es_index
test_text = lambda do |text, expected_total_results, expected_num_pages|
get "/api/v1/search/threads", course_id: course_id, text: text, per_page: "10"
get '/api/v1/search/threads', course_id: course_id, text: text, per_page: '10'
last_response.should be_ok
result = parse(last_response.body)
result["total_results"].should == expected_total_results
......
require 'spec_helper'
require 'elasticsearch'
describe TaskHelpers do
describe TaskHelpers::ElasticsearchHelper do
let(:alias_name) { 'test_alias' }
after(:each) do
TaskHelpers::ElasticsearchHelper.delete_index(alias_name)
end
def assert_alias_points_to_index(alias_name, index_name)
test_alias = Elasticsearch::Model.client.indices.get_alias(name: alias_name).keys[0]
test_alias.should == index_name
end
context("#move_alias") do
before(:each) do
@index_name = TaskHelpers::ElasticsearchHelper.create_index()
end
after(:each) do
TaskHelpers::ElasticsearchHelper.delete_index(@index_name)
end
it "points alias to index" do
TaskHelpers::ElasticsearchHelper.move_alias(alias_name, @index_name)
assert_alias_points_to_index(alias_name, @index_name)
end
it "fails when alias is same as index_name" do
expect { TaskHelpers::ElasticsearchHelper.move_alias(@index_name, @index_name) }.to raise_error
end
it "fails when index doesn't exist" do
expect { TaskHelpers::ElasticsearchHelper.move_alias(alias_name, 'missing_index') }.to raise_error
end
it "fails when index of same name as alias exists" do
TaskHelpers::ElasticsearchHelper.create_index(alias_name)
expect { TaskHelpers::ElasticsearchHelper.move_alias(alias_name, @index_name) }.to raise_error
end
it "points alias to index when index of same name as alias is deleted" do
TaskHelpers::ElasticsearchHelper.create_index(alias_name)
force_delete = true
TaskHelpers::ElasticsearchHelper.move_alias(alias_name, @index_name, force_delete)
assert_alias_points_to_index(alias_name, @index_name)
end
end
context("#rebuild_index") do
include_context 'search_enabled'
def create_thread_and_delete_index()
thread = create(:comment_thread, body: 'the best test body', course_id: 'test_course_id')
refresh_es_index
TaskHelpers::ElasticsearchHelper.delete_index(Content::ES_INDEX_NAME)
end
it "builds new index without switching alias" do
create_thread_and_delete_index
index_name = TaskHelpers::ElasticsearchHelper.rebuild_index()
refresh_es_index(index_name)
Elasticsearch::Model.client.search(index: index_name)['hits']['total'].should be > 0
end
it "builds new index and points alias to it" do
create_thread_and_delete_index
index_name = TaskHelpers::ElasticsearchHelper.rebuild_index(alias_name)
refresh_es_index(alias_name)
Elasticsearch::Model.client.search(index: alias_name)['hits']['total'].should be > 0
end
it "builds new index and points alias to it, first deleting index with same name as alias" do
create_thread_and_delete_index
TaskHelpers::ElasticsearchHelper.create_index(alias_name)
index_name = TaskHelpers::ElasticsearchHelper.rebuild_index(alias_name)
refresh_es_index(alias_name)
Elasticsearch::Model.client.search(index: alias_name)['hits']['total'].should be > 0
end
end
end
end
\ No newline at end of file
require 'spec_helper'
require 'elasticsearch'
describe "search:rebuild_index" do
include_context "rake"
before do
TaskHelpers::ElasticsearchHelper.stub!(:rebuild_index)
end
its(:prerequisites) { should include("environment") }
it "calls rebuild_index with defaults" do
TaskHelpers::ElasticsearchHelper.should_receive(:rebuild_index).with(nil, 500, 0)
subject.invoke
end
it "calls rebuild_index with arguments" do
# Rake calls receive arguments as strings.
call_move_alias = 'true'
batch_size = '100'
sleep_time = '2'
TaskHelpers::ElasticsearchHelper.should_receive(:rebuild_index).with(
Content::ES_INDEX_NAME, batch_size.to_i, sleep_time.to_i
)
subject.invoke(call_move_alias, batch_size, sleep_time)
end
end
describe "search:catchup" do
include_context "rake"
before do
TaskHelpers::ElasticsearchHelper.stub!(:catchup_index)
end
its(:prerequisites) { should include("environment") }
it "calls catchup with defaults" do
TaskHelpers::ElasticsearchHelper.should_receive(:catchup_index).with(
anything, Content::ES_INDEX_NAME, 500, 0
) do |start_time_arg|
start_time_arg.should be_within(1.second).of Time.now
end
subject.invoke
end
it "calls catchup with arguments" do
# Rake calls receive arguments as strings.
minutes = '2'
index_name = 'some_index'
batch_size = '100'
sleep_time = '2'
TaskHelpers::ElasticsearchHelper.should_receive(:catchup_index).with(
anything, index_name, batch_size.to_i, sleep_time.to_i
) do |start_time_arg|
start_time_arg.should be_within((60 * minutes.to_i + 1).second).of Time.now
end
subject.invoke(minutes, index_name, batch_size, sleep_time)
end
end
\ No newline at end of file
......@@ -16,6 +16,10 @@ require 'yajl'
require 'support/database_cleaner'
require 'support/elasticsearch'
require 'support/factory_girl'
require 'support/rake'
require 'webmock/rspec'
WebMock.allow_net_connect!
# setup test environment
set :environment, :test
......
def delete_es_index
Tire.index Content::ES_INDEX_NAME do
delete
end
require 'task_helpers'
def refresh_es_index(index_name=nil)
index_name = index_name ? index_name : Content::ES_INDEX_NAME
TaskHelpers::ElasticsearchHelper.refresh_index(index_name)
end
def create_es_index
new_index = Tire.index Content::ES_INDEX_NAME
new_index.create
[CommentThread, Comment].each do |klass|
klass.put_search_index_mapping
RSpec.shared_context 'search_enabled' do
before(:all) do
CommentService.config[:enable_search] = true
# Delete any previously created index to ensure our search tests start
# with a clean slate. Each test will recreate the index.
TaskHelpers::ElasticsearchHelper.delete_index(Content::ES_INDEX_NAME)
end
after(:each) do
# Delete the index after each test so it will be re-created.
TaskHelpers::ElasticsearchHelper.delete_index(Content::ES_INDEX_NAME)
end
end
def refresh_es_index
es_index_name = Content::ES_INDEX_NAME
Tire.index es_index_name do
refresh
after(:all) do
# Ensure that subsequent tests, that do not require search, are unaffected by search.
CommentService.config[:enable_search] = false
# Ensure (once more) the index was deleted.
TaskHelpers::ElasticsearchHelper.delete_index(Content::ES_INDEX_NAME)
end
end
RSpec.configure do |config|
config.before(:suite) do
CommentService.config[:enable_search] = false
end
config.before(:each) do
delete_es_index
create_es_index
# Create the index before each test if it doesn't exist.
if not TaskHelpers::ElasticsearchHelper.exists_alias(Content::ES_INDEX_NAME)
test_index = TaskHelpers::ElasticsearchHelper.create_index
TaskHelpers::ElasticsearchHelper.move_alias(Content::ES_INDEX_NAME, test_index)
end
end
config.after(:suite) do
TaskHelpers::ElasticsearchHelper.delete_index(Content::ES_INDEX_NAME)
end
end
require "rake"
shared_context "rake" do
let(:rake) { Rake::Application.new }
let(:task_name) { self.class.top_level_description }
let(:task_path) { "lib/tasks/#{task_name.split(":").first}" }
subject { rake[task_name] }
def loaded_files_excluding_current_rake_file
$".reject {|file| file == File.absolute_path("#{task_path}.rake").to_s }
end
before do
Rake.application = rake
Rake.application.rake_require(task_path, [Rake.application.original_dir], loaded_files_excluding_current_rake_file)
Rake::Task.define_task(:environment)
end
end
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment