Commit a1f28e9e by Robert Raposa

Adjust and add search rake tasks.

- Make search:initialize idempotent.
- Add rake tasks for put_mappings and validate_index.
parent 13f5ffac
......@@ -9,8 +9,6 @@ bundle install
# allow dependent services to finish start up (e.g. ElasticSearch, Mongo)
sleep 10
bin/rake search:initialize
# Use 'bin/rspec -fd' to print test names for debugging
# Printing test names can be especially helpful for tracking down test
# failure differences between Travis and Mac, because tests are loaded
......
......@@ -35,6 +35,8 @@ Install the requisite gems:
$ bundle install
To initialize the index:
Setup the search index. Note that the command below creates an alias with a unique name (e.g.
content_20161220185820323), and assigns it a known alias: content. If you choose not to use the command below, you
should still opt to reference your index by an alias rather than the actual index name. This will enable you to swap out
......@@ -44,11 +46,13 @@ indices (e.g. rebuild_index) without having to take downtime or modify code with
$ bin/rake search:initialize
To rebuild a new index without moving the alias and without running catchup, do the following:
To validate the 'content' alias exists and contains the proper mappings:
.. code-block:: bash
$ bin/rake search:rebuild_index
$ bin/rake search:validate_index
To rebuild the index:
To rebuild a new index from the database and then point the alias 'content' to it, you can use the
rebuild_index task. This task will also run catchup before and after the alias is moved, to minimize time where the
......@@ -56,7 +60,13 @@ alias does not contain all documents.
.. code-block:: bash
$ bin/rake search:rebuild_index[true]
$ bin/rake search:rebuild_index
To rebuild a new index without moving the alias and without running catchup, use the following:
.. code-block:: bash
$ bin/rake search:rebuild_index[false]
You can also adjust the batch size (e.g. 200) and the sleep time (e.g. 2 seconds) between batches to lighten the load
on MongoDB.
......
......@@ -16,9 +16,6 @@ end
LOG = Logger.new(STDERR)
# Indicates whether this is being run from within a 'search:' task in rake.
RAKE_SEARCH = (Rake.application.top_level_tasks.select {|task| task.include? 'search:'}).any?
desc 'Load the environment'
task :environment do
# Load all of app.rb to keep rake and the app as similar as possible.
......
......@@ -48,10 +48,12 @@ Mongo::Logger.logger.level = ENV["ENABLE_MONGO_DEBUGGING"] ? Logger::DEBUG : Log
# Setup Elasticsearch
# NOTE (CCB): If you want to see all data sent to Elasticsearch (e.g. for debugging purposes), set the tracer argument
# to the value of a logger.
# Example: Elascisearch.Client.new(tracer: get_logger('elasticsearch.tracer'))
# Example: Elasticsearch::Client.new(tracer: get_logger('elasticsearch.tracer'))
# NOTE: You can also add a logger, but it will log some FATAL warning during index creation.
# Example: Elasticsearch::Client.new(logger: get_logger('elasticsearch', Logger::WARN))
Elasticsearch::Model.client = Elasticsearch::Client.new(
host: CommentService.config[:elasticsearch_server],
logger: get_logger('elasticsearch', Logger::WARN)
log: false
)
# Setup i18n
......@@ -71,14 +73,6 @@ Dir[File.dirname(__FILE__) + '/lib/**/*.rb'].each { |file| require file }
Dir[File.dirname(__FILE__) + '/models/*.rb'].each { |file| require file }
Dir[File.dirname(__FILE__) + '/presenters/*.rb'].each { |file| require file }
$check_index_mapping_exists = defined?(RAKE_SEARCH) === nil || RAKE_SEARCH === false
if $check_index_mapping_exists
# Ensure Elasticsearch index mappings exist, unless we are creating it in the rake search initialize
Comment.put_search_index_mapping
CommentThread.put_search_index_mapping
end
# Comment out observers until notifications are actually set up properly.
#Dir[File.dirname(__FILE__) + '/models/observers/*.rb'].each {|file| require file}
#Mongoid.observers = PostReplyObserver, PostTopicObserver, AtUserObserver
......
......@@ -57,12 +57,9 @@ module TaskHelpers
def self.create_index(name=nil)
name ||= "#{Content::ES_INDEX_NAME}_#{Time.now.strftime('%Y%m%d%H%M%S%L')}"
mappings = {}
[Comment, CommentThread].each do |model|
mappings.merge! model.mappings.to_hash
end
Elasticsearch::Model.client.indices.create(index: name)
put_mappings(name)
Elasticsearch::Model.client.indices.create(index: name, body: {mappings: mappings})
LOG.info "Created new index: #{name}."
name
end
......@@ -142,5 +139,71 @@ module TaskHelpers
def self.refresh_index(name)
Elasticsearch::Model.client.indices.refresh(index: name)
end
def self.initialize_index(alias_name, force_new_index)
# When force_new_index is true, a fresh index will be created for the alias,
# even if it already exists.
if force_new_index or not exists_alias(alias_name)
index_name = create_index()
# WARNING: if an index exists with the same name as the intended alias, it
# will be deleted.
move_alias(alias_name, index_name, force_delete: true)
end
end
def self.put_mappings(name)
# As of ES 0.9, the order that these mappings are created matters. Unit test failures
# appear with a different order. It is unclear if this is a defect in ES, the test, or
# neither.
[CommentThread, Comment].each do |model|
Elasticsearch::Model.client.indices.put_mapping(index: name, type: model.document_type, body: model.mappings.to_hash)
end
LOG.info "Added mappings to index: #{name}."
end
# Validates that the alias exists and its index includes the proper mappings.
# There is no return value, but an exception is raised if the alias is invalid.
#
# Params:
# +alias_name+:: The alias name to be validated.
def self.validate_index(alias_name)
if exists_alias(alias_name) === false
fail "Alias #{alias_name} does not exist."
end
actual_mapping = Elasticsearch::Model.client.indices.get_mapping(index: alias_name).values[0]
expected_mapping = {}
[CommentThread, Comment].each do |model|
expected_mapping.merge! model.mappings.to_hash
end
# As of ES 0.9, the order the mappings are created in matters. See put_mappings.
# Compare document types and order
expected_mapping_keys = expected_mapping.keys.map { |x| x.to_s }
if actual_mapping.keys != expected_mapping_keys
fail "Actual mapping types [#{actual_mapping.keys}] does not match expected mapping types (including order) [#{expected_mapping.keys}]."
end
# Check that expected field mappings of the correct type exist
expected_mapping.keys.each do |doc_type|
missing_fields = Array.new
invalid_field_types = Array.new
expected_mapping[doc_type][:properties].keys.each do |property|
if actual_mapping[doc_type.to_s]['properties'].key?(property.to_s)
expected_type = expected_mapping[doc_type][:properties][property][:type].to_s
actual_type = actual_mapping[doc_type.to_s]['properties'][property.to_s]['type']
if actual_type != expected_type
invalid_field_types.push("'#{property}' type '#{actual_type}' should be '#{expected_type}'")
end
else
missing_fields.push(property)
end
end
if missing_fields.any? or invalid_field_types.any?
fail "Document type '#{doc_type}' has missing or invalid field mappings. Missing fields: #{missing_fields}. Invalid types: #{invalid_field_types}."
end
end
end
end
end
......@@ -31,9 +31,19 @@ namespace :search do
end
desc 'Creates a new search index and points the "content" alias to it'
task :initialize => :environment do
index = TaskHelpers::ElasticsearchHelper.create_index
TaskHelpers::ElasticsearchHelper.move_alias(Content::ES_INDEX_NAME, index)
task :initialize, [:force_new_index] => :environment do |t, args|
# When force_new_index is true, a fresh index for "content" alias is created even if the
# "content" alias already exists.
args.with_defaults(:force_new_index => false)
# WARNING: if "content" is an index and not an alias, it will be deleted and recreated
# no matter what is supplied for the force argument
TaskHelpers::ElasticsearchHelper.initialize_index(Content::ES_INDEX_NAME, args[:force_new_index])
end
desc 'Updates field mappings for the given index.'
task :put_mappings, [:index] => :environment do |t, args|
args.with_defaults(:index => Content::ES_INDEX_NAME)
TaskHelpers::ElasticsearchHelper.put_mappings(args[:index])
end
desc 'Sets/moves an alias to the specified index'
......@@ -43,4 +53,10 @@ namespace :search do
alias_name = Content::ES_INDEX_NAME
TaskHelpers::ElasticsearchHelper.move_alias(alias_name, args[:index], args[:force_delete])
end
desc 'Validates that the "content" alias exists with expected field mappings and types.'
task :validate_index => :environment do
TaskHelpers::ElasticsearchHelper.validate_index(Content::ES_INDEX_NAME)
end
end
......@@ -10,14 +10,6 @@ module Searchable
after_update :update_indexed_document
after_destroy :delete_document
def self.put_search_index_mapping(index=nil)
index ||= self.index_name
success = self.__elasticsearch__.client.indices.put_mapping(index: index, type: self.document_type, body: self.mappings.to_hash)
unless success
logger.warn "WARNING! could not apply search index mapping for #{self.name}"
end
end
def as_indexed_json(options={})
# TODO: Play with the `MyModel.indexes` method -- reject non-mapped attributes, `:as` options, etc
self.as_json(options.merge root: false)
......
......@@ -86,7 +86,27 @@ describe TaskHelpers do
Elasticsearch::Model.client.search(index: alias_name)['hits']['total'].should be > 0
end
end
context("#validate_index") do
include_context 'search_enabled'
subject { TaskHelpers::ElasticsearchHelper.validate_index(Content::ES_INDEX_NAME) }
it "validates the 'content' alias exists with proper mappings" do
subject
end
it "fails if the alias doesn't exist" do
TaskHelpers::ElasticsearchHelper.delete_index(Content::ES_INDEX_NAME)
expect{subject}.to raise_error(RuntimeError)
end
it "fails if the alias has the wrong mappings" do
Elasticsearch::Model.client.indices.delete_mapping(index: Content::ES_INDEX_NAME, type: Comment.document_type)
expect{subject}.to raise_error(RuntimeError)
end
end
end
end
\ No newline at end of file
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment