Commit 752c9a11 by Max Rothman Committed by Feanil Patel

Add ES migration scripts

parent 7542cd25
#!/usr/bin/env bash
set -euo pipefail
#
# Thin wrapper around logstash. You will first have to install logstash. Simply
# downloading the tar.gz from their site is sufficient. Note that logstash may have
# different JVM version requiements than what is available on your machine.
#
# https://www.elastic.co/products/logstash
#
# Assumes that logstash is in your path.
#
# Copies an index from an elasticsearch source server to a target server.
# The target server can be the same as the source.
#
# Usage:
# copy-index.sh SOURCE_SERVER SOURCE_INDEX TARGET_SERVER TARGET_INDEX [WORKERS]
#
# Example:
# ./copy-index.sh http://localhost source_index http://localhost target_index
#
SOURCE_SERVER=$1
SOURCE_INDEX=$2
TARGET_SERVER=$3
TARGET_INDEX=$4
WORKERS="${5:-6}"
read -d '' filter <<EOF || true #read won't find its delimiter and exit with status 1, this is intentional
input {
elasticsearch {
hosts => "$SOURCE_SERVER"
index => "$SOURCE_INDEX" #content for forums
scroll => "12h" #must be as long as the run takes to complete
scan => true #scan through all indexes efficiently
docinfo => true #necessary to move document_type and document_id over
}
}
output {
elasticsearch {
hosts => "$TARGET_SERVER"
index => "$TARGET_INDEX" #same as above
manage_template => false
document_type => "%{[@metadata][_type]}"
document_id => "%{[@metadata][_id]}"
}
stdout {
codec => "dots" #Print a dot when stuff gets moved so we know it's working
}
}
filter {
mutate {
remove_field => ["@timestamp", "@version"] #these fields get added by logstash for some reason
}
}
EOF
logstash -w "$WORKERS" -e "$filter"
#!/usr/bin/env bash
set -euo pipefail
#
# Thin wrapper around rake search:catchup for cs_comment_service (forums).
#
# Reindexes documents created since WINDOW ago.
# If SLEEP_TIME is set to any number greater than 0, loops indefinitely. Since re-
# indexing can only yield correct results, the only risk of setting WINDOW too large
# is poor performance.
#
# Usage:
# source ../forum_env; ./incremental-reindex.sh INDEX [WINDOW] [SLEEP_TIME] [BATCH_SIZE]
#
# Args:
# INDEX The index to re-index
# WINDOW Number of minutes ago to re-index from
# SLEEP_TIME Number of seconds to sleep between re-indexing
# BATCH_SIZE Number of documents to index per batch
#
# Example:
# ./incremental-reindex.sh content 30
#
INDEX="$1"
WINDOW="${2:-5}"
SLEEP_TIME="${3:-60}"
BATCH_SIZE="${4:-500}"
if [ "$SLEEP_TIME" -ge "$((WINDOW * 60))" ]; then
echo 'ERROR: SLEEP_TIME must not be longer than WINDOW, or else documents may be missed.'
exit 1
fi
while : ; do
echo "reindexing documents newer than $WINDOW minutes..."
rake search:catchup["$WINDOW","$INDEX","$BATCH_SIZE"]
echo "done. Sleeping $SLEEP_TIME seconds..."
sleep "$SLEEP_TIME"
[ "$SLEEP_TIME" -le 0 ] && break
done
deepdiff==3.1.0
elasticsearch==0.4.5
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment