Commit 66d14ff2 by Will Daly

Add a role for installing NLTK data

Add dependencies for edxapp and ora on the NLTK role
Install minimal NLTK data used by EASE
parent 76ca39e5
......@@ -6,6 +6,7 @@ dependencies:
rbenv_dir: "{{ edxapp_app_dir }}"
rbenv_ruby_version: "{{ edxapp_ruby_version }}"
- devpi
- nltk
- role: user
user_info:
- name: "{{ EDXAPP_AUTOMATOR_NAME }}"
......
---
NLTK_DATA_DIR: "/usr/local/share/nltk_data"
# Once the file is downloaded, it won't be downloaded again,
# so if you need to version the data files, you should upload
# your own version of the files with the version appended to the filename.
NLTK_DATA:
- { path: "taggers/maxent_treebank_pos_tagger",
url: "http://nltk.github.com/nltk_data/packages/taggers/maxent_treebank_pos_tagger.zip" }
- { path: "corpora/stopwords",
url: "http://nltk.github.com/nltk_data/packages/corpora/stopwords.zip" }
- { path: "corpora/wordnet",
url: "http://nltk.github.com/nltk_data/packages/corpora/wordnet.zip" }
---
- name: create the nltk data directory and subdirectories
file: path={{ NLTK_DATA_DIR }}/{{ item.path|dirname }} state=directory
with_items: NLTK_DATA
tags:
- deploy
- name: download nltk data
get_url: >
dest={{ NLTK_DATA_DIR }}/{{ item.url|basename }}
url={{ item.url }}
with_items: NLTK_DATA
register: nltk_download
tags:
- deploy
- name: unarchive nltk data
unarchive: >
src={{ NLTK_DATA_DIR }}/{{ item.url|basename }}
dest={{ NLTK_DATA_DIR }}/{{ item.path|dirname }}
with_items: NLTK_DATA
when: nltk_download|changed
tags:
- deploy
......@@ -11,10 +11,6 @@ ora_venv_dir: "{{ ora_venvs_dir }}/ora"
ora_venv_bin: "{{ ora_venv_dir }}/bin"
ora_user: "ora"
ora_deploy_path: "{{ ora_venv_bin }}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
ora_nltk_data_dir: "{{ ora_data_dir}}/nltk_data"
ora_nltk_download_url: http://edx-static.s3.amazonaws.com/nltk/nltk-data-20131113.tar.gz
ora_nltk_tmp_file: "{{ ora_data_dir }}/nltk.tmp.tar.tz"
ora_source_repo: https://github.com/edx/edx-ora.git
ora_version: 'master'
......@@ -143,7 +139,6 @@ ora_auth_config:
ora_environment:
SERVICE_VARIANT: ora
NLTK_DATA: $ora_nltk_data_dir
LANG: $ORA_LANG
PATH: $ora_deploy_path
......
---
dependencies:
- supervisor
- nltk
......@@ -43,17 +43,3 @@
notify:
- restart ora
- restart ora_celery
- name: download and install nltk
shell: |
set -e
curl -o {{ ora_nltk_tmp_file }} {{ ora_nltk_download_url }}
tar zxf {{ ora_nltk_tmp_file }}
rm -f {{ ora_nltk_tmp_file }}
touch {{ ora_nltk_download_url|basename }}-installed
creates={{ ora_data_dir }}/{{ ora_nltk_download_url|basename }}-installed
chdir={{ ora_data_dir }}
sudo_user: "{{ common_web_user }}"
notify:
- restart ora
- restart ora_celery
......@@ -5,7 +5,7 @@ command={{ ora_venv_bin }}/gunicorn --preload -b {{ ora_gunicorn_host }}:{{ ora_
user={{ common_web_user }}
directory={{ ora_code_dir }}
environment=PID=/var/run/gunicorn/edx-ora.pid,WORKERS={{ ora_gunicorn_workers }},PORT={{ ora_gunicorn_port }},ADDRESS={{ ora_gunicorn_host }},LANG={{ ORA_LANG }},DJANGO_SETTINGS_MODULE=edx_ora.aws,SERVICE_VARIANT=ora,NLTK_DATA={{ ora_nltk_data_dir }}
environment=PID=/var/run/gunicorn/edx-ora.pid,WORKERS={{ ora_gunicorn_workers }},PORT={{ ora_gunicorn_port }},ADDRESS={{ ora_gunicorn_host }},LANG={{ ORA_LANG }},DJANGO_SETTINGS_MODULE=edx_ora.aws,SERVICE_VARIANT=ora
stdout_logfile={{ supervisor_log_dir }}/%(program_name)-stdout.log
stderr_logfile={{ supervisor_log_dir }}/%(program_name)-stderr.log
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment