---
#
# edX Configuration
#
# github:     https://github.com/edx/configuration
# wiki:       https://openedx.atlassian.net/wiki/display/OpenOPS
# code style: https://openedx.atlassian.net/wiki/display/OpenOPS/Ansible+Code+Conventions
# license:    https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Tasks for role analytics_pipeline
#
# Overview:
#
# Prepare the machine to run the edX Analytics Data Pipeline. The pipeline currently "installs itself"
# via an ansible playbook that is not included in the edx/configuration repo. However, in order to
# run the pipeline in a devstack environment, some configuration needs to be performed. In a production
# environment many of these config files are stored on S3.
#
# Dependencies:
#
# common: some of the variables from the common role are used here
# hadoop_master: ensures hadoop services are installed
# hive: the pipeline makes extensive usage of hive, so that needs to be installed as well
# sqoop: similarly to hive, the pipeline uses this tool extensively
#
# Example play:
#
# - name: Deploy all dependencies of edx-analytics-pipeline to the node
#   hosts: all
#   become: True
#   gather_facts: True
#   roles:
#     - analytics_pipeline
#
# ansible-playbook -i 'localhost,' ./analytics_pipeline.yml  -e@/ansible/vars/deployment.yml -e@/ansible/vars/env-deployment.yml
#

- name: Create config directory
  file:
    path: "{{ ANALYTICS_PIPELINE_CONFIG_DIR }}"
    state: directory
    owner: "{{ hadoop_common_user }}"
    group: "{{ hadoop_common_group }}"
    mode: "0755"
  tags:
    - install
    - install:configuration

- name: Store output database credentials for analytics pipeline
  copy:
    content: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE | to_json }}"
    dest: "{{ COMMON_CFG_DIR }}/edx-analytics-pipeline/output.json"
    owner: "{{ hadoop_common_user }}"
    group: "{{ hadoop_common_group }}"
    mode: "0644"
  tags:
    - install
    - install:configuration

- name: Store input database credentials for analytics pipeline
  copy:
    content: "{{ ANALYTICS_PIPELINE_INPUT_DATABASE | to_json }}"
    dest: "{{ COMMON_CFG_DIR }}/edx-analytics-pipeline/input.json"
    owner: "{{ hadoop_common_user }}"
    group: "{{ hadoop_common_group }}"
    mode: "0644"
  tags:
    - install
    - install:configuration

- name: luigi configuration directory created
  file:
    path: /etc/luigi
    state: directory
    mode: "0755"
  tags:
    - install
    - install:configuration

- name: luigi configuration file written
  template:
    src: client.cfg.j2
    dest: /etc/luigi/client.cfg
    mode: "0644"
  tags:
    - install
    - install:configuration

- name: Util library source checked out
  git:
    repo: "{{ analytics_pipeline_util_library.repo }}"
    dest: "{{ analytics_pipeline_util_library.path }}"
    version: "{{ analytics_pipeline_util_library.version }}"
  tags:
    - install
    - install:code

- name: lib directory created
  file:
    path: "{{ HADOOP_COMMON_USER_HOME }}/lib"
    owner: "{{ hadoop_common_user }}"
    group: "{{ hadoop_common_group }}"
    state: directory
  tags:
    - install
    - install:app-requirements

- name: Check if the util library needs to be built
  stat:
    path: "{{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar"
  register: util_lib_built
  tags:
    - install
    - install:app-requirements

- name: Util library built
  shell: >
    {{ hadoop_common_java_home }}/bin/javac -cp `{{ HADOOP_COMMON_HOME }}/bin/hadoop classpath` org/edx/hadoop/input/ManifestTextInputFormat.java &&
    {{ hadoop_common_java_home }}/bin/jar cf {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar org/edx/hadoop/input/ManifestTextInputFormat.class &&
    chown {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar
  args:
    chdir: "{{ analytics_pipeline_util_library.path }}"
  when: not util_lib_built.stat.exists
  tags:
    - install
    - install:app-requirements

- name: reload systemd configuration
  command: systemctl daemon-reload
  tags:
    - install
    - install:configuration

- name: enable Hadoop services
  service:
    name: "{{ item }}"
    enabled: yes
  with_items: "{{ hadoop_common_services }}"
  tags:
    - install
    - install:configuration

- name: start Hadoop services
  service:
    name: "{{ item }}"
    state: started
  with_items: "{{ hadoop_common_services }}"
  tags:
    - manage
    - manage:start

- name: stop Hadoop services
  service:
    name: "{{ item }}"
    state: stopped
  with_items: "{{ hadoop_common_services }}"
  tags:
    - manage:stop

- name: restart Hadoop services
  service:
    name: "{{ item }}"
    state: restarted
  with_items: "{{ hadoop_common_services }}"
  tags:
    - manage:start
    - manage:restart

- name: Ensure package dir exists in HDFS
  shell: ". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p /edx-analytics-pipeline/packages/"
  become_user: "{{ hadoop_common_user }}"
  tags:
    - install
    - install:app-requirements

- name: Ensure util library is in HDFS
  shell: ". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -put -f {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar /edx-analytics-pipeline/packages/"
  become_user: "{{ hadoop_common_user }}"
  tags:
    - install
    - install:app-requirements
  register: libcp
  until: libcp|success
  retries: 6
  delay: 10

- name: Ensure the data directory exists
  shell: ". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}"
  become_user: "{{ hadoop_common_user }}"
  tags:
    - install
    - install:base

- name: Ensure tracking log file can be read
  file:
    path: "{{ COMMON_LOG_DIR }}/tracking/tracking.log"
    mode: "0644"
  ignore_errors: yes
  tags:
    - install
    - install:configuration

- name: Cron job syncs tracking log file to hdfs
  cron:
    user: "{{ hadoop_common_user }}"
    name: "Sync tracking log to HDFS"
    job: "{{ HADOOP_COMMON_HOME }}/bin/hdfs dfs -put -f {{ COMMON_LOG_DIR }}/tracking/tracking.log {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}/tracking.log"
  tags:
    - install
    - install:configuration

- name: store configuration for acceptance tests
  copy:
    src: acceptance.json
    dest: /var/tmp/acceptance.json
    mode: "0644"
  tags:
    - install
    - install:configuration

- name: Grant access to table storing test data in output database
  mysql_user:
    user: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE.username }}"
    password: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE.password }}"
    priv: 'acceptance%.*:ALL'
    append_privs: yes
  tags:
    - install
    - install:configuration

- name: Test if Hive metadata store schema exists
  shell: ". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && {{ HIVE_HOME }}/bin/hive | tr '\n' ' '"
  become_user: "{{ hadoop_common_user }}"
  register: hive_metastore_info
  tags:
    - install
    - install:configuration

- name: Initialize Hive metadata store schema
  shell: ". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && {{ HIVE_HOME }}/bin/schematool -dbType mysql -initSchema"
  become_user: "{{ hadoop_common_user }}"
  when: "'Version information not found in metastore' in hive_metastore_info.stderr"
  tags:
    - install
    - install:configuration