main.yml 5.94 KB
Newer Older
1 2 3 4 5
---
#
# edX Configuration
#
# github:     https://github.com/edx/configuration
6 7
# wiki:       https://openedx.atlassian.net/wiki/display/OpenOPS
# code style: https://openedx.atlassian.net/wiki/display/OpenOPS/Ansible+Code+Conventions
8 9 10 11 12
# license:    https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Tasks for role analytics_pipeline
13
#
14
# Overview:
15
#
16 17 18 19
# Prepare the machine to run the edX Analytics Data Pipeline. The pipeline currently "installs itself"
# via an ansible playbook that is not included in the edx/configuration repo. However, in order to
# run the pipeline in a devstack environment, some configuration needs to be performed. In a production
# environment many of these config files are stored on S3.
20 21 22
#
# Dependencies:
#
23 24 25 26
# common: some of the variables from the common role are used here
# hadoop_master: ensures hadoop services are installed
# hive: the pipeline makes extensive usage of hive, so that needs to be installed as well
# sqoop: similarly to hive, the pipeline uses this tool extensively
27
#
28 29
# Example play:
#
30 31
# - name: Deploy all dependencies of edx-analytics-pipeline to the node
#   hosts: all
32
#   become: True
33 34 35 36 37
#   gather_facts: True
#   roles:
#     - analytics_pipeline
#
# ansible-playbook -i 'localhost,' ./analytics_pipeline.yml  -e@/ansible/vars/deployment.yml -e@/ansible/vars/env-deployment.yml
38 39
#

40 41 42 43 44 45 46
- name: Create config directory
  file:
    path: "{{ ANALYTICS_PIPELINE_CONFIG_DIR }}"
    state: directory
    owner: "{{ hadoop_common_user }}"
    group: "{{ hadoop_common_group }}"
    mode: "0755"
47 48 49
  tags:
    - install
    - install:configuration
50

51 52 53 54 55 56 57
- name: Store output database credentials for analytics pipeline
  copy:
    content: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE | to_json }}"
    dest: "{{ COMMON_CFG_DIR }}/edx-analytics-pipeline/output.json"
    owner: "{{ hadoop_common_user }}"
    group: "{{ hadoop_common_group }}"
    mode: "0644"
58 59 60
  tags:
    - install
    - install:configuration
61

62 63 64 65 66 67 68
- name: Store input database credentials for analytics pipeline
  copy:
    content: "{{ ANALYTICS_PIPELINE_INPUT_DATABASE | to_json }}"
    dest: "{{ COMMON_CFG_DIR }}/edx-analytics-pipeline/input.json"
    owner: "{{ hadoop_common_user }}"
    group: "{{ hadoop_common_group }}"
    mode: "0644"
69 70 71
  tags:
    - install
    - install:configuration
72

73
- name: luigi configuration directory created
74 75 76 77
  file:
    path: /etc/luigi
    state: directory
    mode: "0755"
78 79 80
  tags:
    - install
    - install:configuration
81 82

- name: luigi configuration file written
83 84 85 86
  template:
    src: client.cfg.j2
    dest: /etc/luigi/client.cfg
    mode: "0644"
87 88 89
  tags:
    - install
    - install:configuration
90

91 92 93 94 95
- name: Util library source checked out
  git_2_0_1:
    repo: "{{ analytics_pipeline_util_library.repo }}"
    dest: "{{ analytics_pipeline_util_library.path }}"
    version: "{{ analytics_pipeline_util_library.version }}"
96 97 98
  tags:
    - install
    - install:code
99 100

- name: lib directory created
101 102 103 104 105
  file:
    path: "{{ HADOOP_COMMON_USER_HOME }}/lib"
    owner: "{{ hadoop_common_user }}"
    group: "{{ hadoop_common_group }}"
    state: directory
106 107 108
  tags:
    - install
    - install:app-requirements
109

110 111 112
- name: Check if the util library needs to be built
  stat:
    path: "{{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar"
113
  register: util_lib_built
114 115 116
  tags:
    - install
    - install:app-requirements
117

118
- name: Util library built
119 120 121 122
  shell: >
    {{ hadoop_common_java_home }}/bin/javac -cp `{{ HADOOP_COMMON_HOME }}/bin/hadoop classpath` org/edx/hadoop/input/ManifestTextInputFormat.java &&
    {{ hadoop_common_java_home }}/bin/jar cf {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar org/edx/hadoop/input/ManifestTextInputFormat.class &&
    chown {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar
123 124
  args:
    chdir: "{{ analytics_pipeline_util_library.path }}"
125
  when: not util_lib_built.stat.exists
126 127 128
  tags:
    - install
    - install:app-requirements
129

130 131 132 133
- name: Ensure hdfs services are started
  service:
    name: hdfs
    state: started
134 135 136
  tags:
    - manage
    - manage:start
137

138 139 140 141
- name: Ensure map reduce services are started
  service:
    name: yarn
    state: started
142 143 144
  tags:
    - manage
    - manage:start
145

146 147
- name: Ensure package dir exists in HDFS
  shell: ". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p /edx-analytics-pipeline/packages/"
148
  become_user: "{{ hadoop_common_user }}"
149 150 151
  tags:
    - install
    - install:app-requirements
152

153 154
- name: Ensure util library is in HDFS
  shell: ". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -put -f {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar /edx-analytics-pipeline/packages/"
155
  become_user: "{{ hadoop_common_user }}"
156 157 158
  tags:
    - install
    - install:app-requirements
159

160 161
- name: Ensure the data directory exists
  shell: ". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}"
162
  become_user: "{{ hadoop_common_user }}"
163 164 165
  tags:
    - install
    - install:base
166

167 168 169 170
- name: Ensure tracking log file can be read
  file:
    path: "{{ COMMON_LOG_DIR }}/tracking/tracking.log"
    mode: "0644"
171
  ignore_errors: yes
172 173 174
  tags:
    - install
    - install:configuration
175

176 177 178 179 180
- name: Cron job syncs tracking log file to hdfs
  cron:
    user: "{{ hadoop_common_user }}"
    name: "Sync tracking log to HDFS"
    job: "{{ HADOOP_COMMON_HOME }}/bin/hdfs dfs -put -f {{ COMMON_LOG_DIR }}/tracking/tracking.log {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}/tracking.log"
181 182 183
  tags:
    - install
    - install:configuration
184 185

- name: store configuration for acceptance tests
186 187 188 189
  copy:
    src: acceptance.json
    dest: /var/tmp/acceptance.json
    mode: "0644"
190 191 192 193
  tags:
    - install
    - install:configuration

194 195 196 197 198 199
- name: Grant access to table storing test data in output database
  mysql_user:
    user: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE.username }}"
    password: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE.password }}"
    priv: 'acceptance%.*:ALL'
    append_privs: yes
200 201 202
  tags:
    - install
    - install:configuration