migrate.py 8.88 KB
Newer Older
1 2 3 4
#
# migration tools for content team to go from stable-edx4edx to LMS+CMS
#

5
import json
6
import logging
7
import os
8 9 10 11 12 13
from pprint import pprint
import xmodule.modulestore.django as xmodule_django
from xmodule.modulestore.django import modulestore

from django.http import HttpResponse
from django.conf import settings
14 15 16 17 18 19
import track.views

try:
    from django.views.decorators.csrf import csrf_exempt
except ImportError:
    from django.contrib.csrf.middleware import csrf_exempt
20 21 22 23 24

log = logging.getLogger("mitx.lms_migrate")
LOCAL_DEBUG = True
ALLOWED_IPS = settings.LMS_MIGRATION_ALLOWED_IPS

Calen Pennington committed
25

26 27
def escape(s):
    """escape HTML special characters in string"""
Calen Pennington committed
28 29
    return str(s).replace('<', '&lt;').replace('>', '&gt;')

30

31 32 33 34
def getip(request):
    '''
    Extract IP address of requester from header, even if behind proxy
    '''
Calen Pennington committed
35
    ip = request.META.get('HTTP_X_REAL_IP', '')  	# nginx reverse proxy
36
    if not ip:
Calen Pennington committed
37
        ip = request.META.get('REMOTE_ADDR', 'None')
38 39
    return ip

40 41

def get_commit_id(course):
42 43
    #return course.metadata.get('GIT_COMMIT_ID', 'No commit id')
    return getattr(course, 'GIT_COMMIT_ID', 'No commit id')
44 45 46
    # getattr(def_ms.courses[reload_dir], 'GIT_COMMIT_ID','No commit id')


Calen Pennington committed
47
def set_commit_id(course, commit_id):
48 49
    #course.metadata['GIT_COMMIT_ID'] = commit_id
    setattr(course, 'GIT_COMMIT_ID', commit_id)
50 51
    # setattr(def_ms.courses[reload_dir], 'GIT_COMMIT_ID', new_commit_id)

Calen Pennington committed
52

53
def manage_modulestores(request, reload_dir=None, commit_id=None):
54 55 56 57 58 59 60 61 62 63 64 65 66
    '''
    Manage the static in-memory modulestores.

    If reload_dir is not None, then instruct the xml loader to reload that course directory.
    '''
    html = "<html><body>"

    def_ms = modulestore()
    courses = def_ms.get_courses()

    #----------------------------------------
    # check on IP address of requester

67
    ip = getip(request)
68

69
    if LOCAL_DEBUG:
70 71 72
        html += '<h3>IP address: %s <h3>' % ip
        html += '<h3>User: %s </h3>' % request.user
        html += '<h3>My pid: %s</h3>' % os.getpid()
Calen Pennington committed
73
        log.debug('request from ip=%s, user=%s' % (ip, request.user))
74 75

    if not (ip in ALLOWED_IPS or 'any' in ALLOWED_IPS):
76 77 78 79 80 81
        if request.user and request.user.is_staff:
            log.debug('request allowed because user=%s is staff' % request.user)
        else:
            html += 'Permission denied'
            html += "</body></html>"
            log.debug('request denied, ALLOWED_IPS=%s' % ALLOWED_IPS)
82
            return HttpResponse(html, status=403)
83 84

    #----------------------------------------
85
    # reload course if specified; handle optional commit_id
86 87 88

    if reload_dir is not None:
        if reload_dir not in def_ms.courses:
89
            html += '<h2 class="inline-error">Error: "%s" is not a valid course directory</h2>' % reload_dir
90
        else:
91 92 93 94 95 96
            # reloading based on commit_id is needed when running mutiple worker threads,
            # so that a given thread doesn't reload the same commit multiple times
            current_commit_id = get_commit_id(def_ms.courses[reload_dir])
            log.debug('commit_id="%s"' % commit_id)
            log.debug('current_commit_id="%s"' % current_commit_id)

Calen Pennington committed
97
            if (commit_id is not None) and (commit_id == current_commit_id):
98
                html += "<h2>Already at commit id %s for %s</h2>" % (commit_id, reload_dir)
99 100 101 102 103 104
                track.views.server_track(request,
                                         'reload %s skipped already at %s (pid=%s)' % (reload_dir,
                                                                                       commit_id,
                                                                                       os.getpid(),
                                                                                       ),
                                         {}, page='migrate')
105 106 107
            else:
                html += '<h2>Reloaded course directory "%s"</h2>' % reload_dir
                def_ms.try_load_course(reload_dir)
Calen Pennington committed
108
                gdir = settings.DATA_DIR / reload_dir
109 110 111
                new_commit_id = os.popen('cd %s; git log -n 1 | head -1' % gdir).read().strip().split(' ')[1]
                set_commit_id(def_ms.courses[reload_dir], new_commit_id)
                html += '<p>commit_id=%s</p>' % new_commit_id
112 113 114
                track.views.server_track(request, 'reloaded %s now at %s (pid=%s)' % (reload_dir,
                                                                                      new_commit_id,
                                                                                      os.getpid()), {}, page='migrate')
115 116 117 118 119 120 121 122 123 124 125 126 127 128

    #----------------------------------------

    html += '<h2>Courses loaded in the modulestore</h2>'
    html += '<ol>'
    for cdir, course in def_ms.courses.items():
        html += '<li><a href="%s/migrate/reload/%s">%s</a> (%s)</li>' % (settings.MITX_ROOT_URL,
                                                            escape(cdir),
                                                            escape(cdir),
                                                            course.location.url())
    html += '</ol>'

    #----------------------------------------

129 130
    #dumpfields = ['definition', 'location', 'metadata']
    dumpfields = ['location', 'metadata']
131

132 133
    for cdir, course in def_ms.courses.items():
        html += '<hr width="100%"/>'
134
        html += '<h2>Course: %s (%s)</h2>' % (course.display_name_with_default, cdir)
135

136 137
        html += '<p>commit_id=%s</p>' % get_commit_id(course)

138
        for field in dumpfields:
139
            data = getattr(course, field, None)
140
            html += '<h3>%s</h3>' % field
Calen Pennington committed
141
            if type(data) == dict:
142
                html += '<ul>'
Calen Pennington committed
143 144
                for k, v in data.items():
                    html += '<li>%s:%s</li>' % (escape(k), escape(v))
145 146 147
                html += '</ul>'
            else:
                html += '<ul><li>%s</li></ul>' % escape(data)
148

149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165

    #----------------------------------------

    html += '<hr width="100%"/>'
    html += "courses: <pre>%s</pre>" % escape(courses)

    ms = xmodule_django._MODULESTORES
    html += "modules: <pre>%s</pre>" % escape(ms)
    html += "default modulestore: <pre>%s</pre>" % escape(unicode(def_ms))

    #----------------------------------------

    log.debug('_MODULESTORES=%s' % ms)
    log.debug('courses=%s' % courses)
    log.debug('def_ms=%s' % unicode(def_ms))

    html += "</body></html>"
166
    return HttpResponse(html)
167

Calen Pennington committed
168

169 170 171 172 173 174 175 176 177 178 179 180 181
@csrf_exempt
def gitreload(request, reload_dir=None):
    '''
    This can be used as a github WebHook Service Hook, for reloading of the content repo used by the LMS.

    If reload_dir is not None, then instruct the xml loader to reload that course directory.
    '''
    html = "<html><body>"
    ip = getip(request)

    html += '<h3>IP address: %s ' % ip
    html += '<h3>User: %s ' % request.user

Calen Pennington committed
182 183
    ALLOWED_IPS = []  	# allow none by default
    if hasattr(settings, 'ALLOWED_GITRELOAD_IPS'):  	# allow override in settings
184
        ALLOWED_IPS = settings.ALLOWED_GITRELOAD_IPS
185 186 187 188 189 190 191

    if not (ip in ALLOWED_IPS or 'any' in ALLOWED_IPS):
        if request.user and request.user.is_staff:
            log.debug('request allowed because user=%s is staff' % request.user)
        else:
            html += 'Permission denied'
            html += "</body></html>"
Calen Pennington committed
192 193 194
            log.debug('request denied from %s, ALLOWED_IPS=%s' % (ip, ALLOWED_IPS))
            return HttpResponse(html)

195 196 197 198 199 200 201 202 203 204
    #----------------------------------------
    # see if request is from github (POST with JSON)

    if reload_dir is None and 'payload' in request.POST:
        payload = request.POST['payload']
        log.debug("payload=%s" % payload)
        gitargs = json.loads(payload)
        log.debug("gitargs=%s" % gitargs)
        reload_dir = gitargs['repository']['name']
        log.debug("github reload_dir=%s" % reload_dir)
Calen Pennington committed
205
        gdir = settings.DATA_DIR / reload_dir
206 207 208 209 210
        if not os.path.exists(gdir):
            log.debug("====> ERROR in gitreload - no such directory %s" % reload_dir)
            return HttpResponse('Error')
        cmd = "cd %s; git reset --hard HEAD; git clean -f -d; git pull origin; chmod g+w course.xml" % gdir
        log.debug(os.popen(cmd).read())
Calen Pennington committed
211
        if hasattr(settings, 'GITRELOAD_HOOK'):  	# hit this hook after reload, if set
212 213
            gh = settings.GITRELOAD_HOOK
            if gh:
Calen Pennington committed
214
                ghurl = '%s/%s' % (gh, reload_dir)
215 216
                r = requests.get(ghurl)
                log.debug("GITRELOAD_HOOK to %s: %s" % (ghurl, r.text))
Calen Pennington committed
217

218 219 220 221 222 223
    #----------------------------------------
    # reload course if specified

    if reload_dir is not None:
        def_ms = modulestore()
        if reload_dir not in def_ms.courses:
224
            html += '<h2 class="inline-error">Error: "%s" is not a valid course directory</font></h2>' % reload_dir
225
        else:
226
            html += "<h2>Reloaded course directory '%s'</h2>" % reload_dir
227 228 229
            def_ms.try_load_course(reload_dir)
            track.views.server_track(request, 'reloaded %s' % reload_dir, {}, page='migrate')

Calen Pennington committed
230
    return HttpResponse(html)