migrate.py 8.72 KB
Newer Older
1 2 3 4
#
# migration tools for content team to go from stable-edx4edx to LMS+CMS
#

5
import json
6
import logging
7
import os
8 9 10 11 12
import xmodule.modulestore.django as xmodule_django
from xmodule.modulestore.django import modulestore

from django.http import HttpResponse
from django.conf import settings
13 14 15 16 17 18
import track.views

try:
    from django.views.decorators.csrf import csrf_exempt
except ImportError:
    from django.contrib.csrf.middleware import csrf_exempt
19

20
log = logging.getLogger("edx.lms_migrate")
21 22 23
LOCAL_DEBUG = True
ALLOWED_IPS = settings.LMS_MIGRATION_ALLOWED_IPS

Calen Pennington committed
24

25 26
def escape(s):
    """escape HTML special characters in string"""
Calen Pennington committed
27 28
    return str(s).replace('<', '&lt;').replace('>', '&gt;')

29

30 31 32 33
def getip(request):
    '''
    Extract IP address of requester from header, even if behind proxy
    '''
Calen Pennington committed
34
    ip = request.META.get('HTTP_X_REAL_IP', '')  	# nginx reverse proxy
35
    if not ip:
Calen Pennington committed
36
        ip = request.META.get('REMOTE_ADDR', 'None')
37 38
    return ip

39 40

def get_commit_id(course):
41 42
    #return course.metadata.get('GIT_COMMIT_ID', 'No commit id')
    return getattr(course, 'GIT_COMMIT_ID', 'No commit id')
43 44 45
    # getattr(def_ms.courses[reload_dir], 'GIT_COMMIT_ID','No commit id')


Calen Pennington committed
46
def set_commit_id(course, commit_id):
47
    #course.metadata['GIT_COMMIT_ID'] = commit_id
Ned Batchelder committed
48 49
    course.GIT_COMMIT_ID = commit_id
    # def_ms.courses[reload_dir].GIT_COMMIT_ID = new_commit_id
50

Calen Pennington committed
51

52
def manage_modulestores(request, reload_dir=None, commit_id=None):
53 54 55 56 57 58 59 60 61 62 63 64 65
    '''
    Manage the static in-memory modulestores.

    If reload_dir is not None, then instruct the xml loader to reload that course directory.
    '''
    html = "<html><body>"

    def_ms = modulestore()
    courses = def_ms.get_courses()

    #----------------------------------------
    # check on IP address of requester

66
    ip = getip(request)
67

68
    if LOCAL_DEBUG:
69 70 71
        html += '<h3>IP address: %s <h3>' % ip
        html += '<h3>User: %s </h3>' % request.user
        html += '<h3>My pid: %s</h3>' % os.getpid()
72
        log.debug(u'request from ip=%s, user=%s', ip, request.user)
73 74

    if not (ip in ALLOWED_IPS or 'any' in ALLOWED_IPS):
75
        if request.user and request.user.is_staff:
76
            log.debug(u'request allowed because user=%s is staff', request.user)
77 78 79 80
        else:
            html += 'Permission denied'
            html += "</body></html>"
            log.debug('request denied, ALLOWED_IPS=%s' % ALLOWED_IPS)
81
            return HttpResponse(html, status=403)
82 83

    #----------------------------------------
84
    # reload course if specified; handle optional commit_id
85 86 87

    if reload_dir is not None:
        if reload_dir not in def_ms.courses:
88
            html += '<h2 class="inline-error">Error: "%s" is not a valid course directory</h2>' % reload_dir
89
        else:
90 91 92 93 94 95
            # reloading based on commit_id is needed when running mutiple worker threads,
            # so that a given thread doesn't reload the same commit multiple times
            current_commit_id = get_commit_id(def_ms.courses[reload_dir])
            log.debug('commit_id="%s"' % commit_id)
            log.debug('current_commit_id="%s"' % current_commit_id)

Calen Pennington committed
96
            if (commit_id is not None) and (commit_id == current_commit_id):
97
                html += "<h2>Already at commit id %s for %s</h2>" % (commit_id, reload_dir)
98 99 100 101 102 103
                track.views.server_track(request,
                                         'reload %s skipped already at %s (pid=%s)' % (reload_dir,
                                                                                       commit_id,
                                                                                       os.getpid(),
                                                                                       ),
                                         {}, page='migrate')
104 105 106
            else:
                html += '<h2>Reloaded course directory "%s"</h2>' % reload_dir
                def_ms.try_load_course(reload_dir)
Calen Pennington committed
107
                gdir = settings.DATA_DIR / reload_dir
108 109 110
                new_commit_id = os.popen('cd %s; git log -n 1 | head -1' % gdir).read().strip().split(' ')[1]
                set_commit_id(def_ms.courses[reload_dir], new_commit_id)
                html += '<p>commit_id=%s</p>' % new_commit_id
111 112 113
                track.views.server_track(request, 'reloaded %s now at %s (pid=%s)' % (reload_dir,
                                                                                      new_commit_id,
                                                                                      os.getpid()), {}, page='migrate')
114 115 116 117 118 119

    #----------------------------------------

    html += '<h2>Courses loaded in the modulestore</h2>'
    html += '<ol>'
    for cdir, course in def_ms.courses.items():
120
        html += '<li><a href="%s/migrate/reload/%s">%s</a> (%s)</li>' % (
121
            settings.EDX_ROOT_URL,
122 123
            escape(cdir),
            escape(cdir),
124
            course.location.to_deprecated_string()
125
        )
126 127 128 129
    html += '</ol>'

    #----------------------------------------

130 131
    #dumpfields = ['definition', 'location', 'metadata']
    dumpfields = ['location', 'metadata']
132

133 134
    for cdir, course in def_ms.courses.items():
        html += '<hr width="100%"/>'
135
        html += '<h2>Course: %s (%s)</h2>' % (course.display_name_with_default, cdir)
136

137 138
        html += '<p>commit_id=%s</p>' % get_commit_id(course)

139
        for field in dumpfields:
140
            data = getattr(course, field, None)
141
            html += '<h3>%s</h3>' % field
Calen Pennington committed
142
            if type(data) == dict:
143
                html += '<ul>'
Calen Pennington committed
144 145
                for k, v in data.items():
                    html += '<li>%s:%s</li>' % (escape(k), escape(v))
146 147 148
                html += '</ul>'
            else:
                html += '<ul><li>%s</li></ul>' % escape(data)
149

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
    #----------------------------------------

    html += '<hr width="100%"/>'
    html += "courses: <pre>%s</pre>" % escape(courses)

    ms = xmodule_django._MODULESTORES
    html += "modules: <pre>%s</pre>" % escape(ms)
    html += "default modulestore: <pre>%s</pre>" % escape(unicode(def_ms))

    #----------------------------------------

    log.debug('_MODULESTORES=%s' % ms)
    log.debug('courses=%s' % courses)
    log.debug('def_ms=%s' % unicode(def_ms))

    html += "</body></html>"
166
    return HttpResponse(html)
167

Calen Pennington committed
168

169 170 171 172 173 174 175 176 177 178 179 180 181
@csrf_exempt
def gitreload(request, reload_dir=None):
    '''
    This can be used as a github WebHook Service Hook, for reloading of the content repo used by the LMS.

    If reload_dir is not None, then instruct the xml loader to reload that course directory.
    '''
    html = "<html><body>"
    ip = getip(request)

    html += '<h3>IP address: %s ' % ip
    html += '<h3>User: %s ' % request.user

Calen Pennington committed
182 183
    ALLOWED_IPS = []  	# allow none by default
    if hasattr(settings, 'ALLOWED_GITRELOAD_IPS'):  	# allow override in settings
184
        ALLOWED_IPS = settings.ALLOWED_GITRELOAD_IPS
185 186 187

    if not (ip in ALLOWED_IPS or 'any' in ALLOWED_IPS):
        if request.user and request.user.is_staff:
188
            log.debug(u'request allowed because user=%s is staff', request.user)
189 190 191
        else:
            html += 'Permission denied'
            html += "</body></html>"
Calen Pennington committed
192 193 194
            log.debug('request denied from %s, ALLOWED_IPS=%s' % (ip, ALLOWED_IPS))
            return HttpResponse(html)

195 196 197 198 199 200 201 202 203 204
    #----------------------------------------
    # see if request is from github (POST with JSON)

    if reload_dir is None and 'payload' in request.POST:
        payload = request.POST['payload']
        log.debug("payload=%s" % payload)
        gitargs = json.loads(payload)
        log.debug("gitargs=%s" % gitargs)
        reload_dir = gitargs['repository']['name']
        log.debug("github reload_dir=%s" % reload_dir)
Calen Pennington committed
205
        gdir = settings.DATA_DIR / reload_dir
206 207 208 209 210
        if not os.path.exists(gdir):
            log.debug("====> ERROR in gitreload - no such directory %s" % reload_dir)
            return HttpResponse('Error')
        cmd = "cd %s; git reset --hard HEAD; git clean -f -d; git pull origin; chmod g+w course.xml" % gdir
        log.debug(os.popen(cmd).read())
Calen Pennington committed
211
        if hasattr(settings, 'GITRELOAD_HOOK'):  	# hit this hook after reload, if set
212 213
            gh = settings.GITRELOAD_HOOK
            if gh:
Calen Pennington committed
214
                ghurl = '%s/%s' % (gh, reload_dir)
215 216
                r = requests.get(ghurl)
                log.debug("GITRELOAD_HOOK to %s: %s" % (ghurl, r.text))
Calen Pennington committed
217

218 219 220 221 222 223
    #----------------------------------------
    # reload course if specified

    if reload_dir is not None:
        def_ms = modulestore()
        if reload_dir not in def_ms.courses:
224
            html += '<h2 class="inline-error">Error: "%s" is not a valid course directory</font></h2>' % reload_dir
225
        else:
226
            html += "<h2>Reloaded course directory '%s'</h2>" % reload_dir
227 228 229
            def_ms.try_load_course(reload_dir)
            track.views.server_track(request, 'reloaded %s' % reload_dir, {}, page='migrate')

Calen Pennington committed
230
    return HttpResponse(html)