git_import.py 10.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
"""
Provides a function for importing a git repository into the lms
instance when using a mongo modulestore
"""

import os
import re
import StringIO
import subprocess
import logging

from django.conf import settings
from django.core import management
from django.core.management.base import CommandError
from django.utils import timezone
from django.utils.translation import ugettext as _
import mongoengine

from dashboard.models import CourseImportLog
20
from opaque_keys import InvalidKeyError
21 22
from opaque_keys.edx.keys import CourseKey
from opaque_keys.edx.locations import SlashSeparatedCourseKey
23 24 25

log = logging.getLogger(__name__)

26
GIT_REPO_DIR = getattr(settings, 'GIT_REPO_DIR', '/edx/var/app/edxapp/course_repos')
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
GIT_IMPORT_STATIC = getattr(settings, 'GIT_IMPORT_STATIC', True)


class GitImportError(Exception):
    """
    Exception class for handling the typical errors in a git import.
    """

    NO_DIR = _("Path {0} doesn't exist, please create it, "
               "or configure a different path with "
               "GIT_REPO_DIR").format(GIT_REPO_DIR)
    URL_BAD = _('Non usable git url provided. Expecting something like:'
                ' git@github.com:mitocw/edx4edx_lite.git')
    BAD_REPO = _('Unable to get git log')
    CANNOT_PULL = _('git clone or pull failed!')
    XML_IMPORT_FAILED = _('Unable to run import command.')
    UNSUPPORTED_STORE = _('The underlying module store does not support import.')
44 45 46
    # Translators: This is an error message when they ask for a
    # particular version of a git repository and that version isn't
    # available from the remote source they specified
47
    REMOTE_BRANCH_MISSING = _('The specified remote branch is not available.')
48 49 50
    # Translators: Error message shown when they have asked for a git
    # repository branch, a specific version within a repository, that
    # doesn't exist, or there is a problem changing to it.
51 52
    CANNOT_BRANCH = _('Unable to switch to specified branch. Please check '
                      'your branch name.')
53

54

55 56 57 58 59 60 61
def cmd_log(cmd, cwd):
    """
    Helper function to redirect stderr to stdout and log the command
    used along with the output. Will raise subprocess.CalledProcessError if
    command doesn't return 0, and returns the command's output.
    """
    output = subprocess.check_output(cmd, cwd=cwd, stderr=subprocess.STDOUT)
62 63 64

    log.debug(u'Command was: %r. Working directory was: %r', ' '.join(cmd), cwd)
    log.debug(u'Command output was: %r', output)
65 66 67
    return output


68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
def switch_branch(branch, rdir):
    """
    This will determine how to change the branch of the repo, and then
    use the appropriate git commands to do so.

    Raises an appropriate GitImportError exception if there is any issues with changing
    branches.
    """
    # Get the latest remote
    try:
        cmd_log(['git', 'fetch', ], rdir)
    except subprocess.CalledProcessError as ex:
        log.exception('Unable to fetch remote: %r', ex.output)
        raise GitImportError(GitImportError.CANNOT_BRANCH)

    # Check if the branch is available from the remote.
    cmd = ['git', 'ls-remote', 'origin', '-h', 'refs/heads/{0}'.format(branch), ]
    try:
        output = cmd_log(cmd, rdir)
    except subprocess.CalledProcessError as ex:
        log.exception('Getting a list of remote branches failed: %r', ex.output)
        raise GitImportError(GitImportError.CANNOT_BRANCH)
David Baumgold committed
90
    if branch not in output:
91 92 93 94 95 96 97 98 99 100
        raise GitImportError(GitImportError.REMOTE_BRANCH_MISSING)
    # Check it the remote branch has already been made locally
    cmd = ['git', 'branch', '-a', ]
    try:
        output = cmd_log(cmd, rdir)
    except subprocess.CalledProcessError as ex:
        log.exception('Getting a list of local branches failed: %r', ex.output)
        raise GitImportError(GitImportError.CANNOT_BRANCH)
    branches = []
    for line in output.split('\n'):
101
        branches.append(line.replace('*', '').strip())
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120

    if branch not in branches:
        # Checkout with -b since it is remote only
        cmd = ['git', 'checkout', '--force', '--track',
               '-b', branch, 'origin/{0}'.format(branch), ]
        try:
            cmd_log(cmd, rdir)
        except subprocess.CalledProcessError as ex:
            log.exception('Unable to checkout remote branch: %r', ex.output)
            raise GitImportError(GitImportError.CANNOT_BRANCH)
    # Go ahead and reset hard to the newest version of the branch now that we know
    # it is local.
    try:
        cmd_log(['git', 'reset', '--hard', 'origin/{0}'.format(branch), ], rdir)
    except subprocess.CalledProcessError as ex:
        log.exception('Unable to reset to branch: %r', ex.output)
        raise GitImportError(GitImportError.CANNOT_BRANCH)


121 122 123 124 125 126
def add_repo(repo, rdir_in, branch=None):
    """
    This will add a git repo into the mongo modulestore.
    If branch is left as None, it will fetch the most recent
    version of the current branch.
    """
127
    # pylint: disable=too-many-statements
128 129 130 131

    # Set defaults even if it isn't defined in settings
    mongo_db = {
        'host': 'localhost',
132
        'port': 27017,
133 134 135 136 137 138 139
        'user': '',
        'password': '',
        'db': 'xlog',
    }

    # Allow overrides
    if hasattr(settings, 'MONGODB_LOG'):
140
        for config_item in ['host', 'user', 'password', 'db', 'port']:
141 142 143 144 145 146 147 148 149 150 151 152 153 154
            mongo_db[config_item] = settings.MONGODB_LOG.get(
                config_item, mongo_db[config_item])

    if not os.path.isdir(GIT_REPO_DIR):
        raise GitImportError(GitImportError.NO_DIR)
    # pull from git
    if not (repo.endswith('.git') or
            repo.startswith(('http:', 'https:', 'git:', 'file:'))):
        raise GitImportError(GitImportError.URL_BAD)

    if rdir_in:
        rdir = os.path.basename(rdir_in)
    else:
        rdir = repo.rsplit('/', 1)[-1].rsplit('.git', 1)[0]
155
    log.debug('rdir = %s', rdir)
156 157 158 159 160 161 162 163 164 165 166 167 168 169

    rdirp = '{0}/{1}'.format(GIT_REPO_DIR, rdir)
    if os.path.exists(rdirp):
        log.info('directory already exists, doing a git pull instead '
                 'of git clone')
        cmd = ['git', 'pull', ]
        cwd = rdirp
    else:
        cmd = ['git', 'clone', repo, ]
        cwd = GIT_REPO_DIR

    cwd = os.path.abspath(cwd)
    try:
        ret_git = cmd_log(cmd, cwd=cwd)
170 171
    except subprocess.CalledProcessError as ex:
        log.exception('Error running git pull: %r', ex.output)
172 173
        raise GitImportError(GitImportError.CANNOT_PULL)

174 175 176
    if branch:
        switch_branch(branch, rdirp)

177 178 179 180
    # get commit id
    cmd = ['git', 'log', '-1', '--format=%H', ]
    try:
        commit_id = cmd_log(cmd, cwd=rdirp)
181 182
    except subprocess.CalledProcessError as ex:
        log.exception('Unable to get git log: %r', ex.output)
183 184 185 186 187
        raise GitImportError(GitImportError.BAD_REPO)

    ret_git += '\nCommit ID: {0}'.format(commit_id)

    # get branch
188
    cmd = ['git', 'symbolic-ref', '--short', 'HEAD', ]
189 190
    try:
        branch = cmd_log(cmd, cwd=rdirp)
191 192 193 194
    except subprocess.CalledProcessError as ex:
        # I can't discover a way to excercise this, but git is complex
        # so still logging and raising here in case.
        log.exception('Unable to determine branch: %r', ex.output)
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
        raise GitImportError(GitImportError.BAD_REPO)

    ret_git += '{0}Branch: {1}'.format('   \n', branch)

    # Get XML logging logger and capture debug to parse results
    output = StringIO.StringIO()
    import_log_handler = logging.StreamHandler(output)
    import_log_handler.setLevel(logging.DEBUG)

    logger_names = ['xmodule.modulestore.xml_importer', 'git_add_course',
                    'xmodule.modulestore.xml', 'xmodule.seq_module', ]
    loggers = []

    for logger_name in logger_names:
        logger = logging.getLogger(logger_name)
        logger.setLevel(logging.DEBUG)
        logger.addHandler(import_log_handler)
        loggers.append(logger)

    try:
        management.call_command('import', GIT_REPO_DIR, rdir,
                                nostatic=not GIT_IMPORT_STATIC)
    except CommandError:
        raise GitImportError(GitImportError.XML_IMPORT_FAILED)
    except NotImplementedError:
        raise GitImportError(GitImportError.UNSUPPORTED_STORE)

    ret_import = output.getvalue()

    # Remove handler hijacks
    for logger in loggers:
        logger.setLevel(logging.NOTSET)
        logger.removeHandler(import_log_handler)

229
    course_key = None
230 231 232 233
    location = 'unknown'

    # extract course ID from output of import-command-run and make symlink
    # this is needed in order for custom course scripts to work
234
    match = re.search(r'(?ms)===> IMPORTING courselike (\S+)', ret_import)
235
    if match:
236
        course_id = match.group(1)
237 238 239 240
        try:
            course_key = CourseKey.from_string(course_id)
        except InvalidKeyError:
            course_key = SlashSeparatedCourseKey.from_deprecated_string(course_id)
241
        cdir = '{0}/{1}'.format(GIT_REPO_DIR, course_key.course)
242
        log.debug('Studio course dir = %s', cdir)
243 244 245 246 247 248 249 250 251 252 253

        if os.path.exists(cdir) and not os.path.islink(cdir):
            log.debug('   -> exists, but is not symlink')
            log.debug(subprocess.check_output(['ls', '-l', ],
                                              cwd=os.path.abspath(cdir)))
            try:
                os.rmdir(os.path.abspath(cdir))
            except OSError:
                log.exception('Failed to remove course directory')

        if not os.path.exists(cdir):
254
            log.debug('   -> creating symlink between %s and %s', rdirp, cdir)
255 256 257 258 259 260 261 262
            try:
                os.symlink(os.path.abspath(rdirp), os.path.abspath(cdir))
            except OSError:
                log.exception('Unable to create course symlink')
            log.debug(subprocess.check_output(['ls', '-l', ],
                                              cwd=os.path.abspath(cdir)))

    # store import-command-run output in mongo
263
    mongouri = 'mongodb://{user}:{password}@{host}:{port}/{db}'.format(**mongo_db)
264 265 266 267 268

    try:
        if mongo_db['user'] and mongo_db['password']:
            mdb = mongoengine.connect(mongo_db['db'], host=mongouri)
        else:
269
            mdb = mongoengine.connect(mongo_db['db'], host=mongo_db['host'], port=mongo_db['port'])
270 271 272 273
    except mongoengine.connection.ConnectionError:
        log.exception('Unable to connect to mongodb to save log, please '
                      'check MONGODB_LOG settings')
    cil = CourseImportLog(
274 275
        course_id=course_key,
        location=location,
276 277 278 279 280 281 282
        repo_dir=rdir,
        created=timezone.now(),
        import_log=ret_import,
        git_log=ret_git,
    )
    cil.save()

283
    log.debug('saved CourseImportLog for %s', cil.course_id)
284
    mdb.disconnect()