git_import.py 11.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
"""
Provides a function for importing a git repository into the lms
instance when using a mongo modulestore
"""

import os
import re
import StringIO
import subprocess
import logging

from django.conf import settings
from django.core import management
from django.core.management.base import CommandError
from django.utils import timezone
16
from django.utils.translation import ugettext_lazy as _
17 18 19
import mongoengine

from dashboard.models import CourseImportLog
20
from opaque_keys import InvalidKeyError
21 22
from opaque_keys.edx.keys import CourseKey
from opaque_keys.edx.locations import SlashSeparatedCourseKey
23 24 25

log = logging.getLogger(__name__)

26
DEFAULT_GIT_REPO_DIR = '/edx/var/app/edxapp/course_repos'
27 28 29 30 31 32


class GitImportError(Exception):
    """
    Exception class for handling the typical errors in a git import.
    """
33
    MESSAGE = None
34

35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
    def __init__(self, message=None):
        if message is None:
            message = self.message
        super(GitImportError, self).__init__(message)


class GitImportErrorNoDir(GitImportError):
    """
    GitImportError when no directory exists at the specified path.
    """
    def __init__(self, repo_dir):
        super(GitImportErrorNoDir, self).__init__(
            _(
                "Path {0} doesn't exist, please create it, "
                "or configure a different path with "
                "GIT_REPO_DIR"
            ).format(repo_dir)
        )


class GitImportErrorUrlBad(GitImportError):
    """
    GitImportError when the git url provided wasn't usable.
    """
    MESSAGE = _(
        'Non usable git url provided. Expecting something like:'
        ' git@github.com:mitocw/edx4edx_lite.git'
    )


class GitImportErrorBadRepo(GitImportError):
    """
    GitImportError when the cloned repository was malformed.
    """
    MESSAGE = _('Unable to get git log')


class GitImportErrorCannotPull(GitImportError):
    """
    GitImportError when the clone of the repository failed.
    """
    MESSAGE = _('git clone or pull failed!')


class GitImportErrorXmlImportFailed(GitImportError):
    """
    GitImportError when the course import command failed.
    """
    MESSAGE = _('Unable to run import command.')


class GitImportErrorUnsupportedStore(GitImportError):
    """
    GitImportError when the modulestore doesn't support imports.
    """
    MESSAGE = _('The underlying module store does not support import.')


class GitImportErrorRemoteBranchMissing(GitImportError):
    """
    GitImportError when the remote branch doesn't exist.
    """
97 98 99
    # Translators: This is an error message when they ask for a
    # particular version of a git repository and that version isn't
    # available from the remote source they specified
100 101 102 103 104 105 106
    MESSAGE = _('The specified remote branch is not available.')


class GitImportErrorCannotBranch(GitImportError):
    """
    GitImportError when the local branch doesn't exist.
    """
107 108 109
    # Translators: Error message shown when they have asked for a git
    # repository branch, a specific version within a repository, that
    # doesn't exist, or there is a problem changing to it.
110
    MESSAGE = _('Unable to switch to specified branch. Please check your branch name.')
111

112

113 114 115 116 117 118 119
def cmd_log(cmd, cwd):
    """
    Helper function to redirect stderr to stdout and log the command
    used along with the output. Will raise subprocess.CalledProcessError if
    command doesn't return 0, and returns the command's output.
    """
    output = subprocess.check_output(cmd, cwd=cwd, stderr=subprocess.STDOUT)
120 121 122

    log.debug(u'Command was: %r. Working directory was: %r', ' '.join(cmd), cwd)
    log.debug(u'Command output was: %r', output)
123 124 125
    return output


126 127 128 129 130 131 132 133 134 135 136 137 138
def switch_branch(branch, rdir):
    """
    This will determine how to change the branch of the repo, and then
    use the appropriate git commands to do so.

    Raises an appropriate GitImportError exception if there is any issues with changing
    branches.
    """
    # Get the latest remote
    try:
        cmd_log(['git', 'fetch', ], rdir)
    except subprocess.CalledProcessError as ex:
        log.exception('Unable to fetch remote: %r', ex.output)
139
        raise GitImportErrorCannotBranch()
140 141 142 143 144 145 146

    # Check if the branch is available from the remote.
    cmd = ['git', 'ls-remote', 'origin', '-h', 'refs/heads/{0}'.format(branch), ]
    try:
        output = cmd_log(cmd, rdir)
    except subprocess.CalledProcessError as ex:
        log.exception('Getting a list of remote branches failed: %r', ex.output)
147
        raise GitImportErrorCannotBranch()
David Baumgold committed
148
    if branch not in output:
149
        raise GitImportErrorRemoteBranchMissing()
150 151 152 153 154 155
    # Check it the remote branch has already been made locally
    cmd = ['git', 'branch', '-a', ]
    try:
        output = cmd_log(cmd, rdir)
    except subprocess.CalledProcessError as ex:
        log.exception('Getting a list of local branches failed: %r', ex.output)
156
        raise GitImportErrorCannotBranch()
157 158
    branches = []
    for line in output.split('\n'):
159
        branches.append(line.replace('*', '').strip())
160 161 162 163 164 165 166 167 168

    if branch not in branches:
        # Checkout with -b since it is remote only
        cmd = ['git', 'checkout', '--force', '--track',
               '-b', branch, 'origin/{0}'.format(branch), ]
        try:
            cmd_log(cmd, rdir)
        except subprocess.CalledProcessError as ex:
            log.exception('Unable to checkout remote branch: %r', ex.output)
169
            raise GitImportErrorCannotBranch()
170 171 172 173 174 175
    # Go ahead and reset hard to the newest version of the branch now that we know
    # it is local.
    try:
        cmd_log(['git', 'reset', '--hard', 'origin/{0}'.format(branch), ], rdir)
    except subprocess.CalledProcessError as ex:
        log.exception('Unable to reset to branch: %r', ex.output)
176
        raise GitImportErrorCannotBranch()
177 178


179 180 181 182 183 184
def add_repo(repo, rdir_in, branch=None):
    """
    This will add a git repo into the mongo modulestore.
    If branch is left as None, it will fetch the most recent
    version of the current branch.
    """
185
    # pylint: disable=too-many-statements
186

187 188 189
    git_repo_dir = getattr(settings, 'GIT_REPO_DIR', DEFAULT_GIT_REPO_DIR)
    git_import_static = getattr(settings, 'GIT_IMPORT_STATIC', True)

190 191 192
    # Set defaults even if it isn't defined in settings
    mongo_db = {
        'host': 'localhost',
193
        'port': 27017,
194 195 196 197 198 199 200
        'user': '',
        'password': '',
        'db': 'xlog',
    }

    # Allow overrides
    if hasattr(settings, 'MONGODB_LOG'):
201
        for config_item in ['host', 'user', 'password', 'db', 'port']:
202 203 204
            mongo_db[config_item] = settings.MONGODB_LOG.get(
                config_item, mongo_db[config_item])

205 206
    if not os.path.isdir(git_repo_dir):
        raise GitImportErrorNoDir(git_repo_dir)
207 208 209
    # pull from git
    if not (repo.endswith('.git') or
            repo.startswith(('http:', 'https:', 'git:', 'file:'))):
210
        raise GitImportErrorUrlBad()
211 212 213 214 215

    if rdir_in:
        rdir = os.path.basename(rdir_in)
    else:
        rdir = repo.rsplit('/', 1)[-1].rsplit('.git', 1)[0]
216
    log.debug('rdir = %s', rdir)
217

218
    rdirp = '{0}/{1}'.format(git_repo_dir, rdir)
219 220 221 222 223 224 225
    if os.path.exists(rdirp):
        log.info('directory already exists, doing a git pull instead '
                 'of git clone')
        cmd = ['git', 'pull', ]
        cwd = rdirp
    else:
        cmd = ['git', 'clone', repo, ]
226
        cwd = git_repo_dir
227 228 229 230

    cwd = os.path.abspath(cwd)
    try:
        ret_git = cmd_log(cmd, cwd=cwd)
231 232
    except subprocess.CalledProcessError as ex:
        log.exception('Error running git pull: %r', ex.output)
233
        raise GitImportErrorCannotPull()
234

235 236 237
    if branch:
        switch_branch(branch, rdirp)

238 239 240 241
    # get commit id
    cmd = ['git', 'log', '-1', '--format=%H', ]
    try:
        commit_id = cmd_log(cmd, cwd=rdirp)
242 243
    except subprocess.CalledProcessError as ex:
        log.exception('Unable to get git log: %r', ex.output)
244
        raise GitImportErrorBadRepo()
245 246 247 248

    ret_git += '\nCommit ID: {0}'.format(commit_id)

    # get branch
249
    cmd = ['git', 'symbolic-ref', '--short', 'HEAD', ]
250 251
    try:
        branch = cmd_log(cmd, cwd=rdirp)
252 253 254 255
    except subprocess.CalledProcessError as ex:
        # I can't discover a way to excercise this, but git is complex
        # so still logging and raising here in case.
        log.exception('Unable to determine branch: %r', ex.output)
256
        raise GitImportErrorBadRepo()
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275

    ret_git += '{0}Branch: {1}'.format('   \n', branch)

    # Get XML logging logger and capture debug to parse results
    output = StringIO.StringIO()
    import_log_handler = logging.StreamHandler(output)
    import_log_handler.setLevel(logging.DEBUG)

    logger_names = ['xmodule.modulestore.xml_importer', 'git_add_course',
                    'xmodule.modulestore.xml', 'xmodule.seq_module', ]
    loggers = []

    for logger_name in logger_names:
        logger = logging.getLogger(logger_name)
        logger.setLevel(logging.DEBUG)
        logger.addHandler(import_log_handler)
        loggers.append(logger)

    try:
276 277
        management.call_command('import', git_repo_dir, rdir,
                                nostatic=not git_import_static)
278
    except CommandError:
279
        raise GitImportErrorXmlImportFailed()
280
    except NotImplementedError:
281
        raise GitImportErrorUnsupportedStore()
282 283 284 285 286 287 288 289

    ret_import = output.getvalue()

    # Remove handler hijacks
    for logger in loggers:
        logger.setLevel(logging.NOTSET)
        logger.removeHandler(import_log_handler)

290
    course_key = None
291 292 293 294
    location = 'unknown'

    # extract course ID from output of import-command-run and make symlink
    # this is needed in order for custom course scripts to work
295
    match = re.search(r'(?ms)===> IMPORTING courselike (\S+)', ret_import)
296
    if match:
297
        course_id = match.group(1)
298 299 300 301
        try:
            course_key = CourseKey.from_string(course_id)
        except InvalidKeyError:
            course_key = SlashSeparatedCourseKey.from_deprecated_string(course_id)
302
        cdir = '{0}/{1}'.format(git_repo_dir, course_key.course)
303
        log.debug('Studio course dir = %s', cdir)
304 305 306 307 308 309 310 311 312 313 314

        if os.path.exists(cdir) and not os.path.islink(cdir):
            log.debug('   -> exists, but is not symlink')
            log.debug(subprocess.check_output(['ls', '-l', ],
                                              cwd=os.path.abspath(cdir)))
            try:
                os.rmdir(os.path.abspath(cdir))
            except OSError:
                log.exception('Failed to remove course directory')

        if not os.path.exists(cdir):
315
            log.debug('   -> creating symlink between %s and %s', rdirp, cdir)
316 317 318 319 320 321 322 323
            try:
                os.symlink(os.path.abspath(rdirp), os.path.abspath(cdir))
            except OSError:
                log.exception('Unable to create course symlink')
            log.debug(subprocess.check_output(['ls', '-l', ],
                                              cwd=os.path.abspath(cdir)))

    # store import-command-run output in mongo
324
    mongouri = 'mongodb://{user}:{password}@{host}:{port}/{db}'.format(**mongo_db)
325 326 327 328 329

    try:
        if mongo_db['user'] and mongo_db['password']:
            mdb = mongoengine.connect(mongo_db['db'], host=mongouri)
        else:
330
            mdb = mongoengine.connect(mongo_db['db'], host=mongo_db['host'], port=mongo_db['port'])
331 332 333 334
    except mongoengine.connection.ConnectionError:
        log.exception('Unable to connect to mongodb to save log, please '
                      'check MONGODB_LOG settings')
    cil = CourseImportLog(
335 336
        course_id=course_key,
        location=location,
337 338 339 340 341 342 343
        repo_dir=rdir,
        created=timezone.now(),
        import_log=ret_import,
        git_log=ret_git,
    )
    cil.save()

344
    log.debug('saved CourseImportLog for %s', cil.course_id)
345
    mdb.disconnect()