git_import.py 11.3 KB
Newer Older
1 2 3 4 5
"""
Provides a function for importing a git repository into the lms
instance when using a mongo modulestore
"""

6
import logging
7 8 9 10 11
import os
import re
import StringIO
import subprocess

12
import mongoengine
13 14 15 16
from django.conf import settings
from django.core import management
from django.core.management.base import CommandError
from django.utils import timezone
17
from django.utils.translation import ugettext_lazy as _
18
from opaque_keys.edx.keys import CourseKey
19

20 21
from dashboard.models import CourseImportLog

22 23
log = logging.getLogger(__name__)

24
DEFAULT_GIT_REPO_DIR = '/edx/var/app/edxapp/course_repos'
25 26 27 28 29 30


class GitImportError(Exception):
    """
    Exception class for handling the typical errors in a git import.
    """
31
    MESSAGE = None
32

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
    def __init__(self, message=None):
        if message is None:
            message = self.message
        super(GitImportError, self).__init__(message)


class GitImportErrorNoDir(GitImportError):
    """
    GitImportError when no directory exists at the specified path.
    """
    def __init__(self, repo_dir):
        super(GitImportErrorNoDir, self).__init__(
            _(
                "Path {0} doesn't exist, please create it, "
                "or configure a different path with "
                "GIT_REPO_DIR"
            ).format(repo_dir)
        )


class GitImportErrorUrlBad(GitImportError):
    """
    GitImportError when the git url provided wasn't usable.
    """
    MESSAGE = _(
        'Non usable git url provided. Expecting something like:'
        ' git@github.com:mitocw/edx4edx_lite.git'
    )


class GitImportErrorBadRepo(GitImportError):
    """
    GitImportError when the cloned repository was malformed.
    """
    MESSAGE = _('Unable to get git log')


class GitImportErrorCannotPull(GitImportError):
    """
    GitImportError when the clone of the repository failed.
    """
    MESSAGE = _('git clone or pull failed!')


class GitImportErrorXmlImportFailed(GitImportError):
    """
    GitImportError when the course import command failed.
    """
    MESSAGE = _('Unable to run import command.')


class GitImportErrorUnsupportedStore(GitImportError):
    """
    GitImportError when the modulestore doesn't support imports.
    """
    MESSAGE = _('The underlying module store does not support import.')


class GitImportErrorRemoteBranchMissing(GitImportError):
    """
    GitImportError when the remote branch doesn't exist.
    """
95 96 97
    # Translators: This is an error message when they ask for a
    # particular version of a git repository and that version isn't
    # available from the remote source they specified
98 99 100 101 102 103 104
    MESSAGE = _('The specified remote branch is not available.')


class GitImportErrorCannotBranch(GitImportError):
    """
    GitImportError when the local branch doesn't exist.
    """
105 106 107
    # Translators: Error message shown when they have asked for a git
    # repository branch, a specific version within a repository, that
    # doesn't exist, or there is a problem changing to it.
108
    MESSAGE = _('Unable to switch to specified branch. Please check your branch name.')
109

110

111 112 113 114 115 116 117
def cmd_log(cmd, cwd):
    """
    Helper function to redirect stderr to stdout and log the command
    used along with the output. Will raise subprocess.CalledProcessError if
    command doesn't return 0, and returns the command's output.
    """
    output = subprocess.check_output(cmd, cwd=cwd, stderr=subprocess.STDOUT)
118 119 120

    log.debug(u'Command was: %r. Working directory was: %r', ' '.join(cmd), cwd)
    log.debug(u'Command output was: %r', output)
121 122 123
    return output


124 125 126 127 128 129 130 131 132 133 134 135 136
def switch_branch(branch, rdir):
    """
    This will determine how to change the branch of the repo, and then
    use the appropriate git commands to do so.

    Raises an appropriate GitImportError exception if there is any issues with changing
    branches.
    """
    # Get the latest remote
    try:
        cmd_log(['git', 'fetch', ], rdir)
    except subprocess.CalledProcessError as ex:
        log.exception('Unable to fetch remote: %r', ex.output)
137
        raise GitImportErrorCannotBranch()
138 139 140 141 142 143 144

    # Check if the branch is available from the remote.
    cmd = ['git', 'ls-remote', 'origin', '-h', 'refs/heads/{0}'.format(branch), ]
    try:
        output = cmd_log(cmd, rdir)
    except subprocess.CalledProcessError as ex:
        log.exception('Getting a list of remote branches failed: %r', ex.output)
145
        raise GitImportErrorCannotBranch()
David Baumgold committed
146
    if branch not in output:
147
        raise GitImportErrorRemoteBranchMissing()
148 149 150 151 152 153
    # Check it the remote branch has already been made locally
    cmd = ['git', 'branch', '-a', ]
    try:
        output = cmd_log(cmd, rdir)
    except subprocess.CalledProcessError as ex:
        log.exception('Getting a list of local branches failed: %r', ex.output)
154
        raise GitImportErrorCannotBranch()
155 156
    branches = []
    for line in output.split('\n'):
157
        branches.append(line.replace('*', '').strip())
158 159 160 161 162 163 164 165 166

    if branch not in branches:
        # Checkout with -b since it is remote only
        cmd = ['git', 'checkout', '--force', '--track',
               '-b', branch, 'origin/{0}'.format(branch), ]
        try:
            cmd_log(cmd, rdir)
        except subprocess.CalledProcessError as ex:
            log.exception('Unable to checkout remote branch: %r', ex.output)
167
            raise GitImportErrorCannotBranch()
168 169 170 171 172 173
    # Go ahead and reset hard to the newest version of the branch now that we know
    # it is local.
    try:
        cmd_log(['git', 'reset', '--hard', 'origin/{0}'.format(branch), ], rdir)
    except subprocess.CalledProcessError as ex:
        log.exception('Unable to reset to branch: %r', ex.output)
174
        raise GitImportErrorCannotBranch()
175 176


177 178 179 180 181 182
def add_repo(repo, rdir_in, branch=None):
    """
    This will add a git repo into the mongo modulestore.
    If branch is left as None, it will fetch the most recent
    version of the current branch.
    """
183
    # pylint: disable=too-many-statements
184

185 186 187
    git_repo_dir = getattr(settings, 'GIT_REPO_DIR', DEFAULT_GIT_REPO_DIR)
    git_import_static = getattr(settings, 'GIT_IMPORT_STATIC', True)

188 189 190
    # Set defaults even if it isn't defined in settings
    mongo_db = {
        'host': 'localhost',
191
        'port': 27017,
192 193 194 195 196 197 198
        'user': '',
        'password': '',
        'db': 'xlog',
    }

    # Allow overrides
    if hasattr(settings, 'MONGODB_LOG'):
199
        for config_item in ['host', 'user', 'password', 'db', 'port']:
200 201 202
            mongo_db[config_item] = settings.MONGODB_LOG.get(
                config_item, mongo_db[config_item])

203 204
    if not os.path.isdir(git_repo_dir):
        raise GitImportErrorNoDir(git_repo_dir)
205 206 207
    # pull from git
    if not (repo.endswith('.git') or
            repo.startswith(('http:', 'https:', 'git:', 'file:'))):
208
        raise GitImportErrorUrlBad()
209 210 211 212 213

    if rdir_in:
        rdir = os.path.basename(rdir_in)
    else:
        rdir = repo.rsplit('/', 1)[-1].rsplit('.git', 1)[0]
214
    log.debug('rdir = %s', rdir)
215

216
    rdirp = '{0}/{1}'.format(git_repo_dir, rdir)
217 218 219 220 221 222 223
    if os.path.exists(rdirp):
        log.info('directory already exists, doing a git pull instead '
                 'of git clone')
        cmd = ['git', 'pull', ]
        cwd = rdirp
    else:
        cmd = ['git', 'clone', repo, ]
224
        cwd = git_repo_dir
225 226 227 228

    cwd = os.path.abspath(cwd)
    try:
        ret_git = cmd_log(cmd, cwd=cwd)
229 230
    except subprocess.CalledProcessError as ex:
        log.exception('Error running git pull: %r', ex.output)
231
        raise GitImportErrorCannotPull()
232

233 234 235
    if branch:
        switch_branch(branch, rdirp)

236 237 238 239
    # get commit id
    cmd = ['git', 'log', '-1', '--format=%H', ]
    try:
        commit_id = cmd_log(cmd, cwd=rdirp)
240 241
    except subprocess.CalledProcessError as ex:
        log.exception('Unable to get git log: %r', ex.output)
242
        raise GitImportErrorBadRepo()
243 244 245 246

    ret_git += '\nCommit ID: {0}'.format(commit_id)

    # get branch
247
    cmd = ['git', 'symbolic-ref', '--short', 'HEAD', ]
248 249
    try:
        branch = cmd_log(cmd, cwd=rdirp)
250 251 252 253
    except subprocess.CalledProcessError as ex:
        # I can't discover a way to excercise this, but git is complex
        # so still logging and raising here in case.
        log.exception('Unable to determine branch: %r', ex.output)
254
        raise GitImportErrorBadRepo()
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273

    ret_git += '{0}Branch: {1}'.format('   \n', branch)

    # Get XML logging logger and capture debug to parse results
    output = StringIO.StringIO()
    import_log_handler = logging.StreamHandler(output)
    import_log_handler.setLevel(logging.DEBUG)

    logger_names = ['xmodule.modulestore.xml_importer', 'git_add_course',
                    'xmodule.modulestore.xml', 'xmodule.seq_module', ]
    loggers = []

    for logger_name in logger_names:
        logger = logging.getLogger(logger_name)
        logger.setLevel(logging.DEBUG)
        logger.addHandler(import_log_handler)
        loggers.append(logger)

    try:
274 275
        management.call_command('import', git_repo_dir, rdir,
                                nostatic=not git_import_static)
276
    except CommandError:
277
        raise GitImportErrorXmlImportFailed()
278
    except NotImplementedError:
279
        raise GitImportErrorUnsupportedStore()
280 281 282 283 284 285 286 287

    ret_import = output.getvalue()

    # Remove handler hijacks
    for logger in loggers:
        logger.setLevel(logging.NOTSET)
        logger.removeHandler(import_log_handler)

288
    course_key = None
289 290 291 292
    location = 'unknown'

    # extract course ID from output of import-command-run and make symlink
    # this is needed in order for custom course scripts to work
293
    match = re.search(r'(?ms)===> IMPORTING courselike (\S+)', ret_import)
294
    if match:
295
        course_id = match.group(1)
296
        course_key = CourseKey.from_string(course_id)
297
        cdir = '{0}/{1}'.format(git_repo_dir, course_key.course)
298
        log.debug('Studio course dir = %s', cdir)
299 300 301 302 303 304 305 306 307 308 309

        if os.path.exists(cdir) and not os.path.islink(cdir):
            log.debug('   -> exists, but is not symlink')
            log.debug(subprocess.check_output(['ls', '-l', ],
                                              cwd=os.path.abspath(cdir)))
            try:
                os.rmdir(os.path.abspath(cdir))
            except OSError:
                log.exception('Failed to remove course directory')

        if not os.path.exists(cdir):
310
            log.debug('   -> creating symlink between %s and %s', rdirp, cdir)
311 312 313 314 315 316 317 318
            try:
                os.symlink(os.path.abspath(rdirp), os.path.abspath(cdir))
            except OSError:
                log.exception('Unable to create course symlink')
            log.debug(subprocess.check_output(['ls', '-l', ],
                                              cwd=os.path.abspath(cdir)))

    # store import-command-run output in mongo
319
    mongouri = 'mongodb://{user}:{password}@{host}:{port}/{db}'.format(**mongo_db)
320 321 322 323 324

    try:
        if mongo_db['user'] and mongo_db['password']:
            mdb = mongoengine.connect(mongo_db['db'], host=mongouri)
        else:
325
            mdb = mongoengine.connect(mongo_db['db'], host=mongo_db['host'], port=mongo_db['port'])
326 327 328 329
    except mongoengine.connection.ConnectionError:
        log.exception('Unable to connect to mongodb to save log, please '
                      'check MONGODB_LOG settings')
    cil = CourseImportLog(
330 331
        course_id=course_key,
        location=location,
332 333 334 335 336 337 338
        repo_dir=rdir,
        created=timezone.now(),
        import_log=ret_import,
        git_log=ret_git,
    )
    cil.save()

339
    log.debug('saved CourseImportLog for %s', cil.course_id)
340
    mdb.disconnect()