Merge pull request #866 from MITx/feature/cdodge/cms-import

Feature/cdodge/cms import

Merge pull request #866 from MITx/feature/cdodge/cms-import
Feature/cdodge/cms import
c8147587 · Calen Pennington · d1b7bcef · 82abdd07 · c8147587 · c8147587
Commit c8147587 authored Oct 11, 2012 by Calen Pennington
5 changed files
--- a/cms/djangoapps/contentstore/management/commands/import.py
+++ b/cms/djangoapps/contentstore/management/commands/import.py
@@ -5,6 +5,7 @@
 from django.core.management.base import BaseCommand, CommandError
 from xmodule.modulestore.xml_importer import import_from_xml
 from xmodule.modulestore.django import modulestore
+from xmodule.contentstore.django import contentstore
 unnamed_modules = 0
@@ -26,4 +27,4 @@ class Command(BaseCommand):
        print "Importing.  Data_dir={data}, course_dirs={courses}".format(
            data=data_dir,
            courses=course_dirs)
-        import_from_xml(modulestore('direct'), data_dir, course_dirs, load_error_modules=False)
+        import_from_xml(modulestore('direct'), data_dir, course_dirs, load_error_modules=False,static_content_store=contentstore())
--- a/cms/djangoapps/contentstore/views.py
+++ b/cms/djangoapps/contentstore/views.py
@@ -636,74 +636,26 @@ def upload_asset(request, org, course, coursename):
    # nomenclature since we're using a FileSystem paradigm here. We're just imposing
    # the Location string formatting expectations to keep things a bit more consistent
-    name = request.FILES['file'].name
+    filename = request.FILES['file'].name
    mime_type = request.FILES['file'].content_type
    filedata = request.FILES['file'].read()
-    thumbnail_file_location = None
+    content_loc = StaticContent.compute_location(org, course, filename)
+    content = StaticContent(content_loc, filename, mime_type, filedata)
-    # if the upload asset is an image, we can generate a thumbnail from it
-    # let's do so now, so that we have the thumbnail location which we need 
+    # first let's save a thumbnail so we can get back a thumbnail location
-    # so that the asset can point to it
+    thumbnail_content = contentstore().generate_thumbnail(content)
-    if mime_type.split('/')[0] == 'image':
-        try:
-            # not sure if this is necessary, but let's rewind the stream just in case
-            request.FILES['file'].seek(0)
-            # use PIL to do the thumbnail generation (http://www.pythonware.com/products/pil/)
-            # My understanding is that PIL will maintain aspect ratios while restricting
-            # the max-height/width to be whatever you pass in as 'size'
-            # @todo: move the thumbnail size to a configuration setting?!?
-            im = Image.open(request.FILES['file'])
-            # I've seen some exceptions from the PIL library when trying to save palletted 
-            # PNG files to JPEG. Per the google-universe, they suggest converting to RGB first.
-            im = im.convert('RGB')
-            size = 128, 128
-            im.thumbnail(size, Image.ANTIALIAS)
-            thumbnail_file = StringIO.StringIO()
-            im.save(thumbnail_file, 'JPEG')
-            thumbnail_file.seek(0)
-            # use a naming convention to associate originals with the thumbnail
-            thumbnail_name = StaticContent.generate_thumbnail_name(name)
-            # then just store this thumbnail as any other piece of content
-            thumbnail_file_location = StaticContent.compute_location(org, course, 
-                                                                              thumbnail_name, is_thumbnail=True)
-            thumbnail_content = StaticContent(thumbnail_file_location, thumbnail_name, 
-                                              'image/jpeg', thumbnail_file)
-            contentstore().save(thumbnail_content)
-            # remove any cached content at this location, as thumbnails are treated just like any
-            # other bit of static content
-            del_cached_content(thumbnail_content.location)
-            # not sure if this is necessary, but let's rewind the stream just in case
-            request.FILES['file'].seek(0)
-        except:
-            # catch, log, and continue as thumbnails are not a hard requirement
-            logging.error('Failed to generate thumbnail for {0}. Continuing...'.format(name))
-            thumbnail_file_location = None
-    file_location = StaticContent.compute_location(org, course, name)
-    # create a StaticContent entity and point to the thumbnail
-    content = StaticContent(file_location, name, mime_type, filedata, thumbnail_location = thumbnail_file_location)
-    # first commit to the DB
-    contentstore().save(content)
-    # then remove the cache so we're not serving up stale content
+    if thumbnail_content is not None:
-    # NOTE: we're not re-populating the cache here as the DB owns the last-modified timestamp
+        content.thumbnail_location = thumbnail_content.location
-    # which is used when serving up static content. This integrity is needed for
+        del_cached_content(thumbnail_content.location)
-    # browser-side caching support. We *could* re-fetch the saved content so that we have the
-    # timestamp populated, but we might as well wait for the first real request to come in
+    #then commit the content 
-    # to re-populate the cache.
+    contentstore().save(content)
    del_cached_content(content.location)
    response = HttpResponse('Upload completed')
-    response['asset_url'] = StaticContent.get_url_path_from_location(file_location)
+    response['asset_url'] = StaticContent.get_url_path_from_location(content.location)
    return response
 '''

--- a/common/lib/xmodule/xmodule/contentstore/content.py
+++ b/common/lib/xmodule/xmodule/contentstore/content.py
@@ -5,7 +5,11 @@ XASSET_THUMBNAIL_TAIL_NAME = '.jpg'
 import os
 import logging
+import StringIO
 from xmodule.modulestore import Location
+from .django import contentstore
+from PIL import Image
 class StaticContent(object):
    def __init__(self, loc, name, content_type, data, last_modified_at=None, thumbnail_location=None):
@@ -26,6 +30,7 @@ class StaticContent(object):
    @staticmethod
    def compute_location(org, course, name, revision=None, is_thumbnail=False):
+        name = name.replace('/', '_')
        return Location([XASSET_LOCATION_TAG, org, course, 'asset' if not is_thumbnail else 'thumbnail', Location.clean(name), revision])
    def get_id(self):
@@ -90,3 +95,43 @@ class ContentStore(object):
            ]
        '''
        raise NotImplementedError
+    def generate_thumbnail(self, content):
+        thumbnail_content = None
+        # if we're uploading an image, then let's generate a thumbnail so that we can
+        # serve it up when needed without having to rescale on the fly
+        if content.content_type is not None and content.content_type.split('/')[0] == 'image':
+            try:
+                # use PIL to do the thumbnail generation (http://www.pythonware.com/products/pil/)
+                # My understanding is that PIL will maintain aspect ratios while restricting
+                # the max-height/width to be whatever you pass in as 'size'
+                # @todo: move the thumbnail size to a configuration setting?!?
+                im = Image.open(StringIO.StringIO(content.data))
+                # I've seen some exceptions from the PIL library when trying to save palletted 
+                # PNG files to JPEG. Per the google-universe, they suggest converting to RGB first.
+                im = im.convert('RGB')
+                size = 128, 128
+                im.thumbnail(size, Image.ANTIALIAS)
+                thumbnail_file = StringIO.StringIO()
+                im.save(thumbnail_file, 'JPEG')
+                thumbnail_file.seek(0)
+                # use a naming convention to associate originals with the thumbnail
+                thumbnail_name = StaticContent.generate_thumbnail_name(content.location.name)
+                # then just store this thumbnail as any other piece of content
+                thumbnail_file_location = StaticContent.compute_location(content.location.org, content.location.course, 
+                                                                                  thumbnail_name, is_thumbnail = True)
+                thumbnail_content = StaticContent(thumbnail_file_location, thumbnail_name, 
+                                                  'image/jpeg', thumbnail_file)
+                contentstore().save(thumbnail_content)
+            except:
+                raise
+        return thumbnail_content
--- a/common/lib/xmodule/xmodule/contentstore/mongo.py
+++ b/common/lib/xmodule/xmodule/contentstore/mongo.py
@@ -18,7 +18,7 @@ class MongoContentStore(ContentStore):
        logging.debug( 'Using MongoDB for static content serving at host={0} db={1}'.format(host,db))
        _db = Connection(host=host, port=port, **kwargs)[db]
-        if self.user is not None and self.password is not None:
+        if user is not None and password is not None:
            _db.authenticate(user, password)
        self.fs = gridfs.GridFS(_db)

--- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py
+++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py
 import logging
+import os
+import mimetypes
 from .xml import XMLModuleStore
 from .exceptions import DuplicateItemError
+from xmodule.modulestore import Location
+from xmodule.contentstore.content import StaticContent, XASSET_SRCREF_PREFIX
 log = logging.getLogger(__name__)
+def import_static_content(modules, data_dir, static_content_store):
+    remap_dict = {}
+    course_data_dir = None
+    course_loc = None
+    # quick scan to find the course module and pull out the data_dir and location
+    # maybe there an easier way to look this up?!?
+    for module in modules.itervalues():
+        if module.category == 'course':
+            course_loc = module.location
+            course_data_dir = module.metadata['data_dir']
+    if course_data_dir is None or course_loc is None:
+        return remap_dict
+    # now import all static assets
+    static_dir = '{0}/{1}/static/'.format(data_dir, course_data_dir)
+    for dirname, dirnames, filenames in os.walk(static_dir):
+        for filename in filenames:
+            try:
+                content_path = os.path.join(dirname, filename)
+                fullname_with_subpath = content_path.replace(static_dir, '')  # strip away leading path from the name
+                content_loc = StaticContent.compute_location(course_loc.org, course_loc.course, fullname_with_subpath)
+                mime_type = mimetypes.guess_type(filename)[0]
+                print 'importing static asset {0} of mime-type {1} from path {2}'.format(content_loc, 
+                    mime_type, content_path)
+                f = open(content_path, 'rb')
+                data = f.read()
+                f.close()
+                content = StaticContent(content_loc, filename, mime_type, data)
+                # first let's save a thumbnail so we can get back a thumbnail location
+                thumbnail_content = static_content_store.generate_thumbnail(content)
+                if thumbnail_content is not None:
+                    content.thumbnail_location = thumbnail_content.location
+                #then commit the content
+                static_content_store.save(content)
+                #store the remapping information which will be needed to subsitute in the module data
+                remap_dict[fullname_with_subpath] = content_loc.name
+            except:
+                raise    
+    return remap_dict
 def import_from_xml(store, data_dir, course_dirs=None, 
                    default_class='xmodule.raw_module.RawDescriptor',
-                    load_error_modules=True):
+                    load_error_modules=True, static_content_store=None):
    """
    Import the specified xml data_dir into the "store" modulestore,
    using org and course as the location org and course.
@@ -23,15 +83,45 @@ def import_from_xml(store, data_dir, course_dirs=None,
        course_dirs=course_dirs,
        load_error_modules=load_error_modules,
    )
+    # NOTE: the XmlModuleStore does not implement get_items() which would be a preferable means
+    # to enumerate the entire collection of course modules. It will be left as a TBD to implement that
+    # method on XmlModuleStore.
    for course_id in module_store.modules.keys():
+        remap_dict = {}
+        if static_content_store is not None:
+            remap_dict = import_static_content(module_store.modules[course_id], data_dir, static_content_store)
        for module in module_store.modules[course_id].itervalues():
+            if module.category == 'course':
+                # HACK: for now we don't support progress tabs. There's a special metadata configuration setting for this.
+                module.metadata['hide_progress_tab'] = True
            if 'data' in module.definition:
-                store.update_item(module.location, module.definition['data'])
+                module_data = module.definition['data']
+                # cdodge: update any references to the static content paths
+                # This is a bit brute force - simple search/replace - but it's unlikely that such references to '/static/....'
+                # would occur naturally (in the wild)
+                # @TODO, sorry a bit of technical debt here. There are some helper methods in xmodule_modifiers.py and static_replace.py which could
+                # better do the url replace on the html rendering side rather than on the ingest side
+                try:
+                    if '/static/' in module_data:
+                        for subkey in remap_dict.keys():
+                            module_data = module_data.replace('/static/' + subkey, 'xasset:' + remap_dict[subkey])
+                except:
+                    pass    # part of the techincal debt is that module_data might not be a string (e.g. ABTest)
+                store.update_item(module.location, module_data)
            if 'children' in module.definition:
                store.update_children(module.location, module.definition['children'])
            # NOTE: It's important to use own_metadata here to avoid writing
            # inherited metadata everywhere.
            store.update_metadata(module.location, dict(module.own_metadata))
    return module_store