Commit c8147587 by Calen Pennington

Merge pull request #866 from MITx/feature/cdodge/cms-import

Feature/cdodge/cms import
parents d1b7bcef 82abdd07
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand, CommandError
from xmodule.modulestore.xml_importer import import_from_xml from xmodule.modulestore.xml_importer import import_from_xml
from xmodule.modulestore.django import modulestore from xmodule.modulestore.django import modulestore
from xmodule.contentstore.django import contentstore
unnamed_modules = 0 unnamed_modules = 0
...@@ -26,4 +27,4 @@ class Command(BaseCommand): ...@@ -26,4 +27,4 @@ class Command(BaseCommand):
print "Importing. Data_dir={data}, course_dirs={courses}".format( print "Importing. Data_dir={data}, course_dirs={courses}".format(
data=data_dir, data=data_dir,
courses=course_dirs) courses=course_dirs)
import_from_xml(modulestore('direct'), data_dir, course_dirs, load_error_modules=False) import_from_xml(modulestore('direct'), data_dir, course_dirs, load_error_modules=False,static_content_store=contentstore())
...@@ -636,74 +636,26 @@ def upload_asset(request, org, course, coursename): ...@@ -636,74 +636,26 @@ def upload_asset(request, org, course, coursename):
# nomenclature since we're using a FileSystem paradigm here. We're just imposing # nomenclature since we're using a FileSystem paradigm here. We're just imposing
# the Location string formatting expectations to keep things a bit more consistent # the Location string formatting expectations to keep things a bit more consistent
name = request.FILES['file'].name filename = request.FILES['file'].name
mime_type = request.FILES['file'].content_type mime_type = request.FILES['file'].content_type
filedata = request.FILES['file'].read() filedata = request.FILES['file'].read()
thumbnail_file_location = None content_loc = StaticContent.compute_location(org, course, filename)
content = StaticContent(content_loc, filename, mime_type, filedata)
# if the upload asset is an image, we can generate a thumbnail from it
# let's do so now, so that we have the thumbnail location which we need # first let's save a thumbnail so we can get back a thumbnail location
# so that the asset can point to it thumbnail_content = contentstore().generate_thumbnail(content)
if mime_type.split('/')[0] == 'image':
try:
# not sure if this is necessary, but let's rewind the stream just in case
request.FILES['file'].seek(0)
# use PIL to do the thumbnail generation (http://www.pythonware.com/products/pil/)
# My understanding is that PIL will maintain aspect ratios while restricting
# the max-height/width to be whatever you pass in as 'size'
# @todo: move the thumbnail size to a configuration setting?!?
im = Image.open(request.FILES['file'])
# I've seen some exceptions from the PIL library when trying to save palletted
# PNG files to JPEG. Per the google-universe, they suggest converting to RGB first.
im = im.convert('RGB')
size = 128, 128
im.thumbnail(size, Image.ANTIALIAS)
thumbnail_file = StringIO.StringIO()
im.save(thumbnail_file, 'JPEG')
thumbnail_file.seek(0)
# use a naming convention to associate originals with the thumbnail
thumbnail_name = StaticContent.generate_thumbnail_name(name)
# then just store this thumbnail as any other piece of content
thumbnail_file_location = StaticContent.compute_location(org, course,
thumbnail_name, is_thumbnail=True)
thumbnail_content = StaticContent(thumbnail_file_location, thumbnail_name,
'image/jpeg', thumbnail_file)
contentstore().save(thumbnail_content)
# remove any cached content at this location, as thumbnails are treated just like any
# other bit of static content
del_cached_content(thumbnail_content.location)
# not sure if this is necessary, but let's rewind the stream just in case
request.FILES['file'].seek(0)
except:
# catch, log, and continue as thumbnails are not a hard requirement
logging.error('Failed to generate thumbnail for {0}. Continuing...'.format(name))
thumbnail_file_location = None
file_location = StaticContent.compute_location(org, course, name)
# create a StaticContent entity and point to the thumbnail
content = StaticContent(file_location, name, mime_type, filedata, thumbnail_location = thumbnail_file_location)
# first commit to the DB
contentstore().save(content)
# then remove the cache so we're not serving up stale content if thumbnail_content is not None:
# NOTE: we're not re-populating the cache here as the DB owns the last-modified timestamp content.thumbnail_location = thumbnail_content.location
# which is used when serving up static content. This integrity is needed for del_cached_content(thumbnail_content.location)
# browser-side caching support. We *could* re-fetch the saved content so that we have the
# timestamp populated, but we might as well wait for the first real request to come in #then commit the content
# to re-populate the cache. contentstore().save(content)
del_cached_content(content.location) del_cached_content(content.location)
response = HttpResponse('Upload completed') response = HttpResponse('Upload completed')
response['asset_url'] = StaticContent.get_url_path_from_location(file_location) response['asset_url'] = StaticContent.get_url_path_from_location(content.location)
return response return response
''' '''
......
...@@ -5,7 +5,11 @@ XASSET_THUMBNAIL_TAIL_NAME = '.jpg' ...@@ -5,7 +5,11 @@ XASSET_THUMBNAIL_TAIL_NAME = '.jpg'
import os import os
import logging import logging
import StringIO
from xmodule.modulestore import Location from xmodule.modulestore import Location
from .django import contentstore
from PIL import Image
class StaticContent(object): class StaticContent(object):
def __init__(self, loc, name, content_type, data, last_modified_at=None, thumbnail_location=None): def __init__(self, loc, name, content_type, data, last_modified_at=None, thumbnail_location=None):
...@@ -26,6 +30,7 @@ class StaticContent(object): ...@@ -26,6 +30,7 @@ class StaticContent(object):
@staticmethod @staticmethod
def compute_location(org, course, name, revision=None, is_thumbnail=False): def compute_location(org, course, name, revision=None, is_thumbnail=False):
name = name.replace('/', '_')
return Location([XASSET_LOCATION_TAG, org, course, 'asset' if not is_thumbnail else 'thumbnail', Location.clean(name), revision]) return Location([XASSET_LOCATION_TAG, org, course, 'asset' if not is_thumbnail else 'thumbnail', Location.clean(name), revision])
def get_id(self): def get_id(self):
...@@ -90,3 +95,43 @@ class ContentStore(object): ...@@ -90,3 +95,43 @@ class ContentStore(object):
] ]
''' '''
raise NotImplementedError raise NotImplementedError
def generate_thumbnail(self, content):
thumbnail_content = None
# if we're uploading an image, then let's generate a thumbnail so that we can
# serve it up when needed without having to rescale on the fly
if content.content_type is not None and content.content_type.split('/')[0] == 'image':
try:
# use PIL to do the thumbnail generation (http://www.pythonware.com/products/pil/)
# My understanding is that PIL will maintain aspect ratios while restricting
# the max-height/width to be whatever you pass in as 'size'
# @todo: move the thumbnail size to a configuration setting?!?
im = Image.open(StringIO.StringIO(content.data))
# I've seen some exceptions from the PIL library when trying to save palletted
# PNG files to JPEG. Per the google-universe, they suggest converting to RGB first.
im = im.convert('RGB')
size = 128, 128
im.thumbnail(size, Image.ANTIALIAS)
thumbnail_file = StringIO.StringIO()
im.save(thumbnail_file, 'JPEG')
thumbnail_file.seek(0)
# use a naming convention to associate originals with the thumbnail
thumbnail_name = StaticContent.generate_thumbnail_name(content.location.name)
# then just store this thumbnail as any other piece of content
thumbnail_file_location = StaticContent.compute_location(content.location.org, content.location.course,
thumbnail_name, is_thumbnail = True)
thumbnail_content = StaticContent(thumbnail_file_location, thumbnail_name,
'image/jpeg', thumbnail_file)
contentstore().save(thumbnail_content)
except:
raise
return thumbnail_content
...@@ -18,7 +18,7 @@ class MongoContentStore(ContentStore): ...@@ -18,7 +18,7 @@ class MongoContentStore(ContentStore):
logging.debug( 'Using MongoDB for static content serving at host={0} db={1}'.format(host,db)) logging.debug( 'Using MongoDB for static content serving at host={0} db={1}'.format(host,db))
_db = Connection(host=host, port=port, **kwargs)[db] _db = Connection(host=host, port=port, **kwargs)[db]
if self.user is not None and self.password is not None: if user is not None and password is not None:
_db.authenticate(user, password) _db.authenticate(user, password)
self.fs = gridfs.GridFS(_db) self.fs = gridfs.GridFS(_db)
......
import logging import logging
import os
import mimetypes
from .xml import XMLModuleStore from .xml import XMLModuleStore
from .exceptions import DuplicateItemError from .exceptions import DuplicateItemError
from xmodule.modulestore import Location
from xmodule.contentstore.content import StaticContent, XASSET_SRCREF_PREFIX
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def import_static_content(modules, data_dir, static_content_store):
remap_dict = {}
course_data_dir = None
course_loc = None
# quick scan to find the course module and pull out the data_dir and location
# maybe there an easier way to look this up?!?
for module in modules.itervalues():
if module.category == 'course':
course_loc = module.location
course_data_dir = module.metadata['data_dir']
if course_data_dir is None or course_loc is None:
return remap_dict
# now import all static assets
static_dir = '{0}/{1}/static/'.format(data_dir, course_data_dir)
for dirname, dirnames, filenames in os.walk(static_dir):
for filename in filenames:
try:
content_path = os.path.join(dirname, filename)
fullname_with_subpath = content_path.replace(static_dir, '') # strip away leading path from the name
content_loc = StaticContent.compute_location(course_loc.org, course_loc.course, fullname_with_subpath)
mime_type = mimetypes.guess_type(filename)[0]
print 'importing static asset {0} of mime-type {1} from path {2}'.format(content_loc,
mime_type, content_path)
f = open(content_path, 'rb')
data = f.read()
f.close()
content = StaticContent(content_loc, filename, mime_type, data)
# first let's save a thumbnail so we can get back a thumbnail location
thumbnail_content = static_content_store.generate_thumbnail(content)
if thumbnail_content is not None:
content.thumbnail_location = thumbnail_content.location
#then commit the content
static_content_store.save(content)
#store the remapping information which will be needed to subsitute in the module data
remap_dict[fullname_with_subpath] = content_loc.name
except:
raise
return remap_dict
def import_from_xml(store, data_dir, course_dirs=None, def import_from_xml(store, data_dir, course_dirs=None,
default_class='xmodule.raw_module.RawDescriptor', default_class='xmodule.raw_module.RawDescriptor',
load_error_modules=True): load_error_modules=True, static_content_store=None):
""" """
Import the specified xml data_dir into the "store" modulestore, Import the specified xml data_dir into the "store" modulestore,
using org and course as the location org and course. using org and course as the location org and course.
...@@ -23,15 +83,45 @@ def import_from_xml(store, data_dir, course_dirs=None, ...@@ -23,15 +83,45 @@ def import_from_xml(store, data_dir, course_dirs=None,
course_dirs=course_dirs, course_dirs=course_dirs,
load_error_modules=load_error_modules, load_error_modules=load_error_modules,
) )
# NOTE: the XmlModuleStore does not implement get_items() which would be a preferable means
# to enumerate the entire collection of course modules. It will be left as a TBD to implement that
# method on XmlModuleStore.
for course_id in module_store.modules.keys(): for course_id in module_store.modules.keys():
remap_dict = {}
if static_content_store is not None:
remap_dict = import_static_content(module_store.modules[course_id], data_dir, static_content_store)
for module in module_store.modules[course_id].itervalues(): for module in module_store.modules[course_id].itervalues():
if module.category == 'course':
# HACK: for now we don't support progress tabs. There's a special metadata configuration setting for this.
module.metadata['hide_progress_tab'] = True
if 'data' in module.definition: if 'data' in module.definition:
store.update_item(module.location, module.definition['data']) module_data = module.definition['data']
# cdodge: update any references to the static content paths
# This is a bit brute force - simple search/replace - but it's unlikely that such references to '/static/....'
# would occur naturally (in the wild)
# @TODO, sorry a bit of technical debt here. There are some helper methods in xmodule_modifiers.py and static_replace.py which could
# better do the url replace on the html rendering side rather than on the ingest side
try:
if '/static/' in module_data:
for subkey in remap_dict.keys():
module_data = module_data.replace('/static/' + subkey, 'xasset:' + remap_dict[subkey])
except:
pass # part of the techincal debt is that module_data might not be a string (e.g. ABTest)
store.update_item(module.location, module_data)
if 'children' in module.definition: if 'children' in module.definition:
store.update_children(module.location, module.definition['children']) store.update_children(module.location, module.definition['children'])
# NOTE: It's important to use own_metadata here to avoid writing # NOTE: It's important to use own_metadata here to avoid writing
# inherited metadata everywhere. # inherited metadata everywhere.
store.update_metadata(module.location, dict(module.own_metadata)) store.update_metadata(module.location, dict(module.own_metadata))
return module_store return module_store
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment