Commit cfa873cc by zubair-arbi

ignore MAC meta files on import and also remove any such files from the course

STUD-1725
parent d53a6669
"""
Script for removing all redundant Mac OS metadata files (with filename ".DS_Store"
or with filename which starts with "._") for all courses
"""
import logging
from django.core.management.base import BaseCommand
from xmodule.contentstore.django import contentstore
log = logging.getLogger(__name__)
class Command(BaseCommand):
"""
Remove all Mac OS related redundant files for all courses in contentstore
"""
help = 'Remove all Mac OS related redundant file/files for all courses in contentstore'
def handle(self, *args, **options):
"""
Execute the command
"""
content_store = contentstore()
success = False
log.info(u"-" * 80)
log.info(u"Cleaning up assets for all courses")
try:
# Remove all redundant Mac OS metadata files
assets_deleted = content_store.remove_redundant_content_for_courses()
success = True
except Exception as err:
log.info(u"=" * 30 + u"> failed to cleanup")
log.info(u"Error:")
log.info(err)
if success:
log.info(u"=" * 80)
log.info(u"Total number of assets deleted: {0}".format(assets_deleted))
"""
Test for assets cleanup of courses for Mac OS metadata files (with filename ".DS_Store"
or with filename which starts with "._")
"""
from django.core.management import call_command
from opaque_keys.edx.locations import SlashSeparatedCourseKey
from xmodule.contentstore.content import XASSET_LOCATION_TAG
from xmodule.contentstore.django import contentstore
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.mongo.base import location_to_query
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
from xmodule.modulestore.xml_importer import import_from_xml
class ExportAllCourses(ModuleStoreTestCase):
"""
Tests assets cleanup for all courses.
"""
def setUp(self):
""" Common setup. """
self.content_store = contentstore()
self.module_store = modulestore()
def test_export_all_courses(self):
"""
This test validates that redundant Mac metadata files ('._example.txt', '.DS_Store') are
cleaned up on import
"""
import_from_xml(
self.module_store,
'**replace_user**',
'common/test/data/',
['dot-underscore'],
static_content_store=self.content_store,
do_import_static=True,
verbose=True
)
course = self.module_store.get_course(SlashSeparatedCourseKey('edX', 'dot-underscore', '2014_Fall'))
self.assertIsNotNone(course)
# check that there are two assets ['example.txt', '.example.txt'] in contentstore for imported course
all_assets, count = self.content_store.get_all_content_for_course(course.id)
self.assertEqual(count, 2)
self.assertEqual(all_assets[0]['_id']['name'], u'.example.txt')
self.assertEqual(all_assets[1]['_id']['name'], u'example.txt')
# manually add redundant assets (file ".DS_Store" and filename starts with "._")
course_filter = course.id.make_asset_key("asset", None)
query = location_to_query(course_filter, wildcard=True, tag=XASSET_LOCATION_TAG)
query['_id.name'] = all_assets[0]['_id']['name']
asset_doc = self.content_store.fs_files.find_one(query)
asset_doc['_id']['name'] = u'._example_test.txt'
self.content_store.fs_files.insert(asset_doc)
asset_doc['_id']['name'] = u'.DS_Store'
self.content_store.fs_files.insert(asset_doc)
# check that now course has four assets
all_assets, count = self.content_store.get_all_content_for_course(course.id)
self.assertEqual(count, 4)
self.assertEqual(all_assets[0]['_id']['name'], u'.example.txt')
self.assertEqual(all_assets[1]['_id']['name'], u'example.txt')
self.assertEqual(all_assets[2]['_id']['name'], u'._example_test.txt')
self.assertEqual(all_assets[3]['_id']['name'], u'.DS_Store')
# now call asset_cleanup command and check that there is only two proper assets in contentstore for the course
call_command('cleanup_assets')
all_assets, count = self.content_store.get_all_content_for_course(course.id)
self.assertEqual(count, 2)
self.assertEqual(all_assets[0]['_id']['name'], u'.example.txt')
self.assertEqual(all_assets[1]['_id']['name'], u'example.txt')
...@@ -151,6 +151,8 @@ TECH_SUPPORT_EMAIL = ENV_TOKENS.get('TECH_SUPPORT_EMAIL', TECH_SUPPORT_EMAIL) ...@@ -151,6 +151,8 @@ TECH_SUPPORT_EMAIL = ENV_TOKENS.get('TECH_SUPPORT_EMAIL', TECH_SUPPORT_EMAIL)
COURSES_WITH_UNSAFE_CODE = ENV_TOKENS.get("COURSES_WITH_UNSAFE_CODE", []) COURSES_WITH_UNSAFE_CODE = ENV_TOKENS.get("COURSES_WITH_UNSAFE_CODE", [])
ASSET_IGNORE_REGEX = ENV_TOKENS.get('ASSET_IGNORE_REGEX', ASSET_IGNORE_REGEX)
# Theme overrides # Theme overrides
THEME_NAME = ENV_TOKENS.get('THEME_NAME', None) THEME_NAME = ENV_TOKENS.get('THEME_NAME', None)
......
...@@ -31,7 +31,7 @@ import lms.envs.common ...@@ -31,7 +31,7 @@ import lms.envs.common
# Although this module itself may not use these imported variables, other dependent modules may. # Although this module itself may not use these imported variables, other dependent modules may.
from lms.envs.common import ( from lms.envs.common import (
USE_TZ, TECH_SUPPORT_EMAIL, PLATFORM_NAME, BUGS_EMAIL, DOC_STORE_CONFIG, ALL_LANGUAGES, WIKI_ENABLED, MODULESTORE, USE_TZ, TECH_SUPPORT_EMAIL, PLATFORM_NAME, BUGS_EMAIL, DOC_STORE_CONFIG, ALL_LANGUAGES, WIKI_ENABLED, MODULESTORE,
update_module_store_settings update_module_store_settings, ASSET_IGNORE_REGEX
) )
from path import path from path import path
from warnings import simplefilter from warnings import simplefilter
......
...@@ -13,6 +13,7 @@ import os ...@@ -13,6 +13,7 @@ import os
import json import json
from bson.son import SON from bson.son import SON
from opaque_keys.edx.keys import AssetKey from opaque_keys.edx.keys import AssetKey
from xmodule.modulestore.django import ASSET_IGNORE_REGEX
class MongoContentStore(ContentStore): class MongoContentStore(ContentStore):
...@@ -170,6 +171,26 @@ class MongoContentStore(ContentStore): ...@@ -170,6 +171,26 @@ class MongoContentStore(ContentStore):
course_key, start=start, maxresults=maxresults, get_thumbnails=False, sort=sort course_key, start=start, maxresults=maxresults, get_thumbnails=False, sort=sort
) )
def remove_redundant_content_for_courses(self):
"""
Finds and removes all redundant files (Mac OS metadata files with filename ".DS_Store"
or filename starts with "._") for all courses
"""
assets_to_delete = 0
for prefix in ['_id', 'content_son']:
query = SON([
('{}.tag'.format(prefix), XASSET_LOCATION_TAG),
('{}.category'.format(prefix), 'asset'),
('{}.name'.format(prefix), {'$regex': ASSET_IGNORE_REGEX}),
])
items = self.fs_files.find(query)
assets_to_delete = assets_to_delete + items.count()
for asset in items:
self.fs.delete(asset[prefix])
self.fs_files.remove(query)
return assets_to_delete
def _get_all_content_for_course(self, course_key, get_thumbnails=False, start=0, maxresults=-1, sort=None): def _get_all_content_for_course(self, course_key, get_thumbnails=False, start=0, maxresults=-1, sort=None):
''' '''
Returns a list of all static assets for a course. The return format is a list of asset data dictionary elements. Returns a list of all static assets for a course. The return format is a list of asset data dictionary elements.
......
...@@ -8,6 +8,8 @@ from __future__ import absolute_import ...@@ -8,6 +8,8 @@ from __future__ import absolute_import
from importlib import import_module from importlib import import_module
from django.conf import settings from django.conf import settings
if not settings.configured:
settings.configure()
from django.core.cache import get_cache, InvalidCacheBackendError from django.core.cache import get_cache, InvalidCacheBackendError
import django.utils import django.utils
...@@ -25,6 +27,8 @@ try: ...@@ -25,6 +27,8 @@ try:
except ImportError: except ImportError:
HAS_REQUEST_CACHE = False HAS_REQUEST_CACHE = False
ASSET_IGNORE_REGEX = getattr(settings, "ASSET_IGNORE_REGEX", r"(^\._.*$)|(^\.DS_Store$)|(^.*~$)")
def load_function(path): def load_function(path):
""" """
......
...@@ -3,6 +3,7 @@ import os ...@@ -3,6 +3,7 @@ import os
import mimetypes import mimetypes
from path import path from path import path
import json import json
import re
from .xml import XMLModuleStore, ImportSystem, ParentTracker from .xml import XMLModuleStore, ImportSystem, ParentTracker
from xblock.runtime import KvsFieldData, DictKeyValueStore from xblock.runtime import KvsFieldData, DictKeyValueStore
...@@ -15,6 +16,7 @@ from xmodule.errortracker import make_error_tracker ...@@ -15,6 +16,7 @@ from xmodule.errortracker import make_error_tracker
from .store_utilities import rewrite_nonportable_content_links from .store_utilities import rewrite_nonportable_content_links
import xblock import xblock
from xmodule.tabs import CourseTabList from xmodule.tabs import CourseTabList
from xmodule.modulestore.django import ASSET_IGNORE_REGEX
from xmodule.modulestore.exceptions import InvalidLocationError from xmodule.modulestore.exceptions import InvalidLocationError
from xmodule.modulestore.mongo.base import MongoRevisionKey from xmodule.modulestore.mongo.base import MongoRevisionKey
from xmodule.modulestore import ModuleStoreEnum from xmodule.modulestore import ModuleStoreEnum
...@@ -49,7 +51,7 @@ def import_static_content( ...@@ -49,7 +51,7 @@ def import_static_content(
content_path = os.path.join(dirname, filename) content_path = os.path.join(dirname, filename)
if filename.endswith('~'): if re.match(ASSET_IGNORE_REGEX, filename):
if verbose: if verbose:
log.debug('skipping static content %s...', content_path) log.debug('skipping static content %s...', content_path)
continue continue
......
...@@ -21,3 +21,21 @@ class IgnoredFilesTestCase(unittest.TestCase): ...@@ -21,3 +21,21 @@ class IgnoredFilesTestCase(unittest.TestCase):
self.assertIn("example.txt", name_val) self.assertIn("example.txt", name_val)
self.assertNotIn("example.txt~", name_val) self.assertNotIn("example.txt~", name_val)
self.assertIn("GREEN", name_val["example.txt"]) self.assertIn("GREEN", name_val["example.txt"])
def test_ignore_dot_underscore_static_files(self):
"""
Test for ignored Mac OS metadata files (filename starts with "._")
"""
course_dir = DATA_DIR / "dot-underscore"
course_id = SlashSeparatedCourseKey("edX", "dot-underscore", "2014_Fall")
content_store = Mock()
content_store.generate_thumbnail.return_value = ("content", "location")
import_static_content(course_dir, content_store, course_id)
saved_static_content = [call[0][0] for call in content_store.save.call_args_list]
name_val = {sc.name: sc.data for sc in saved_static_content}
self.assertIn("example.txt", name_val)
self.assertIn(".example.txt", name_val)
self.assertNotIn("._example.txt", name_val)
self.assertNotIn(".DS_Store", name_val)
self.assertIn("GREEN", name_val["example.txt"])
self.assertIn("BLUE", name_val[".example.txt"])
IGNORE MAC METADATA FILES
This course simulates an import of a course from a Mac OS that has some unnessary
metadata files (filename starts with ._) in assets (static/._example.txt). These
files do not belong with the content so skip them on import and also do a
cleanup for such already added assets.
<course org="edX" course="dot-underscore" slug="2014_Fall"/>
...@@ -253,6 +253,8 @@ for name, value in ENV_TOKENS.get("CODE_JAIL", {}).items(): ...@@ -253,6 +253,8 @@ for name, value in ENV_TOKENS.get("CODE_JAIL", {}).items():
COURSES_WITH_UNSAFE_CODE = ENV_TOKENS.get("COURSES_WITH_UNSAFE_CODE", []) COURSES_WITH_UNSAFE_CODE = ENV_TOKENS.get("COURSES_WITH_UNSAFE_CODE", [])
ASSET_IGNORE_REGEX = ENV_TOKENS.get('ASSET_IGNORE_REGEX', ASSET_IGNORE_REGEX)
# Event Tracking # Event Tracking
if "TRACKING_IGNORE_URL_PATTERNS" in ENV_TOKENS: if "TRACKING_IGNORE_URL_PATTERNS" in ENV_TOKENS:
TRACKING_IGNORE_URL_PATTERNS = ENV_TOKENS.get("TRACKING_IGNORE_URL_PATTERNS") TRACKING_IGNORE_URL_PATTERNS = ENV_TOKENS.get("TRACKING_IGNORE_URL_PATTERNS")
......
...@@ -266,6 +266,9 @@ FEATURES = { ...@@ -266,6 +266,9 @@ FEATURES = {
} }
# Ignore static asset files on import which match this pattern
ASSET_IGNORE_REGEX = r"(^\._.*$)|(^\.DS_Store$)|(^.*~$)"
# Used for A/B testing # Used for A/B testing
DEFAULT_GROUPS = [] DEFAULT_GROUPS = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment