Commit fb73888e by Nimisha Asthagiri

Storage-backed versioned Block Structures: Models

parent d439da44
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
import django.utils.timezone
import openedx.core.djangoapps.xmodule_django.models
import model_utils.fields
import openedx.core.djangoapps.content.block_structure.models
class Migration(migrations.Migration):
dependencies = [
('block_structure', '0001_config'),
]
operations = [
migrations.CreateModel(
name='BlockStructureModel',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, verbose_name='created', editable=False)),
('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, verbose_name='modified', editable=False)),
('data_usage_key', openedx.core.djangoapps.xmodule_django.models.UsageKeyField(unique=True, max_length=255, verbose_name='Identifier of the data being collected.')),
('data_version', models.CharField(max_length=255, null=True, verbose_name='Version of the data at the time of collection.', blank=True)),
('data_edit_timestamp', models.DateTimeField(null=True, verbose_name='Edit timestamp of the data at the time of collection.', blank=True)),
('transformers_schema_version', models.CharField(max_length=255, verbose_name='Representation of the schema version of the transformers used during collection.')),
('block_structure_schema_version', models.CharField(max_length=255, verbose_name='Version of the block structure schema at the time of collection.')),
('data', models.FileField(max_length=500, upload_to=openedx.core.djangoapps.content.block_structure.models._path_name)),
],
options={
'db_table': 'block_structure',
},
),
]
"""
Models used by the block structure framework.
"""
from datetime import datetime
from django.conf import settings
from django.core.files.base import ContentFile
from django.db import models
from logging import getLogger
from model_utils.models import TimeStampedModel
from openedx.core.djangoapps.xmodule_django.models import UsageKeyField
from openedx.core.lib.block_structure.exceptions import BlockStructureNotFound
from openedx.core.storage import get_storage
import openedx.core.djangoapps.content.block_structure.config as config
log = getLogger(__name__)
def _create_path(directory, filename):
"""
Returns the full path for the given directory and filename.
"""
return '{}/{}'.format(directory, filename)
def _directory_name(data_usage_key):
"""
Returns the directory name for the given
data_usage_key.
"""
return '{}{}'.format(
settings.BLOCK_STRUCTURES_SETTINGS.get('DIRECTORY_PREFIX', ''),
unicode(data_usage_key),
)
def _path_name(bs_model, filename): # pylint:disable=unused-argument
"""
Returns path name to use for the given
BlockStructureModel instance.
"""
filename = datetime.utcnow().strftime('%Y-%m-%d-%H:%M:%S-%f')
return _create_path(
_directory_name(bs_model.data_usage_key),
filename,
)
def _bs_model_storage():
"""
Get django Storage object for BlockStructureModel.
"""
return get_storage(
settings.BLOCK_STRUCTURES_SETTINGS.get('STORAGE_CLASS'),
**settings.BLOCK_STRUCTURES_SETTINGS.get('STORAGE_KWARGS', {})
)
class BlockStructureModel(TimeStampedModel):
"""
Model for storing Block Structure information.
"""
VERSION_FIELDS = [
u'data_version',
u'data_edit_timestamp',
u'transformers_schema_version',
u'block_structure_schema_version',
]
UNIQUENESS_FIELDS = [u'data_usage_key'] + VERSION_FIELDS
class Meta(object):
db_table = 'block_structure'
data_usage_key = UsageKeyField(
u'Identifier of the data being collected.',
blank=False,
max_length=255,
unique=True,
)
data_version = models.CharField(
u'Version of the data at the time of collection.',
blank=True,
null=True,
max_length=255,
)
data_edit_timestamp = models.DateTimeField(
u'Edit timestamp of the data at the time of collection.',
blank=True,
null=True,
)
transformers_schema_version = models.CharField(
u'Representation of the schema version of the transformers used during collection.',
blank=False,
max_length=255,
)
block_structure_schema_version = models.CharField(
u'Version of the block structure schema at the time of collection.',
blank=False,
max_length=255,
)
data = models.FileField(
upload_to=_path_name,
max_length=500, # allocate enough for base path + prefix + usage_key + timestamp in filepath
)
def get_serialized_data(self):
"""
Returns the collected data for this instance.
"""
serialized_data = self.data.read()
log.info("BlockStructure: Read data from store; %r, size: %d", self, len(serialized_data))
return serialized_data
@classmethod
def get(cls, data_usage_key):
"""
Returns the entry associated with the given data_usage_key.
Raises:
BlockStructureNotFound if an entry for data_usage_key is not found.
"""
try:
return cls.objects.get(data_usage_key=data_usage_key)
except cls.DoesNotExist:
log.info("BlockStructure: Not found in table; %r.", data_usage_key)
raise BlockStructureNotFound(data_usage_key)
@classmethod
def update_or_create(cls, serialized_data, data_usage_key, **kwargs):
"""
Updates or creates the BlockStructureModel entry
for the given data_usage_key in the kwargs,
uploading serialized_data as the content data.
"""
bs_model, created = cls.objects.update_or_create(defaults=kwargs, data_usage_key=data_usage_key)
bs_model.data.save('', ContentFile(serialized_data))
log.info(
'BlockStructure: %s in store; %r, size: %d',
'Created' if created else 'Updated',
bs_model,
len(serialized_data),
)
if not created:
cls._prune_files(data_usage_key)
return bs_model, created
def __unicode__(self):
"""
Returns a string representation of this model.
"""
return u', '.join(
u'{}: {}'.format(field_name, unicode(getattr(self, field_name)))
for field_name in self.UNIQUENESS_FIELDS
)
@classmethod
def _prune_files(cls, data_usage_key, num_to_keep=None):
"""
Deletes previous file versions for data_usage_key.
"""
if not config.is_enabled(config.PRUNE_OLD_VERSIONS):
return
if num_to_keep is None:
num_to_keep = config.num_versions_to_keep()
try:
all_files_by_date = sorted(cls._get_all_files(data_usage_key))
files_to_delete = all_files_by_date[:-num_to_keep] if num_to_keep > 0 else all_files_by_date
cls._delete_files(files_to_delete)
log.info(
'BlockStructure: Deleted %d out of total %d files in store; data_usage_key: %r, num_to_keep: %d.',
len(files_to_delete),
len(all_files_by_date),
data_usage_key,
num_to_keep,
)
except Exception as error: # pylint: disable=broad-except
log.exception(
'BlockStructure: Exception when deleting old files; data_usage_key: %r, %r',
data_usage_key,
error,
)
@classmethod
def _delete_files(cls, files):
"""
Deletes the given files from storage.
"""
storage = _bs_model_storage()
map(storage.delete, files)
@classmethod
def _get_all_files(cls, data_usage_key):
"""
Returns all filenames that exist for the given key.
"""
directory = _directory_name(data_usage_key)
_, filenames = _bs_model_storage().listdir(directory)
return [
_create_path(directory, filename)
for filename in filenames
if filename and not filename.startswith('.')
]
"""
Unit tests for Block Structure models.
"""
# pylint: disable=protected-access
import ddt
from django.test import TestCase
from django.utils.timezone import now
from itertools import product
from mock import patch, Mock
from uuid import uuid4
from opaque_keys.edx.locator import CourseLocator, BlockUsageLocator
from openedx.core.lib.block_structure.exceptions import BlockStructureNotFound
from ..config import PRUNE_OLD_VERSIONS
from ..models import BlockStructureModel
from .helpers import override_config_setting
@ddt.ddt
class BlockStructureModelTestCase(TestCase):
"""
Tests for BlockStructureModel.
"""
def setUp(self):
super(BlockStructureModelTestCase, self).setUp()
self.course_key = CourseLocator('org', 'course', unicode(uuid4()))
self.usage_key = BlockUsageLocator(course_key=self.course_key, block_type='course', block_id='course')
self.params = self._create_bsm_params()
def tearDown(self):
with override_config_setting(PRUNE_OLD_VERSIONS, active=True):
BlockStructureModel._prune_files(self.usage_key, num_to_keep=0)
super(BlockStructureModelTestCase, self).tearDown()
def _assert_bsm_fields(self, bsm, expected_serialized_data):
"""
Verifies that the field values and serialized data
on the given bsm are as expected.
"""
for field_name, field_value in self.params.iteritems():
self.assertEqual(field_value, getattr(bsm, field_name))
self.assertEqual(bsm.get_serialized_data(), expected_serialized_data)
self.assertIn(unicode(self.usage_key), bsm.data.name)
def _assert_file_count_equal(self, expected_count):
"""
Asserts the number of files for self.usage_key
is as expected.
"""
self.assertEqual(len(BlockStructureModel._get_all_files(self.usage_key)), expected_count)
def _create_bsm_params(self):
"""
Returns the parameters for creating a BlockStructureModel.
"""
return dict(
data_usage_key=self.usage_key,
data_version='DV',
data_edit_timestamp=now(),
transformers_schema_version='TV',
block_structure_schema_version=unicode(1),
)
def _verify_update_or_create_call(self, serialized_data, mock_log=None, expect_created=None):
"""
Calls BlockStructureModel.update_or_create
and verifies the response.
"""
bsm, created = BlockStructureModel.update_or_create(serialized_data, **self.params)
if mock_log:
self.assertEqual("Created" if expect_created else "Updated", mock_log.info.call_args[0][1])
self.assertEqual(len(serialized_data), mock_log.info.call_args[0][3])
self._assert_bsm_fields(bsm, serialized_data)
if expect_created is not None:
self.assertEqual(created, expect_created)
return bsm
@patch('openedx.core.djangoapps.content.block_structure.models.log')
def test_update_or_create(self, mock_log):
serialized_data = 'initial data'
# shouldn't already exist
with self.assertRaises(BlockStructureNotFound):
BlockStructureModel.get(self.usage_key)
self.assertIn("BlockStructure: Not found in table;", mock_log.info.call_args[0][0])
# create an entry
bsm = self._verify_update_or_create_call(serialized_data, mock_log, expect_created=True)
# get entry
found_bsm = BlockStructureModel.get(self.usage_key)
self._assert_bsm_fields(found_bsm, serialized_data)
self.assertIn("BlockStructure: Read data from store;", mock_log.info.call_args[0][0])
# update entry
self.params.update(dict(data_version='new version'))
updated_serialized_data = 'updated data'
updated_bsm = self._verify_update_or_create_call(updated_serialized_data, mock_log, expect_created=False)
self.assertNotEqual(bsm.data.name, updated_bsm.data.name)
# old files not pruned
self._assert_file_count_equal(2)
@override_config_setting(PRUNE_OLD_VERSIONS, active=True)
@patch('openedx.core.djangoapps.content.block_structure.config.num_versions_to_keep', Mock(return_value=1))
def test_prune_files(self):
self._verify_update_or_create_call('test data', expect_created=True)
self._verify_update_or_create_call('updated data', expect_created=False)
self._assert_file_count_equal(1)
@override_config_setting(PRUNE_OLD_VERSIONS, active=True)
@patch('openedx.core.djangoapps.content.block_structure.config.num_versions_to_keep', Mock(return_value=1))
@patch('openedx.core.djangoapps.content.block_structure.models.BlockStructureModel._delete_files')
@patch('openedx.core.djangoapps.content.block_structure.models.log')
def test_prune_exception(self, mock_log, mock_delete):
mock_delete.side_effect = Exception
self._verify_update_or_create_call('test data', expect_created=True)
self._verify_update_or_create_call('updated data', expect_created=False)
self.assertIn('BlockStructure: Exception when deleting old files', mock_log.exception.call_args[0][0])
self._assert_file_count_equal(2) # old files not pruned
@ddt.data(
*product(
range(1, 3), # prune_keep_count
range(4), # num_prior_edits
)
)
@ddt.unpack
def test_prune_keep_count(self, prune_keep_count, num_prior_edits):
with patch(
'openedx.core.djangoapps.content.block_structure.config.num_versions_to_keep',
return_value=prune_keep_count,
):
for _ in range(num_prior_edits):
self._verify_update_or_create_call('data')
if num_prior_edits:
self._assert_file_count_equal(num_prior_edits)
with override_config_setting(PRUNE_OLD_VERSIONS, active=True):
self._verify_update_or_create_call('data')
self._assert_file_count_equal(min(prune_keep_count, num_prior_edits + 1))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment