Commit 2b0ff8e3 by Nimisha Asthagiri

Storage-backed Block Structures: Use correct bucket

TNL-6645
parent bf08c6ed
...@@ -165,5 +165,5 @@ class TestGetBlocksQueryCounts(SharedModuleStoreTestCase): ...@@ -165,5 +165,5 @@ class TestGetBlocksQueryCounts(SharedModuleStoreTestCase):
self._get_blocks( self._get_blocks(
course, course,
expected_mongo_queries, expected_mongo_queries,
expected_sql_queries=9 if with_storage_backing else 3, expected_sql_queries=11 if with_storage_backing else 3,
) )
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
import openedx.core.djangoapps.content.block_structure.models
class Migration(migrations.Migration):
dependencies = [
('block_structure', '0002_blockstructuremodel'),
]
operations = [
migrations.AlterField(
model_name='blockstructuremodel',
name='data',
field=openedx.core.djangoapps.content.block_structure.models.CustomizableFileField(),
),
]
...@@ -2,10 +2,12 @@ ...@@ -2,10 +2,12 @@
Models used by the block structure framework. Models used by the block structure framework.
""" """
from contextlib import contextmanager
from datetime import datetime from datetime import datetime
from django.conf import settings from django.conf import settings
from django.core.exceptions import SuspiciousOperation
from django.core.files.base import ContentFile from django.core.files.base import ContentFile
from django.db import models from django.db import models, transaction
from logging import getLogger from logging import getLogger
from model_utils.models import TimeStampedModel from model_utils.models import TimeStampedModel
...@@ -59,6 +61,63 @@ def _bs_model_storage(): ...@@ -59,6 +61,63 @@ def _bs_model_storage():
) )
class CustomizableFileField(models.FileField):
"""
Subclass of FileField that allows custom settings to not
be serialized (hard-coded) in migrations. Otherwise,
migrations include optional settings for storage (such as
the storage class and bucket name); we don't want to
create new migration files for each configuration change.
"""
def __init__(self, *args, **kwargs):
kwargs.update(dict(
upload_to=_path_name,
storage=_bs_model_storage(),
max_length=500, # allocate enough for base path + prefix + usage_key + timestamp in filepath
))
super(CustomizableFileField, self).__init__(*args, **kwargs)
def deconstruct(self):
name, path, args, kwargs = super(CustomizableFileField, self).deconstruct()
del kwargs['upload_to']
del kwargs['storage']
del kwargs['max_length']
return name, path, args, kwargs
@contextmanager
def _storage_error_handling(bs_model, operation, is_read_operation=False):
"""
Helpful context manager that handles various errors
from the backend storage.
Typical errors at read time on configuration changes:
IOError:
- File not found (S3 or FS)
- Bucket name changed (S3)
SuspiciousOperation
- Path mismatches when changing backends
Other known errors:
OSError
- Access issues in creating files (FS)
S3ResponseError
- Incorrect credentials with 403 status (S3)
- Non-existent bucket with 404 status (S3)
"""
try:
yield
except Exception as error: # pylint: disable=broad-except
log.exception(u'BlockStructure: Exception %s on store %s; %s.', error.__class__, operation, bs_model)
if is_read_operation and isinstance(error, (IOError, SuspiciousOperation)):
# May have been caused by one of the possible error
# situations listed above. Raise BlockStructureNotFound
# so the block structure can be regenerated and restored.
raise BlockStructureNotFound(bs_model.data_usage_key)
else:
raise
class BlockStructureModel(TimeStampedModel): class BlockStructureModel(TimeStampedModel):
""" """
Model for storing Block Structure information. Model for storing Block Structure information.
...@@ -101,17 +160,17 @@ class BlockStructureModel(TimeStampedModel): ...@@ -101,17 +160,17 @@ class BlockStructureModel(TimeStampedModel):
blank=False, blank=False,
max_length=255, max_length=255,
) )
data = models.FileField( data = CustomizableFileField()
upload_to=_path_name,
max_length=500, # allocate enough for base path + prefix + usage_key + timestamp in filepath
)
def get_serialized_data(self): def get_serialized_data(self):
""" """
Returns the collected data for this instance. Returns the collected data for this instance.
""" """
serialized_data = self.data.read() operation = u'Read'
log.info("BlockStructure: Read data from store; %r, size: %d", unicode(self), len(serialized_data)) with _storage_error_handling(self, operation, is_read_operation=True):
serialized_data = self.data.read()
self._log(self, operation, serialized_data)
return serialized_data return serialized_data
@classmethod @classmethod
...@@ -124,7 +183,7 @@ class BlockStructureModel(TimeStampedModel): ...@@ -124,7 +183,7 @@ class BlockStructureModel(TimeStampedModel):
try: try:
return cls.objects.get(data_usage_key=data_usage_key) return cls.objects.get(data_usage_key=data_usage_key)
except cls.DoesNotExist: except cls.DoesNotExist:
log.info("BlockStructure: Not found in table; %r.", data_usage_key) log.info(u'BlockStructure: Not found in table; %s.', data_usage_key)
raise BlockStructureNotFound(data_usage_key) raise BlockStructureNotFound(data_usage_key)
@classmethod @classmethod
...@@ -134,14 +193,17 @@ class BlockStructureModel(TimeStampedModel): ...@@ -134,14 +193,17 @@ class BlockStructureModel(TimeStampedModel):
for the given data_usage_key in the kwargs, for the given data_usage_key in the kwargs,
uploading serialized_data as the content data. uploading serialized_data as the content data.
""" """
bs_model, created = cls.objects.update_or_create(defaults=kwargs, data_usage_key=data_usage_key) # Use an atomic transaction so the model isn't updated
bs_model.data.save('', ContentFile(serialized_data)) # unless the file is successfully persisted.
log.info( with transaction.atomic():
'BlockStructure: %s in store; %r, size: %d', bs_model, created = cls.objects.update_or_create(defaults=kwargs, data_usage_key=data_usage_key)
'Created' if created else 'Updated', operation = u'Created' if created else u'Updated'
unicode(bs_model),
len(serialized_data), with _storage_error_handling(bs_model, operation):
) bs_model.data.save('', ContentFile(serialized_data))
cls._log(bs_model, operation, serialized_data)
if not created: if not created:
cls._prune_files(data_usage_key) cls._prune_files(data_usage_key)
...@@ -172,19 +234,15 @@ class BlockStructureModel(TimeStampedModel): ...@@ -172,19 +234,15 @@ class BlockStructureModel(TimeStampedModel):
files_to_delete = all_files_by_date[:-num_to_keep] if num_to_keep > 0 else all_files_by_date files_to_delete = all_files_by_date[:-num_to_keep] if num_to_keep > 0 else all_files_by_date
cls._delete_files(files_to_delete) cls._delete_files(files_to_delete)
log.info( log.info(
'BlockStructure: Deleted %d out of total %d files in store; data_usage_key: %r, num_to_keep: %d.', u'BlockStructure: Deleted %d out of total %d files in store; data_usage_key: %s, num_to_keep: %d.',
len(files_to_delete), len(files_to_delete),
len(all_files_by_date), len(all_files_by_date),
data_usage_key, data_usage_key,
num_to_keep, num_to_keep,
) )
except Exception as error: # pylint: disable=broad-except except Exception: # pylint: disable=broad-except
log.exception( log.exception(u'BlockStructure: Exception when deleting old files; data_usage_key: %s.', data_usage_key)
'BlockStructure: Exception when deleting old files; data_usage_key: %r, %r',
data_usage_key,
error,
)
@classmethod @classmethod
def _delete_files(cls, files): def _delete_files(cls, files):
...@@ -206,3 +264,18 @@ class BlockStructureModel(TimeStampedModel): ...@@ -206,3 +264,18 @@ class BlockStructureModel(TimeStampedModel):
for filename in filenames for filename in filenames
if filename and not filename.startswith('.') if filename and not filename.startswith('.')
] ]
@classmethod
def _log(cls, bs_model, operation, serialized_data):
"""
Writes log information for the given values.
"""
log.info(
u'BlockStructure: %s in store %s at %s%s; %s, size: %d',
operation,
bs_model.data.storage.__class__,
getattr(bs_model.data.storage, 'bucket_name', ''),
getattr(bs_model.data.storage, 'location', ''),
bs_model,
len(serialized_data),
)
...@@ -108,7 +108,7 @@ class BlockStructureStore(object): ...@@ -108,7 +108,7 @@ class BlockStructureStore(object):
bs_model = self._get_model(root_block_usage_key) bs_model = self._get_model(root_block_usage_key)
self._cache.delete(self._encode_root_cache_key(bs_model)) self._cache.delete(self._encode_root_cache_key(bs_model))
bs_model.delete() bs_model.delete()
logger.info("BlockStructure: Deleted from cache and store; %r.", unicode(bs_model)) logger.info("BlockStructure: Deleted from cache and store; %s.", bs_model)
def is_up_to_date(self, root_block_usage_key, modulestore): def is_up_to_date(self, root_block_usage_key, modulestore):
""" """
...@@ -157,7 +157,7 @@ class BlockStructureStore(object): ...@@ -157,7 +157,7 @@ class BlockStructureStore(object):
""" """
cache_key = self._encode_root_cache_key(bs_model) cache_key = self._encode_root_cache_key(bs_model)
self._cache.set(cache_key, serialized_data, timeout=config.cache_timeout_in_seconds()) self._cache.set(cache_key, serialized_data, timeout=config.cache_timeout_in_seconds())
logger.info("BlockStructure: Added to cache; %r, size: %d", unicode(bs_model), len(serialized_data)) logger.info("BlockStructure: Added to cache; %s, size: %d", bs_model, len(serialized_data))
def _get_from_cache(self, bs_model): def _get_from_cache(self, bs_model):
""" """
...@@ -167,13 +167,13 @@ class BlockStructureStore(object): ...@@ -167,13 +167,13 @@ class BlockStructureStore(object):
BlockStructureNotFound if not found. BlockStructureNotFound if not found.
""" """
cache_key = self._encode_root_cache_key(bs_model) cache_key = self._encode_root_cache_key(bs_model)
serialized_data = self._cache.get(cache_key) serialized_data = self._cache.get(cache_key)
if not serialized_data: if not serialized_data:
logger.info("BlockStructure: Not found in cache; %r.", unicode(bs_model)) logger.info("BlockStructure: Not found in cache; %s.", bs_model)
raise BlockStructureNotFound(bs_model.data_usage_key) raise BlockStructureNotFound(bs_model.data_usage_key)
else: else:
logger.info("BlockStructure: Read from cache; %r, size: %d", unicode(bs_model), len(serialized_data)) logger.info("BlockStructure: Read from cache; %s, size: %d", bs_model, len(serialized_data))
return serialized_data return serialized_data
def _get_from_store(self, bs_model): def _get_from_store(self, bs_model):
......
...@@ -3,6 +3,7 @@ Unit tests for Block Structure models. ...@@ -3,6 +3,7 @@ Unit tests for Block Structure models.
""" """
# pylint: disable=protected-access # pylint: disable=protected-access
import ddt import ddt
from django.core.exceptions import SuspiciousOperation
from django.test import TestCase from django.test import TestCase
from django.utils.timezone import now from django.utils.timezone import now
from itertools import product from itertools import product
...@@ -13,7 +14,7 @@ from opaque_keys.edx.locator import CourseLocator, BlockUsageLocator ...@@ -13,7 +14,7 @@ from opaque_keys.edx.locator import CourseLocator, BlockUsageLocator
from ..config import PRUNE_OLD_VERSIONS from ..config import PRUNE_OLD_VERSIONS
from ..exceptions import BlockStructureNotFound from ..exceptions import BlockStructureNotFound
from ..models import BlockStructureModel from ..models import BlockStructureModel, _storage_error_handling
from .helpers import override_config_setting from .helpers import override_config_setting
...@@ -72,7 +73,7 @@ class BlockStructureModelTestCase(TestCase): ...@@ -72,7 +73,7 @@ class BlockStructureModelTestCase(TestCase):
bsm, created = BlockStructureModel.update_or_create(serialized_data, **self.params) bsm, created = BlockStructureModel.update_or_create(serialized_data, **self.params)
if mock_log: if mock_log:
self.assertEqual("Created" if expect_created else "Updated", mock_log.info.call_args[0][1]) self.assertEqual("Created" if expect_created else "Updated", mock_log.info.call_args[0][1])
self.assertEqual(len(serialized_data), mock_log.info.call_args[0][3]) self.assertEqual(len(serialized_data), mock_log.info.call_args[0][6])
self._assert_bsm_fields(bsm, serialized_data) self._assert_bsm_fields(bsm, serialized_data)
if expect_created is not None: if expect_created is not None:
self.assertEqual(created, expect_created) self.assertEqual(created, expect_created)
...@@ -93,7 +94,7 @@ class BlockStructureModelTestCase(TestCase): ...@@ -93,7 +94,7 @@ class BlockStructureModelTestCase(TestCase):
# get entry # get entry
found_bsm = BlockStructureModel.get(self.usage_key) found_bsm = BlockStructureModel.get(self.usage_key)
self._assert_bsm_fields(found_bsm, serialized_data) self._assert_bsm_fields(found_bsm, serialized_data)
self.assertIn("BlockStructure: Read data from store;", mock_log.info.call_args[0][0]) self.assertIn("Read", mock_log.info.call_args[0][1])
# update entry # update entry
self.params.update(dict(data_version='new version')) self.params.update(dict(data_version='new version'))
...@@ -144,3 +145,18 @@ class BlockStructureModelTestCase(TestCase): ...@@ -144,3 +145,18 @@ class BlockStructureModelTestCase(TestCase):
with override_config_setting(PRUNE_OLD_VERSIONS, active=True): with override_config_setting(PRUNE_OLD_VERSIONS, active=True):
self._verify_update_or_create_call('data') self._verify_update_or_create_call('data')
self._assert_file_count_equal(min(prune_keep_count, num_prior_edits + 1)) self._assert_file_count_equal(min(prune_keep_count, num_prior_edits + 1))
@ddt.data(
(IOError, BlockStructureNotFound, True),
(IOError, IOError, False),
(SuspiciousOperation, BlockStructureNotFound, True),
(SuspiciousOperation, SuspiciousOperation, False),
(OSError, OSError, True),
(OSError, OSError, False),
)
@ddt.unpack
def test_error_handling(self, error_raised_in_operation, expected_error_raised, is_read_operation):
bs_model, _ = BlockStructureModel.update_or_create('test data', **self.params)
with self.assertRaises(expected_error_raised):
with _storage_error_handling(bs_model, 'operation', is_read_operation):
raise error_raised_in_operation
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment