Commit f3e089bd by John Eskew

Mgmt commands to clear data from historical tables.

parent 5557eae2
"""
Command to delete all rows from these tables:
microsite_configuration_historicalmicrositeorganizationmapping
microsite_configuration_historicalmicrositetemplate
"""
import logging
from common.djangoapps.microsite_configuration.models import MicrositeOrganizationMapping, MicrositeTemplate
from openedx.core.djangoapps.util.row_delete import delete_rows, BaseDeletionCommand
log = logging.getLogger(__name__)
class Command(BaseDeletionCommand):
"""
Example usage: ./manage.py lms --settings=devstack delete_historical_microsite_data
"""
help = 'Deletes all historical MicrositeOrganizationMapping and MicrositeTemplate rows (in chunks).'
def handle(self, *args, **options):
"""
Deletes rows, chunking the deletes to avoid long table/row locks.
"""
chunk_size, sleep_between = super(Command, self).handle(*args, **options)
delete_rows(
MicrositeOrganizationMapping.objects,
'microsite_configuration_historicalmicrositeorganizationmapping',
'history_id',
chunk_size, sleep_between
)
delete_rows(
MicrositeTemplate.objects,
'microsite_configuration_historicalmicrositetemplate',
'history_id',
chunk_size, sleep_between
)
"""
Command to delete all rows from the student_historicalcourseenrollment table.
"""
import logging
from student.models import CourseEnrollment
from openedx.core.djangoapps.util.row_delete import delete_rows, BaseDeletionCommand
log = logging.getLogger(__name__)
class Command(BaseDeletionCommand):
"""
Example usage: ./manage.py lms --settings=devstack delete_historical_enrollment_data
"""
help = 'Deletes all historical CourseEnrollment rows (in chunks).'
def handle(self, *args, **options):
"""
Deletes rows, chunking the deletes to avoid long table/row locks.
"""
chunk_size, sleep_between = super(Command, self).handle(*args, **options)
delete_rows(
CourseEnrollment.objects,
'student_historicalcourseenrollment',
'history_id',
chunk_size, sleep_between
)
"""
Command to delete all rows from the verify_student_historicalverificationdeadline table.
"""
import logging
from verify_student.models import VerificationDeadline
from openedx.core.djangoapps.util.row_delete import delete_rows, BaseDeletionCommand
log = logging.getLogger(__name__)
class Command(BaseDeletionCommand):
"""
Example usage: ./manage.py lms --settings=devstack delete_historical_verify_student_data
"""
help = 'Deletes all historical VerificationDeadline rows (in chunks).'
def handle(self, *args, **options):
"""
Deletes rows, chunking the deletes to avoid long table/row locks.
"""
chunk_size, sleep_between = super(Command, self).handle(*args, **options)
delete_rows(
VerificationDeadline.objects,
'verify_student_historicalverificationdeadline',
'history_id',
chunk_size, sleep_between
)
"""
Command to delete all rows from the api_admin_historicalapiaccessrequest table.
"""
import logging
from openedx.core.djangoapps.api_admin.models import ApiAccessRequest
from openedx.core.djangoapps.util.row_delete import delete_rows, BaseDeletionCommand
log = logging.getLogger(__name__)
class Command(BaseDeletionCommand):
"""
Example usage: ./manage.py lms --settings=devstack delete_historical_api_admin_data
"""
help = 'Deletes all historical ApiAccessRequest rows (in chunks).'
def handle(self, *args, **options):
"""
Deletes rows, chunking the deletes to avoid long table/row locks.
"""
chunk_size, sleep_between = super(Command, self).handle(*args, **options)
delete_rows(
ApiAccessRequest.objects,
'api_admin_historicalapiaccessrequest',
'history_id',
chunk_size, sleep_between
)
"""
Command to delete all rows from the credit_historicalcreditrequest and
credit_historicalcreditrequirementstatus tables.
"""
import logging
from openedx.core.djangoapps.credit.models import CreditRequest, CreditRequirementStatus
from openedx.core.djangoapps.util.row_delete import delete_rows, BaseDeletionCommand
log = logging.getLogger(__name__)
class Command(BaseDeletionCommand):
"""
Example usage: ./manage.py lms --settings=devstack delete_historical_credit_data
"""
help = 'Deletes all historical CreditRequest and CreditRequirementStatus rows (in chunks).'
def handle(self, *args, **options):
"""
Deletes rows, chunking the deletes to avoid long table/row locks.
"""
chunk_size, sleep_between = super(Command, self).handle(*args, **options)
delete_rows(
CreditRequest.objects,
'credit_historicalcreditrequest',
'history_id',
chunk_size, sleep_between
)
delete_rows(
CreditRequirementStatus.objects,
'credit_historicalcreditrequirementstatus',
'history_id',
chunk_size, sleep_between
)
"""
Code to delete rows from a table within a Django mgmt command using best practices.
"""
import logging
import time
from django.core.management.base import CommandError, BaseCommand
from django.db import transaction
log = logging.getLogger(__name__)
def delete_rows(model_mgr,
table_name,
primary_id_name,
chunk_size,
sleep_between):
"""
Deletes *ALL* rows from table, chunking the deletes to avoid long table/row locks.
Args:
model_mgr (django.db.models.manager.Manager): Django ORM mgr for the table's model.
table_name (str): Name of table from which to delete all rows.
primary_id_name (str): Name of primary ID autoincrement column from table.
chunk_size (int): Number of rows to delete in each transaction.
sleep_between (float): Number of seconds to sleep between transactions.
"""
if chunk_size <= 0:
raise CommandError('Only positive chunk size is allowed ({}).'.format(chunk_size))
if sleep_between < 0:
raise CommandError('Only non-negative sleep between seconds is allowed ({}).'.format(sleep_between))
# The "as id" below fools Django raw query into thinking the primary key is being queried.
# It's necessary because Django will throw an exception if the raw SQL does not query the primary key.
min_max_ids = model_mgr.raw(
'SELECT MIN({}) as id, MAX({}) as max_id FROM {}'.format(primary_id_name, primary_id_name, table_name)
)[0]
min_id = min_max_ids.id
max_id = min_max_ids.max_id
if not min_id or not max_id:
log.info("No data exists in table %s - skipping.", table_name)
return
log.info(
"STARTED: Deleting around %s rows with chunk size of %s and %s seconds between chunks.",
max_id - min_id + 1, chunk_size, sleep_between
)
lower_id = min_id
while lower_id <= max_id:
deletions_now = min(chunk_size, max_id - lower_id + 1)
upper_id = lower_id + deletions_now
log.info("Deleting around %s rows between ids %s and %s...", deletions_now, lower_id, upper_id)
with transaction.atomic():
# xss-lint: disable=python-wrap-html
delete_sql = 'DELETE FROM {} WHERE {} >= {} AND {} < {}'.format(
table_name, primary_id_name, lower_id, primary_id_name, upper_id
)
log.info(delete_sql)
try:
list(model_mgr.raw(delete_sql))
except TypeError:
# The list() above is simply to get the RawQuerySet to be evaluated.
# Without evaluation, the raw DELETE SQL will *not* actually execute.
# But - it will cause a "TypeError: 'NoneType' object is not iterable" to be ignored.
pass
lower_id += deletions_now
log.info("Sleeping %s seconds...", sleep_between)
time.sleep(sleep_between)
log.info("FINISHED: Deleted at most %s rows total.", max_id - min_id + 1)
class BaseDeletionCommand(BaseCommand):
"""
Base command used to delete all rows from a table.
"""
# Default maximum number of rows to delete in a single transaction.
DEFAULT_CHUNK_SIZE = 10000
# Default seconds to sleep between chunked deletes of rows.
DEFAULT_SLEEP_BETWEEN_DELETES = 0
def add_arguments(self, parser):
parser.add_argument(
'--chunk_size',
default=self.DEFAULT_CHUNK_SIZE,
type=int,
help='Maximum number of rows to delete in each DB transaction. Choose this value carefully to avoid DB outages!'
)
parser.add_argument(
'--sleep_between',
default=self.DEFAULT_SLEEP_BETWEEN_DELETES,
type=float,
help='Seconds to sleep between chunked delete of rows.'
)
def handle(self, *args, **options):
"""
Deletes rows, chunking the deletes to avoid long table/row locks.
"""
return (
options.get('chunk_size', self.DEFAULT_CHUNK_SIZE),
options.get('sleep_between', self.DEFAULT_SLEEP_BETWEEN_DELETES)
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment