Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-platform
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-platform
Commits
07aad296
Commit
07aad296
authored
Jul 12, 2013
by
Ned Batchelder
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
The clean_history management command to remove excess courseware_studentmodulehistory records.
parent
8300bb5e
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
217 additions
and
0 deletions
+217
-0
lms/djangoapps/courseware/management/commands/clean_history.py
+217
-0
lms/djangoapps/courseware/management/tests/test_clean_history.py
+0
-0
No files found.
lms/djangoapps/courseware/management/commands/clean_history.py
0 → 100644
View file @
07aad296
"""A command to clean the StudentModuleHistory table.
When we added XBlock storage, each field modification wrote a new history row
to the db. Now that we have bulk saves to avoid that database hammering, we
need to clean out the unnecessary rows from the database.
This command that does that.
"""
import
datetime
import
json
from
optparse
import
make_option
import
traceback
from
django.core.management.base
import
NoArgsCommand
from
django.db
import
connection
class
Command
(
NoArgsCommand
):
"""The actual clean_history command to clean history rows."""
help
=
"Deletes unneeded rows from the StudentModuleHistory table."
option_list
=
NoArgsCommand
.
option_list
+
(
make_option
(
'--dry-run'
,
action
=
'store_true'
,
default
=
False
,
help
=
"Don't change the database, just show what would be done."
,
),
)
def
handle_noargs
(
self
,
**
options
):
smhc
=
StudentModuleHistoryCleaner
(
dry_run
=
options
[
"dry_run"
],
)
smhc
.
main
()
class
StudentModuleHistoryCleaner
(
object
):
"""Logic to clean rows from the StudentModuleHistory table."""
DELETE_GAP_SECS
=
0.5
# Rows this close can be discarded.
STATE_FILE
=
"clean_history.json"
BATCH_SIZE
=
100
def
__init__
(
self
,
dry_run
=
False
):
self
.
dry_run
=
dry_run
self
.
next_student_module_id
=
0
self
.
last_student_module_id
=
0
def
main
(
self
,
batch_size
=
None
):
"""Invoked from the management command to do all the work."""
batch_size
=
batch_size
or
self
.
BATCH_SIZE
self
.
last_student_module_id
=
self
.
get_last_student_module_id
()
self
.
load_state
()
while
self
.
next_student_module_id
<=
self
.
last_student_module_id
:
for
smid
in
self
.
module_ids_to_check
(
batch_size
):
try
:
self
.
clean_one_student_module
(
smid
)
except
Exception
:
# pylint: disable=W0703
trace
=
traceback
.
format_exc
()
self
.
say
(
"Couldn't clean student_module_id {}:
\n
{}"
.
format
(
smid
,
trace
))
self
.
commit
()
self
.
save_state
()
def
say
(
self
,
message
):
"""
Display a message to the user.
The message will have a trailing newline added to it.
"""
print
message
def
commit
(
self
):
"""
Commit the transaction.
"""
self
.
say
(
"Committing"
)
connection
.
commit
()
def
load_state
(
self
):
"""
Load the latest state from disk.
"""
try
:
state_file
=
open
(
self
.
STATE_FILE
)
except
IOError
:
self
.
say
(
"No stored state"
)
self
.
next_student_module_id
=
0
else
:
with
state_file
:
state
=
json
.
load
(
state_file
)
self
.
say
(
"Loaded stored state: {}"
.
format
(
json
.
dumps
(
state
,
sort_keys
=
True
)
)
)
self
.
next_student_module_id
=
state
[
'next_student_module_id'
]
def
save_state
(
self
):
"""
Save the state to disk.
"""
state
=
{
'next_student_module_id'
:
self
.
next_student_module_id
,
}
with
open
(
self
.
STATE_FILE
,
"w"
)
as
state_file
:
json
.
dump
(
state
,
state_file
)
self
.
say
(
"Saved state: {}"
.
format
(
json
.
dumps
(
state
,
sort_keys
=
True
)))
def
get_last_student_module_id
(
self
):
"""
Return the id of the last student_module.
"""
cursor
=
connection
.
cursor
()
cursor
.
execute
(
"""
SELECT max(student_module_id) FROM courseware_studentmodulehistory
"""
)
last
=
cursor
.
fetchone
()[
0
]
self
.
say
(
"Last student_module_id is {}"
.
format
(
last
))
return
last
def
module_ids_to_check
(
self
,
batch_size
):
"""Produce a sequence of student module ids to check.
`batch_size` is how many module ids to produce, max.
The sequence starts with `next_student_module_id`, and goes up to
and including `last_student_module_id`.
`next_student_module_id` is updated as each id is yielded.
"""
start
=
self
.
next_student_module_id
for
smid
in
range
(
start
,
start
+
batch_size
):
if
smid
>
self
.
last_student_module_id
:
break
yield
smid
self
.
next_student_module_id
=
smid
+
1
def
get_history_for_student_modules
(
self
,
student_module_id
):
"""
Get the history rows for a student module.
```student_module_id```: the id of the student module we're
interested in.
Return a list: [(id, created), ...], all the rows of history.
"""
cursor
=
connection
.
cursor
()
cursor
.
execute
(
"""
SELECT id, created FROM courseware_studentmodulehistory
WHERE student_module_id =
%
s
ORDER BY created
"""
,
[
student_module_id
]
)
history
=
cursor
.
fetchall
()
return
history
def
delete_history
(
self
,
ids_to_delete
):
"""
Delete history rows.
```ids_to_delete```: a non-empty list (or set...) of history row ids to delete.
"""
assert
ids_to_delete
cursor
=
connection
.
cursor
()
cursor
.
execute
(
"""
DELETE FROM courseware_studentmodulehistory
WHERE id IN ({ids})
"""
.
format
(
ids
=
","
.
join
(
str
(
i
)
for
i
in
ids_to_delete
))
)
def
clean_one_student_module
(
self
,
student_module_id
):
"""Clean one StudentModule's-worth of history.
`student_module_id`: the id of the StudentModule to process.
"""
delete_gap
=
datetime
.
timedelta
(
seconds
=
self
.
DELETE_GAP_SECS
)
history
=
self
.
get_history_for_student_modules
(
student_module_id
)
if
not
history
:
self
.
say
(
"No history for student_module_id {}"
.
format
(
student_module_id
))
return
ids_to_delete
=
[]
next_created
=
None
for
history_id
,
created
in
reversed
(
history
):
if
next_created
is
not
None
:
# Compare this timestamp with the next one.
if
(
next_created
-
created
)
<
delete_gap
:
# This row is followed closely by another, we can discard
# this one.
ids_to_delete
.
append
(
history_id
)
next_created
=
created
verb
=
"Would have deleted"
if
self
.
dry_run
else
"Deleting"
self
.
say
(
"{verb} {to_delete} rows of {total} for student_module_id {id}"
.
format
(
verb
=
verb
,
to_delete
=
len
(
ids_to_delete
),
total
=
len
(
history
),
id
=
student_module_id
,
))
if
ids_to_delete
and
not
self
.
dry_run
:
self
.
delete_history
(
ids_to_delete
)
lms/djangoapps/courseware/management/tests/test_clean_history.py
0 → 100644
View file @
07aad296
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment