Commit 90940d48 by Daniel Friedman

Batch the bulk_creates in bulk_create_notes

TNL-2428
parent f048f5ed
import itertools
import json import json
from optparse import make_option from optparse import make_option
import os import os
...@@ -33,18 +34,30 @@ class Command(BaseCommand): ...@@ -33,18 +34,30 @@ class Command(BaseCommand):
default=['edX/DemoX/Demo_Course'], default=['edX/DemoX/Demo_Course'],
help='comma-separated list of course_ids for which notes should be randomly attributed' help='comma-separated list of course_ids for which notes should be randomly attributed'
), ),
make_option(
'--batch_size',
action='store',
type='int',
default=1000,
help='number of notes that should be bulk inserted at a time - useful for getting around the maximum SQL '
'query size'
)
) )
help = 'Add N random notes to the database' help = 'Add N random notes to the database'
def handle(self, *args, **options): def handle(self, *args, **options):
if len(args) != 1: if len(args) != 1:
raise CommandError("generate_random_notes takes the following arguments: " + self.args) raise CommandError("bulk_create_notes takes the following arguments: " + self.args)
total_notes = int(args[0]) total_notes = int(args[0])
notes_per_user = options['per_user'] notes_per_user = options['per_user']
course_ids = options['course_ids'] course_ids = options['course_ids']
Note.objects.bulk_create(note_iter(total_notes, notes_per_user, course_ids)) batch_size = options['batch_size']
# In production, there is a max SQL query size. Batch the bulk inserts
# such that we don't exceed this limit.
for notes_chunk in grouper_it(note_iter(total_notes, notes_per_user, course_ids), batch_size):
Note.objects.bulk_create(notes_chunk)
def note_iter(total_notes, notes_per_user, course_ids): def note_iter(total_notes, notes_per_user, course_ids):
""" """
...@@ -89,3 +102,18 @@ def note_iter(total_notes, notes_per_user, course_ids): ...@@ -89,3 +102,18 @@ def note_iter(total_notes, notes_per_user, course_ids):
ranges=json.dumps([{"start": "/div[1]/p[1]", "end": "/div[1]/p[1]", "startOffset": 0, "endOffset": 6}]), ranges=json.dumps([{"start": "/div[1]/p[1]", "end": "/div[1]/p[1]", "startOffset": 0, "endOffset": 6}]),
tags=json.dumps(weighted_get_words([(1, 40), (2, 30), (5, 15), (10, 10), (15, 5)])) tags=json.dumps(weighted_get_words([(1, 40), (2, 30), (5, 15), (10, 10), (15, 5)]))
) )
def grouper_it(iterable, batch_size):
"""
Return an iterator of iterators. Each child iterator yields the
next `batch_size`-many elements from `iterable`.
"""
iterator = iter(iterable)
while True:
chunk_it = itertools.islice(iterable, batch_size)
try:
first_el = next(chunk_it)
except StopIteration:
break
yield itertools.chain((first_el,), chunk_it)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment