Commit 2f9e9e24 by Chris Rossi Committed by Diana Huang

Refactor findusers script to be fully lazy, in hopes we don't run out of memory…

Refactor findusers script to be fully lazy, in hopes we don't run out of memory with huge numbers of users.
parent 7a48e935
...@@ -12,6 +12,8 @@ from django.utils import timezone ...@@ -12,6 +12,8 @@ from django.utils import timezone
from optparse import make_option from optparse import make_option
from ...models import LinkedIn
FRIDAY = 4 FRIDAY = 4
...@@ -71,32 +73,45 @@ class Command(BaseCommand): ...@@ -71,32 +73,45 @@ class Command(BaseCommand):
if not max_checks: if not max_checks:
raise CommandError("No checks allowed during this time.") raise CommandError("No checks allowed during this time.")
check_users = [] def batch_users():
"Generator to lazily generate batches of users to query."
count = 0
batch = []
for user in User.objects.all(): for user in User.objects.all():
checked = (hasattr(user, 'linkedin') and if not hasattr(user, 'linkedin'):
user.linkedin.has_linkedin_account is not None) LinkedIn(user=user).save()
checked = user.linkedin.has_linkedin_account is not None
if recheck or not checked: if recheck or not checked:
check_users.append(user) batch.append(user)
if len(batch) == checks_per_call:
if max_checks != -1 and len(check_users) > max_checks: yield batch
batch = []
count += 1
if max_checks != 1 and count == max_checks:
self.stderr.write( self.stderr.write(
"WARNING: limited to checking only %d users today." % "WARNING: limited to checking only %d users today."
max_checks) % max_checks)
check_users = check_users[:max_checks] break
batches = [check_users[i:i + checks_per_call] if batch:
for i in xrange(0, len(check_users), checks_per_call)] yield batch
def do_batch(batch): def do_batch(batch):
"Process a batch of users." "Process a batch of users."
emails = [u.email for u in batch] emails = (u.email for u in batch)
for user, has_account in zip(batch, api.batch(emails)): for user, has_account in zip(batch, api.batch(emails)):
user.linkedin.has_linkedin_account = has_account linkedin = user.linkedin
if linkedin.has_linkedin_account != has_account:
if batches: linkedin.has_linkedin_account = has_account
do_batch(batches.pop(0)) linkedin.save()
batches = batch_users()
try:
do_batch(batches.next()) # may raise StopIteration
for batch in batches: for batch in batches:
time.sleep(time_between_calls) time.sleep(time_between_calls)
do_batch(batch) do_batch(batch)
except StopIteration:
pass
class LinkedinAPI(object): class LinkedinAPI(object):
...@@ -108,4 +123,4 @@ class LinkedinAPI(object): ...@@ -108,4 +123,4 @@ class LinkedinAPI(object):
""" """
Get the LinkedIn status for a batch of emails. Get the LinkedIn status for a batch of emails.
""" """
pass return (True for email in emails)
...@@ -110,6 +110,7 @@ class FindUsersTests(unittest.TestCase): ...@@ -110,6 +110,7 @@ class FindUsersTests(unittest.TestCase):
def dummy_batch(emails): def dummy_batch(emails):
"Mock LinkedIn API." "Mock LinkedIn API."
emails = list(emails)
self.assertEqual(len(emails), 4) self.assertEqual(len(emails), 4)
return [email % 2 == 0 for email in emails] return [email % 2 == 0 for email in emails]
api.batch = dummy_batch api.batch = dummy_batch
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment