Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-platform
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-platform
Commits
08a08448
Commit
08a08448
authored
Sep 25, 2013
by
Brian Wilson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add some handling for SES exceptions.
parent
c133fd91
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
135 additions
and
36 deletions
+135
-36
lms/djangoapps/bulk_email/tasks.py
+63
-16
lms/djangoapps/instructor_task/api_helper.py
+1
-1
lms/djangoapps/instructor_task/subtasks.py
+69
-14
lms/djangoapps/instructor_task/tasks_helper.py
+2
-5
No files found.
lms/djangoapps/bulk_email/tasks.py
View file @
08a08448
...
...
@@ -4,6 +4,7 @@ to a course.
"""
import
math
import
re
import
random
from
uuid
import
uuid4
from
time
import
sleep
...
...
@@ -12,6 +13,8 @@ from traceback import format_exc
from
dogapi
import
dog_stats_api
from
smtplib
import
SMTPServerDisconnected
,
SMTPDataError
,
SMTPConnectError
from
boto.ses.exceptions
import
SESDailyQuotaExceededError
,
SESMaxSendingRateExceededError
from
boto.exception
import
AWSConnectionError
from
celery
import
task
,
current_task
,
group
from
celery.utils.log
import
get_task_logger
...
...
@@ -34,12 +37,26 @@ from instructor_task.models import InstructorTask
from
instructor_task.subtasks
import
(
update_subtask_status
,
create_subtask_result
,
increment_subtask_result
,
update_instructor_task_for_subtasks
,
)
log
=
get_task_logger
(
__name__
)
# Exceptions that, if caught, should cause the task to be re-tried.
# These errors will be caught a maximum of 5 times before the task fails.
RETRY_ERRORS
=
(
SMTPDataError
,
SMTPConnectError
,
SMTPServerDisconnected
,
AWSConnectionError
)
# Errors that involve exceeding a quota of sent email
QUOTA_EXCEEDED_ERRORS
=
(
SESDailyQuotaExceededError
,
)
# Errors that mail is being sent too quickly. When caught by a task, it
# triggers an exponential backoff and retry. Retries happen continuously until
# the email is sent.
SENDING_RATE_ERRORS
=
(
SESMaxSendingRateExceededError
,
)
def
_get_recipient_queryset
(
user_id
,
to_option
,
course_id
,
course_location
):
"""
Generates a query set corresponding to the requested category.
...
...
@@ -154,7 +171,12 @@ def perform_delegate_email_batches(entry_id, course_id, task_input, action_name)
num_tasks_this_query
=
int
(
math
.
ceil
(
float
(
num_emails_this_query
)
/
float
(
settings
.
EMAILS_PER_TASK
)))
chunk
=
int
(
math
.
ceil
(
float
(
num_emails_this_query
)
/
float
(
num_tasks_this_query
)))
for
i
in
range
(
num_tasks_this_query
):
to_list
=
recipient_sublist
[
i
*
chunk
:
i
*
chunk
+
chunk
]
if
i
==
num_tasks_this_query
-
1
:
# Avoid cutting off the very last email when chunking a task that divides perfectly
# (eg num_emails_this_query = 297 and EMAILS_PER_TASK is 100)
to_list
=
recipient_sublist
[
i
*
chunk
:]
else
:
to_list
=
recipient_sublist
[
i
*
chunk
:
i
*
chunk
+
chunk
]
subtask_id
=
str
(
uuid4
())
subtask_id_list
.
append
(
subtask_id
)
task_list
.
append
(
send_course_email
.
subtask
((
...
...
@@ -165,7 +187,6 @@ def perform_delegate_email_batches(entry_id, course_id, task_input, action_name)
),
task_id
=
subtask_id
,
routing_key
=
settings
.
HIGH_PRIORITY_QUEUE
,
queue
=
settings
.
HIGH_PRIORITY_QUEUE
,
))
num_workers
+=
num_tasks_this_query
...
...
@@ -177,7 +198,7 @@ def perform_delegate_email_batches(entry_id, course_id, task_input, action_name)
# now group the subtasks, and start them running:
task_group
=
group
(
task_list
)
task_group
.
apply_async
(
routing_key
=
settings
.
HIGH_PRIORITY_QUEUE
,
queue
=
settings
.
HIGH_PRIORITY_QUEUE
)
task_group
.
apply_async
(
routing_key
=
settings
.
HIGH_PRIORITY_QUEUE
)
# We want to return progress here, as this is what will be stored in the
# AsyncResult for the parent task as its return value.
...
...
@@ -220,8 +241,9 @@ def send_course_email(entry_id, email_id, to_list, global_email_context):
# Get information from current task's request:
current_task_id
=
_get_current_task
()
.
request
.
id
log
.
info
(
"Preparing to send email as subtask
%
s for instructor task
%
d: request =
%
s"
,
current_task_id
,
entry_id
,
_get_current_task
()
.
request
)
num_to_send
=
len
(
to_list
)
log
.
info
(
"Preparing to send
%
s emails as subtask
%
s for instructor task
%
d: request =
%
s"
,
num_to_send
,
current_task_id
,
entry_id
,
_get_current_task
()
.
request
)
send_exception
=
None
course_email_result_value
=
None
...
...
@@ -239,9 +261,10 @@ def send_course_email(entry_id, email_id, to_list, global_email_context):
_
,
send_exception
,
traceback
=
exc_info
()
traceback_string
=
format_exc
(
traceback
)
if
traceback
is
not
None
else
''
log
.
error
(
"background task (
%
s) failed unexpectedly:
%
s
%
s"
,
current_task_id
,
send_exception
,
traceback_string
)
# consider all emails to not be sent, and update stats:
num_error
=
len
(
to_list
)
course_email_result_value
=
create_subtask_result
(
0
,
num_error
,
0
)
# We got here for really unexpected reasons. Since we don't know how far
# the task got in emailing, we count all recipients as having failed.
# It at least keeps the counts consistent.
course_email_result_value
=
create_subtask_result
(
0
,
num_to_send
,
0
)
if
send_exception
is
None
:
# Update the InstructorTask object that is storing its progress.
...
...
@@ -391,13 +414,19 @@ def _send_course_email(entry_id, email_id, to_list, global_email_context):
# Pop the user that was emailed off the end of the list:
to_list
.
pop
()
except
(
SMTPDataError
,
SMTPConnectError
,
SMTPServerDisconnected
)
as
exc
:
except
SENDING_RATE_ERRORS
as
exc
:
subtask_progress
=
create_subtask_result
(
num_sent
,
num_error
,
num_optout
)
return
_submit_for_retry
(
entry_id
,
email_id
,
to_list
,
global_email_context
,
exc
,
subtask_progress
,
True
)
except
RETRY_ERRORS
as
exc
:
# Errors caught here cause the email to be retried. The entire task is actually retried
# without popping the current recipient off of the existing list.
# Errors caught are those that indicate a temporary condition that might succeed on retry.
subtask_progress
=
create_subtask_result
(
num_sent
,
num_error
,
num_optout
)
return
_submit_for_retry
(
entry_id
,
email_id
,
to_list
,
global_email_context
,
exc
,
subtask_progress
entry_id
,
email_id
,
to_list
,
global_email_context
,
exc
,
subtask_progress
,
False
)
except
Exception
as
exc
:
...
...
@@ -406,8 +435,14 @@ def _send_course_email(entry_id, email_id, to_list, global_email_context):
# If we're going to just mark it as failed
# And the log message below should indicate which task_id is failing, so we have a chance to
# reconstruct the problems.
log
.
exception
(
'Task
%
s: email with id
%
d caused send_course_email task to fail with uncaught exception. To list:
%
s'
,
task_id
,
email_id
,
[
i
[
'email'
]
for
i
in
to_list
])
if
isinstance
(
exc
,
QUOTA_EXCEEDED_ERRORS
):
log
.
exception
(
'WARNING: Course "
%
s" exceeded quota!'
,
course_title
)
log
.
exception
(
'Email with id
%
d not sent due to exceeding quota. To list:
%
s'
,
email_id
,
[
i
[
'email'
]
for
i
in
to_list
])
else
:
log
.
exception
(
'Task
%
s: email with id
%
d caused send_course_email task to fail with uncaught exception. To list:
%
s'
,
task_id
,
email_id
,
[
i
[
'email'
]
for
i
in
to_list
])
num_error
+=
len
(
to_list
)
return
create_subtask_result
(
num_sent
,
num_error
,
num_optout
),
exc
else
:
...
...
@@ -418,7 +453,7 @@ def _send_course_email(entry_id, email_id, to_list, global_email_context):
connection
.
close
()
def
_submit_for_retry
(
entry_id
,
email_id
,
to_list
,
global_email_context
,
current_exception
,
subtask_progress
):
def
_submit_for_retry
(
entry_id
,
email_id
,
to_list
,
global_email_context
,
current_exception
,
subtask_progress
,
is_sending_rate_error
):
"""
Helper function to requeue a task for retry, using the new version of arguments provided.
...
...
@@ -443,6 +478,15 @@ def _submit_for_retry(entry_id, email_id, to_list, global_email_context, current
log
.
warning
(
'Task
%
s: email with id
%
d not delivered due to temporary error
%
s, retrying send to
%
d recipients'
,
task_id
,
email_id
,
current_exception
,
len
(
to_list
))
# Don't resend emails that have already succeeded.
# Retry the email at increasing exponential backoff.
if
is_sending_rate_error
:
countdown
=
((
2
**
retry_index
)
*
15
)
*
random
.
uniform
(
.
5
,
1.5
)
else
:
countdown
=
((
2
**
retry_index
)
*
15
)
*
random
.
uniform
(
.
75
,
1.5
)
try
:
send_course_email
.
retry
(
args
=
[
...
...
@@ -452,7 +496,7 @@ def _submit_for_retry(entry_id, email_id, to_list, global_email_context, current
global_email_context
,
],
exc
=
current_exception
,
countdown
=
(
2
**
retry_index
)
*
15
,
countdown
=
countdown
,
throw
=
True
,
)
except
RetryTaskError
as
retry_error
:
...
...
@@ -464,10 +508,13 @@ def _submit_for_retry(entry_id, email_id, to_list, global_email_context, current
# If there are no more retries, because the maximum has been reached,
# we expect the original exception to be raised. We catch it here
# (and put it in retry_exc just in case it's different, but it shouldn't be),
# and update status as if it were any other failure.
# and update status as if it were any other failure. That means that
# the recipients still in the to_list are counted as failures.
log
.
exception
(
'Task
%
s: email with id
%
d caused send_course_email task to fail to retry. To list:
%
s'
,
task_id
,
email_id
,
[
i
[
'email'
]
for
i
in
to_list
])
return
subtask_progress
,
retry_exc
num_failed
=
len
(
to_list
)
new_subtask_progress
=
increment_subtask_result
(
subtask_progress
,
0
,
num_failed
,
0
)
return
new_subtask_progress
,
retry_exc
def
_statsd_tag
(
course_title
):
...
...
lms/djangoapps/instructor_task/api_helper.py
View file @
08a08448
...
...
@@ -115,7 +115,7 @@ def _update_instructor_task(instructor_task, task_result):
task_output
=
None
entry_needs_updating
=
True
if
result_state
==
SUCCESS
and
instructor_task
.
task_state
==
PROGRESS
and
len
(
instructor_task
.
subtasks
)
>
0
:
if
instructor_task
.
task_state
==
PROGRESS
and
len
(
instructor_task
.
subtasks
)
>
0
:
# This happens when running subtasks: the result object is marked with SUCCESS,
# meaning that the subtasks have successfully been defined. However, the InstructorTask
# will be marked as in PROGRESS, until the last subtask completes and marks it as SUCCESS.
...
...
lms/djangoapps/instructor_task/subtasks.py
View file @
08a08448
...
...
@@ -14,27 +14,61 @@ from instructor_task.models import InstructorTask, PROGRESS, QUEUING
TASK_LOG
=
get_task_logger
(
__name__
)
def
create_subtask_result
(
new_num_sent
,
new_num_error
,
new_num_optout
):
"""Return the result of course_email sending as a dict (not a string)."""
attempted
=
new_num_sent
+
new_num_error
current_result
=
{
'attempted'
:
attempted
,
'succeeded'
:
new_num_sent
,
'skipped'
:
new_num_optout
,
'failed'
:
new_num_error
}
def
create_subtask_result
(
num_sent
,
num_error
,
num_optout
):
"""
Create a result of a subtask.
Keys are: 'attempted', 'succeeded', 'skipped', 'failed'.
Object must be JSON-serializable.
"""
attempted
=
num_sent
+
num_error
current_result
=
{
'attempted'
:
attempted
,
'succeeded'
:
num_sent
,
'skipped'
:
num_optout
,
'failed'
:
num_error
}
return
current_result
def
increment_subtask_result
(
subtask_result
,
new_num_sent
,
new_num_error
,
new_num_optout
):
"""
Update the result of a subtask with additional results.
Keys are: 'attempted', 'succeeded', 'skipped', 'failed'.
"""
new_result
=
create_subtask_result
(
new_num_sent
,
new_num_error
,
new_num_optout
)
for
keyname
in
new_result
:
if
keyname
in
subtask_result
:
new_result
[
keyname
]
+=
subtask_result
[
keyname
]
return
new_result
def
update_instructor_task_for_subtasks
(
entry
,
action_name
,
total_num
,
subtask_id_list
):
"""
Store initial subtask information to InstructorTask object.
# Before we actually start running the tasks we've defined,
# the InstructorTask needs to be updated with their information.
# So we update the InstructorTask object here, not in the return.
# The monitoring code knows that it shouldn't go to the InstructorTask's task's
# Result for its progress when there are subtasks. So we accumulate
# the results of each subtask as it completes into the InstructorTask.
# At this point, we have some status that we can report, as to the magnitude of the overall
# task. That is, we know the total. Set that, and our subtasks should work towards that goal.
# Note that we add start_time in here, so that it can be used
# by subtasks to calculate duration_ms values:
The InstructorTask's "task_output" field is initialized. This is a JSON-serialized dict.
Counters for 'attempted', 'succeeded', 'failed', 'skipped' keys are initialized to zero,
as is the 'duration_ms' value. A 'start_time' is stored for later duration calculations,
and the total number of "things to do" is set, so the user can be told how much needs to be
done overall. The `action_name` is also stored, to also help with constructing more readable
progress messages.
The InstructorTask's "subtasks" field is also initialized. This is also a JSON-serialized dict.
Keys include 'total', 'succeeded', 'retried', 'failed', which are counters for the number of
subtasks. 'Total' is set here to the total number, while the other three are initialized to zero.
Once the counters for 'succeeded' and 'failed' match the 'total', the subtasks are done and
the InstructorTask's "status" will be changed to SUCCESS.
The "subtasks" field also contains a 'status' key, that contains a dict that stores status
information for each subtask. At the moment, the value for each subtask (keyed by its task_id)
is the value of `status`, which is initialized here to QUEUING.
This information needs to be set up in the InstructorTask before any of the subtasks start
running. If not, there is a chance that the subtasks could complete before the parent task
is done creating subtasks. Doing so also simplifies the save() here, as it avoids the need
for locking.
Monitoring code should assume that if an InstructorTask has subtask information, that it should
rely on the status stored in the InstructorTask object, rather than status stored in the
corresponding AsyncResult.
"""
progress
=
{
'action_name'
:
action_name
,
...
...
@@ -64,6 +98,27 @@ def update_instructor_task_for_subtasks(entry, action_name, total_num, subtask_i
def
update_subtask_status
(
entry_id
,
current_task_id
,
status
,
subtask_result
):
"""
Update the status of the subtask in the parent InstructorTask object tracking its progress.
Uses select_for_update to lock the InstructorTask object while it is being updated.
The operation is surrounded by a try/except/else that permit the manual transaction to be
committed on completion, or rolled back on error.
The InstructorTask's "task_output" field is updated. This is a JSON-serialized dict.
Accumulates values for 'attempted', 'succeeded', 'failed', 'skipped' from `subtask_result`
into the corresponding values in the InstructorTask's task_output. Also updates the 'duration_ms'
value with the current interval since the original InstructorTask started.
The InstructorTask's "subtasks" field is also updated. This is also a JSON-serialized dict.
Keys include 'total', 'succeeded', 'retried', 'failed', which are counters for the number of
subtasks. 'Total' is expected to have been set at the time the subtasks were created.
The other three counters are incremented depending on the value of `status`. Once the counters
for 'succeeded' and 'failed' match the 'total', the subtasks are done and the InstructorTask's
"status" is changed to SUCCESS.
The "subtasks" field also contains a 'status' key, that contains a dict that stores status
information for each subtask. At the moment, the value for each subtask (keyed by its task_id)
is the value of `status`, but could be expanded in future to store information about failure
messages, progress made, etc.
"""
TASK_LOG
.
info
(
"Preparing to update status for email subtask
%
s for instructor task
%
d with status
%
s"
,
current_task_id
,
entry_id
,
subtask_result
)
...
...
lms/djangoapps/instructor_task/tasks_helper.py
View file @
08a08448
...
...
@@ -271,11 +271,8 @@ def run_main_task(entry_id, task_fcn, action_name):
task_input
=
json
.
loads
(
entry
.
task_input
)
# construct log message:
# TODO: generalize this beyond just problem and student, so it includes email_id and to_option.
# Can we just loop over all keys and output them all? Just print the task_input dict itself?
module_state_key
=
task_input
.
get
(
'problem_url'
)
fmt
=
'task "{task_id}": course "{course_id}" problem "{state_key}"'
task_info_string
=
fmt
.
format
(
task_id
=
task_id
,
course_id
=
course_id
,
state_key
=
module_state_key
)
fmt
=
'task "{task_id}": course "{course_id}" input "{task_input}"'
task_info_string
=
fmt
.
format
(
task_id
=
task_id
,
course_id
=
course_id
,
task_input
=
task_input
)
TASK_LOG
.
info
(
'Starting update (nothing
%
s yet):
%
s'
,
action_name
,
task_info_string
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment