Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
C
course-discovery
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
course-discovery
Commits
5cd58ec7
Commit
5cd58ec7
authored
Feb 21, 2017
by
Matthew Piatetsky
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add config model to control max threads for data loaders
ECOM-7188
parent
ae50ef24
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
53 additions
and
30 deletions
+53
-30
course_discovery/apps/course_metadata/admin.py
+1
-1
course_discovery/apps/course_metadata/management/commands/refresh_course_metadata.py
+15
-25
course_discovery/apps/course_metadata/management/commands/tests/test_refresh_course_metadata.py
+4
-4
course_discovery/apps/course_metadata/migrations/0048_dataloaderconfig.py
+25
-0
course_discovery/apps/course_metadata/models.py
+8
-0
No files found.
course_discovery/apps/course_metadata/admin.py
View file @
5cd58ec7
...
...
@@ -222,5 +222,5 @@ for model in (LevelType, Prerequisite,):
# Register remaining models using basic ModelAdmin classes
for
model
in
(
Image
,
Video
,
ExpectedLearningItem
,
SyllabusItem
,
PersonSocialNetwork
,
CourseRunSocialNetwork
,
JobOutlookItem
,):
JobOutlookItem
,
DataLoaderConfig
):
admin
.
site
.
register
(
model
)
course_discovery/apps/course_metadata/management/commands/refresh_course_metadata.py
View file @
5cd58ec7
...
...
@@ -16,7 +16,7 @@ from course_discovery.apps.course_metadata.data_loaders.marketing_site import (
CourseMarketingSiteDataLoader
,
PersonMarketingSiteDataLoader
,
SchoolMarketingSiteDataLoader
,
SponsorMarketingSiteDataLoader
,
SubjectMarketingSiteDataLoader
,
XSeriesMarketingSiteDataLoader
)
from
course_discovery.apps.course_metadata.models
import
Course
from
course_discovery.apps.course_metadata.models
import
Course
,
DataLoaderConfig
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -59,18 +59,7 @@ class Command(BaseCommand):
help
=
'The short code for a specific partner to refresh.'
)
parser
.
add_argument
(
'-w'
,
'--max_workers'
,
type
=
int
,
action
=
'store'
,
dest
=
'max_workers'
,
default
=
7
,
help
=
'Number of worker threads to use when traversing paginated responses.'
)
def
handle
(
self
,
*
args
,
**
options
):
max_workers
=
options
.
get
(
'max_workers'
)
# For each partner defined...
partners
=
Partner
.
objects
.
all
()
...
...
@@ -129,6 +118,7 @@ class Command(BaseCommand):
# as an update, significantly lowering the probability of race conditions.
courses_exist
=
Course
.
objects
.
filter
(
partner
=
partner
)
.
exists
()
is_threadsafe
=
courses_exist
and
waffle
.
switch_is_active
(
'threaded_metadata_write'
)
max_workers
=
DataLoaderConfig
.
get_solo
()
.
max_workers
logger
.
info
(
'Command is{negation} using threads to write data.'
.
format
(
negation
=
''
if
is_threadsafe
else
' not'
)
...
...
@@ -136,31 +126,31 @@ class Command(BaseCommand):
pipeline
=
(
(
(
SubjectMarketingSiteDataLoader
,
partner
.
marketing_site_url_root
,
None
),
(
SchoolMarketingSiteDataLoader
,
partner
.
marketing_site_url_root
,
None
),
(
SponsorMarketingSiteDataLoader
,
partner
.
marketing_site_url_root
,
None
),
(
PersonMarketingSiteDataLoader
,
partner
.
marketing_site_url_root
,
None
),
(
SubjectMarketingSiteDataLoader
,
partner
.
marketing_site_url_root
,
max_workers
),
(
SchoolMarketingSiteDataLoader
,
partner
.
marketing_site_url_root
,
max_workers
),
(
SponsorMarketingSiteDataLoader
,
partner
.
marketing_site_url_root
,
max_workers
),
(
PersonMarketingSiteDataLoader
,
partner
.
marketing_site_url_root
,
max_workers
),
),
(
(
CourseMarketingSiteDataLoader
,
partner
.
marketing_site_url_root
,
None
),
(
OrganizationsApiDataLoader
,
partner
.
organizations_api_url
,
None
),
(
CourseMarketingSiteDataLoader
,
partner
.
marketing_site_url_root
,
max_workers
),
(
OrganizationsApiDataLoader
,
partner
.
organizations_api_url
,
max_workers
),
),
(
(
CoursesApiDataLoader
,
partner
.
courses_api_url
,
None
),
(
CoursesApiDataLoader
,
partner
.
courses_api_url
,
max_workers
),
),
(
(
EcommerceApiDataLoader
,
partner
.
ecommerce_api_url
,
1
),
(
ProgramsApiDataLoader
,
partner
.
programs_api_url
,
None
),
(
ProgramsApiDataLoader
,
partner
.
programs_api_url
,
max_workers
),
),
(
(
XSeriesMarketingSiteDataLoader
,
partner
.
marketing_site_url_root
,
None
),
(
XSeriesMarketingSiteDataLoader
,
partner
.
marketing_site_url_root
,
max_workers
),
),
)
if
waffle
.
switch_is_active
(
'parallel_refresh_pipeline'
):
for
stage
in
pipeline
:
with
concurrent
.
futures
.
ProcessPoolExecutor
()
as
executor
:
for
loader_class
,
api_url
,
max_workers
_override
in
stage
:
for
loader_class
,
api_url
,
max_workers
in
stage
:
if
api_url
:
executor
.
submit
(
execute_parallel_loader
,
...
...
@@ -169,13 +159,13 @@ class Command(BaseCommand):
api_url
,
access_token
,
token_type
,
(
max_workers_override
or
max_workers
)
,
max_workers
,
is_threadsafe
,
**
kwargs
,
)
else
:
# Flatten pipeline and run serially.
for
loader_class
,
api_url
,
max_workers
_override
in
itertools
.
chain
(
*
(
stage
for
stage
in
pipeline
)):
for
loader_class
,
api_url
,
max_workers
in
itertools
.
chain
(
*
(
stage
for
stage
in
pipeline
)):
if
api_url
:
execute_loader
(
loader_class
,
...
...
@@ -183,7 +173,7 @@ class Command(BaseCommand):
api_url
,
access_token
,
token_type
,
(
max_workers_override
or
max_workers
)
,
max_workers
,
is_threadsafe
,
**
kwargs
,
)
...
...
course_discovery/apps/course_metadata/management/commands/tests/test_refresh_course_metadata.py
View file @
5cd58ec7
...
...
@@ -135,8 +135,8 @@ class RefreshCourseMetadataCommandTests(TransactionTestCase):
# Set up expected calls
expected_calls
=
[
mock
.
call
(
loader_class
,
self
.
partner
,
api_url
,
ACCESS_TOKEN
,
'JWT'
,
max_workers
_override
or
7
,
False
,
**
self
.
kwargs
)
for
loader_class
,
api_url
,
max_workers
_override
in
self
.
pipeline
]
ACCESS_TOKEN
,
'JWT'
,
max_workers
or
7
,
False
,
**
self
.
kwargs
)
for
loader_class
,
api_url
,
max_workers
in
self
.
pipeline
]
mock_executor
.
assert_has_calls
(
expected_calls
)
def
test_refresh_course_metadata_parallel
(
self
):
...
...
@@ -157,8 +157,8 @@ class RefreshCourseMetadataCommandTests(TransactionTestCase):
# Set up expected calls
expected_calls
=
[
mock
.
call
(
execute_parallel_loader
,
loader_class
,
self
.
partner
,
api_url
,
ACCESS_TOKEN
,
'JWT'
,
max_workers
_override
or
7
,
True
,
**
self
.
kwargs
)
for
loader_class
,
api_url
,
max_workers
_override
in
self
.
pipeline
]
'JWT'
,
max_workers
or
7
,
True
,
**
self
.
kwargs
)
for
loader_class
,
api_url
,
max_workers
in
self
.
pipeline
]
mock_executor
.
assert_has_calls
(
expected_calls
,
any_order
=
True
)
def
test_refresh_course_metadata_with_invalid_partner_code
(
self
):
...
...
course_discovery/apps/course_metadata/migrations/0048_dataloaderconfig.py
0 → 100644
View file @
5cd58ec7
# -*- coding: utf-8 -*-
# Generated by Django 1.9.11 on 2017-02-21 20:11
from
__future__
import
unicode_literals
from
django.db
import
migrations
,
models
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'course_metadata'
,
'0047_personwork'
),
]
operations
=
[
migrations
.
CreateModel
(
name
=
'DataLoaderConfig'
,
fields
=
[
(
'id'
,
models
.
AutoField
(
auto_created
=
True
,
primary_key
=
True
,
serialize
=
False
,
verbose_name
=
'ID'
)),
(
'max_workers'
,
models
.
PositiveSmallIntegerField
(
default
=
7
)),
],
options
=
{
'abstract'
:
False
,
},
),
]
course_discovery/apps/course_metadata/models.py
View file @
5cd58ec7
...
...
@@ -15,6 +15,7 @@ from django_extensions.db.fields import AutoSlugField
from
django_extensions.db.models
import
TimeStampedModel
from
haystack
import
connections
from
haystack.query
import
SearchQuerySet
from
solo.models
import
SingletonModel
from
sortedm2m.fields
import
SortedManyToManyField
from
stdimage.models
import
StdImageField
from
stdimage.utils
import
UploadToAutoSlug
...
...
@@ -902,3 +903,10 @@ class CourseRunSocialNetwork(AbstractSocialNetworkModel):
class
PersonWork
(
AbstractValueModel
):
""" Person Works model. """
person
=
models
.
ForeignKey
(
Person
,
related_name
=
'person_works'
)
class
DataLoaderConfig
(
SingletonModel
):
"""
Configuration for data loaders used in the refresh_course_metadata command.
"""
max_workers
=
models
.
PositiveSmallIntegerField
(
default
=
7
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment