Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-platform
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-platform
Commits
6e873a48
Commit
6e873a48
authored
Jul 10, 2017
by
rabiaiftikhar
Committed by
rabia23
Jul 12, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
EDUCATOR-810 fix UnicodeDecodeError
parent
ead5e9f9
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
53 additions
and
0 deletions
+53
-0
cms/envs/common.py
+3
-0
openedx/core/djangoapps/crawlers/models.py
+9
-0
openedx/core/djangoapps/crawlers/tests/__init__.py
+0
-0
openedx/core/djangoapps/crawlers/tests/test_models.py
+41
-0
No files found.
cms/envs/common.py
View file @
6e873a48
...
...
@@ -932,6 +932,9 @@ INSTALLED_APPS = (
# Site configuration for theming and behavioral modification
'openedx.core.djangoapps.site_configuration'
,
# Ability to detect and special-case crawler behavior
'openedx.core.djangoapps.crawlers'
,
# comment common
'django_comment_common'
,
...
...
openedx/core/djangoapps/crawlers/models.py
View file @
6e873a48
...
...
@@ -2,6 +2,7 @@
This module handles the detection of crawlers, so that we can handle them
appropriately in other parts of the code.
"""
import
six
from
config_models.models
import
ConfigurationModel
from
django.db
import
models
...
...
@@ -39,6 +40,14 @@ class CrawlersConfig(ConfigurationModel):
if
(
not
req_user_agent
)
or
(
not
crawler_agents
):
return
False
# The crawler_agents list we pull from our model always has unicode objects, but the
# req_user_agent we get from HTTP headers ultimately comes to us via WSGI. That
# value is an ISO-8859-1 encoded byte string in Python 2.7 (and in the HTTP spec), but
# it will be a unicode str when we move to Python 3.x. This code should work under
# either version.
if
isinstance
(
req_user_agent
,
six
.
binary_type
):
crawler_agents
=
[
crawler_agent
.
encode
(
'iso-8859-1'
)
for
crawler_agent
in
crawler_agents
]
# We perform prefix matching of the crawler agent here so that we don't
# have to worry about version bumps.
return
any
(
...
...
openedx/core/djangoapps/crawlers/tests/__init__.py
0 → 100644
View file @
6e873a48
openedx/core/djangoapps/crawlers/tests/test_models.py
0 → 100644
View file @
6e873a48
# -*- coding: utf-8 -*-
"""
Tests that the request came from a crawler or not.
"""
import
ddt
from
django.test
import
TestCase
from
django.http
import
HttpRequest
from
..models
import
CrawlersConfig
@ddt.ddt
class
CrawlersConfigTest
(
TestCase
):
def
setUp
(
self
):
super
(
CrawlersConfigTest
,
self
)
.
setUp
()
CrawlersConfig
(
known_user_agents
=
'edX-downloader,crawler_foo'
,
enabled
=
True
)
.
save
()
@ddt.data
(
"Mozilla/5.0 (Linux; Android 5.1; Nexus 5 Build/LMY47I; wv) AppleWebKit/537.36 (KHTML, like Gecko) "
"Version/4.0 Chrome/47.0.2526.100 Mobile Safari/537.36 edX/org.edx.mobile/2.0.0"
,
"Le Héros des Deux Mondes"
,
)
def
test_req_user_agent_is_not_crawler
(
self
,
req_user_agent
):
"""
verify that the request did not come from a crawler.
"""
fake_request
=
HttpRequest
()
fake_request
.
META
[
'HTTP_USER_AGENT'
]
=
req_user_agent
self
.
assertFalse
(
CrawlersConfig
.
is_crawler
(
fake_request
))
@ddt.data
(
u"edX-downloader"
,
"crawler_foo"
.
encode
(
"utf-8"
)
)
def
test_req_user_agent_is_crawler
(
self
,
req_user_agent
):
"""
verify that the request came from a crawler.
"""
fake_request
=
HttpRequest
()
fake_request
.
META
[
'HTTP_USER_AGENT'
]
=
req_user_agent
self
.
assertTrue
(
CrawlersConfig
.
is_crawler
(
fake_request
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment