Commit 6e873a48 by rabiaiftikhar Committed by rabia23

EDUCATOR-810 fix UnicodeDecodeError

parent ead5e9f9
...@@ -932,6 +932,9 @@ INSTALLED_APPS = ( ...@@ -932,6 +932,9 @@ INSTALLED_APPS = (
# Site configuration for theming and behavioral modification # Site configuration for theming and behavioral modification
'openedx.core.djangoapps.site_configuration', 'openedx.core.djangoapps.site_configuration',
# Ability to detect and special-case crawler behavior
'openedx.core.djangoapps.crawlers',
# comment common # comment common
'django_comment_common', 'django_comment_common',
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
This module handles the detection of crawlers, so that we can handle them This module handles the detection of crawlers, so that we can handle them
appropriately in other parts of the code. appropriately in other parts of the code.
""" """
import six
from config_models.models import ConfigurationModel from config_models.models import ConfigurationModel
from django.db import models from django.db import models
...@@ -39,6 +40,14 @@ class CrawlersConfig(ConfigurationModel): ...@@ -39,6 +40,14 @@ class CrawlersConfig(ConfigurationModel):
if (not req_user_agent) or (not crawler_agents): if (not req_user_agent) or (not crawler_agents):
return False return False
# The crawler_agents list we pull from our model always has unicode objects, but the
# req_user_agent we get from HTTP headers ultimately comes to us via WSGI. That
# value is an ISO-8859-1 encoded byte string in Python 2.7 (and in the HTTP spec), but
# it will be a unicode str when we move to Python 3.x. This code should work under
# either version.
if isinstance(req_user_agent, six.binary_type):
crawler_agents = [crawler_agent.encode('iso-8859-1') for crawler_agent in crawler_agents]
# We perform prefix matching of the crawler agent here so that we don't # We perform prefix matching of the crawler agent here so that we don't
# have to worry about version bumps. # have to worry about version bumps.
return any( return any(
......
# -*- coding: utf-8 -*-
"""
Tests that the request came from a crawler or not.
"""
import ddt
from django.test import TestCase
from django.http import HttpRequest
from ..models import CrawlersConfig
@ddt.ddt
class CrawlersConfigTest(TestCase):
def setUp(self):
super(CrawlersConfigTest, self).setUp()
CrawlersConfig(known_user_agents='edX-downloader,crawler_foo', enabled=True).save()
@ddt.data(
"Mozilla/5.0 (Linux; Android 5.1; Nexus 5 Build/LMY47I; wv) AppleWebKit/537.36 (KHTML, like Gecko) "
"Version/4.0 Chrome/47.0.2526.100 Mobile Safari/537.36 edX/org.edx.mobile/2.0.0",
"Le Héros des Deux Mondes",
)
def test_req_user_agent_is_not_crawler(self, req_user_agent):
"""
verify that the request did not come from a crawler.
"""
fake_request = HttpRequest()
fake_request.META['HTTP_USER_AGENT'] = req_user_agent
self.assertFalse(CrawlersConfig.is_crawler(fake_request))
@ddt.data(
u"edX-downloader",
"crawler_foo".encode("utf-8")
)
def test_req_user_agent_is_crawler(self, req_user_agent):
"""
verify that the request came from a crawler.
"""
fake_request = HttpRequest()
fake_request.META['HTTP_USER_AGENT'] = req_user_agent
self.assertTrue(CrawlersConfig.is_crawler(fake_request))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment