Commit 6e873a48 by rabiaiftikhar Committed by rabia23

EDUCATOR-810 fix UnicodeDecodeError

parent ead5e9f9
......@@ -932,6 +932,9 @@ INSTALLED_APPS = (
# Site configuration for theming and behavioral modification
'openedx.core.djangoapps.site_configuration',
# Ability to detect and special-case crawler behavior
'openedx.core.djangoapps.crawlers',
# comment common
'django_comment_common',
......
......@@ -2,6 +2,7 @@
This module handles the detection of crawlers, so that we can handle them
appropriately in other parts of the code.
"""
import six
from config_models.models import ConfigurationModel
from django.db import models
......@@ -39,6 +40,14 @@ class CrawlersConfig(ConfigurationModel):
if (not req_user_agent) or (not crawler_agents):
return False
# The crawler_agents list we pull from our model always has unicode objects, but the
# req_user_agent we get from HTTP headers ultimately comes to us via WSGI. That
# value is an ISO-8859-1 encoded byte string in Python 2.7 (and in the HTTP spec), but
# it will be a unicode str when we move to Python 3.x. This code should work under
# either version.
if isinstance(req_user_agent, six.binary_type):
crawler_agents = [crawler_agent.encode('iso-8859-1') for crawler_agent in crawler_agents]
# We perform prefix matching of the crawler agent here so that we don't
# have to worry about version bumps.
return any(
......
# -*- coding: utf-8 -*-
"""
Tests that the request came from a crawler or not.
"""
import ddt
from django.test import TestCase
from django.http import HttpRequest
from ..models import CrawlersConfig
@ddt.ddt
class CrawlersConfigTest(TestCase):
def setUp(self):
super(CrawlersConfigTest, self).setUp()
CrawlersConfig(known_user_agents='edX-downloader,crawler_foo', enabled=True).save()
@ddt.data(
"Mozilla/5.0 (Linux; Android 5.1; Nexus 5 Build/LMY47I; wv) AppleWebKit/537.36 (KHTML, like Gecko) "
"Version/4.0 Chrome/47.0.2526.100 Mobile Safari/537.36 edX/org.edx.mobile/2.0.0",
"Le Héros des Deux Mondes",
)
def test_req_user_agent_is_not_crawler(self, req_user_agent):
"""
verify that the request did not come from a crawler.
"""
fake_request = HttpRequest()
fake_request.META['HTTP_USER_AGENT'] = req_user_agent
self.assertFalse(CrawlersConfig.is_crawler(fake_request))
@ddt.data(
u"edX-downloader",
"crawler_foo".encode("utf-8")
)
def test_req_user_agent_is_crawler(self, req_user_agent):
"""
verify that the request came from a crawler.
"""
fake_request = HttpRequest()
fake_request.META['HTTP_USER_AGENT'] = req_user_agent
self.assertTrue(CrawlersConfig.is_crawler(fake_request))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment