Addresses design review feedback

35e7a733 · Marko Jevtic · 67b64a80 · 35e7a733 · 35e7a733 · 35e7a733
Commit 35e7a733 authored Aug 13, 2015 by Marko Jevtic
Showing with 52 additions and 4 deletions

common/lib/xmodule/xmodule/capa_module.py
+14 -2

common/lib/xmodule/xmodule/html_module.py
+13 -1

common/lib/xmodule/xmodule/tests/test_capa_module.py
+0 -0

common/lib/xmodule/xmodule/tests/test_html_module.py
+25 -1

No files found.
--- a/common/lib/xmodule/xmodule/capa_module.py
+++ b/common/lib/xmodule/xmodule/capa_module.py
@@ -196,8 +196,20 @@ class CapaDescriptor(CapaFields, RawDescriptor):
        Return dictionary prepared with module content and type for indexing.
        """
        xblock_body = super(CapaDescriptor, self).index_dictionary()
-        # Removing solution
+        # Removing solutions and hints, as well as script and style
-        capa_content = re.sub(re.compile(r"<solution>.*</solution>", re.DOTALL), "", self.data)
+        capa_content = re.sub(
+            re.compile(
+                r"""
+                    <solution>.*?</solution> |
+                    <script>.*?</script> |
+                    <style>.*?</style> |
+                    <[a-z]*hint.*?>.*?</[a-z]*hint>
+                """,
+                re.DOTALL |
+                re.VERBOSE),
+            "",
+            self.data
+        )
        # Removing HTML-encoded non-breaking space characters
        capa_content = re.sub(r"(\s|&nbsp;|//)+", " ", html_to_text(capa_content))
        # Removing HTML CDATA

--- a/common/lib/xmodule/xmodule/html_module.py
+++ b/common/lib/xmodule/xmodule/html_module.py
@@ -275,8 +275,20 @@ class HtmlDescriptor(HtmlFields, XmlDescriptor, EditingDescriptor):  # pylint: d
    def index_dictionary(self):
        xblock_body = super(HtmlDescriptor, self).index_dictionary()
+        # Removing script and style
+        html_content = re.sub(
+            re.compile(
+                r"""
+                    <script>.*?</script> |
+                    <style>.*?</style>
+                """,
+                re.DOTALL |
+                re.VERBOSE),
+            "",
+            self.data
+        )
        # Removing HTML-encoded non-breaking space characters
-        html_content = re.sub(r"(\s|&nbsp;|//)+", " ", html_to_text(self.data))
+        html_content = re.sub(r"(\s|&nbsp;|//)+", " ", html_to_text(html_content))
        # Removing HTML CDATA
        html_content = re.sub(r"<!\[CDATA\[.*\]\]>", "", html_content)
        # Removing HTML comments

--- a/common/lib/xmodule/xmodule/tests/test_capa_module.py
+++ b/common/lib/xmodule/xmodule/tests/test_capa_module.py
--- a/common/lib/xmodule/xmodule/tests/test_html_module.py
+++ b/common/lib/xmodule/xmodule/tests/test_html_module.py
@@ -59,7 +59,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
    Make sure that HtmlDescriptor can format data for indexing as expected.
    """
-    def test_index_dictionary(self):
+    def test_index_dictionary_simple_html_module(self):
        sample_xml = '''
            <html>
                <p>Hello World!</p>
@@ -71,6 +71,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
            "content_type": "Text"
        })
+    def test_index_dictionary_cdata_html_module(self):
        sample_xml_cdata = '''
            <html>
                <p>This has CDATA in it.</p>
@@ -83,6 +84,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
            "content_type": "Text"
        })
+    def test_index_dictionary_multiple_spaces_html_module(self):
        sample_xml_tab_spaces = '''
            <html>
                <p>     Text has spaces :)  </p>
@@ -94,6 +96,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
            "content_type": "Text"
        })
+    def test_index_dictionary_html_module_with_comment(self):
        sample_xml_comment = '''
            <html>
                <p>This has HTML comment in it.</p>
@@ -106,6 +109,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
            "content_type": "Text"
        })
+    def test_index_dictionary_html_module_with_both_comments_and_cdata(self):
        sample_xml_mix_comment_cdata = '''
            <html>
                <!-- Beginning of the html -->
@@ -120,3 +124,23 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
            "content": {"html_content": " This has HTML comment in it. HTML end. ", "display_name": "Text"},
            "content_type": "Text"
        })
+    def test_index_dictionary_html_module_with_script_and_style_tags(self):
+        sample_xml_style_script_tags = '''
+            <html>
+                <style>p {color: green;}</style>
+                <!-- Beginning of the html -->
+                <p>This has HTML comment in it.<!-- Commenting Content --></p>
+                <!-- Here comes CDATA -->
+                <![CDATA[This is just a CDATA!]]>
+                <p>HTML end.</p>
+                <script>
+                    var message = "Hello world!"
+                </script>
+            </html>
+        '''
+        descriptor = instantiate_descriptor(data=sample_xml_style_script_tags)
+        self.assertEqual(descriptor.index_dictionary(), {
+            "content": {"html_content": " This has HTML comment in it. HTML end. ", "display_name": "Text"},
+            "content_type": "Text"
+        })