Commit 35e7a733 by Marko Jevtic

Addresses design review feedback

parent 67b64a80
...@@ -196,8 +196,20 @@ class CapaDescriptor(CapaFields, RawDescriptor): ...@@ -196,8 +196,20 @@ class CapaDescriptor(CapaFields, RawDescriptor):
Return dictionary prepared with module content and type for indexing. Return dictionary prepared with module content and type for indexing.
""" """
xblock_body = super(CapaDescriptor, self).index_dictionary() xblock_body = super(CapaDescriptor, self).index_dictionary()
# Removing solution # Removing solutions and hints, as well as script and style
capa_content = re.sub(re.compile(r"<solution>.*</solution>", re.DOTALL), "", self.data) capa_content = re.sub(
re.compile(
r"""
<solution>.*?</solution> |
<script>.*?</script> |
<style>.*?</style> |
<[a-z]*hint.*?>.*?</[a-z]*hint>
""",
re.DOTALL |
re.VERBOSE),
"",
self.data
)
# Removing HTML-encoded non-breaking space characters # Removing HTML-encoded non-breaking space characters
capa_content = re.sub(r"(\s|&nbsp;|//)+", " ", html_to_text(capa_content)) capa_content = re.sub(r"(\s|&nbsp;|//)+", " ", html_to_text(capa_content))
# Removing HTML CDATA # Removing HTML CDATA
......
...@@ -275,8 +275,20 @@ class HtmlDescriptor(HtmlFields, XmlDescriptor, EditingDescriptor): # pylint: d ...@@ -275,8 +275,20 @@ class HtmlDescriptor(HtmlFields, XmlDescriptor, EditingDescriptor): # pylint: d
def index_dictionary(self): def index_dictionary(self):
xblock_body = super(HtmlDescriptor, self).index_dictionary() xblock_body = super(HtmlDescriptor, self).index_dictionary()
# Removing script and style
html_content = re.sub(
re.compile(
r"""
<script>.*?</script> |
<style>.*?</style>
""",
re.DOTALL |
re.VERBOSE),
"",
self.data
)
# Removing HTML-encoded non-breaking space characters # Removing HTML-encoded non-breaking space characters
html_content = re.sub(r"(\s|&nbsp;|//)+", " ", html_to_text(self.data)) html_content = re.sub(r"(\s|&nbsp;|//)+", " ", html_to_text(html_content))
# Removing HTML CDATA # Removing HTML CDATA
html_content = re.sub(r"<!\[CDATA\[.*\]\]>", "", html_content) html_content = re.sub(r"<!\[CDATA\[.*\]\]>", "", html_content)
# Removing HTML comments # Removing HTML comments
......
...@@ -59,7 +59,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase): ...@@ -59,7 +59,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
Make sure that HtmlDescriptor can format data for indexing as expected. Make sure that HtmlDescriptor can format data for indexing as expected.
""" """
def test_index_dictionary(self): def test_index_dictionary_simple_html_module(self):
sample_xml = ''' sample_xml = '''
<html> <html>
<p>Hello World!</p> <p>Hello World!</p>
...@@ -71,6 +71,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase): ...@@ -71,6 +71,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
"content_type": "Text" "content_type": "Text"
}) })
def test_index_dictionary_cdata_html_module(self):
sample_xml_cdata = ''' sample_xml_cdata = '''
<html> <html>
<p>This has CDATA in it.</p> <p>This has CDATA in it.</p>
...@@ -83,6 +84,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase): ...@@ -83,6 +84,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
"content_type": "Text" "content_type": "Text"
}) })
def test_index_dictionary_multiple_spaces_html_module(self):
sample_xml_tab_spaces = ''' sample_xml_tab_spaces = '''
<html> <html>
<p> Text has spaces :) </p> <p> Text has spaces :) </p>
...@@ -94,6 +96,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase): ...@@ -94,6 +96,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
"content_type": "Text" "content_type": "Text"
}) })
def test_index_dictionary_html_module_with_comment(self):
sample_xml_comment = ''' sample_xml_comment = '''
<html> <html>
<p>This has HTML comment in it.</p> <p>This has HTML comment in it.</p>
...@@ -106,6 +109,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase): ...@@ -106,6 +109,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
"content_type": "Text" "content_type": "Text"
}) })
def test_index_dictionary_html_module_with_both_comments_and_cdata(self):
sample_xml_mix_comment_cdata = ''' sample_xml_mix_comment_cdata = '''
<html> <html>
<!-- Beginning of the html --> <!-- Beginning of the html -->
...@@ -120,3 +124,23 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase): ...@@ -120,3 +124,23 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
"content": {"html_content": " This has HTML comment in it. HTML end. ", "display_name": "Text"}, "content": {"html_content": " This has HTML comment in it. HTML end. ", "display_name": "Text"},
"content_type": "Text" "content_type": "Text"
}) })
def test_index_dictionary_html_module_with_script_and_style_tags(self):
sample_xml_style_script_tags = '''
<html>
<style>p {color: green;}</style>
<!-- Beginning of the html -->
<p>This has HTML comment in it.<!-- Commenting Content --></p>
<!-- Here comes CDATA -->
<![CDATA[This is just a CDATA!]]>
<p>HTML end.</p>
<script>
var message = "Hello world!"
</script>
</html>
'''
descriptor = instantiate_descriptor(data=sample_xml_style_script_tags)
self.assertEqual(descriptor.index_dictionary(), {
"content": {"html_content": " This has HTML comment in it. HTML end. ", "display_name": "Text"},
"content_type": "Text"
})
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment