Merge pull request #9283 from edx/mjevtic/SOL-1040

(SOL-1040) Indexing capa problems

Merge pull request #9283 from edx/mjevtic/SOL-1040
(SOL-1040) Indexing capa problems
45832b98 · Marko Jevtić · 7b346f7d · 8faff973 · 45832b98 · 45832b98
Commit 45832b98 authored Aug 24, 2015 by Marko Jevtić
7 changed files
--- a/common/lib/xmodule/xmodule/capa_module.py
+++ b/common/lib/xmodule/xmodule/capa_module.py
@@ -2,6 +2,7 @@
 import json
 import logging
 import sys
+import re
 from lxml import etree
 from pkg_resources import resource_string
@@ -10,6 +11,7 @@ import dogstats_wrapper as dog_stats_api
 from .capa_base import CapaMixin, CapaFields, ComplexEncoder
 from capa import responsetypes
 from .progress import Progress
+from xmodule.util.misc import escape_html_characters
 from xmodule.x_module import XModule, module_attr, DEPRECATION_VSCOMPAT_EVENT
 from xmodule.raw_module import RawDescriptor
 from xmodule.exceptions import NotFoundError, ProcessingError
@@ -193,16 +195,33 @@ class CapaDescriptor(CapaFields, RawDescriptor):
        """
        Return dictionary prepared with module content and type for indexing.
        """
-        result = super(CapaDescriptor, self).index_dictionary()
+        xblock_body = super(CapaDescriptor, self).index_dictionary()
-        if not result:
+        # Removing solutions and hints, as well as script and style
-            result = {}
+        capa_content = re.sub(
-        index = {
+            re.compile(
-            'content_type': self.INDEX_CONTENT_TYPE,
+                r"""
-            'problem_types': list(self.problem_types),
+                    <solution>.*?</solution> |
-            "display_name": self.display_name
+                    <script>.*?</script> |
+                    <style>.*?</style> |
+                    <[a-z]*hint.*?>.*?</[a-z]*hint>
+                """,
+                re.DOTALL |
+                re.VERBOSE),
+            "",
+            self.data
+        )
+        capa_content = escape_html_characters(capa_content)
+        capa_body = {
+            "capa_content": capa_content,
+            "display_name": self.display_name,
        }
-        result.update(index)
+        if "content" in xblock_body:
-        return result
+            xblock_body["content"].update(capa_body)
+        else:
+            xblock_body["content"] = capa_body
+        xblock_body["content_type"] = self.INDEX_CONTENT_TYPE
+        xblock_body["problem_types"] = list(self.problem_types)
+        return xblock_body
    def has_support(self, view, functionality):
        """

--- a/common/lib/xmodule/xmodule/html_module.py
+++ b/common/lib/xmodule/xmodule/html_module.py
@@ -10,7 +10,7 @@ from fs.errors import ResourceNotFoundError
 from pkg_resources import resource_string
 import dogstats_wrapper as dog_stats_api
-from xmodule.annotator_mixin import html_to_text
+from xmodule.util.misc import escape_html_characters
 from xmodule.contentstore.content import StaticContent
 from xmodule.editing_module import EditingDescriptor
 from xmodule.edxnotes_utils import edxnotes
@@ -275,12 +275,19 @@ class HtmlDescriptor(HtmlFields, XmlDescriptor, EditingDescriptor):  # pylint: d
    def index_dictionary(self):
        xblock_body = super(HtmlDescriptor, self).index_dictionary()
-        # Removing HTML-encoded non-breaking space characters
+        # Removing script and style
-        html_content = re.sub(r"(\s|&nbsp;|//)+", " ", html_to_text(self.data))
+        html_content = re.sub(
-        # Removing HTML CDATA
+            re.compile(
-        html_content = re.sub(r"<!\[CDATA\[.*\]\]>", "", html_content)
+                r"""
-        # Removing HTML comments
+                    <script>.*?</script> |
-        html_content = re.sub(r"<!--.*-->", "", html_content)
+                    <style>.*?</style>
+                """,
+                re.DOTALL |
+                re.VERBOSE),
+            "",
+            self.data
+        )
+        html_content = escape_html_characters(html_content)
        html_body = {
            "html_content": html_content,
            "display_name": self.display_name,

--- a/common/lib/xmodule/xmodule/tests/test_capa_module.py
+++ b/common/lib/xmodule/xmodule/tests/test_capa_module.py
@@ -1707,6 +1707,363 @@ class CapaModuleTest(unittest.TestCase):
 @ddt.ddt
 class CapaDescriptorTest(unittest.TestCase):
+    sample_checkbox_problem_xml = textwrap.dedent("""
+        <problem>
+            <p>Title</p>
+            <p>Description</p>
+            <p>Example</p>
+            <p>The following languages are in the Indo-European family:</p>
+            <choiceresponse>
+              <checkboxgroup label="The following languages are in the Indo-European family:">
+                <choice correct="true">Urdu</choice>
+                <choice correct="false">Finnish</choice>
+                <choice correct="true">Marathi</choice>
+                <choice correct="true">French</choice>
+                <choice correct="false">Hungarian</choice>
+              </checkboxgroup>
+            </choiceresponse>
+            <p>Note: Make sure you select all of the correct options—there may be more than one!</p>
+            <solution>
+            <div class="detailed-solution">
+            <p>Explanation</p>
+            <p>Solution for CAPA problem</p>
+            </div>
+            </solution>
+        </problem>
+    """)
+    sample_dropdown_problem_xml = textwrap.dedent("""
+        <problem>
+            <p>Dropdown problems allow learners to select only one option from a list of options.</p>
+            <p>Description</p>
+            <p>You can use the following example problem as a model.</p>
+            <p> Which of the following countries celebrates its independence on August 15?</p>
+            <optionresponse>
+              <optioninput label="lbl" options="('India','Spain','China','Bermuda')" correct="India"></optioninput>
+            </optionresponse>
+             <solution>
+            <div class="detailed-solution">
+            <p>Explanation</p>
+            <p> India became an independent nation on August 15, 1947.</p>
+            </div>
+            </solution>
+        </problem>
+    """)
+    sample_multichoice_problem_xml = textwrap.dedent("""
+        <problem>
+            <p>Multiple choice problems allow learners to select only one option.</p>
+            <p>When you add the problem, be sure to select Settings to specify a Display Name and other values.</p>
+            <p>You can use the following example problem as a model.</p>
+            <p>Which of the following countries has the largest population?</p>
+            <multiplechoiceresponse>
+              <choicegroup label="Which of the following countries has the largest population?" type="MultipleChoice">
+                <choice correct="false">Brazil
+                    <choicehint>timely feedback -- explain why an almost correct answer is wrong</choicehint>
+                </choice>
+                <choice correct="false">Germany</choice>
+                <choice correct="true">Indonesia</choice>
+                <choice correct="false">Russia</choice>
+              </choicegroup>
+            </multiplechoiceresponse>
+            <solution>
+            <div class="detailed-solution">
+            <p>Explanation</p>
+            <p>According to September 2014 estimates:</p>
+            <p>The population of Indonesia is approximately 250 million.</p>
+            <p>The population of Brazil  is approximately 200 million.</p>
+            <p>The population of Russia is approximately 146 million.</p>
+            <p>The population of Germany is approximately 81 million.</p>
+            </div>
+            </solution>
+        </problem>
+    """)
+    sample_numerical_input_problem_xml = textwrap.dedent("""
+        <problem>
+            <p>In a numerical input problem, learners enter numbers or a specific and relatively simple mathematical
+            expression. Learners enter the response in plain text, and the system then converts the text to a symbolic
+            expression that learners can see below the response field.</p>
+            <p>The system can handle several types of characters, including basic operators, fractions, exponents, and
+            common constants such as "i". You can refer learners to "Entering Mathematical and Scientific Expressions"
+            in the edX Guide for Students for more information.</p>
+            <p>When you add the problem, be sure to select Settings to specify a Display Name and other values that
+            apply.</p>
+            <p>You can use the following example problems as models.</p>
+            <p>How many miles away from Earth is the sun? Use scientific notation to answer.</p>
+            <numericalresponse answer="9.3*10^7">
+              <formulaequationinput label="How many miles away from Earth is the sun?
+              Use scientific notation to answer." />
+            </numericalresponse>
+            <p>The square of what number is -100?</p>
+            <numericalresponse answer="10*i">
+              <formulaequationinput label="The square of what number is -100?" />
+            </numericalresponse>
+            <solution>
+            <div class="detailed-solution">
+            <p>Explanation</p>
+            <p>The sun is 93,000,000, or 9.3*10^7, miles away from Earth.</p>
+            <p>-100 is the square of 10 times the imaginary number, i.</p>
+            </div>
+            </solution>
+        </problem>
+    """)
+    sample_text_input_problem_xml = textwrap.dedent("""
+        <problem>
+            <p>In text input problems, also known as "fill-in-the-blank" problems, learners enter text into a response
+            field. The text can include letters and characters such as punctuation marks. The text that the learner
+            enters must match your specified answer text exactly. You can specify more than one correct answer.
+            Learners must enter a response that matches one of the correct answers exactly.</p>
+            <p>When you add the problem, be sure to select Settings to specify a Display Name and other values that
+            apply.</p>
+            <p>You can use the following example problem as a model.</p>
+            <p>What was the first post-secondary school in China to allow both male and female students?</p>
+            <stringresponse answer="Nanjing Higher Normal Institute" type="ci" >
+              <additional_answer answer="National Central University"></additional_answer>
+              <additional_answer answer="Nanjing University"></additional_answer>
+              <textline label="What was the first post-secondary school in China to allow both male and female
+              students?" size="20"/>
+            </stringresponse>
+            <solution>
+            <div class="detailed-solution">
+            <p>Explanation</p>
+            <p>Nanjing Higher Normal Institute first admitted female students in 1920.</p>
+            </div>
+            </solution>
+        </problem>
+    """)
+    sample_checkboxes_with_hints_and_feedback_problem_xml = textwrap.dedent("""
+        <problem>
+            <p>You can provide feedback for each option in a checkbox problem, with distinct feedback depending on
+            whether or not the learner selects that option.</p>
+            <p>You can also provide compound feedback for a specific combination of answers. For example, if you have
+            three possible answers in the problem, you can configure specific feedback for when a learner selects each
+            combination of possible answers.</p>
+            <p>You can also add hints for learners.</p>
+            <p>Be sure to select Settings to specify a Display Name and other values that apply.</p>
+            <p>Use the following example problem as a model.</p>
+            <p>Which of the following is a fruit? Check all that apply.</p>
+            <choiceresponse>
+              <checkboxgroup label="Which of the following is a fruit? Check all that apply.">
+                <choice correct="true">apple
+                  <choicehint selected="true">You are correct that an apple is a fruit because it is the fertilized
+                  ovary that comes from an apple tree and contains seeds.</choicehint>
+                  <choicehint selected="false">Remember that an apple is also a fruit.</choicehint></choice>
+                <choice correct="true">pumpkin
+                  <choicehint selected="true">You are correct that a pumpkin is a fruit because it is the fertilized
+                  ovary of a squash plant and contains seeds.</choicehint>
+                  <choicehint selected="false">Remember that a pumpkin is also a fruit.</choicehint></choice>
+                <choice correct="false">potato
+                  <choicehint selected="true">A potato is a vegetable, not a fruit, because it does not come from a
+                  flower and does not contain seeds.</choicehint>
+                  <choicehint selected="false">You are correct that a potato is a vegetable because it is an edible
+                  part of a plant in tuber form.</choicehint></choice>
+                <choice correct="true">tomato
+                  <choicehint selected="true">You are correct that a tomato is a fruit because it is the fertilized
+                  ovary of a tomato plant and contains seeds.</choicehint>
+                  <choicehint selected="false">Many people mistakenly think a tomato is a vegetable. However, because
+                  a tomato is the fertilized ovary of a tomato plant and contains seeds, it is a fruit.</choicehint>
+                  </choice>
+                <compoundhint value="A B D">An apple, pumpkin, and tomato are all fruits as they all are fertilized
+                ovaries of a plant and contain seeds.</compoundhint>
+                <compoundhint value="A B C D">You are correct that an apple, pumpkin, and tomato are all fruits as they
+                all are fertilized ovaries of a plant and contain seeds. However, a potato is not a fruit as it is an
+                edible part of a plant in tuber form and is a vegetable.</compoundhint>
+              </checkboxgroup>
+            </choiceresponse>
+            <demandhint>
+              <hint>A fruit is the fertilized ovary from a flower.</hint>
+              <hint>A fruit contains seeds of the plant.</hint>
+            </demandhint>
+        </problem>
+    """)
+    sample_dropdown_with_hints_and_feedback_problem_xml = textwrap.dedent("""
+        <problem>
+            <p>You can provide feedback for each available option in a dropdown problem.</p>
+            <p>You can also add hints for learners.</p>
+            <p>Be sure to select Settings to specify a Display Name and other values that apply.</p>
+            <p>Use the following example problem as a model.</p>
+            <p> A/an ________ is a vegetable.</p>
+            <optionresponse>
+              <optioninput label=" A/an ________ is a vegetable.">
+                <option correct="False">apple <optionhint>An apple is the fertilized ovary that comes from an apple
+                tree and contains seeds, meaning it is a fruit.</optionhint></option>
+                <option correct="False">pumpkin <optionhint>A pumpkin is the fertilized ovary of a squash plant and
+                contains seeds, meaning it is a fruit.</optionhint></option>
+                <option correct="True">potato <optionhint>A potato is an edible part of a plant in tuber form and is a
+                vegetable.</optionhint></option>
+                <option correct="False">tomato <optionhint>Many people mistakenly think a tomato is a vegetable.
+                However, because a tomato is the fertilized ovary of a tomato plant and contains seeds, it is a fruit.
+                </optionhint></option>
+              </optioninput>
+            </optionresponse>
+            <demandhint>
+              <hint>A fruit is the fertilized ovary from a flower.</hint>
+              <hint>A fruit contains seeds of the plant.</hint>
+            </demandhint>
+        </problem>
+    """)
+    sample_multichoice_with_hints_and_feedback_problem_xml = textwrap.dedent("""
+        <problem>
+            <p>You can provide feedback for each option in a multiple choice problem.</p>
+            <p>You can also add hints for learners.</p>
+            <p>Be sure to select Settings to specify a Display Name and other values that apply.</p>
+            <p>Use the following example problem as a model.</p>
+            <p>Which of the following is a vegetable?</p>
+            <multiplechoiceresponse>
+              <choicegroup label="Which of the following is a vegetable?" type="MultipleChoice">
+                <choice correct="false">apple <choicehint>An apple is the fertilized ovary that comes from an apple
+                tree and contains seeds, meaning it is a fruit.</choicehint></choice>
+                <choice correct="false">pumpkin <choicehint>A pumpkin is the fertilized ovary of a squash plant and
+                contains seeds, meaning it is a fruit.</choicehint></choice>
+                <choice correct="true">potato <choicehint>A potato is an edible part of a plant in tuber form and is a
+                vegetable.</choicehint></choice>
+                <choice correct="false">tomato <choicehint>Many people mistakenly think a tomato is a vegetable.
+                However, because a tomato is the fertilized ovary of a tomato plant and contains seeds, it is a fruit.
+                </choicehint></choice>
+              </choicegroup>
+            </multiplechoiceresponse>
+            <demandhint>
+              <hint>A fruit is the fertilized ovary from a flower.</hint>
+              <hint>A fruit contains seeds of the plant.</hint>
+            </demandhint>
+        </problem>
+    """)
+    sample_numerical_input_with_hints_and_feedback_problem_xml = textwrap.dedent("""
+        <problem>
+            <p>You can provide feedback for correct answers in numerical input problems. You cannot provide feedback
+            for incorrect answers.</p>
+            <p>Use feedback for the correct answer to reinforce the process for arriving at the numerical value.</p>
+            <p>You can also add hints for learners.</p>
+            <p>Be sure to select Settings to specify a Display Name and other values that apply.</p>
+            <p>Use the following example problem as a model.</p>
+            <p>What is the arithmetic mean for the following set of numbers? (1, 5, 6, 3, 5)</p>
+            <numericalresponse answer="4">
+              <formulaequationinput label="What is the arithmetic mean for the following set of numbers?
+              (1, 5, 6, 3, 5)" />
+              <correcthint>The mean for this set of numbers is 20 / 5, which equals 4.</correcthint>
+            </numericalresponse>
+            <solution>
+            <div class="detailed-solution">
+            <p>Explanation</p>
+            <p>The mean is calculated by summing the set of numbers and dividing by n. In this case:
+            (1 + 5 + 6 + 3 + 5) / 5 = 20 / 5 = 4.</p>
+            </div>
+            </solution>
+            <demandhint>
+              <hint>The mean is calculated by summing the set of numbers and dividing by n.</hint>
+              <hint>n is the count of items in the set.</hint>
+            </demandhint>
+        </problem>
+    """)
+    sample_text_input_with_hints_and_feedback_problem_xml = textwrap.dedent("""
+        <problem>
+            <p>You can provide feedback for the correct answer in text input problems, as well as for specific
+            incorrect answers.</p>
+            <p>Use feedback on expected incorrect answers to address common misconceptions and to provide guidance on
+            how to arrive at the correct answer.</p>
+            <p>Be sure to select Settings to specify a Display Name and other values that apply.</p>
+            <p>Use the following example problem as a model.</p>
+            <p>Which U.S. state has the largest land area?</p>
+            <stringresponse answer="Alaska" type="ci" >
+              <correcthint>Alaska is 576,400 square miles, more than double the land area of the second largest state,
+              Texas.</correcthint>
+              <stringequalhint answer="Texas">While many people think Texas is the largest state, it is actually the
+              second largest, with 261,797 square miles.</stringequalhint>
+              <stringequalhint answer="California">California is the third largest state, with 155,959 square miles.
+              </stringequalhint>
+              <textline label="Which U.S. state has the largest land area?" size="20"/>
+            </stringresponse>
+            <demandhint>
+              <hint>Consider the square miles, not population.</hint>
+              <hint>Consider all 50 states, not just the continental United States.</hint>
+            </demandhint>
+        </problem>
+    """)
    def _create_descriptor(self, xml, name=None):
        """ Creates a CapaDescriptor to run test against """
        descriptor = CapaDescriptor(get_test_system(), scope_ids=1)
@@ -1724,8 +2081,11 @@ class CapaDescriptorTest(unittest.TestCase):
        self.assertEquals(descriptor.problem_types, {response_tag})
        self.assertEquals(descriptor.index_dictionary(), {
            'content_type': CapaDescriptor.INDEX_CONTENT_TYPE,
-            'display_name': name,
+            'problem_types': [response_tag],
-            'problem_types': [response_tag]
+            'content': {
+                'display_name': name,
+                'capa_content': ''
+            }
        })
    def test_response_types_ignores_non_response_tags(self):
@@ -1748,8 +2108,11 @@ class CapaDescriptorTest(unittest.TestCase):
        self.assertEquals(descriptor.problem_types, {"multiplechoiceresponse"})
        self.assertEquals(descriptor.index_dictionary(), {
            'content_type': CapaDescriptor.INDEX_CONTENT_TYPE,
-            'display_name': name,
+            'problem_types': ["multiplechoiceresponse"],
-            'problem_types': ["multiplechoiceresponse"]
+            'content': {
+                'display_name': name,
+                'capa_content': ' Label Some comment Apple Banana Chocolate Donut '
+            }
        })
    def test_response_types_multiple_tags(self):
@@ -1778,8 +2141,336 @@ class CapaDescriptorTest(unittest.TestCase):
        self.assertEquals(
            descriptor.index_dictionary(), {
                'content_type': CapaDescriptor.INDEX_CONTENT_TYPE,
-                'display_name': name,
+                'problem_types': ["optionresponse", "multiplechoiceresponse"],
-                'problem_types': ["optionresponse", "multiplechoiceresponse"]
+                'content': {
+                    'display_name': name,
+                    'capa_content': ' Label Some comment Donut Buggy '
+                }
+            }
+        )
+    def test_solutions_not_indexed(self):
+        xml = textwrap.dedent("""
+            <problem>
+                <solution>
+                <div class="detailed-solution">
+                <p>Explanation</p>
+                <p>This is what the 1st solution.</p>
+                </div>
+                </solution>
+                <solution>
+                <div class="detailed-solution">
+                <p>Explanation</p>
+                <p>This is the 2nd solution.</p>
+                </div>
+                </solution>
+            </problem>
+        """)
+        name = "Blank Common Capa Problem"
+        descriptor = self._create_descriptor(xml, name=name)
+        self.assertEquals(
+            descriptor.index_dictionary(), {
+                'content_type': CapaDescriptor.INDEX_CONTENT_TYPE,
+                'problem_types': [],
+                'content': {
+                    'display_name': name,
+                    'capa_content': ' '
+                }
+            }
+        )
+    def test_indexing_checkboxes(self):
+        name = "Checkboxes"
+        descriptor = self._create_descriptor(self.sample_checkbox_problem_xml, name=name)
+        capa_content = textwrap.dedent("""
+            Title
+            Description
+            Example
+            The following languages are in the Indo-European family:
+            Urdu
+            Finnish
+            Marathi
+            French
+            Hungarian
+            Note: Make sure you select all of the correct options—there may be more than one!
+        """)
+        self.assertEquals(descriptor.problem_types, {"choiceresponse"})
+        self.assertEquals(
+            descriptor.index_dictionary(), {
+                'content_type': CapaDescriptor.INDEX_CONTENT_TYPE,
+                'problem_types': ["choiceresponse"],
+                'content': {
+                    'display_name': name,
+                    'capa_content': capa_content.replace("\n", " ")
+                }
+            }
+        )
+    def test_indexing_dropdown(self):
+        name = "Dropdown"
+        descriptor = self._create_descriptor(self.sample_dropdown_problem_xml, name=name)
+        capa_content = textwrap.dedent("""
+            Dropdown problems allow learners to select only one option from a list of options.
+            Description
+            You can use the following example problem as a model.
+            Which of the following countries celebrates its independence on August 15?
+        """)
+        self.assertEquals(descriptor.problem_types, {"optionresponse"})
+        self.assertEquals(
+            descriptor.index_dictionary(), {
+                'content_type': CapaDescriptor.INDEX_CONTENT_TYPE,
+                'problem_types': ["optionresponse"],
+                'content': {
+                    'display_name': name,
+                    'capa_content': capa_content.replace("\n", " ")
+                }
+            }
+        )
+    def test_indexing_multiple_choice(self):
+        name = "Multiple Choice"
+        descriptor = self._create_descriptor(self.sample_multichoice_problem_xml, name=name)
+        capa_content = textwrap.dedent("""
+            Multiple choice problems allow learners to select only one option.
+            When you add the problem, be sure to select Settings to specify a Display Name and other values.
+            You can use the following example problem as a model.
+            Which of the following countries has the largest population?
+            Brazil
+            Germany
+            Indonesia
+            Russia
+        """)
+        self.assertEquals(descriptor.problem_types, {"multiplechoiceresponse"})
+        self.assertEquals(
+            descriptor.index_dictionary(), {
+                'content_type': CapaDescriptor.INDEX_CONTENT_TYPE,
+                'problem_types': ["multiplechoiceresponse"],
+                'content': {
+                    'display_name': name,
+                    'capa_content': capa_content.replace("\n", " ")
+                }
+            }
+        )
+    def test_indexing_numerical_input(self):
+        name = "Numerical Input"
+        descriptor = self._create_descriptor(self.sample_numerical_input_problem_xml, name=name)
+        capa_content = textwrap.dedent("""
+            In a numerical input problem, learners enter numbers or a specific and relatively simple mathematical
+            expression. Learners enter the response in plain text, and the system then converts the text to a symbolic
+            expression that learners can see below the response field.
+            The system can handle several types of characters, including basic operators, fractions, exponents, and
+            common constants such as "i". You can refer learners to "Entering Mathematical and Scientific Expressions"
+            in the edX Guide for Students for more information.
+            When you add the problem, be sure to select Settings to specify a Display Name and other values that
+            apply.
+            You can use the following example problems as models.
+            How many miles away from Earth is the sun? Use scientific notation to answer.
+            The square of what number is -100?
+        """)
+        self.assertEquals(descriptor.problem_types, {"numericalresponse"})
+        self.assertEquals(
+            descriptor.index_dictionary(), {
+                'content_type': CapaDescriptor.INDEX_CONTENT_TYPE,
+                'problem_types': ["numericalresponse"],
+                'content': {
+                    'display_name': name,
+                    'capa_content': capa_content.replace("\n", " ")
+                }
+            }
+        )
+    def test_indexing_text_input(self):
+        name = "Text Input"
+        descriptor = self._create_descriptor(self.sample_text_input_problem_xml, name=name)
+        capa_content = textwrap.dedent("""
+            In text input problems, also known as "fill-in-the-blank" problems, learners enter text into a response
+            field. The text can include letters and characters such as punctuation marks. The text that the learner
+            enters must match your specified answer text exactly. You can specify more than one correct answer.
+            Learners must enter a response that matches one of the correct answers exactly.
+            When you add the problem, be sure to select Settings to specify a Display Name and other values that
+            apply.
+            You can use the following example problem as a model.
+            What was the first post-secondary school in China to allow both male and female students?
+        """)
+        self.assertEquals(descriptor.problem_types, {"stringresponse"})
+        self.assertEquals(
+            descriptor.index_dictionary(), {
+                'content_type': CapaDescriptor.INDEX_CONTENT_TYPE,
+                'problem_types': ["stringresponse"],
+                'content': {
+                    'display_name': name,
+                    'capa_content': capa_content.replace("\n", " ")
+                }
+            }
+        )
+    def test_indexing_checkboxes_with_hints_and_feedback(self):
+        name = "Checkboxes with Hints and Feedback"
+        descriptor = self._create_descriptor(self.sample_checkboxes_with_hints_and_feedback_problem_xml, name=name)
+        capa_content = textwrap.dedent("""
+            You can provide feedback for each option in a checkbox problem, with distinct feedback depending on
+            whether or not the learner selects that option.
+            You can also provide compound feedback for a specific combination of answers. For example, if you have
+            three possible answers in the problem, you can configure specific feedback for when a learner selects each
+            combination of possible answers.
+            You can also add hints for learners.
+            Be sure to select Settings to specify a Display Name and other values that apply.
+            Use the following example problem as a model.
+            Which of the following is a fruit? Check all that apply.
+            apple
+            pumpkin
+            potato
+            tomato
+        """)
+        self.assertEquals(descriptor.problem_types, {"choiceresponse"})
+        self.assertEquals(
+            descriptor.index_dictionary(), {
+                'content_type': CapaDescriptor.INDEX_CONTENT_TYPE,
+                'problem_types': ["choiceresponse"],
+                'content': {
+                    'display_name': name,
+                    'capa_content': capa_content.replace("\n", " ")
+                }
+            }
+        )
+    def test_indexing_dropdown_with_hints_and_feedback(self):
+        name = "Dropdown with Hints and Feedback"
+        descriptor = self._create_descriptor(self.sample_dropdown_with_hints_and_feedback_problem_xml, name=name)
+        capa_content = textwrap.dedent("""
+            You can provide feedback for each available option in a dropdown problem.
+            You can also add hints for learners.
+            Be sure to select Settings to specify a Display Name and other values that apply.
+            Use the following example problem as a model.
+            A/an ________ is a vegetable.
+            apple
+            pumpkin
+            potato
+            tomato
+        """)
+        self.assertEquals(descriptor.problem_types, {"optionresponse"})
+        self.assertEquals(
+            descriptor.index_dictionary(), {
+                'content_type': CapaDescriptor.INDEX_CONTENT_TYPE,
+                'problem_types': ["optionresponse"],
+                'content': {
+                    'display_name': name,
+                    'capa_content': capa_content.replace("\n", " ")
+                }
+            }
+        )
+    def test_indexing_multiple_choice_with_hints_and_feedback(self):
+        name = "Multiple Choice with Hints and Feedback"
+        descriptor = self._create_descriptor(self.sample_multichoice_with_hints_and_feedback_problem_xml, name=name)
+        capa_content = textwrap.dedent("""
+            You can provide feedback for each option in a multiple choice problem.
+            You can also add hints for learners.
+            Be sure to select Settings to specify a Display Name and other values that apply.
+            Use the following example problem as a model.
+            Which of the following is a vegetable?
+            apple
+            pumpkin
+            potato
+            tomato
+        """)
+        self.assertEquals(descriptor.problem_types, {"multiplechoiceresponse"})
+        self.assertEquals(
+            descriptor.index_dictionary(), {
+                'content_type': CapaDescriptor.INDEX_CONTENT_TYPE,
+                'problem_types': ["multiplechoiceresponse"],
+                'content': {
+                    'display_name': name,
+                    'capa_content': capa_content.replace("\n", " ")
+                }
+            }
+        )
+    def test_indexing_numerical_input_with_hints_and_feedback(self):
+        name = "Numerical Input with Hints and Feedback"
+        descriptor = self._create_descriptor(self.sample_numerical_input_with_hints_and_feedback_problem_xml, name=name)
+        capa_content = textwrap.dedent("""
+            You can provide feedback for correct answers in numerical input problems. You cannot provide feedback
+            for incorrect answers.
+            Use feedback for the correct answer to reinforce the process for arriving at the numerical value.
+            You can also add hints for learners.
+            Be sure to select Settings to specify a Display Name and other values that apply.
+            Use the following example problem as a model.
+            What is the arithmetic mean for the following set of numbers? (1, 5, 6, 3, 5)
+        """)
+        self.assertEquals(descriptor.problem_types, {"numericalresponse"})
+        self.assertEquals(
+            descriptor.index_dictionary(), {
+                'content_type': CapaDescriptor.INDEX_CONTENT_TYPE,
+                'problem_types': ["numericalresponse"],
+                'content': {
+                    'display_name': name,
+                    'capa_content': capa_content.replace("\n", " ")
+                }
+            }
+        )
+    def test_indexing_text_input_with_hints_and_feedback(self):
+        name = "Text Input with Hints and Feedback"
+        descriptor = self._create_descriptor(self.sample_text_input_with_hints_and_feedback_problem_xml, name=name)
+        capa_content = textwrap.dedent("""
+            You can provide feedback for the correct answer in text input problems, as well as for specific
+            incorrect answers.
+            Use feedback on expected incorrect answers to address common misconceptions and to provide guidance on
+            how to arrive at the correct answer.
+            Be sure to select Settings to specify a Display Name and other values that apply.
+            Use the following example problem as a model.
+            Which U.S. state has the largest land area?
+        """)
+        self.assertEquals(descriptor.problem_types, {"stringresponse"})
+        self.assertEquals(
+            descriptor.index_dictionary(), {
+                'content_type': CapaDescriptor.INDEX_CONTENT_TYPE,
+                'problem_types': ["stringresponse"],
+                'content': {
+                    'display_name': name,
+                    'capa_content': capa_content.replace("\n", " ")
+                }
+            }
+        )
+    def test_indexing_problem_with_html_tags(self):
+        sample_problem_xml = textwrap.dedent("""
+            <problem>
+                <style>p {left: 10px;}</style>
+                <!-- Beginning of the html -->
+                <p>This has HTML comment in it.<!-- Commenting Content --></p>
+                <!-- Here comes CDATA -->
+                <![CDATA[This is just a CDATA!]]>
+                <p>HTML end.</p>
+                <!-- Script that makes everything alive! -->
+                <script>
+                    var alive;
+                </script>
+            </problem>
+        """)
+        name = "Mixed business"
+        descriptor = self._create_descriptor(sample_problem_xml, name=name)
+        capa_content = textwrap.dedent("""
+            This has HTML comment in it.
+            HTML end.
+        """)
+        self.assertEquals(
+            descriptor.index_dictionary(), {
+                'content_type': CapaDescriptor.INDEX_CONTENT_TYPE,
+                'problem_types': [],
+                'content': {
+                    'display_name': name,
+                    'capa_content': capa_content.replace("\n", " ")
+                }
            }
        )

--- a/common/lib/xmodule/xmodule/tests/test_html_module.py
+++ b/common/lib/xmodule/xmodule/tests/test_html_module.py
@@ -59,7 +59,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
    Make sure that HtmlDescriptor can format data for indexing as expected.
    """
-    def test_index_dictionary(self):
+    def test_index_dictionary_simple_html_module(self):
        sample_xml = '''
            <html>
                <p>Hello World!</p>
@@ -71,6 +71,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
            "content_type": "Text"
        })
+    def test_index_dictionary_cdata_html_module(self):
        sample_xml_cdata = '''
            <html>
                <p>This has CDATA in it.</p>
@@ -83,6 +84,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
            "content_type": "Text"
        })
+    def test_index_dictionary_multiple_spaces_html_module(self):
        sample_xml_tab_spaces = '''
            <html>
                <p>     Text has spaces :)  </p>
@@ -94,6 +96,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
            "content_type": "Text"
        })
+    def test_index_dictionary_html_module_with_comment(self):
        sample_xml_comment = '''
            <html>
                <p>This has HTML comment in it.</p>
@@ -106,6 +109,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
            "content_type": "Text"
        })
+    def test_index_dictionary_html_module_with_both_comments_and_cdata(self):
        sample_xml_mix_comment_cdata = '''
            <html>
                <!-- Beginning of the html -->
@@ -120,3 +124,23 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
            "content": {"html_content": " This has HTML comment in it. HTML end. ", "display_name": "Text"},
            "content_type": "Text"
        })
+    def test_index_dictionary_html_module_with_script_and_style_tags(self):
+        sample_xml_style_script_tags = '''
+            <html>
+                <style>p {color: green;}</style>
+                <!-- Beginning of the html -->
+                <p>This has HTML comment in it.<!-- Commenting Content --></p>
+                <!-- Here comes CDATA -->
+                <![CDATA[This is just a CDATA!]]>
+                <p>HTML end.</p>
+                <script>
+                    var message = "Hello world!"
+                </script>
+            </html>
+        '''
+        descriptor = instantiate_descriptor(data=sample_xml_style_script_tags)
+        self.assertEqual(descriptor.index_dictionary(), {
+            "content": {"html_content": " This has HTML comment in it. HTML end. ", "display_name": "Text"},
+            "content_type": "Text"
+        })
--- a/common/lib/xmodule/xmodule/tests/test_utils_escape_html_characters.py
+++ b/common/lib/xmodule/xmodule/tests/test_utils_escape_html_characters.py
+"""Tests for methods defined in util/misc.py"""
+from xmodule.util.misc import escape_html_characters
+from unittest import TestCase
+class UtilHtmlEscapeTests(TestCase):
+    """
+    Tests for methods exposed in util/misc
+    """
+    final_content = " This is a paragraph. "
+    def test_escape_html_comments(self):
+        html_content = """
+            <!--This is a comment. Comments are not displayed in the browser-->
+            This is a paragraph.
+            """
+        self.assertEqual(escape_html_characters(html_content), self.final_content)
+    def test_escape_cdata_comments(self):
+        html_content = """
+            <![CDATA[
+                function matchwo(a,b)
+                {
+                if (a < b && a < 0) then
+                  {
+                  return 1;
+                  }
+                else
+                  {
+                  return 0;
+                  }
+                }
+            ]]>
+            This is a paragraph.
+            """
+        self.assertEqual(escape_html_characters(html_content), self.final_content)
+    def test_escape_non_breaking_space(self):
+        html_content = """
+            &nbsp;&nbsp;
+            &nbsp;
+            <![CDATA[
+                function matchwo(a,b)
+                {
+                if (a < b && a < 0) then
+                  {
+                  return 1;
+                  }
+                else
+                  {
+                  return 0;
+                  }
+                }
+            ]]>
+            This is a paragraph.&nbsp;
+        """
+        self.assertEqual(escape_html_characters(html_content), self.final_content)
--- a/common/lib/xmodule/xmodule/util/misc.py
+++ b/common/lib/xmodule/xmodule/util/misc.py
 """
 Miscellaneous utility functions.
 """
+import re
+from xmodule.annotator_mixin import html_to_text
 def escape_invalid_characters(name, invalid_char_list, replace_with='_'):
@@ -24,3 +27,34 @@ def escape_invalid_characters(name, invalid_char_list, replace_with='_'):
        if char in name:
            name = name.replace(char, replace_with)
    return name
+def escape_html_characters(content):
+    """
+    Remove HTML characters that shouldn't be indexed using ElasticSearch indexer
+    This method is complementary to html_to_text method found in xmodule/annotator_mixin.py
+    Args:
+        content (str): variable to escape html characters from
+    Returns:
+        content (str): content ready to be index by ElasticSearch
+    """
+    # Removing HTML comments
+    return re.sub(
+        r"<!--.*-->",
+        "",
+        # Removing HTML CDATA
+        re.sub(
+            r"<!\[CDATA\[.*\]\]>",
+            "",
+            # Removing HTML-encoded non-breaking space characters
+            re.sub(
+                r"(\s|&nbsp;|//)+",
+                " ",
+                html_to_text(content)
+            )
+        )
+    )
--- a/common/test/acceptance/tests/lms/test_lms_dashboard_search.py
+++ b/common/test/acceptance/tests/lms/test_lms_dashboard_search.py
@@ -80,7 +80,7 @@ class DashboardSearchTest(WebAppTest):
            course_fix.add_children(
                XBlockFixtureDesc('chapter', 'Section 1').add_children(
                    XBlockFixtureDesc('sequential', 'Subsection 1').add_children(
-                        XBlockFixtureDesc('problem', 'dashboard search')
+                        XBlockFixtureDesc('problem', 'Test Problem')
                    )
                )
            ).add_children(