Commit 3d49d80b by Ned Batchelder Committed by Ned Batchelder

Fix safe_lxml. SEC-338

The imports were sorted in May, which broke the monkeypatching in
safe_lxml.  I added two tests that the XML parsers are properly patched,
but they didn't pass until I added the monkeypatching to the start of
the test runs.  Once that was done, some tests failed because they
relied on specific details of how empty elements are represented.  Those
tests are now fixed.
parent 0380c5fc
...@@ -14,6 +14,11 @@ import contracts ...@@ -14,6 +14,11 @@ import contracts
import pytest import pytest
# Patch the xml libs before anything else.
from safe_lxml import defuse_xml_libs
defuse_xml_libs()
def pytest_configure(config): def pytest_configure(config):
""" """
Do core setup operations from manage.py before collecting tests. Do core setup operations from manage.py before collecting tests.
......
...@@ -291,9 +291,7 @@ class CapaHtmlRenderTest(unittest.TestCase): ...@@ -291,9 +291,7 @@ class CapaHtmlRenderTest(unittest.TestCase):
# Comments and processing instructions should be skipped. # Comments and processing instructions should be skipped.
xml_str = textwrap.dedent("""\ xml_str = textwrap.dedent("""\
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html [ <!DOCTYPE html []>
<!ENTITY % wacky "lxml.etree is wacky!">
]>
<problem> <problem>
<!-- A commment. --> <!-- A commment. -->
<?ignore this processing instruction. ?> <?ignore this processing instruction. ?>
...@@ -305,7 +303,7 @@ class CapaHtmlRenderTest(unittest.TestCase): ...@@ -305,7 +303,7 @@ class CapaHtmlRenderTest(unittest.TestCase):
# Render the HTML # Render the HTML
the_html = problem.get_html() the_html = problem.get_html()
self.assertRegexpMatches(the_html, r"<div/>") self.assertRegexpMatches(the_html, r"<div>\s*</div>")
def _create_test_file(self, path, content_str): def _create_test_file(self, path, content_str):
test_fp = self.capa_system.filestore.open(path, "w") test_fp = self.capa_system.filestore.open(path, "w")
......
...@@ -190,14 +190,14 @@ class CapaTargetedFeedbackTest(unittest.TestCase): ...@@ -190,14 +190,14 @@ class CapaTargetedFeedbackTest(unittest.TestCase):
problem.done = True problem.done = True
problem.student_answers = {'1_2_1': 'choice_0'} problem.student_answers = {'1_2_1': 'choice_0'}
the_html = problem.get_html() the_html = problem.get_html()
self.assertRegexpMatches(the_html, r"<targetedfeedbackset/>") self.assertRegexpMatches(the_html, r"<targetedfeedbackset>\s*</targetedfeedbackset>")
# New problem with same XML -- try the correct choice. # New problem with same XML -- try the correct choice.
problem = new_loncapa_problem(xml_str) problem = new_loncapa_problem(xml_str)
problem.done = True problem.done = True
problem.student_answers = {'1_2_1': 'choice_2'} # correct problem.student_answers = {'1_2_1': 'choice_2'} # correct
the_html = problem.get_html() the_html = problem.get_html()
self.assertRegexpMatches(the_html, r"<targetedfeedbackset/>") self.assertRegexpMatches(the_html, r"<targetedfeedbackset>\s*</targetedfeedbackset>")
def test_targeted_feedback_no_solution_element(self): def test_targeted_feedback_no_solution_element(self):
xml_str = textwrap.dedent(""" xml_str = textwrap.dedent("""
...@@ -581,7 +581,7 @@ class CapaTargetedFeedbackTest(unittest.TestCase): ...@@ -581,7 +581,7 @@ class CapaTargetedFeedbackTest(unittest.TestCase):
# Q1 and Q2 have no feedback # Q1 and Q2 have no feedback
self.assertRegexpMatches( self.assertRegexpMatches(
without_new_lines, without_new_lines,
r'<targetedfeedbackset.*?/>.*<targetedfeedbackset.*?/>' r'<targetedfeedbackset>\s*</targetedfeedbackset>.*<targetedfeedbackset>\s*</targetedfeedbackset>'
) )
def test_targeted_feedback_multiple_answer_1(self): def test_targeted_feedback_multiple_answer_1(self):
...@@ -594,7 +594,7 @@ class CapaTargetedFeedbackTest(unittest.TestCase): ...@@ -594,7 +594,7 @@ class CapaTargetedFeedbackTest(unittest.TestCase):
self.assertRegexpMatches( self.assertRegexpMatches(
without_new_lines, without_new_lines,
r'<targetedfeedbackset.*?>.*?explanation-id="feedback1".*?</targetedfeedbackset>.*' + r'<targetedfeedbackset.*?>.*?explanation-id="feedback1".*?</targetedfeedbackset>.*' +
r'<targetedfeedbackset.*?/>' r'<targetedfeedbackset>\s*</targetedfeedbackset>'
) )
def test_targeted_feedback_multiple_answer_2(self): def test_targeted_feedback_multiple_answer_2(self):
......
"""Code run by pylint before running any tests."""
# Patch the xml libs before anything else.
from safe_lxml import defuse_xml_libs
defuse_xml_libs()
...@@ -7,12 +7,17 @@ It also includes a safer XMLParser. ...@@ -7,12 +7,17 @@ It also includes a safer XMLParser.
For processing xml always prefer this over using lxml.etree directly. For processing xml always prefer this over using lxml.etree directly.
""" """
# This should be imported after lxml.etree so that it overrides the following attributes. # Names are imported into this module so that it can be a stand-in for
from defusedxml.lxml import XML, fromstring, parse # lxml.etree. The names are not used here, so disable the pylint warning.
# pylint: disable=unused-import, wildcard-import, unused-wildcard-import
from lxml.etree import XMLParser as _XMLParser from lxml.etree import XMLParser as _XMLParser
from lxml.etree import * # pylint: disable=wildcard-import, unused-wildcard-import; pylint: disable=unused-import from lxml.etree import *
from lxml.etree import _Element, _ElementTree from lxml.etree import _Element, _ElementTree
# This should be imported after lxml.etree so that it overrides the following attributes.
from defusedxml.lxml import XML, fromstring, parse
class XMLParser(_XMLParser): # pylint: disable=function-redefined class XMLParser(_XMLParser): # pylint: disable=function-redefined
""" """
......
"""Test that we have defused XML."""
import defusedxml
from lxml import etree
import pytest
@pytest.mark.parametrize("attr", ["XML", "fromstring", "parse"])
def test_etree_is_defused(attr):
func = getattr(etree, attr)
assert "defused" in func.__code__.co_filename
def test_entities_arent_resolved():
# Make sure we have disabled entity resolution.
xml = '<?xml version="1.0"?><!DOCTYPE mydoc [<!ENTITY hi "Hello">]> <root>&hi;</root>'
parser = etree.XMLParser()
with pytest.raises(defusedxml.EntitiesForbidden):
_ = etree.XML(xml, parser=parser)
"""Code run by pylint before running any tests."""
# Patch the xml libs before anything else.
from safe_lxml import defuse_xml_libs
defuse_xml_libs()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment