Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-platform
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-platform
Commits
79ce0432
Commit
79ce0432
authored
Oct 21, 2013
by
Jason Bau
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
use bleach instead of lxml.html.clean for sanitize_html OEE
parent
33720a85
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
86 additions
and
35 deletions
+86
-35
common/lib/xmodule/xmodule/open_ended_grading_classes/openendedchild.py
+22
-35
common/lib/xmodule/xmodule/tests/test_combined_open_ended.py
+62
-0
requirements/edx/base.txt
+2
-0
No files found.
common/lib/xmodule/xmodule/open_ended_grading_classes/openendedchild.py
View file @
79ce0432
import
json
import
logging
from
lxml.html.clean
import
Cleaner
,
autolink_html
import
re
import
bleach
from
xmodule.progress
import
Progress
import
capa.xqueue_interface
as
xqueue_interface
from
capa.util
import
*
...
...
@@ -50,24 +50,14 @@ def upload_to_s3(file_to_upload, keyname, s3_interface):
return
public_url
class
WhiteListCleaner
(
Cleaner
):
"""
By default, lxml cleaner strips out all links that are not in a defined whitelist.
We want to allow all links, and rely on the peer grading flagging mechanic to catch
the "bad" ones. So, don't define a whitelist at all.
"""
def
allow_embedded_url
(
self
,
el
,
url
):
"""
Override the Cleaner allow_embedded_url method to remove the whitelist url requirement.
Ensure that any tags not in the whitelist are stripped beforehand.
"""
# Tell cleaner to strip any element with a tag that isn't whitelisted.
if
self
.
whitelist_tags
is
not
None
and
el
.
tag
not
in
self
.
whitelist_tags
:
return
False
# Tell cleaner to allow all urls.
return
True
# Used by sanitize_html
ALLOWED_HTML_ATTRS
=
{
'*'
:
[
'id'
,
'class'
,
'height'
,
'width'
,
'alt'
],
'a'
:
[
'href'
,
'title'
,
'rel'
],
'embed'
:
[
'src'
],
'iframe'
:
[
'src'
],
'img'
:
[
'src'
],
}
class
OpenEndedChild
(
object
):
...
...
@@ -228,22 +218,19 @@ class OpenEndedChild(object):
answer - any string
return - a cleaned version of the string
"""
try
:
answer
=
autolink_html
(
answer
)
cleaner
=
WhiteListCleaner
(
style
=
True
,
links
=
True
,
add_nofollow
=
False
,
page_structure
=
True
,
safe_attrs_only
=
True
,
whitelist_tags
=
(
'embed'
,
'iframe'
,
'a'
,
'img'
,
'br'
,)
)
clean_html
=
cleaner
.
clean_html
(
answer
)
clean_html
=
re
.
sub
(
r'</p>$'
,
''
,
re
.
sub
(
r'^<p>'
,
''
,
clean_html
))
clean_html
=
re
.
sub
(
"
\n
"
,
"<br/>"
,
clean_html
)
except
Exception
:
clean_html
=
answer
return
clean_html
clean_html
=
bleach
.
clean
(
answer
,
tags
=
[
'embed'
,
'iframe'
,
'a'
,
'img'
,
'br'
],
attributes
=
ALLOWED_HTML_ATTRS
,
strip
=
True
)
return
OpenEndedChild
.
replace_newlines
(
clean_html
)
@staticmethod
def
replace_newlines
(
html
):
"""
Replaces "
\n
" newlines with <br/>
"""
retv
=
re
.
sub
(
r'</p>$'
,
''
,
re
.
sub
(
r'^<p>'
,
''
,
html
))
return
re
.
sub
(
"
\n
"
,
"<br/>"
,
retv
)
def
new_history_entry
(
self
,
answer
):
"""
...
...
common/lib/xmodule/xmodule/tests/test_combined_open_ended.py
View file @
79ce0432
...
...
@@ -1001,3 +1001,65 @@ class OpenEndedModuleXmlImageUploadTest(unittest.TestCase, DummyModulestore):
self
.
assertTrue
(
response
[
'success'
])
self
.
assertIn
(
self
.
answer_link
,
response
[
'student_response'
])
self
.
assertIn
(
self
.
autolink_tag
,
response
[
'student_response'
])
class
OpenEndedModuleUtilTest
(
unittest
.
TestCase
):
"""
Tests for the util functions of OpenEndedModule. Currently just for the html_sanitizer and <br/> inserter
"""
script_dirty
=
u'<script>alert("xss!")</script>'
script_clean
=
u'alert("xss!")'
img_dirty
=
u'<img alt="cats" height="200" onclick="eval()" src="http://example.com/lolcats.jpg" width="200">'
img_clean
=
u'<img alt="cats" height="200" src="http://example.com/lolcats.jpg" width="200">'
embed_dirty
=
u'<embed height="200" id="cats" onhover="eval()" src="http://example.com/lolcats.swf" width="200">'
embed_clean
=
u'<embed height="200" id="cats" src="http://example.com/lolcats.swf" width="200">'
iframe_dirty
=
u'<img class="cats" height="200" onerror="eval()" src="http://example.com/lolcats" width="200">'
iframe_clean
=
u'<img class="cats" height="200" src="http://example.com/lolcats" width="200">'
text
=
u'I am a
\u201c\xfc
ber student
\u201d
'
text_lessthan_noencd
=
u'This used to be broken < by the other parser. 3>5'
text_lessthan_encode
=
u'This used to be broken < by the other parser. 3>5'
text_linebreaks
=
u"St
\xfc
dent submission:
\n
I like lamp."
text_brs
=
u"St
\xfc
dent submission:<br/>I like lamp."
def
test_script
(
self
):
"""
Basic test for stripping <script>
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
script_dirty
),
self
.
script_clean
)
def
test_img
(
self
):
"""
Basic test for passing through img, but stripping bad attr
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
img_dirty
),
self
.
img_clean
)
def
test_embed
(
self
):
"""
Basic test for passing through embed, but stripping bad attr
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
embed_dirty
),
self
.
embed_clean
)
def
test_iframe
(
self
):
"""
Basic test for passing through iframe, but stripping bad attr
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
iframe_dirty
),
self
.
iframe_clean
)
def
test_text
(
self
):
"""
Test for passing through text unchanged, including unicode
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
text
),
self
.
text
)
def
test_lessthan
(
self
):
"""
Tests that `<` in text context is handled properly
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
text_lessthan_noencd
),
self
.
text_lessthan_encode
)
def
test_linebreaks
(
self
):
"""
tests the replace_newlines function
"""
self
.
assertEqual
(
OpenEndedChild
.
replace_newlines
(
self
.
text_linebreaks
),
self
.
text_brs
)
requirements/edx/base.txt
View file @
79ce0432
...
...
@@ -8,6 +8,8 @@
beautifulsoup4==4.1.3
beautifulsoup==3.2.1
bleach==1.2.2
html5lib==0.95
boto==2.6.0
celery==3.0.19
dealer==0.2.3
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment