Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-platform
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-platform
Commits
6e1b8749
Commit
6e1b8749
authored
Oct 22, 2013
by
Jason Bau
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1440 from edx/jbau/COE-sanitize-with-bleach
Jbau/coe sanitize with bleach
parents
9936fc0a
651ef209
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
118 additions
and
35 deletions
+118
-35
common/lib/xmodule/xmodule/open_ended_grading_classes/openendedchild.py
+27
-35
common/lib/xmodule/xmodule/tests/test_combined_open_ended.py
+89
-0
requirements/edx/base.txt
+2
-0
No files found.
common/lib/xmodule/xmodule/open_ended_grading_classes/openendedchild.py
View file @
6e1b8749
import
json
import
logging
from
lxml.html.clean
import
Cleaner
,
autolink_html
import
re
import
bleach
from
html5lib.tokenizer
import
HTMLTokenizer
from
xmodule.progress
import
Progress
import
capa.xqueue_interface
as
xqueue_interface
from
capa.util
import
*
...
...
@@ -50,24 +51,14 @@ def upload_to_s3(file_to_upload, keyname, s3_interface):
return
public_url
class
WhiteListCleaner
(
Cleaner
):
"""
By default, lxml cleaner strips out all links that are not in a defined whitelist.
We want to allow all links, and rely on the peer grading flagging mechanic to catch
the "bad" ones. So, don't define a whitelist at all.
"""
def
allow_embedded_url
(
self
,
el
,
url
):
"""
Override the Cleaner allow_embedded_url method to remove the whitelist url requirement.
Ensure that any tags not in the whitelist are stripped beforehand.
"""
# Tell cleaner to strip any element with a tag that isn't whitelisted.
if
self
.
whitelist_tags
is
not
None
and
el
.
tag
not
in
self
.
whitelist_tags
:
return
False
# Tell cleaner to allow all urls.
return
True
# Used by sanitize_html
ALLOWED_HTML_ATTRS
=
{
'*'
:
[
'id'
,
'class'
,
'height'
,
'width'
,
'alt'
],
'a'
:
[
'href'
,
'title'
,
'rel'
,
'target'
],
'embed'
:
[
'src'
],
'iframe'
:
[
'src'
],
'img'
:
[
'src'
],
}
class
OpenEndedChild
(
object
):
...
...
@@ -228,22 +219,23 @@ class OpenEndedChild(object):
answer - any string
return - a cleaned version of the string
"""
try
:
answer
=
autolink_html
(
answer
)
cleaner
=
WhiteListCleaner
(
style
=
True
,
links
=
True
,
add_nofollow
=
False
,
page_structure
=
True
,
safe_attrs_only
=
True
,
whitelist_tags
=
(
'embed'
,
'iframe'
,
'a'
,
'img'
,
'br'
,)
)
clean_html
=
cleaner
.
clean_html
(
answer
)
clean_html
=
re
.
sub
(
r'</p>$'
,
''
,
re
.
sub
(
r'^<p>'
,
''
,
clean_html
))
clean_html
=
re
.
sub
(
"
\n
"
,
"<br/>"
,
clean_html
)
except
Exception
:
clean_html
=
answer
return
clean_html
clean_html
=
bleach
.
clean
(
answer
,
tags
=
[
'embed'
,
'iframe'
,
'a'
,
'img'
,
'br'
],
attributes
=
ALLOWED_HTML_ATTRS
,
strip
=
True
)
autolinked
=
bleach
.
linkify
(
clean_html
,
callbacks
=
[
bleach
.
callbacks
.
target_blank
],
skip_pre
=
True
,
tokenizer
=
HTMLTokenizer
)
return
OpenEndedChild
.
replace_newlines
(
autolinked
)
@staticmethod
def
replace_newlines
(
html
):
"""
Replaces "
\n
" newlines with <br/>
"""
retv
=
re
.
sub
(
r'</p>$'
,
''
,
re
.
sub
(
r'^<p>'
,
''
,
html
))
return
re
.
sub
(
"
\n
"
,
"<br/>"
,
retv
)
def
new_history_entry
(
self
,
answer
):
"""
...
...
common/lib/xmodule/xmodule/tests/test_combined_open_ended.py
View file @
6e1b8749
...
...
@@ -1001,3 +1001,92 @@ class OpenEndedModuleXmlImageUploadTest(unittest.TestCase, DummyModulestore):
self
.
assertTrue
(
response
[
'success'
])
self
.
assertIn
(
self
.
answer_link
,
response
[
'student_response'
])
self
.
assertIn
(
self
.
autolink_tag
,
response
[
'student_response'
])
class
OpenEndedModuleUtilTest
(
unittest
.
TestCase
):
"""
Tests for the util functions of OpenEndedModule. Currently just for the html_sanitizer and <br/> inserter
"""
script_dirty
=
u'<script>alert("xss!")</script>'
script_clean
=
u'alert("xss!")'
img_dirty
=
u'<img alt="cats" height="200" onclick="eval()" src="http://example.com/lolcats.jpg" width="200">'
img_clean
=
u'<img alt="cats" height="200" src="http://example.com/lolcats.jpg" width="200">'
embed_dirty
=
u'<embed height="200" id="cats" onhover="eval()" src="http://example.com/lolcats.swf" width="200"/>'
embed_clean
=
u'<embed height="200" id="cats" src="http://example.com/lolcats.swf" width="200">'
iframe_dirty
=
u'<iframe class="cats" height="200" onerror="eval()" src="http://example.com/lolcats" width="200"/>'
iframe_clean
=
u'<iframe class="cats" height="200" src="http://example.com/lolcats" width="200"></iframe>'
text
=
u'I am a
\u201c\xfc
ber student
\u201d
'
text_lessthan_noencd
=
u'This used to be broken < by the other parser. 3>5'
text_lessthan_encode
=
u'This used to be broken < by the other parser. 3>5'
text_linebreaks
=
u"St
\xfc
dent submission:
\n
I like lamp."
text_brs
=
u"St
\xfc
dent submission:<br/>I like lamp."
link_text
=
u'I love going to www.lolcatz.com'
link_atag
=
u'I love going to <a href="http://www.lolcatz.com" target="_blank">www.lolcatz.com</a>'
def
test_script
(
self
):
"""
Basic test for stripping <script>
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
script_dirty
),
self
.
script_clean
)
def
test_img
(
self
):
"""
Basic test for passing through img, but stripping bad attr
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
img_dirty
),
self
.
img_clean
)
def
test_embed
(
self
):
"""
Basic test for passing through embed, but stripping bad attr
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
embed_dirty
),
self
.
embed_clean
)
def
test_iframe
(
self
):
"""
Basic test for passing through iframe, but stripping bad attr
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
iframe_dirty
),
self
.
iframe_clean
)
def
test_text
(
self
):
"""
Test for passing through text unchanged, including unicode
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
text
),
self
.
text
)
def
test_lessthan
(
self
):
"""
Tests that `<` in text context is handled properly
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
text_lessthan_noencd
),
self
.
text_lessthan_encode
)
def
test_linebreaks
(
self
):
"""
tests the replace_newlines function
"""
self
.
assertEqual
(
OpenEndedChild
.
replace_newlines
(
self
.
text_linebreaks
),
self
.
text_brs
)
def
test_linkify
(
self
):
"""
tests the replace_newlines function
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
link_text
),
self
.
link_atag
)
def
test_combined
(
self
):
"""
tests a combination of inputs
"""
test_input
=
u"{}
\n
{}
\n
{}
\n\n
{}{}
\n
{}"
.
format
(
self
.
link_text
,
self
.
text
,
self
.
script_dirty
,
self
.
embed_dirty
,
self
.
text_lessthan_noencd
,
self
.
img_dirty
)
test_output
=
u"{}<br/>{}<br/>{}<br/><br/>{}{}<br/>{}"
.
format
(
self
.
link_atag
,
self
.
text
,
self
.
script_clean
,
self
.
embed_clean
,
self
.
text_lessthan_encode
,
self
.
img_clean
)
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
test_input
),
test_output
)
requirements/edx/base.txt
View file @
6e1b8749
...
...
@@ -8,6 +8,8 @@
beautifulsoup4==4.1.3
beautifulsoup==3.2.1
bleach==1.2.2
html5lib==0.95
boto==2.6.0
celery==3.0.19
dealer==0.2.3
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment