Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-platform
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-platform
Commits
6e1b8749
Commit
6e1b8749
authored
Oct 22, 2013
by
Jason Bau
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1440 from edx/jbau/COE-sanitize-with-bleach
Jbau/coe sanitize with bleach
parents
9936fc0a
651ef209
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
118 additions
and
35 deletions
+118
-35
common/lib/xmodule/xmodule/open_ended_grading_classes/openendedchild.py
+27
-35
common/lib/xmodule/xmodule/tests/test_combined_open_ended.py
+89
-0
requirements/edx/base.txt
+2
-0
No files found.
common/lib/xmodule/xmodule/open_ended_grading_classes/openendedchild.py
View file @
6e1b8749
import
json
import
json
import
logging
import
logging
from
lxml.html.clean
import
Cleaner
,
autolink_html
import
re
import
re
import
bleach
from
html5lib.tokenizer
import
HTMLTokenizer
from
xmodule.progress
import
Progress
from
xmodule.progress
import
Progress
import
capa.xqueue_interface
as
xqueue_interface
import
capa.xqueue_interface
as
xqueue_interface
from
capa.util
import
*
from
capa.util
import
*
...
@@ -50,24 +51,14 @@ def upload_to_s3(file_to_upload, keyname, s3_interface):
...
@@ -50,24 +51,14 @@ def upload_to_s3(file_to_upload, keyname, s3_interface):
return
public_url
return
public_url
class
WhiteListCleaner
(
Cleaner
):
# Used by sanitize_html
"""
ALLOWED_HTML_ATTRS
=
{
By default, lxml cleaner strips out all links that are not in a defined whitelist.
'*'
:
[
'id'
,
'class'
,
'height'
,
'width'
,
'alt'
],
We want to allow all links, and rely on the peer grading flagging mechanic to catch
'a'
:
[
'href'
,
'title'
,
'rel'
,
'target'
],
the "bad" ones. So, don't define a whitelist at all.
'embed'
:
[
'src'
],
"""
'iframe'
:
[
'src'
],
def
allow_embedded_url
(
self
,
el
,
url
):
'img'
:
[
'src'
],
"""
}
Override the Cleaner allow_embedded_url method to remove the whitelist url requirement.
Ensure that any tags not in the whitelist are stripped beforehand.
"""
# Tell cleaner to strip any element with a tag that isn't whitelisted.
if
self
.
whitelist_tags
is
not
None
and
el
.
tag
not
in
self
.
whitelist_tags
:
return
False
# Tell cleaner to allow all urls.
return
True
class
OpenEndedChild
(
object
):
class
OpenEndedChild
(
object
):
...
@@ -228,22 +219,23 @@ class OpenEndedChild(object):
...
@@ -228,22 +219,23 @@ class OpenEndedChild(object):
answer - any string
answer - any string
return - a cleaned version of the string
return - a cleaned version of the string
"""
"""
try
:
clean_html
=
bleach
.
clean
(
answer
,
answer
=
autolink_html
(
answer
)
tags
=
[
'embed'
,
'iframe'
,
'a'
,
'img'
,
'br'
],
cleaner
=
WhiteListCleaner
(
attributes
=
ALLOWED_HTML_ATTRS
,
style
=
True
,
strip
=
True
)
links
=
True
,
autolinked
=
bleach
.
linkify
(
clean_html
,
add_nofollow
=
False
,
callbacks
=
[
bleach
.
callbacks
.
target_blank
],
page_structure
=
True
,
skip_pre
=
True
,
safe_attrs_only
=
True
,
tokenizer
=
HTMLTokenizer
)
whitelist_tags
=
(
'embed'
,
'iframe'
,
'a'
,
'img'
,
'br'
,)
return
OpenEndedChild
.
replace_newlines
(
autolinked
)
)
clean_html
=
cleaner
.
clean_html
(
answer
)
@staticmethod
clean_html
=
re
.
sub
(
r'</p>$'
,
''
,
re
.
sub
(
r'^<p>'
,
''
,
clean_html
))
def
replace_newlines
(
html
):
clean_html
=
re
.
sub
(
"
\n
"
,
"<br/>"
,
clean_html
)
"""
except
Exception
:
Replaces "
\n
" newlines with <br/>
clean_html
=
answer
"""
return
clean_html
retv
=
re
.
sub
(
r'</p>$'
,
''
,
re
.
sub
(
r'^<p>'
,
''
,
html
))
return
re
.
sub
(
"
\n
"
,
"<br/>"
,
retv
)
def
new_history_entry
(
self
,
answer
):
def
new_history_entry
(
self
,
answer
):
"""
"""
...
...
common/lib/xmodule/xmodule/tests/test_combined_open_ended.py
View file @
6e1b8749
...
@@ -1001,3 +1001,92 @@ class OpenEndedModuleXmlImageUploadTest(unittest.TestCase, DummyModulestore):
...
@@ -1001,3 +1001,92 @@ class OpenEndedModuleXmlImageUploadTest(unittest.TestCase, DummyModulestore):
self
.
assertTrue
(
response
[
'success'
])
self
.
assertTrue
(
response
[
'success'
])
self
.
assertIn
(
self
.
answer_link
,
response
[
'student_response'
])
self
.
assertIn
(
self
.
answer_link
,
response
[
'student_response'
])
self
.
assertIn
(
self
.
autolink_tag
,
response
[
'student_response'
])
self
.
assertIn
(
self
.
autolink_tag
,
response
[
'student_response'
])
class
OpenEndedModuleUtilTest
(
unittest
.
TestCase
):
"""
Tests for the util functions of OpenEndedModule. Currently just for the html_sanitizer and <br/> inserter
"""
script_dirty
=
u'<script>alert("xss!")</script>'
script_clean
=
u'alert("xss!")'
img_dirty
=
u'<img alt="cats" height="200" onclick="eval()" src="http://example.com/lolcats.jpg" width="200">'
img_clean
=
u'<img alt="cats" height="200" src="http://example.com/lolcats.jpg" width="200">'
embed_dirty
=
u'<embed height="200" id="cats" onhover="eval()" src="http://example.com/lolcats.swf" width="200"/>'
embed_clean
=
u'<embed height="200" id="cats" src="http://example.com/lolcats.swf" width="200">'
iframe_dirty
=
u'<iframe class="cats" height="200" onerror="eval()" src="http://example.com/lolcats" width="200"/>'
iframe_clean
=
u'<iframe class="cats" height="200" src="http://example.com/lolcats" width="200"></iframe>'
text
=
u'I am a
\u201c\xfc
ber student
\u201d
'
text_lessthan_noencd
=
u'This used to be broken < by the other parser. 3>5'
text_lessthan_encode
=
u'This used to be broken < by the other parser. 3>5'
text_linebreaks
=
u"St
\xfc
dent submission:
\n
I like lamp."
text_brs
=
u"St
\xfc
dent submission:<br/>I like lamp."
link_text
=
u'I love going to www.lolcatz.com'
link_atag
=
u'I love going to <a href="http://www.lolcatz.com" target="_blank">www.lolcatz.com</a>'
def
test_script
(
self
):
"""
Basic test for stripping <script>
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
script_dirty
),
self
.
script_clean
)
def
test_img
(
self
):
"""
Basic test for passing through img, but stripping bad attr
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
img_dirty
),
self
.
img_clean
)
def
test_embed
(
self
):
"""
Basic test for passing through embed, but stripping bad attr
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
embed_dirty
),
self
.
embed_clean
)
def
test_iframe
(
self
):
"""
Basic test for passing through iframe, but stripping bad attr
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
iframe_dirty
),
self
.
iframe_clean
)
def
test_text
(
self
):
"""
Test for passing through text unchanged, including unicode
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
text
),
self
.
text
)
def
test_lessthan
(
self
):
"""
Tests that `<` in text context is handled properly
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
text_lessthan_noencd
),
self
.
text_lessthan_encode
)
def
test_linebreaks
(
self
):
"""
tests the replace_newlines function
"""
self
.
assertEqual
(
OpenEndedChild
.
replace_newlines
(
self
.
text_linebreaks
),
self
.
text_brs
)
def
test_linkify
(
self
):
"""
tests the replace_newlines function
"""
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
self
.
link_text
),
self
.
link_atag
)
def
test_combined
(
self
):
"""
tests a combination of inputs
"""
test_input
=
u"{}
\n
{}
\n
{}
\n\n
{}{}
\n
{}"
.
format
(
self
.
link_text
,
self
.
text
,
self
.
script_dirty
,
self
.
embed_dirty
,
self
.
text_lessthan_noencd
,
self
.
img_dirty
)
test_output
=
u"{}<br/>{}<br/>{}<br/><br/>{}{}<br/>{}"
.
format
(
self
.
link_atag
,
self
.
text
,
self
.
script_clean
,
self
.
embed_clean
,
self
.
text_lessthan_encode
,
self
.
img_clean
)
self
.
assertEqual
(
OpenEndedChild
.
sanitize_html
(
test_input
),
test_output
)
requirements/edx/base.txt
View file @
6e1b8749
...
@@ -8,6 +8,8 @@
...
@@ -8,6 +8,8 @@
beautifulsoup4==4.1.3
beautifulsoup4==4.1.3
beautifulsoup==3.2.1
beautifulsoup==3.2.1
bleach==1.2.2
html5lib==0.95
boto==2.6.0
boto==2.6.0
celery==3.0.19
celery==3.0.19
dealer==0.2.3
dealer==0.2.3
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment