Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-platform
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-platform
Commits
45832b98
Commit
45832b98
authored
Aug 24, 2015
by
Marko Jevtić
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #9283 from edx/mjevtic/SOL-1040
(SOL-1040) Indexing capa problems
parents
7b346f7d
8faff973
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
162 additions
and
18 deletions
+162
-18
common/lib/xmodule/xmodule/capa_module.py
+28
-9
common/lib/xmodule/xmodule/html_module.py
+14
-7
common/lib/xmodule/xmodule/tests/test_capa_module.py
+0
-0
common/lib/xmodule/xmodule/tests/test_html_module.py
+25
-1
common/lib/xmodule/xmodule/tests/test_utils_escape_html_characters.py
+60
-0
common/lib/xmodule/xmodule/util/misc.py
+34
-0
common/test/acceptance/tests/lms/test_lms_dashboard_search.py
+1
-1
No files found.
common/lib/xmodule/xmodule/capa_module.py
View file @
45832b98
...
...
@@ -2,6 +2,7 @@
import
json
import
logging
import
sys
import
re
from
lxml
import
etree
from
pkg_resources
import
resource_string
...
...
@@ -10,6 +11,7 @@ import dogstats_wrapper as dog_stats_api
from
.capa_base
import
CapaMixin
,
CapaFields
,
ComplexEncoder
from
capa
import
responsetypes
from
.progress
import
Progress
from
xmodule.util.misc
import
escape_html_characters
from
xmodule.x_module
import
XModule
,
module_attr
,
DEPRECATION_VSCOMPAT_EVENT
from
xmodule.raw_module
import
RawDescriptor
from
xmodule.exceptions
import
NotFoundError
,
ProcessingError
...
...
@@ -193,16 +195,33 @@ class CapaDescriptor(CapaFields, RawDescriptor):
"""
Return dictionary prepared with module content and type for indexing.
"""
result
=
super
(
CapaDescriptor
,
self
)
.
index_dictionary
()
if
not
result
:
result
=
{}
index
=
{
'content_type'
:
self
.
INDEX_CONTENT_TYPE
,
'problem_types'
:
list
(
self
.
problem_types
),
"display_name"
:
self
.
display_name
xblock_body
=
super
(
CapaDescriptor
,
self
)
.
index_dictionary
()
# Removing solutions and hints, as well as script and style
capa_content
=
re
.
sub
(
re
.
compile
(
r"""
<solution>.*?</solution> |
<script>.*?</script> |
<style>.*?</style> |
<[a-z]*hint.*?>.*?</[a-z]*hint>
"""
,
re
.
DOTALL
|
re
.
VERBOSE
),
""
,
self
.
data
)
capa_content
=
escape_html_characters
(
capa_content
)
capa_body
=
{
"capa_content"
:
capa_content
,
"display_name"
:
self
.
display_name
,
}
result
.
update
(
index
)
return
result
if
"content"
in
xblock_body
:
xblock_body
[
"content"
]
.
update
(
capa_body
)
else
:
xblock_body
[
"content"
]
=
capa_body
xblock_body
[
"content_type"
]
=
self
.
INDEX_CONTENT_TYPE
xblock_body
[
"problem_types"
]
=
list
(
self
.
problem_types
)
return
xblock_body
def
has_support
(
self
,
view
,
functionality
):
"""
...
...
common/lib/xmodule/xmodule/html_module.py
View file @
45832b98
...
...
@@ -10,7 +10,7 @@ from fs.errors import ResourceNotFoundError
from
pkg_resources
import
resource_string
import
dogstats_wrapper
as
dog_stats_api
from
xmodule.
annotator_mixin
import
html_to_text
from
xmodule.
util.misc
import
escape_html_characters
from
xmodule.contentstore.content
import
StaticContent
from
xmodule.editing_module
import
EditingDescriptor
from
xmodule.edxnotes_utils
import
edxnotes
...
...
@@ -275,12 +275,19 @@ class HtmlDescriptor(HtmlFields, XmlDescriptor, EditingDescriptor): # pylint: d
def
index_dictionary
(
self
):
xblock_body
=
super
(
HtmlDescriptor
,
self
)
.
index_dictionary
()
# Removing HTML-encoded non-breaking space characters
html_content
=
re
.
sub
(
r"(\s| |//)+"
,
" "
,
html_to_text
(
self
.
data
))
# Removing HTML CDATA
html_content
=
re
.
sub
(
r"<!\[CDATA\[.*\]\]>"
,
""
,
html_content
)
# Removing HTML comments
html_content
=
re
.
sub
(
r"<!--.*-->"
,
""
,
html_content
)
# Removing script and style
html_content
=
re
.
sub
(
re
.
compile
(
r"""
<script>.*?</script> |
<style>.*?</style>
"""
,
re
.
DOTALL
|
re
.
VERBOSE
),
""
,
self
.
data
)
html_content
=
escape_html_characters
(
html_content
)
html_body
=
{
"html_content"
:
html_content
,
"display_name"
:
self
.
display_name
,
...
...
common/lib/xmodule/xmodule/tests/test_capa_module.py
View file @
45832b98
This diff is collapsed.
Click to expand it.
common/lib/xmodule/xmodule/tests/test_html_module.py
View file @
45832b98
...
...
@@ -59,7 +59,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
Make sure that HtmlDescriptor can format data for indexing as expected.
"""
def
test_index_dictionary
(
self
):
def
test_index_dictionary
_simple_html_module
(
self
):
sample_xml
=
'''
<html>
<p>Hello World!</p>
...
...
@@ -71,6 +71,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
"content_type"
:
"Text"
})
def
test_index_dictionary_cdata_html_module
(
self
):
sample_xml_cdata
=
'''
<html>
<p>This has CDATA in it.</p>
...
...
@@ -83,6 +84,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
"content_type"
:
"Text"
})
def
test_index_dictionary_multiple_spaces_html_module
(
self
):
sample_xml_tab_spaces
=
'''
<html>
<p> Text has spaces :) </p>
...
...
@@ -94,6 +96,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
"content_type"
:
"Text"
})
def
test_index_dictionary_html_module_with_comment
(
self
):
sample_xml_comment
=
'''
<html>
<p>This has HTML comment in it.</p>
...
...
@@ -106,6 +109,7 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
"content_type"
:
"Text"
})
def
test_index_dictionary_html_module_with_both_comments_and_cdata
(
self
):
sample_xml_mix_comment_cdata
=
'''
<html>
<!-- Beginning of the html -->
...
...
@@ -120,3 +124,23 @@ class HtmlDescriptorIndexingTestCase(unittest.TestCase):
"content"
:
{
"html_content"
:
" This has HTML comment in it. HTML end. "
,
"display_name"
:
"Text"
},
"content_type"
:
"Text"
})
def
test_index_dictionary_html_module_with_script_and_style_tags
(
self
):
sample_xml_style_script_tags
=
'''
<html>
<style>p {color: green;}</style>
<!-- Beginning of the html -->
<p>This has HTML comment in it.<!-- Commenting Content --></p>
<!-- Here comes CDATA -->
<![CDATA[This is just a CDATA!]]>
<p>HTML end.</p>
<script>
var message = "Hello world!"
</script>
</html>
'''
descriptor
=
instantiate_descriptor
(
data
=
sample_xml_style_script_tags
)
self
.
assertEqual
(
descriptor
.
index_dictionary
(),
{
"content"
:
{
"html_content"
:
" This has HTML comment in it. HTML end. "
,
"display_name"
:
"Text"
},
"content_type"
:
"Text"
})
common/lib/xmodule/xmodule/tests/test_utils_escape_html_characters.py
0 → 100644
View file @
45832b98
"""Tests for methods defined in util/misc.py"""
from
xmodule.util.misc
import
escape_html_characters
from
unittest
import
TestCase
class
UtilHtmlEscapeTests
(
TestCase
):
"""
Tests for methods exposed in util/misc
"""
final_content
=
" This is a paragraph. "
def
test_escape_html_comments
(
self
):
html_content
=
"""
<!--This is a comment. Comments are not displayed in the browser-->
This is a paragraph.
"""
self
.
assertEqual
(
escape_html_characters
(
html_content
),
self
.
final_content
)
def
test_escape_cdata_comments
(
self
):
html_content
=
"""
<![CDATA[
function matchwo(a,b)
{
if (a < b && a < 0) then
{
return 1;
}
else
{
return 0;
}
}
]]>
This is a paragraph.
"""
self
.
assertEqual
(
escape_html_characters
(
html_content
),
self
.
final_content
)
def
test_escape_non_breaking_space
(
self
):
html_content
=
"""
<![CDATA[
function matchwo(a,b)
{
if (a < b && a < 0) then
{
return 1;
}
else
{
return 0;
}
}
]]>
This is a paragraph.
"""
self
.
assertEqual
(
escape_html_characters
(
html_content
),
self
.
final_content
)
common/lib/xmodule/xmodule/util/misc.py
View file @
45832b98
"""
Miscellaneous utility functions.
"""
import
re
from
xmodule.annotator_mixin
import
html_to_text
def
escape_invalid_characters
(
name
,
invalid_char_list
,
replace_with
=
'_'
):
...
...
@@ -24,3 +27,34 @@ def escape_invalid_characters(name, invalid_char_list, replace_with='_'):
if
char
in
name
:
name
=
name
.
replace
(
char
,
replace_with
)
return
name
def
escape_html_characters
(
content
):
"""
Remove HTML characters that shouldn't be indexed using ElasticSearch indexer
This method is complementary to html_to_text method found in xmodule/annotator_mixin.py
Args:
content (str): variable to escape html characters from
Returns:
content (str): content ready to be index by ElasticSearch
"""
# Removing HTML comments
return
re
.
sub
(
r"<!--.*-->"
,
""
,
# Removing HTML CDATA
re
.
sub
(
r"<!\[CDATA\[.*\]\]>"
,
""
,
# Removing HTML-encoded non-breaking space characters
re
.
sub
(
r"(\s| |//)+"
,
" "
,
html_to_text
(
content
)
)
)
)
common/test/acceptance/tests/lms/test_lms_dashboard_search.py
View file @
45832b98
...
...
@@ -80,7 +80,7 @@ class DashboardSearchTest(WebAppTest):
course_fix
.
add_children
(
XBlockFixtureDesc
(
'chapter'
,
'Section 1'
)
.
add_children
(
XBlockFixtureDesc
(
'sequential'
,
'Subsection 1'
)
.
add_children
(
XBlockFixtureDesc
(
'problem'
,
'
dashboard search
'
)
XBlockFixtureDesc
(
'problem'
,
'
Test Problem
'
)
)
)
)
.
add_children
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment