Commit a2c686cf by Robert Raposa

Merge pull request #12122 from edx/robrap/linter-mako

TNL-4324: Add check for strings that should be wrapped with HTML()
parents 56ca0e90 70a64aee
......@@ -242,6 +242,10 @@ class Rules(Enum):
'mako-html-alone',
"Only use HTML() alone with properly escaped HTML(), and make sure it is really alone."
)
mako_wrap_html = (
'mako-wrap-html',
"String containing HTML should be wrapped with call to HTML()."
)
underscore_not_escaped = (
'underscore-not-escaped',
'Expressions should be escaped using <%- expression %>.'
......@@ -491,6 +495,110 @@ class FileResults(object):
violation.print_results(out)
class ParseString(object):
"""
ParseString is the result of parsing a string out of a template.
A ParseString has the following attributes:
start_index: The index of the first quote, or -1 if none found
end_index: The index following the closing quote, or -1 if
unparseable
quote_length: The length of the quote. Could be 3 for a Python
triple quote. Or None if none found.
string: the text of the parsed string, or None if none found.
string_inner: the text inside the quotes of the parsed string, or None
if none found.
"""
def __init__(self, template, start_index, end_index):
"""
Init method.
Arguments:
template: The template to be searched.
start_index: The start index to search.
end_index: The end index to search before.
"""
self.end_index = -1
self.quote_length = None
self.string = None
self.string_inner = None
self.start_index = self._find_string_start(template, start_index, end_index)
if 0 <= self.start_index:
result = self._parse_string(template, self.start_index)
if result is not None:
self.end_index = result['end_index']
self.quote_length = result['quote_length']
self.string = result['string']
self.string_inner = result['string_inner']
def _find_string_start(self, template, start_index, end_index):
"""
Finds the index of the end of start of a string. In other words, the
first single or double quote.
Arguments:
template: The template to be searched.
start_index: The start index to search.
end_index: The end index to search before.
Returns:
The start index of the first single or double quote, or -1 if
no quote was found.
"""
quote_regex = re.compile(r"""['"]""")
start_match = quote_regex.search(template, start_index, end_index)
if start_match is None:
return -1
else:
return start_match.start()
def _parse_string(self, template, start_index):
"""
Finds the indices of a string inside a template.
Arguments:
template: The template to be searched.
start_index: The start index of the open quote.
Returns:
A dict containing the following, or None if not parseable:
end_index: The index following the closing quote
quote_length: The length of the quote. Could be 3 for a Python
triple quote.
string: the text of the parsed string
string_inner: the text inside the quotes of the parsed string
"""
quote = template[start_index]
if quote not in ["'", '"']:
raise ValueError("start_index must refer to a single or double quote.")
triple_quote = quote * 3
if template.startswith(triple_quote, start_index):
quote = triple_quote
next_start_index = start_index + len(quote)
while True:
quote_end_index = template.find(quote, next_start_index)
backslash_index = template.find("\\", next_start_index)
if quote_end_index < 0:
return None
if 0 <= backslash_index < quote_end_index:
next_start_index = backslash_index + 2
else:
end_index = quote_end_index + len(quote)
quote_length = len(quote)
string = template[start_index:end_index]
return {
'end_index': end_index,
'quote_length': quote_length,
'string': string,
'string_inner': string[quote_length:-quote_length],
}
class MakoTemplateLinter(object):
"""
The linter for Mako template files.
......@@ -675,12 +783,6 @@ class MakoTemplateLinter(object):
"""
Checks rules related to proper use of HTML() and Text().
Rule 1: If HTML() is called, the expression must begin with Text(), or
Rule 2: If HTML() is called alone, it must be the only call.
Rule 3: Both HTML() and Text() must be closed before any call to
format().
Rule 4: Using Text() without HTML() is unnecessary.
Arguments:
expression: A dict containing the start_index, end_index, and
expression (text) of the expression.
......@@ -689,10 +791,15 @@ class MakoTemplateLinter(object):
"""
# strip '${' and '}' and whitespace from ends
expression_inner = expression['expression'][2:-1].strip()
# find the template relative inner expression start index
# - find used to take into account above strip()
template_inner_start_index = expression['start_index'] + expression['expression'].find(expression_inner)
if 'HTML(' in expression_inner:
if expression_inner.startswith('HTML('):
close_paren_index = self._find_closing_char_index(None, "(", ")", expression_inner, len('HTML('), 0)
# check that the close paren is at the end of the expression.
close_paren_index = self._find_closing_char_index(
None, "(", ")", expression_inner, start_index=len('HTML('), num_open_chars=0, strings=[]
)['close_char_index']
# check that the close paren is at the end of the stripped expression.
if close_paren_index != len(expression_inner) - 1:
results.violations.append(ExpressionRuleViolation(
Rules.mako_html_alone, expression
......@@ -707,8 +814,13 @@ class MakoTemplateLinter(object):
Rules.mako_text_redundant, expression
))
for match in re.finditer("(HTML\(|Text\()", expression_inner):
close_paren_index = self._find_closing_char_index(None, "(", ")", expression_inner, match.end(), 0)
# strings to be checked for HTML
unwrapped_html_strings = expression['strings']
for match in re.finditer(r"(HTML\(|Text\()", expression_inner):
result = self._find_closing_char_index(
None, "(", ")", expression_inner, start_index=match.end(), num_open_chars=0, strings=[]
)
close_paren_index = result['close_char_index']
if 0 <= close_paren_index:
# the argument sent to HTML() or Text()
argument = expression_inner[match.end():close_paren_index]
......@@ -716,6 +828,21 @@ class MakoTemplateLinter(object):
results.violations.append(ExpressionRuleViolation(
Rules.mako_close_before_format, expression
))
if match.group() == "HTML(":
# remove expression strings wrapped in HTML()
for string in list(unwrapped_html_strings):
html_inner_start_index = template_inner_start_index + match.end()
html_inner_end_index = template_inner_start_index + close_paren_index
if html_inner_start_index <= string.start_index and string.end_index <= html_inner_end_index:
unwrapped_html_strings.remove(string)
# check strings not wrapped in HTML()
for string in unwrapped_html_strings:
if '<' in string.string_inner:
results.violations.append(ExpressionRuleViolation(
Rules.mako_wrap_html, expression
))
break
def _check_filters(self, mako_template, expression, context, has_page_default, results):
"""
......@@ -744,7 +871,10 @@ class MakoTemplateLinter(object):
return
filters = filters_match.group()[1:-1].replace(" ", "").split(",")
if context == 'html':
if (len(filters) == 2) and (filters[0] == 'n') and (filters[1] == 'unicode'):
# {x | n, unicode} is valid in any context
pass
elif context == 'html':
if (len(filters) == 1) and (filters[0] == 'h'):
if has_page_default:
# suppress this violation if the page default hasn't been set,
......@@ -849,9 +979,11 @@ class MakoTemplateLinter(object):
following:
start_index: The index of the start of the expression.
end_index: The index of the end of the expression, or -1 if
unparseable.
end_index: The index immediately following the expression, or -1
if unparseable.
expression: The text of the expression.
strings: a list of ParseStrings
"""
start_delim = '${'
start_index = 0
......@@ -861,26 +993,33 @@ class MakoTemplateLinter(object):
start_index = mako_template.find(start_delim, start_index)
if start_index < 0:
break
end_index = self._find_closing_char_index(start_delim, '{', '}', mako_template, start_index + len(start_delim), 0)
if end_index < 0:
result = self._find_closing_char_index(
start_delim, '{', '}', mako_template, start_index=start_index + len(start_delim),
num_open_chars=0, strings=[]
)
close_char_index = result['close_char_index']
if close_char_index < 0:
expression = None
else:
expression = mako_template[start_index:end_index + 1]
expression = mako_template[start_index:close_char_index + 1]
expression = {
'start_index': start_index,
'end_index': end_index,
'expression': expression
'end_index': close_char_index + 1,
'expression': expression,
'strings': result['strings'],
}
expressions.append(expression)
# end_index of -1 represents a parsing error and we may find others
start_index = max(start_index + len(start_delim), end_index)
start_index = max(start_index + len(start_delim), close_char_index)
return expressions
def _find_closing_char_index(self, start_delim, open_char, close_char, template, start_index, num_open_chars):
def _find_closing_char_index(
self, start_delim, open_char, close_char, template, start_index, num_open_chars, strings
):
"""
Finds the index of the closing char that matches the opening char.
......@@ -895,102 +1034,61 @@ class MakoTemplateLinter(object):
template: The template to be searched.
start_index: The start index of the last open char.
num_open_chars: The current number of open chars.
strings: A list of ParseStrings already parsed
Returns:
The index of the closing character, or -1 if unparseable.
A dict containing the following:
close_char_index: The index of the closing character, or -1 if
unparseable.
strings: a list of ParseStrings
"""
unparseable_result = {'close_char_index': -1, 'strings': []}
close_char_index = template.find(close_char, start_index)
if close_char_index < 0:
# if we can't find an end_char, let's just quit
return -1
return unparseable_result
open_char_index = template.find(open_char, start_index, close_char_index)
start_quote_index = self._find_string_start(template, start_index, close_char_index)
if 0 <= start_quote_index:
string_end_index = self._parse_string(template, start_quote_index)['end_index']
if string_end_index < 0:
return -1
parse_string = ParseString(template, start_index, close_char_index)
valid_index_list = [close_char_index]
if 0 <= open_char_index:
valid_index_list.append(open_char_index)
if 0 <= parse_string.start_index:
valid_index_list.append(parse_string.start_index)
min_valid_index = min(valid_index_list)
if parse_string.start_index == min_valid_index:
strings.append(parse_string)
if parse_string.end_index < 0:
return unparseable_result
else:
return self._find_closing_char_index(start_delim, open_char, close_char, template, string_end_index, num_open_chars)
return self._find_closing_char_index(
start_delim, open_char, close_char, template, start_index=parse_string.end_index,
num_open_chars=num_open_chars, strings=strings
)
if (open_char_index >= 0) and (open_char_index < close_char_index):
if open_char_index == min_valid_index:
if start_delim is not None:
# if we find another starting delim, consider this unparseable
start_delim_index = template.find(start_delim, start_index, close_char_index)
if start_delim_index < open_char_index:
return -1
return self._find_closing_char_index(start_delim, open_char, close_char, template, open_char_index + 1, num_open_chars + 1)
if 0 <= start_delim_index < open_char_index:
return unparseable_result
return self._find_closing_char_index(
start_delim, open_char, close_char, template, start_index=open_char_index + 1,
num_open_chars=num_open_chars + 1, strings=strings
)
if num_open_chars == 0:
return close_char_index
return {
'close_char_index': close_char_index,
'strings': strings,
}
else:
return self._find_closing_char_index(start_delim, open_char, close_char, template, close_char_index + 1, num_open_chars - 1)
def _find_string_start(self, template, start_index, end_index):
"""
Finds the index of the end of start of a string. In other words, the
first single or double quote.
Arguments:
template: The template to be searched.
start_index: The start index to search.
end_index: The end index to search before.
num_open_chars: The current number of open expressions.
Returns:
The start index of the first single or double quote, or -1 if
no quote was found.
"""
double_quote_index = template.find('"', start_index, end_index)
single_quote_index = template.find("'", start_index, end_index)
if 0 <= single_quote_index or 0 <= double_quote_index:
if 0 <= single_quote_index and 0 <= double_quote_index:
return min(single_quote_index, double_quote_index)
else:
return max(single_quote_index, double_quote_index)
return -1
def _parse_string(self, template, start_index):
"""
Finds the indices of a string inside a template.
Arguments:
template: The template to be searched.
start_index: The start index of the open quote.
Returns:
A dict containing the following:
start_index: The index of the first quote.
end_index: The index following the closing quote, or -1 if
unparseable
quote_length: The length of the quote. Could be 3 for a Python
triple quote.
"""
quote = template[start_index]
if quote not in ["'", '"']:
raise ValueError("start_index must refer to a single or double quote.")
triple_quote = quote * 3
if template.startswith(triple_quote, start_index):
quote = triple_quote
result = {
'start_index': start_index,
'end_index': -1,
'quote_length': len(quote),
}
start_index += len(quote)
while True:
quote_end_index = template.find(quote, start_index)
backslash_index = template.find("\\", start_index)
if quote_end_index < 0:
return result
if 0 <= backslash_index < quote_end_index:
start_index = backslash_index + 2
else:
result['end_index'] = quote_end_index + len(quote)
return result
return self._find_closing_char_index(
start_delim, open_char, close_char, template, start_index=close_char_index + 1,
num_open_chars=num_open_chars - 1, strings=strings
)
class UnderscoreTemplateLinter(object):
......
......@@ -9,7 +9,7 @@ import textwrap
from unittest import TestCase
from ..safe_template_linter import (
_process_os_walk, FileResults, MakoTemplateLinter, UnderscoreTemplateLinter, Rules
_process_os_walk, FileResults, MakoTemplateLinter, ParseString, UnderscoreTemplateLinter, Rules
)
......@@ -113,6 +113,7 @@ class TestMakoTemplateLinter(TestCase):
{'expression': '${x}', 'rule': None},
{'expression': '${{unbalanced}', 'rule': Rules.mako_unparseable_expression},
{'expression': '${x | n}', 'rule': Rules.mako_invalid_html_filter},
{'expression': '${x | n, unicode}', 'rule': None},
{'expression': '${x | h}', 'rule': Rules.mako_unwanted_html_filter},
{'expression': '${x | n, dump_js_escaped_json}', 'rule': Rules.mako_invalid_html_filter},
)
......@@ -203,15 +204,55 @@ class TestMakoTemplateLinter(TestCase):
'rule': Rules.mako_close_before_format
},
{
'expression': """${ Text("text") }""",
'expression':
textwrap.dedent("""
${"Mixed {span_start}text{span_end}".format(
span_start="<span>",
span_end="</span>",
)}
"""),
'rule': Rules.mako_wrap_html
},
{
'expression':
textwrap.dedent("""
${Text(_("String with multiple lines "
"{link_start}unenroll{link_end} "
"and final line")).format(
link_start=HTML(
'<a id="link__over_multiple_lines" '
'data-course-id="{course_id}" '
'href="#test-modal">'
).format(
course_id=course_overview.id
),
link_end=HTML('</a>'),
)}
"""),
'rule': None
},
{
'expression': "${'<span></span>'}",
'rule': Rules.mako_wrap_html
},
{
'expression': "${'Embedded HTML <strong></strong>'}",
'rule': Rules.mako_wrap_html
},
{
'expression': "${ Text('text') }",
'rule': Rules.mako_text_redundant
},
{
'expression': """${ HTML("<span></span>") }""",
'expression': "${ HTML('<span></span>') }",
'rule': None
},
{
'expression': """${ HTML("<span></span>") + "some other text" }""",
'expression': "${HTML(render_entry(map['entries'], child))}",
'rule': None
},
{
'expression': "${ HTML('<span></span>') + 'some other text' }",
'rule': Rules.mako_html_alone
},
)
......@@ -309,6 +350,7 @@ class TestMakoTemplateLinter(TestCase):
{'expression': '${x | n}', 'rule': Rules.mako_invalid_js_filter},
{'expression': '${x | h}', 'rule': Rules.mako_invalid_js_filter},
{'expression': '${x | n, dump_js_escaped_json}', 'rule': None},
{'expression': '${x | n, unicode}', 'rule': None},
)
def test_check_mako_expressions_in_javascript(self, data):
"""
......@@ -440,7 +482,7 @@ class TestMakoTemplateLinter(TestCase):
self.assertTrue(lines[violation.start_line - 1].startswith("${", violation.start_column - 1))
if data['parseable']:
self.assertTrue("}" in lines[violation.end_line - 1])
self.assertTrue(lines[violation.end_line - 1].startswith("}", violation.end_column - 1))
self.assertTrue(lines[violation.end_line - 1].startswith("}", violation.end_column - len("}") - 1))
else:
self.assertEqual(violation.start_line, violation.end_line)
self.assertEqual(violation.end_column, "?")
......@@ -468,14 +510,14 @@ class TestMakoTemplateLinter(TestCase):
self.assertEqual(len(expressions), 1)
start_index = expressions[0]['start_index']
end_index = expressions[0]['end_index']
self.assertEqual(data['template'][start_index:end_index + 1], data['template'].strip())
self.assertEqual(data['template'][start_index:end_index], data['template'].strip())
self.assertEqual(expressions[0]['expression'], data['template'].strip())
@data(
{'template': " ${{unparseable} ${}", 'start_index': 1},
{'template': " ${'unparseable} ${}", 'start_index': 1},
)
def test_find_mako_expressions(self, data):
def test_find_unparseable_mako_expressions(self, data):
"""
Test _find_mako_expressions for unparseable expressions
"""
......@@ -487,29 +529,15 @@ class TestMakoTemplateLinter(TestCase):
self.assertIsNone(expressions[0]['expression'])
@data(
{'template': """${""}""", 'start_index': 0, 'end_index': 5, 'expected_index': 2},
{'template': """${''}""", 'start_index': 0, 'end_index': 5, 'expected_index': 2},
{'template': """${"''"}""", 'start_index': 0, 'end_index': 7, 'expected_index': 2},
{'template': """${'""'}""", 'start_index': 0, 'end_index': 7, 'expected_index': 2},
{'template': """${'""'}""", 'start_index': 3, 'end_index': 7, 'expected_index': 3},
{'template': """${'""'}""", 'start_index': 0, 'end_index': 1, 'expected_index': -1},
)
def test_find_string_start(self, data):
"""
Test _find_string_start helper
"""
linter = MakoTemplateLinter()
string_start_index = linter._find_string_start(data['template'], data['start_index'], data['end_index'])
self.assertEqual(string_start_index, data['expected_index'])
@data(
{
'template': '${""}',
'result': {'start_index': 2, 'end_index': 4, 'quote_length': 1}
},
{
'template': "${''}",
'result': {'start_index': 2, 'end_index': 4, 'quote_length': 1}
},
{
'template': "${'Hello'}",
'result': {'start_index': 2, 'end_index': 9, 'quote_length': 1}
},
......@@ -528,9 +556,18 @@ class TestMakoTemplateLinter(TestCase):
"""
linter = MakoTemplateLinter()
result = linter._parse_string(data['template'], data['result']['start_index'])
parse_string = ParseString(data['template'], data['result']['start_index'], len(data['template']))
string_dict = {
'start_index': parse_string.start_index,
'end_index': parse_string.end_index,
'quote_length': parse_string.quote_length,
}
self.assertDictEqual(result, data['result'])
self.assertDictEqual(string_dict, data['result'])
self.assertEqual(data['template'][parse_string.start_index:parse_string.end_index], parse_string.string)
start_index = parse_string.start_index + parse_string.quote_length
end_index = parse_string.end_index - parse_string.quote_length
self.assertEqual(data['template'][start_index:end_index], parse_string.string_inner)
def _validate_data_rule(self, data, results):
if data['rule'] is None:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment