Commit fb13dc64 by Robert Raposa

Move MakoLinter.

parent a6b9ba7d
......@@ -1407,855 +1407,855 @@ class JavaScriptLinter(BaseLinter):
return False
class MakoTemplateLinter(BaseLinter):
class PythonLinter(BaseLinter):
"""
The linter for Mako template files.
The linter for Python files.
The current implementation of the linter does naive Python parsing. It does
not use the parser. One known issue is that parsing errors found inside a
docstring need to be disabled, rather than being automatically skipped.
Skipping docstrings is an enhancement that could be added.
"""
javaScriptLinter = JavaScriptLinter()
def __init__(self):
"""
Init method.
"""
super(PythonLinter, self).__init__()
self._skip_python_dirs = self._skip_dirs + ('tests', 'test/acceptance')
def process_file(self, directory, file_name):
"""
Process file to determine if it is a Mako template file and
Process file to determine if it is a Python file and
if it is safe.
Arguments:
directory (string): The directory of the file to be checked
file_name (string): A filename for a potential Mako file
file_name (string): A filename for a potential Python file
Returns:
The file results containing any violations.
"""
mako_file_full_path = os.path.normpath(directory + '/' + file_name)
results = FileResults(mako_file_full_path)
file_full_path = os.path.normpath(directory + '/' + file_name)
results = FileResults(file_full_path)
if not results.is_file:
return results
if not self._is_valid_directory(directory):
if file_name.lower().endswith('.py') is False:
return results
# TODO: When safe-by-default is turned on at the platform level, will we:
# 1. Turn it on for .html only, or
# 2. Turn it on for all files, and have different rulesets that have
# different rules of .xml, .html, .js, .txt Mako templates (e.g. use
# the n filter to turn off h for some of these)?
# For now, we only check .html and .xml files
if not (file_name.lower().endswith('.html') or file_name.lower().endswith('.xml')):
# skip this linter code (i.e. safe_template_linter.py)
if file_name == os.path.basename(__file__):
return results
return self._load_and_check_file_is_safe(mako_file_full_path, self._check_mako_file_is_safe, results)
def _is_valid_directory(self, directory):
"""
Determines if the provided directory is a directory that could contain
Mako template files that need to be linted.
Arguments:
directory: The directory to be linted.
Returns:
True if this directory should be linted for Mako template violations
and False otherwise.
"""
if self._is_skip_dir(self._skip_dirs, directory):
return False
# TODO: This is an imperfect guess concerning the Mako template
# directories. This needs to be reviewed before turning on safe by
# default at the platform level.
if ('/templates/' in directory) or directory.endswith('/templates'):
return True
if not self._is_valid_directory(self._skip_python_dirs, directory):
return results
return False
return self._load_and_check_file_is_safe(file_full_path, self.check_python_file_is_safe, results)
def _check_mako_file_is_safe(self, mako_template, results):
def check_python_file_is_safe(self, file_contents, results):
"""
Checks for violations in a Mako template.
Checks for violations in a Python file.
Arguments:
mako_template: The contents of the Mako template.
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
"""
if self._is_django_template(mako_template):
return
has_page_default = self._has_page_default(mako_template, results)
self._check_mako_expressions(mako_template, has_page_default, results)
results.prepare_results(mako_template, line_comment_delim='##')
def _is_django_template(self, mako_template):
"""
Determines if the template is actually a Django template.
Arguments:
mako_template: The template code.
Returns:
True if this is really a Django template, and False otherwise.
"""
if re.search('({%.*%})|({{.*}})', mako_template) is not None:
return True
return False
def _get_page_tag_count(self, mako_template):
"""
Determines the number of page expressions in the Mako template. Ignores
page expressions that are commented out.
Arguments:
mako_template: The contents of the Mako template.
Returns:
The number of page expressions
"""
count = len(re.findall('<%page ', mako_template, re.IGNORECASE))
count_commented = len(re.findall(r'##\s+<%page ', mako_template, re.IGNORECASE))
return max(0, count - count_commented)
self._check_concat_with_html(file_contents, Rules.python_concat_html, results)
self._check_deprecated_display_name(file_contents, results)
self._check_custom_escape(file_contents, results)
self._check_html(file_contents, results)
results.prepare_results(file_contents, line_comment_delim='#')
def _has_page_default(self, mako_template, results):
def _check_deprecated_display_name(self, file_contents, results):
"""
Checks if the Mako template contains the page expression marking it as
safe by default.
Checks that the deprecated display_name_with_default_escaped is not
used. Adds violation to results if there is a problem.
Arguments:
mako_template: The contents of the Mako template.
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
Side effect:
Adds violations regarding page default if necessary
Returns:
True if the template has the page default, and False otherwise.
"""
page_tag_count = self._get_page_tag_count(mako_template)
# check if there are too many page expressions
if 2 <= page_tag_count:
results.violations.append(RuleViolation(Rules.mako_multiple_page_tags))
return False
# make sure there is exactly 1 page expression, excluding commented out
# page expressions, before proceeding
elif page_tag_count != 1:
results.violations.append(RuleViolation(Rules.mako_missing_default))
return False
# check that safe by default (h filter) is turned on
page_h_filter_regex = re.compile('<%page[^>]*expression_filter=(?:"h"|\'h\')[^>]*/>')
page_match = page_h_filter_regex.search(mako_template)
if not page_match:
results.violations.append(RuleViolation(Rules.mako_missing_default))
return page_match
for match in re.finditer(r'\.display_name_with_default_escaped', file_contents):
expression = Expression(match.start(), match.end())
results.violations.append(ExpressionRuleViolation(
Rules.python_deprecated_display_name, expression
))
def _check_mako_expressions(self, mako_template, has_page_default, results):
def _check_custom_escape(self, file_contents, results):
"""
Searches for Mako expressions and then checks if they contain
violations, including checking JavaScript contexts for JavaScript
violations.
Checks for custom escaping calls, rather than using a standard escaping
method.
Arguments:
mako_template: The contents of the Mako template.
has_page_default: True if the page is marked as default, False
otherwise.
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
"""
expressions = self._find_mako_expressions(mako_template)
contexts = self._get_contexts(mako_template)
self._check_javascript_contexts(mako_template, contexts, results)
for expression in expressions:
if expression.end_index is None:
for match in re.finditer("(<.*&lt;|&lt;.*<)", file_contents):
expression = Expression(match.start(), match.end())
results.violations.append(ExpressionRuleViolation(
Rules.mako_unparseable_expression, expression
Rules.python_custom_escape, expression
))
continue
context = self._get_context(contexts, expression.start_index)
self._check_filters(mako_template, expression, context, has_page_default, results)
self._check_deprecated_display_name(expression, results)
self._check_html_and_text(expression, has_page_default, results)
def _check_javascript_contexts(self, mako_template, contexts, results):
def _check_html(self, file_contents, results):
"""
Lint the JavaScript contexts for JavaScript violations inside a Mako
template.
Checks many rules related to HTML in a Python file.
Arguments:
mako_template: The contents of the Mako template.
contexts: A list of context dicts with 'type' and 'index'.
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
Side effect:
Adds JavaScript violations to results.
"""
javascript_start_index = None
for context in contexts:
if context['type'] == 'javascript':
if javascript_start_index < 0:
javascript_start_index = context['index']
else:
if javascript_start_index is not None:
javascript_end_index = context['index']
javascript_code = mako_template[javascript_start_index:javascript_end_index]
self._check_javascript_context(javascript_code, javascript_start_index, results)
javascript_start_index = None
if javascript_start_index is not None:
javascript_code = mako_template[javascript_start_index:]
self._check_javascript_context(javascript_code, javascript_start_index, results)
# Text() Expressions keyed by its end index
text_calls_by_end_index = {}
# HTML() Expressions keyed by its end index
html_calls_by_end_index = {}
start_index = 0
while True:
def _check_javascript_context(self, javascript_code, start_offset, results):
"""
Lint a single JavaScript context for JavaScript violations inside a Mako
template.
# check HTML(), Text() and format() calls
result = self._check_html_text_format(
file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
)
next_start_index = result['next_start_index']
interpolate_end_index = result['interpolate_end_index']
Arguments:
javascript_code: The template contents of the JavaScript context.
start_offset: The offset of the JavaScript context inside the
original Mako template.
results: A list of results into which violations will be added.
# check for interpolation including HTML outside of function calls
self._check_interpolate_with_html(
file_contents, start_index, interpolate_end_index, results
)
Side effect:
Adds JavaScript violations to results.
# advance the search
start_index = next_start_index
"""
javascript_results = FileResults("")
self.javaScriptLinter.check_javascript_file_is_safe(javascript_code, javascript_results)
# translate the violations into the location within the original
# Mako template
for violation in javascript_results.violations:
expression = violation.expression
expression.start_index += start_offset
if expression.end_index is not None:
expression.end_index += start_offset
results.violations.append(ExpressionRuleViolation(violation.rule, expression))
# end if there is nothing left to search
if interpolate_end_index is None:
break
def _check_deprecated_display_name(self, expression, results):
def _check_html_text_format(
self, file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
):
"""
Checks that the deprecated display_name_with_default_escaped is not
used. Adds violation to results if there is a problem.
Checks for HTML(), Text() and format() calls, and various rules related
to these calls.
Arguments:
expression: An Expression
file_contents: The contents of the Python file
start_index: The index at which to begin searching for a function
call.
text_calls_by_end_index: Text() Expressions keyed by its end index.
html_calls_by_end_index: HTML() Expressions keyed by its end index.
results: A list of results into which violations will be added.
"""
if '.display_name_with_default_escaped' in expression.expression:
results.violations.append(ExpressionRuleViolation(
Rules.python_deprecated_display_name, expression
))
def _check_html_and_text(self, expression, has_page_default, results):
"""
Checks rules related to proper use of HTML() and Text().
Arguments:
expression: A Mako Expression.
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
Returns:
A dict with the following keys:
'next_start_index': The start index of the next search for a
function call.
'interpolate_end_index': The end index of the next next search
for interpolation with html, or None if the end of file
should be used.
"""
expression_inner = expression.expression_inner
# use find to get the template relative inner expression start index
# due to possible skipped white space
template_inner_start_index = expression.start_index
template_inner_start_index += expression.expression.find(expression_inner)
if 'HTML(' in expression_inner:
if expression_inner.startswith('HTML('):
close_paren_index = self._find_closing_char_index(
None, "(", ")", expression_inner, start_index=len('HTML(')
)['close_char_index']
# check that the close paren is at the end of the stripped expression.
if close_paren_index != len(expression_inner) - 1:
results.violations.append(ExpressionRuleViolation(
Rules.mako_html_alone, expression
))
elif expression_inner.startswith('Text(') is False:
# used to find opening of .format(), Text() and HTML() calls
regex_function_open = re.compile(r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()")
interpolate_end_index = None
end_index = None
strings = None
html_calls = []
while True:
# first search for HTML(), Text(), or .format()
if end_index is None:
function_match = regex_function_open.search(file_contents, start_index)
else:
function_match = regex_function_open.search(file_contents, start_index, end_index)
if function_match is not None:
if interpolate_end_index is None:
interpolate_end_index = function_match.start()
function_close_result = self._find_closing_char_index(
None, '(', ')', file_contents, start_index=function_match.end(),
)
if function_close_result is None:
results.violations.append(ExpressionRuleViolation(
Rules.mako_html_requires_text, expression
Rules.python_parse_error, Expression(function_match.start())
))
else:
if 'Text(' in expression_inner:
expression = Expression(
function_match.start(), function_close_result['close_char_index'] + 1, file_contents,
start_delim=function_match.group(), end_delim=")"
)
# if this an outer most Text(), HTML(), or format() call
if end_index is None:
end_index = expression.end_index
interpolate_end_index = expression.start_index
strings = function_close_result['strings']
if function_match.group() == '.format(':
if 'HTML(' in expression.expression_inner or 'Text(' in expression.expression_inner:
is_wrapped_with_text = str(function_match.start()) in text_calls_by_end_index.keys()
is_wrapped_with_html = str(function_match.start()) in html_calls_by_end_index.keys()
if is_wrapped_with_text is False and is_wrapped_with_html is False:
results.violations.append(ExpressionRuleViolation(
Rules.mako_text_redundant, expression
Rules.python_requires_html_or_text, expression
))
# strings to be checked for HTML
unwrapped_html_strings = expression.strings
for match in re.finditer(r"(HTML\(|Text\()", expression_inner):
result = self._find_closing_char_index(None, "(", ")", expression_inner, start_index=match.end())
if result is not None:
close_paren_index = result['close_char_index']
# the argument sent to HTML() or Text()
argument = expression_inner[match.end():close_paren_index]
if ".format(" in argument:
else: # expression is 'HTML(' or 'Text('
# HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
# Generally, format() would be the issue if there is one.
if regex_function_open.search(expression.expression_inner) is not None:
results.violations.append(ExpressionRuleViolation(
Rules.python_close_before_format, expression
))
if match.group() == "HTML(":
# remove expression strings wrapped in HTML()
for string in list(unwrapped_html_strings):
html_inner_start_index = template_inner_start_index + match.end()
html_inner_end_index = template_inner_start_index + close_paren_index
if html_inner_start_index <= string.start_index and string.end_index <= html_inner_end_index:
unwrapped_html_strings.remove(string)
if function_match.group() == 'Text(':
text_calls_by_end_index[str(expression.end_index)] = expression
else: # function_match.group() == 'HTML(':
html_calls_by_end_index[str(expression.end_index)] = expression
html_calls.append(expression)
# check strings not wrapped in HTML() for '<'
for string in unwrapped_html_strings:
if '<' in string.string_inner:
results.violations.append(ExpressionRuleViolation(
Rules.python_wrap_html, expression
))
break
# check strings not wrapped in HTML() for HTML entities
if has_page_default:
for string in unwrapped_html_strings:
if re.search(r"&[#]?[a-zA-Z0-9]+;", string.string_inner):
results.violations.append(ExpressionRuleViolation(
Rules.mako_html_entities, expression
))
start_index = function_match.end()
else:
break
def _check_filters(self, mako_template, expression, context, has_page_default, results):
# checks strings in the outer most call to ensure they are properly
# wrapped with HTML()
self._check_format_html_strings_wrapped(strings, html_calls, results)
# compute where to continue the search
if function_match is None and end_index is None:
next_start_index = start_index
elif end_index is None:
next_start_index = function_match.end()
else:
next_start_index = end_index
return {
'next_start_index': next_start_index,
'interpolate_end_index': interpolate_end_index,
}
def _check_format_html_strings_wrapped(self, strings, html_calls, results):
"""
Checks that the filters used in the given Mako expression are valid
for the given context. Adds violation to results if there is a problem.
Checks that any string inside a format call that seems to contain HTML
is wrapped with a call to HTML().
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
context: The context of the page in which the expression was found
(e.g. javascript, html).
has_page_default: True if the page is marked as default, False
otherwise.
strings: A list of ParseStrings for each string inside the format()
call.
html_calls: A list of Expressions representing all of the HTML()
calls inside the format() call.
results: A list of results into which violations will be added.
"""
if context == 'unknown':
results.violations.append(ExpressionRuleViolation(
Rules.mako_unknown_context, expression
))
return
# Example: finds "| n, h}" when given "${x | n, h}"
filters_regex = re.compile(r'\|([.,\w\s]*)\}')
filters_match = filters_regex.search(expression.expression)
if filters_match is None:
if context == 'javascript':
results.violations.append(ExpressionRuleViolation(
Rules.mako_invalid_js_filter, expression
))
return
filters = filters_match.group(1).replace(" ", "").split(",")
if filters == ['n', 'decode.utf8']:
# {x | n, decode.utf8} is valid in any context
pass
elif context == 'html':
if filters == ['h']:
if has_page_default:
# suppress this violation if the page default hasn't been set,
# otherwise the template might get less safe
results.violations.append(ExpressionRuleViolation(
Rules.mako_unwanted_html_filter, expression
))
else:
results.violations.append(ExpressionRuleViolation(
Rules.mako_invalid_html_filter, expression
))
elif context == 'javascript':
self._check_js_expression_not_with_html(mako_template, expression, results)
if filters == ['n', 'dump_js_escaped_json']:
# {x | n, dump_js_escaped_json} is valid
pass
elif filters == ['n', 'js_escaped_string']:
# {x | n, js_escaped_string} is valid, if surrounded by quotes
self._check_js_string_expression_in_quotes(mako_template, expression, results)
else:
html_strings = []
html_wrapped_strings = []
if strings is not None:
# find all strings that contain HTML
for string in strings:
if '<' in string.string:
html_strings.append(string)
# check if HTML string is appropriately wrapped
for html_call in html_calls:
if html_call.start_index < string.start_index < string.end_index < html_call.end_index:
html_wrapped_strings.append(string)
break
# loop through all unwrapped strings
for unsafe_string in set(html_strings) - set(html_wrapped_strings):
unsafe_string_expression = Expression(unsafe_string.start_index)
results.violations.append(ExpressionRuleViolation(
Rules.mako_invalid_js_filter, expression
Rules.python_wrap_html, unsafe_string_expression
))
def _check_js_string_expression_in_quotes(self, mako_template, expression, results):
def _check_interpolate_with_html(self, file_contents, start_index, end_index, results):
"""
Checks that a Mako expression using js_escaped_string is surrounded by
quotes.
Find interpolations with html that fall outside of any calls to HTML(),
Text(), and .format().
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
file_contents: The contents of the Python file
start_index: The index to start the search, or None if nothing to
search
end_index: The index to end the search, or None if the end of file
should be used.
results: A list of results into which violations will be added.
"""
parse_string = self._find_string_wrapping_expression(mako_template, expression)
if parse_string is None:
# used to find interpolation with HTML
pattern_interpolate_html_inner = r'(<.*%s|%s.*<|<.*{\w*}|{\w*}.*<)'
regex_interpolate_html = re.compile(r"""(".*{}.*"|'.*{}.*')""".format(
pattern_interpolate_html_inner, pattern_interpolate_html_inner
))
if end_index is None:
interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index)
else:
interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index, end_index)
for match_html_string in interpolate_string_iter:
expression = Expression(match_html_string.start(), match_html_string.end())
results.violations.append(ExpressionRuleViolation(
Rules.mako_js_missing_quotes, expression
Rules.python_interpolate_html, expression
))
def _check_js_expression_not_with_html(self, mako_template, expression, results):
"""
Checks that a Mako expression in a JavaScript context does not appear in
a string that also contains HTML.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
class MakoTemplateLinter(BaseLinter):
"""
parse_string = self._find_string_wrapping_expression(mako_template, expression)
if parse_string is not None and re.search('[<>]', parse_string.string) is not None:
results.violations.append(ExpressionRuleViolation(
Rules.mako_js_html_string, expression
))
The linter for Mako template files.
"""
javaScriptLinter = JavaScriptLinter()
def _find_string_wrapping_expression(self, mako_template, expression):
def process_file(self, directory, file_name):
"""
Finds the string wrapping the Mako expression if there is one.
Process file to determine if it is a Mako template file and
if it is safe.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
directory (string): The directory of the file to be checked
file_name (string): A filename for a potential Mako file
Returns:
ParseString representing a scrubbed version of the wrapped string,
where the Mako expression was replaced with "${...}", if a wrapped
string was found. Otherwise, returns None if none found.
"""
lines = StringLines(mako_template)
start_index = lines.index_to_line_start_index(expression.start_index)
if expression.end_index is not None:
end_index = lines.index_to_line_end_index(expression.end_index)
else:
return None
# scrub out the actual expression so any code inside the expression
# doesn't interfere with rules applied to the surrounding code (i.e.
# checking JavaScript).
scrubbed_lines = "".join((
mako_template[start_index:expression.start_index],
"${...}",
mako_template[expression.end_index:end_index]
))
adjusted_start_index = expression.start_index - start_index
start_index = 0
while True:
parse_string = ParseString(scrubbed_lines, start_index, len(scrubbed_lines))
# check for validly parsed string
if 0 <= parse_string.start_index < parse_string.end_index:
# check if expression is contained in the given string
if parse_string.start_index < adjusted_start_index < parse_string.end_index:
return parse_string
else:
# move to check next string
start_index = parse_string.end_index
else:
break
return None
def _get_contexts(self, mako_template):
"""
Returns a data structure that represents the indices at which the
template changes from HTML context to JavaScript and back.
The file results containing any violations.
Return:
A list of dicts where each dict contains:
- index: the index of the context.
- type: the context type (e.g. 'html' or 'javascript').
"""
contexts_re = re.compile(
r"""
<script.*?> | # script tag start
</script> | # script tag end
<%static:require_module.*?> | # require js script tag start
</%static:require_module> | # require js script tag end
<%block[ ]*name=['"]requirejs['"]\w*> | # require js tag start
</%block> # require js tag end
""",
re.VERBOSE | re.IGNORECASE
)
media_type_re = re.compile(r"""type=['"].*?['"]""", re.IGNORECASE)
contexts = [{'index': 0, 'type': 'html'}]
javascript_types = [
'text/javascript', 'text/ecmascript', 'application/ecmascript', 'application/javascript',
'text/x-mathjax-config', 'json/xblock-args'
]
html_types = ['text/template']
for context in contexts_re.finditer(mako_template):
match_string = context.group().lower()
if match_string.startswith("<script"):
match_type = media_type_re.search(match_string)
context_type = 'javascript'
if match_type is not None:
# get media type (e.g. get text/javascript from
# type="text/javascript")
match_type = match_type.group()[6:-1].lower()
if match_type in html_types:
context_type = 'html'
elif match_type not in javascript_types:
context_type = 'unknown'
contexts.append({'index': context.end(), 'type': context_type})
elif match_string.startswith("</"):
contexts.append({'index': context.start(), 'type': 'html'})
else:
contexts.append({'index': context.end(), 'type': 'javascript'})
mako_file_full_path = os.path.normpath(directory + '/' + file_name)
results = FileResults(mako_file_full_path)
return contexts
if not results.is_file:
return results
def _get_context(self, contexts, index):
"""
Gets the context (e.g. javascript, html) of the template at the given
index.
if not self._is_valid_directory(directory):
return results
Arguments:
contexts: A list of dicts where each dict contains the 'index' of the context
and the context 'type' (e.g. 'html' or 'javascript').
index: The index for which we want the context.
# TODO: When safe-by-default is turned on at the platform level, will we:
# 1. Turn it on for .html only, or
# 2. Turn it on for all files, and have different rulesets that have
# different rules of .xml, .html, .js, .txt Mako templates (e.g. use
# the n filter to turn off h for some of these)?
# For now, we only check .html and .xml files
if not (file_name.lower().endswith('.html') or file_name.lower().endswith('.xml')):
return results
Returns:
The context (e.g. javascript or html) for the given index.
"""
current_context = contexts[0]['type']
for context in contexts:
if context['index'] <= index:
current_context = context['type']
else:
break
return current_context
return self._load_and_check_file_is_safe(mako_file_full_path, self._check_mako_file_is_safe, results)
def _find_mako_expressions(self, mako_template):
def _is_valid_directory(self, directory):
"""
Finds all the Mako expressions in a Mako template and creates a list
of dicts for each expression.
Determines if the provided directory is a directory that could contain
Mako template files that need to be linted.
Arguments:
mako_template: The content of the Mako template.
directory: The directory to be linted.
Returns:
A list of Expressions.
True if this directory should be linted for Mako template violations
and False otherwise.
"""
start_delim = '${'
start_index = 0
expressions = []
while True:
start_index = mako_template.find(start_delim, start_index)
if start_index < 0:
break
if self._is_skip_dir(self._skip_dirs, directory):
return False
result = self._find_closing_char_index(
start_delim, '{', '}', mako_template, start_index=start_index + len(start_delim)
)
if result is None:
expression = Expression(start_index)
# for parsing error, restart search right after the start of the
# current expression
start_index = start_index + len(start_delim)
else:
close_char_index = result['close_char_index']
expression = mako_template[start_index:close_char_index + 1]
expression = Expression(
start_index,
end_index=close_char_index + 1,
template=mako_template,
start_delim=start_delim,
end_delim='}',
strings=result['strings'],
)
# restart search after the current expression
start_index = expression.end_index
expressions.append(expression)
return expressions
# TODO: This is an imperfect guess concerning the Mako template
# directories. This needs to be reviewed before turning on safe by
# default at the platform level.
if ('/templates/' in directory) or directory.endswith('/templates'):
return True
return False
class PythonLinter(BaseLinter):
def _check_mako_file_is_safe(self, mako_template, results):
"""
The linter for Python files.
Checks for violations in a Mako template.
The current implementation of the linter does naive Python parsing. It does
not use the parser. One known issue is that parsing errors found inside a
docstring need to be disabled, rather than being automatically skipped.
Skipping docstrings is an enhancement that could be added.
"""
Arguments:
mako_template: The contents of the Mako template.
results: A file results objects to which violations will be added.
def __init__(self):
"""
Init method.
"""
super(PythonLinter, self).__init__()
self._skip_python_dirs = self._skip_dirs + ('tests', 'test/acceptance')
if self._is_django_template(mako_template):
return
has_page_default = self._has_page_default(mako_template, results)
self._check_mako_expressions(mako_template, has_page_default, results)
results.prepare_results(mako_template, line_comment_delim='##')
def process_file(self, directory, file_name):
def _is_django_template(self, mako_template):
"""
Process file to determine if it is a Python file and
if it is safe.
Determines if the template is actually a Django template.
Arguments:
directory (string): The directory of the file to be checked
file_name (string): A filename for a potential Python file
mako_template: The template code.
Returns:
The file results containing any violations.
True if this is really a Django template, and False otherwise.
"""
file_full_path = os.path.normpath(directory + '/' + file_name)
results = FileResults(file_full_path)
if not results.is_file:
return results
if file_name.lower().endswith('.py') is False:
return results
# skip this linter code (i.e. safe_template_linter.py)
if file_name == os.path.basename(__file__):
return results
if not self._is_valid_directory(self._skip_python_dirs, directory):
return results
return self._load_and_check_file_is_safe(file_full_path, self.check_python_file_is_safe, results)
if re.search('({%.*%})|({{.*}})', mako_template) is not None:
return True
return False
def check_python_file_is_safe(self, file_contents, results):
def _get_page_tag_count(self, mako_template):
"""
Checks for violations in a Python file.
Determines the number of page expressions in the Mako template. Ignores
page expressions that are commented out.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
mako_template: The contents of the Mako template.
Returns:
The number of page expressions
"""
self._check_concat_with_html(file_contents, Rules.python_concat_html, results)
self._check_deprecated_display_name(file_contents, results)
self._check_custom_escape(file_contents, results)
self._check_html(file_contents, results)
results.prepare_results(file_contents, line_comment_delim='#')
count = len(re.findall('<%page ', mako_template, re.IGNORECASE))
count_commented = len(re.findall(r'##\s+<%page ', mako_template, re.IGNORECASE))
return max(0, count - count_commented)
def _check_deprecated_display_name(self, file_contents, results):
def _has_page_default(self, mako_template, results):
"""
Checks that the deprecated display_name_with_default_escaped is not
used. Adds violation to results if there is a problem.
Checks if the Mako template contains the page expression marking it as
safe by default.
Arguments:
file_contents: The contents of the Python file
mako_template: The contents of the Mako template.
results: A list of results into which violations will be added.
"""
for match in re.finditer(r'\.display_name_with_default_escaped', file_contents):
expression = Expression(match.start(), match.end())
results.violations.append(ExpressionRuleViolation(
Rules.python_deprecated_display_name, expression
))
Side effect:
Adds violations regarding page default if necessary
def _check_custom_escape(self, file_contents, results):
"""
Checks for custom escaping calls, rather than using a standard escaping
method.
Arguments:
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
Returns:
True if the template has the page default, and False otherwise.
"""
for match in re.finditer("(<.*&lt;|&lt;.*<)", file_contents):
expression = Expression(match.start(), match.end())
results.violations.append(ExpressionRuleViolation(
Rules.python_custom_escape, expression
))
page_tag_count = self._get_page_tag_count(mako_template)
# check if there are too many page expressions
if 2 <= page_tag_count:
results.violations.append(RuleViolation(Rules.mako_multiple_page_tags))
return False
# make sure there is exactly 1 page expression, excluding commented out
# page expressions, before proceeding
elif page_tag_count != 1:
results.violations.append(RuleViolation(Rules.mako_missing_default))
return False
# check that safe by default (h filter) is turned on
page_h_filter_regex = re.compile('<%page[^>]*expression_filter=(?:"h"|\'h\')[^>]*/>')
page_match = page_h_filter_regex.search(mako_template)
if not page_match:
results.violations.append(RuleViolation(Rules.mako_missing_default))
return page_match
def _check_html(self, file_contents, results):
def _check_mako_expressions(self, mako_template, has_page_default, results):
"""
Checks many rules related to HTML in a Python file.
Searches for Mako expressions and then checks if they contain
violations, including checking JavaScript contexts for JavaScript
violations.
Arguments:
file_contents: The contents of the Python file
mako_template: The contents of the Mako template.
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
"""
# Text() Expressions keyed by its end index
text_calls_by_end_index = {}
# HTML() Expressions keyed by its end index
html_calls_by_end_index = {}
start_index = 0
while True:
# check HTML(), Text() and format() calls
result = self._check_html_text_format(
file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
)
next_start_index = result['next_start_index']
interpolate_end_index = result['interpolate_end_index']
# check for interpolation including HTML outside of function calls
self._check_interpolate_with_html(
file_contents, start_index, interpolate_end_index, results
)
# advance the search
start_index = next_start_index
expressions = self._find_mako_expressions(mako_template)
contexts = self._get_contexts(mako_template)
self._check_javascript_contexts(mako_template, contexts, results)
for expression in expressions:
if expression.end_index is None:
results.violations.append(ExpressionRuleViolation(
Rules.mako_unparseable_expression, expression
))
continue
# end if there is nothing left to search
if interpolate_end_index is None:
break
context = self._get_context(contexts, expression.start_index)
self._check_filters(mako_template, expression, context, has_page_default, results)
self._check_deprecated_display_name(expression, results)
self._check_html_and_text(expression, has_page_default, results)
def _check_html_text_format(
self, file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
):
def _check_javascript_contexts(self, mako_template, contexts, results):
"""
Checks for HTML(), Text() and format() calls, and various rules related
to these calls.
Lint the JavaScript contexts for JavaScript violations inside a Mako
template.
Arguments:
file_contents: The contents of the Python file
start_index: The index at which to begin searching for a function
call.
text_calls_by_end_index: Text() Expressions keyed by its end index.
html_calls_by_end_index: HTML() Expressions keyed by its end index.
mako_template: The contents of the Mako template.
contexts: A list of context dicts with 'type' and 'index'.
results: A list of results into which violations will be added.
Returns:
A dict with the following keys:
'next_start_index': The start index of the next search for a
function call.
'interpolate_end_index': The end index of the next next search
for interpolation with html, or None if the end of file
should be used.
Side effect:
Adds JavaScript violations to results.
"""
# used to find opening of .format(), Text() and HTML() calls
regex_function_open = re.compile(r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()")
interpolate_end_index = None
end_index = None
strings = None
html_calls = []
while True:
# first search for HTML(), Text(), or .format()
if end_index is None:
function_match = regex_function_open.search(file_contents, start_index)
else:
function_match = regex_function_open.search(file_contents, start_index, end_index)
if function_match is not None:
if interpolate_end_index is None:
interpolate_end_index = function_match.start()
function_close_result = self._find_closing_char_index(
None, '(', ')', file_contents, start_index=function_match.end(),
)
if function_close_result is None:
results.violations.append(ExpressionRuleViolation(
Rules.python_parse_error, Expression(function_match.start())
))
javascript_start_index = None
for context in contexts:
if context['type'] == 'javascript':
if javascript_start_index < 0:
javascript_start_index = context['index']
else:
expression = Expression(
function_match.start(), function_close_result['close_char_index'] + 1, file_contents,
start_delim=function_match.group(), end_delim=")"
)
# if this an outer most Text(), HTML(), or format() call
if end_index is None:
end_index = expression.end_index
interpolate_end_index = expression.start_index
strings = function_close_result['strings']
if function_match.group() == '.format(':
if 'HTML(' in expression.expression_inner or 'Text(' in expression.expression_inner:
is_wrapped_with_text = str(function_match.start()) in text_calls_by_end_index.keys()
is_wrapped_with_html = str(function_match.start()) in html_calls_by_end_index.keys()
if is_wrapped_with_text is False and is_wrapped_with_html is False:
results.violations.append(ExpressionRuleViolation(
Rules.python_requires_html_or_text, expression
))
else: # expression is 'HTML(' or 'Text('
# HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
# Generally, format() would be the issue if there is one.
if regex_function_open.search(expression.expression_inner) is not None:
results.violations.append(ExpressionRuleViolation(
Rules.python_close_before_format, expression
))
if function_match.group() == 'Text(':
text_calls_by_end_index[str(expression.end_index)] = expression
else: # function_match.group() == 'HTML(':
html_calls_by_end_index[str(expression.end_index)] = expression
html_calls.append(expression)
if javascript_start_index is not None:
javascript_end_index = context['index']
javascript_code = mako_template[javascript_start_index:javascript_end_index]
self._check_javascript_context(javascript_code, javascript_start_index, results)
javascript_start_index = None
if javascript_start_index is not None:
javascript_code = mako_template[javascript_start_index:]
self._check_javascript_context(javascript_code, javascript_start_index, results)
start_index = function_match.end()
else:
break
def _check_javascript_context(self, javascript_code, start_offset, results):
"""
Lint a single JavaScript context for JavaScript violations inside a Mako
template.
# checks strings in the outer most call to ensure they are properly
# wrapped with HTML()
self._check_format_html_strings_wrapped(strings, html_calls, results)
Arguments:
javascript_code: The template contents of the JavaScript context.
start_offset: The offset of the JavaScript context inside the
original Mako template.
results: A list of results into which violations will be added.
# compute where to continue the search
if function_match is None and end_index is None:
next_start_index = start_index
elif end_index is None:
next_start_index = function_match.end()
else:
next_start_index = end_index
Side effect:
Adds JavaScript violations to results.
return {
'next_start_index': next_start_index,
'interpolate_end_index': interpolate_end_index,
}
"""
javascript_results = FileResults("")
self.javaScriptLinter.check_javascript_file_is_safe(javascript_code, javascript_results)
# translate the violations into the location within the original
# Mako template
for violation in javascript_results.violations:
expression = violation.expression
expression.start_index += start_offset
if expression.end_index is not None:
expression.end_index += start_offset
results.violations.append(ExpressionRuleViolation(violation.rule, expression))
def _check_format_html_strings_wrapped(self, strings, html_calls, results):
def _check_deprecated_display_name(self, expression, results):
"""
Checks that any string inside a format call that seems to contain HTML
is wrapped with a call to HTML().
Checks that the deprecated display_name_with_default_escaped is not
used. Adds violation to results if there is a problem.
Arguments:
strings: A list of ParseStrings for each string inside the format()
call.
html_calls: A list of Expressions representing all of the HTML()
calls inside the format() call.
expression: An Expression
results: A list of results into which violations will be added.
"""
html_strings = []
html_wrapped_strings = []
if strings is not None:
# find all strings that contain HTML
for string in strings:
if '<' in string.string:
html_strings.append(string)
# check if HTML string is appropriately wrapped
for html_call in html_calls:
if html_call.start_index < string.start_index < string.end_index < html_call.end_index:
html_wrapped_strings.append(string)
break
# loop through all unwrapped strings
for unsafe_string in set(html_strings) - set(html_wrapped_strings):
unsafe_string_expression = Expression(unsafe_string.start_index)
if '.display_name_with_default_escaped' in expression.expression:
results.violations.append(ExpressionRuleViolation(
Rules.python_wrap_html, unsafe_string_expression
Rules.python_deprecated_display_name, expression
))
def _check_interpolate_with_html(self, file_contents, start_index, end_index, results):
def _check_html_and_text(self, expression, has_page_default, results):
"""
Find interpolations with html that fall outside of any calls to HTML(),
Text(), and .format().
Checks rules related to proper use of HTML() and Text().
Arguments:
file_contents: The contents of the Python file
start_index: The index to start the search, or None if nothing to
search
end_index: The index to end the search, or None if the end of file
should be used.
expression: A Mako Expression.
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
"""
# used to find interpolation with HTML
pattern_interpolate_html_inner = r'(<.*%s|%s.*<|<.*{\w*}|{\w*}.*<)'
regex_interpolate_html = re.compile(r"""(".*{}.*"|'.*{}.*')""".format(
pattern_interpolate_html_inner, pattern_interpolate_html_inner
))
if end_index is None:
interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index)
else:
interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index, end_index)
for match_html_string in interpolate_string_iter:
expression = Expression(match_html_string.start(), match_html_string.end())
expression_inner = expression.expression_inner
# use find to get the template relative inner expression start index
# due to possible skipped white space
template_inner_start_index = expression.start_index
template_inner_start_index += expression.expression.find(expression_inner)
if 'HTML(' in expression_inner:
if expression_inner.startswith('HTML('):
close_paren_index = self._find_closing_char_index(
None, "(", ")", expression_inner, start_index=len('HTML(')
)['close_char_index']
# check that the close paren is at the end of the stripped expression.
if close_paren_index != len(expression_inner) - 1:
results.violations.append(ExpressionRuleViolation(
Rules.python_interpolate_html, expression
Rules.mako_html_alone, expression
))
elif expression_inner.startswith('Text(') is False:
results.violations.append(ExpressionRuleViolation(
Rules.mako_html_requires_text, expression
))
else:
if 'Text(' in expression_inner:
results.violations.append(ExpressionRuleViolation(
Rules.mako_text_redundant, expression
))
# strings to be checked for HTML
unwrapped_html_strings = expression.strings
for match in re.finditer(r"(HTML\(|Text\()", expression_inner):
result = self._find_closing_char_index(None, "(", ")", expression_inner, start_index=match.end())
if result is not None:
close_paren_index = result['close_char_index']
# the argument sent to HTML() or Text()
argument = expression_inner[match.end():close_paren_index]
if ".format(" in argument:
results.violations.append(ExpressionRuleViolation(
Rules.python_close_before_format, expression
))
if match.group() == "HTML(":
# remove expression strings wrapped in HTML()
for string in list(unwrapped_html_strings):
html_inner_start_index = template_inner_start_index + match.end()
html_inner_end_index = template_inner_start_index + close_paren_index
if html_inner_start_index <= string.start_index and string.end_index <= html_inner_end_index:
unwrapped_html_strings.remove(string)
# check strings not wrapped in HTML() for '<'
for string in unwrapped_html_strings:
if '<' in string.string_inner:
results.violations.append(ExpressionRuleViolation(
Rules.python_wrap_html, expression
))
break
# check strings not wrapped in HTML() for HTML entities
if has_page_default:
for string in unwrapped_html_strings:
if re.search(r"&[#]?[a-zA-Z0-9]+;", string.string_inner):
results.violations.append(ExpressionRuleViolation(
Rules.mako_html_entities, expression
))
break
def _check_filters(self, mako_template, expression, context, has_page_default, results):
"""
Checks that the filters used in the given Mako expression are valid
for the given context. Adds violation to results if there is a problem.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
context: The context of the page in which the expression was found
(e.g. javascript, html).
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
"""
if context == 'unknown':
results.violations.append(ExpressionRuleViolation(
Rules.mako_unknown_context, expression
))
return
# Example: finds "| n, h}" when given "${x | n, h}"
filters_regex = re.compile(r'\|([.,\w\s]*)\}')
filters_match = filters_regex.search(expression.expression)
if filters_match is None:
if context == 'javascript':
results.violations.append(ExpressionRuleViolation(
Rules.mako_invalid_js_filter, expression
))
return
filters = filters_match.group(1).replace(" ", "").split(",")
if filters == ['n', 'decode.utf8']:
# {x | n, decode.utf8} is valid in any context
pass
elif context == 'html':
if filters == ['h']:
if has_page_default:
# suppress this violation if the page default hasn't been set,
# otherwise the template might get less safe
results.violations.append(ExpressionRuleViolation(
Rules.mako_unwanted_html_filter, expression
))
else:
results.violations.append(ExpressionRuleViolation(
Rules.mako_invalid_html_filter, expression
))
elif context == 'javascript':
self._check_js_expression_not_with_html(mako_template, expression, results)
if filters == ['n', 'dump_js_escaped_json']:
# {x | n, dump_js_escaped_json} is valid
pass
elif filters == ['n', 'js_escaped_string']:
# {x | n, js_escaped_string} is valid, if surrounded by quotes
self._check_js_string_expression_in_quotes(mako_template, expression, results)
else:
results.violations.append(ExpressionRuleViolation(
Rules.mako_invalid_js_filter, expression
))
def _check_js_string_expression_in_quotes(self, mako_template, expression, results):
"""
Checks that a Mako expression using js_escaped_string is surrounded by
quotes.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
"""
parse_string = self._find_string_wrapping_expression(mako_template, expression)
if parse_string is None:
results.violations.append(ExpressionRuleViolation(
Rules.mako_js_missing_quotes, expression
))
def _check_js_expression_not_with_html(self, mako_template, expression, results):
"""
Checks that a Mako expression in a JavaScript context does not appear in
a string that also contains HTML.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
"""
parse_string = self._find_string_wrapping_expression(mako_template, expression)
if parse_string is not None and re.search('[<>]', parse_string.string) is not None:
results.violations.append(ExpressionRuleViolation(
Rules.mako_js_html_string, expression
))
def _find_string_wrapping_expression(self, mako_template, expression):
"""
Finds the string wrapping the Mako expression if there is one.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
Returns:
ParseString representing a scrubbed version of the wrapped string,
where the Mako expression was replaced with "${...}", if a wrapped
string was found. Otherwise, returns None if none found.
"""
lines = StringLines(mako_template)
start_index = lines.index_to_line_start_index(expression.start_index)
if expression.end_index is not None:
end_index = lines.index_to_line_end_index(expression.end_index)
else:
return None
# scrub out the actual expression so any code inside the expression
# doesn't interfere with rules applied to the surrounding code (i.e.
# checking JavaScript).
scrubbed_lines = "".join((
mako_template[start_index:expression.start_index],
"${...}",
mako_template[expression.end_index:end_index]
))
adjusted_start_index = expression.start_index - start_index
start_index = 0
while True:
parse_string = ParseString(scrubbed_lines, start_index, len(scrubbed_lines))
# check for validly parsed string
if 0 <= parse_string.start_index < parse_string.end_index:
# check if expression is contained in the given string
if parse_string.start_index < adjusted_start_index < parse_string.end_index:
return parse_string
else:
# move to check next string
start_index = parse_string.end_index
else:
break
return None
def _get_contexts(self, mako_template):
"""
Returns a data structure that represents the indices at which the
template changes from HTML context to JavaScript and back.
Return:
A list of dicts where each dict contains:
- index: the index of the context.
- type: the context type (e.g. 'html' or 'javascript').
"""
contexts_re = re.compile(
r"""
<script.*?> | # script tag start
</script> | # script tag end
<%static:require_module.*?> | # require js script tag start
</%static:require_module> | # require js script tag end
<%block[ ]*name=['"]requirejs['"]\w*> | # require js tag start
</%block> # require js tag end
""",
re.VERBOSE | re.IGNORECASE
)
media_type_re = re.compile(r"""type=['"].*?['"]""", re.IGNORECASE)
contexts = [{'index': 0, 'type': 'html'}]
javascript_types = [
'text/javascript', 'text/ecmascript', 'application/ecmascript', 'application/javascript',
'text/x-mathjax-config', 'json/xblock-args'
]
html_types = ['text/template']
for context in contexts_re.finditer(mako_template):
match_string = context.group().lower()
if match_string.startswith("<script"):
match_type = media_type_re.search(match_string)
context_type = 'javascript'
if match_type is not None:
# get media type (e.g. get text/javascript from
# type="text/javascript")
match_type = match_type.group()[6:-1].lower()
if match_type in html_types:
context_type = 'html'
elif match_type not in javascript_types:
context_type = 'unknown'
contexts.append({'index': context.end(), 'type': context_type})
elif match_string.startswith("</"):
contexts.append({'index': context.start(), 'type': 'html'})
else:
contexts.append({'index': context.end(), 'type': 'javascript'})
return contexts
def _get_context(self, contexts, index):
"""
Gets the context (e.g. javascript, html) of the template at the given
index.
Arguments:
contexts: A list of dicts where each dict contains the 'index' of the context
and the context 'type' (e.g. 'html' or 'javascript').
index: The index for which we want the context.
Returns:
The context (e.g. javascript or html) for the given index.
"""
current_context = contexts[0]['type']
for context in contexts:
if context['index'] <= index:
current_context = context['type']
else:
break
return current_context
def _find_mako_expressions(self, mako_template):
"""
Finds all the Mako expressions in a Mako template and creates a list
of dicts for each expression.
Arguments:
mako_template: The content of the Mako template.
Returns:
A list of Expressions.
"""
start_delim = '${'
start_index = 0
expressions = []
while True:
start_index = mako_template.find(start_delim, start_index)
if start_index < 0:
break
result = self._find_closing_char_index(
start_delim, '{', '}', mako_template, start_index=start_index + len(start_delim)
)
if result is None:
expression = Expression(start_index)
# for parsing error, restart search right after the start of the
# current expression
start_index = start_index + len(start_delim)
else:
close_char_index = result['close_char_index']
expression = mako_template[start_index:close_char_index + 1]
expression = Expression(
start_index,
end_index=close_char_index + 1,
template=mako_template,
start_delim=start_delim,
end_delim='}',
strings=result['strings'],
)
# restart search after the current expression
start_index = expression.end_index
expressions.append(expression)
return expressions
def _process_file(full_path, template_linters, options, out):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment