Commit 24ad0059 by Robert Raposa

Merge pull request #12352 from edx/robrap/linter-false-positives-3

Fix Python false positives with AST.
parents e1400459 60d11bf2
...@@ -26,7 +26,7 @@ def HTML(html): # pylint: disable=invalid-name ...@@ -26,7 +26,7 @@ def HTML(html): # pylint: disable=invalid-name
from openedx.core.djangolib.markup import Text, HTML from openedx.core.djangolib.markup import Text, HTML
%> %>
${Text(_("Write & send {start}email{end}")).format( ${Text(_("Write & send {start}email{end}")).format(
start=HTML("<a href='mailto:{}'>".format(user.email), start=HTML("<a href='mailto:{}'>").format(user.email),
end=HTML("</a>"), end=HTML("</a>"),
)} )}
......
...@@ -18,6 +18,10 @@ show_help() { ...@@ -18,6 +18,10 @@ show_help() {
echo " -m, --main-branch=COMMIT Run against files changed between the" echo " -m, --main-branch=COMMIT Run against files changed between the"
echo " current branch and this commit." echo " current branch and this commit."
echo " Defaults to origin/master." echo " Defaults to origin/master."
echo ""
echo "For additional help:"
echo " http://edx.readthedocs.org/projects/edx-developer-guide/en/latest/conventions/safe_templates.html#safe-template-linter"
} }
for i in "$@"; do for i in "$@"; do
......
...@@ -4,10 +4,12 @@ A linting tool to check if templates are safe ...@@ -4,10 +4,12 @@ A linting tool to check if templates are safe
""" """
from __future__ import print_function from __future__ import print_function
import argparse import argparse
import ast
from enum import Enum from enum import Enum
import os import os
import re import re
import sys import sys
import textwrap
class StringLines(object): class StringLines(object):
...@@ -219,18 +221,6 @@ class Rules(Enum): ...@@ -219,18 +221,6 @@ class Rules(Enum):
'mako-js-html-string', 'mako-js-html-string',
'A JavaScript string containing HTML should not have an embedded Mako expression.' 'A JavaScript string containing HTML should not have an embedded Mako expression.'
) )
mako_html_requires_text = (
'mako-html-requires-text',
'You must begin with Text() if you use HTML() during interpolation.'
)
mako_text_redundant = (
'mako-text-redundant',
'Using Text() function without HTML() is unnecessary.'
)
mako_html_alone = (
'mako-html-alone',
"Only use HTML() alone with properly escaped HTML(), and make sure it is really alone."
)
mako_html_entities = ( mako_html_entities = (
'mako-html-entities', 'mako-html-entities',
"HTML entities should be plain text or wrapped with HTML()." "HTML entities should be plain text or wrapped with HTML()."
...@@ -420,7 +410,7 @@ class RuleViolation(object): ...@@ -420,7 +410,7 @@ class RuleViolation(object):
""" """
Returns a key that can be sorted on Returns a key that can be sorted on
""" """
return 0 return (0, 0, self.rule.rule_id)
def first_line(self): def first_line(self):
""" """
...@@ -441,11 +431,12 @@ class RuleViolation(object): ...@@ -441,11 +431,12 @@ class RuleViolation(object):
self.full_path = full_path self.full_path = full_path
self._mark_disabled(string_lines.get_string()) self._mark_disabled(string_lines.get_string())
def print_results(self, out): def print_results(self, _options, out):
""" """
Prints the results represented by this rule violation. Prints the results represented by this rule violation.
Arguments: Arguments:
_options: ignored
out: output file out: output file
""" """
print("{}: {}".format(self.full_path, self.rule.rule_id), file=out) print("{}: {}".format(self.full_path, self.rule.rule_id), file=out)
...@@ -520,7 +511,7 @@ class ExpressionRuleViolation(RuleViolation): ...@@ -520,7 +511,7 @@ class ExpressionRuleViolation(RuleViolation):
""" """
Returns a key that can be sorted on Returns a key that can be sorted on
""" """
return (self.start_line, self.start_column) return (self.start_line, self.start_column, self.rule.rule_id)
def first_line(self): def first_line(self):
""" """
...@@ -553,27 +544,36 @@ class ExpressionRuleViolation(RuleViolation): ...@@ -553,27 +544,36 @@ class ExpressionRuleViolation(RuleViolation):
self.lines.append(string_lines.line_number_to_line(line_number)) self.lines.append(string_lines.line_number_to_line(line_number))
self._mark_expression_disabled(string_lines) self._mark_expression_disabled(string_lines)
def print_results(self, out): def print_results(self, options, out):
""" """
Prints the results represented by this rule violation. Prints the results represented by this rule violation.
Arguments: Arguments:
options: A list of the following options:
list_files: True to print only file names, and False to print
all violations.
verbose: True for multiple lines of context, False single line.
out: output file out: output file
""" """
for line_number in range(self.start_line, self.end_line + 1): if options['verbose']:
end_line = self.end_line + 1
else:
end_line = self.start_line + 1
for line_number in range(self.start_line, end_line):
if line_number == self.start_line: if line_number == self.start_line:
column = self.start_column column = self.start_column
rule_id = self.rule.rule_id + ":" rule_id = self.rule.rule_id + ":"
else: else:
column = 1 column = 1
rule_id = " " * (len(self.rule.rule_id) + 1) rule_id = " " * (len(self.rule.rule_id) + 1)
line = self.lines[line_number - self.start_line].encode(encoding='utf-8')
print("{}: {}:{}: {} {}".format( print("{}: {}:{}: {} {}".format(
self.full_path, self.full_path,
line_number, line_number,
column, column,
rule_id, rule_id,
self.lines[line_number - self.start_line].encode(encoding='utf-8') line
), file=out) ), file=out)
...@@ -619,6 +619,7 @@ class FileResults(object): ...@@ -619,6 +619,7 @@ class FileResults(object):
options: A list of the following options: options: A list of the following options:
list_files: True to print only file names, and False to print list_files: True to print only file names, and False to print
all violations. all violations.
verbose: True for multiple lines of context, False single line.
out: output file out: output file
Returns: Returns:
...@@ -636,7 +637,7 @@ class FileResults(object): ...@@ -636,7 +637,7 @@ class FileResults(object):
for violation in self.violations: for violation in self.violations:
if not violation.is_disabled: if not violation.is_disabled:
num_violations += 1 num_violations += 1
violation.print_results(out) violation.print_results(options, out)
return num_violations return num_violations
def _filter_commented_code(self, line_comment_delim): def _filter_commented_code(self, line_comment_delim):
...@@ -663,6 +664,11 @@ class FileResults(object): ...@@ -663,6 +664,11 @@ class FileResults(object):
True if the first line of the violation is actually commented out, True if the first line of the violation is actually commented out,
False otherwise. False otherwise.
""" """
if 'parse' in violation.rule.rule_id:
# For parse rules, don't filter them because the comment could be a
# part of the parse issue to begin with.
return False
else:
return violation.first_line().lstrip().startswith(line_comment_delim) return violation.first_line().lstrip().startswith(line_comment_delim)
...@@ -775,6 +781,9 @@ class BaseLinter(object): ...@@ -775,6 +781,9 @@ class BaseLinter(object):
BaseLinter provides some helper functions that are used by multiple linters. BaseLinter provides some helper functions that are used by multiple linters.
""" """
LINE_COMMENT_DELIM = None
def __init__(self): def __init__(self):
""" """
Init method. Init method.
...@@ -888,9 +897,14 @@ class BaseLinter(object): ...@@ -888,9 +897,14 @@ class BaseLinter(object):
""" """
strings = [] if strings is None else strings strings = [] if strings is None else strings
# Find start index of an uncommented line.
start_index = self._uncommented_start_index(template, start_index)
# loop until we found something useful on an uncommented out line
while start_index is not None:
close_char_index = template.find(close_char, start_index) close_char_index = template.find(close_char, start_index)
if close_char_index < 0: if close_char_index < 0:
# if we can't find a close char, let's just quit # If we can't find a close char, let's just quit.
return None return None
open_char_index = template.find(open_char, start_index, close_char_index) open_char_index = template.find(open_char, start_index, close_char_index)
parse_string = ParseString(template, start_index, close_char_index) parse_string = ParseString(template, start_index, close_char_index)
...@@ -902,6 +916,14 @@ class BaseLinter(object): ...@@ -902,6 +916,14 @@ class BaseLinter(object):
valid_index_list.append(parse_string.start_index) valid_index_list.append(parse_string.start_index)
min_valid_index = min(valid_index_list) min_valid_index = min(valid_index_list)
start_index = self._uncommented_start_index(template, min_valid_index)
if start_index == min_valid_index:
break
if start_index is None:
# No uncommented code to search.
return None
if parse_string.start_index == min_valid_index: if parse_string.start_index == min_valid_index:
strings.append(parse_string) strings.append(parse_string)
if parse_string.end_index is None: if parse_string.end_index is None:
...@@ -934,47 +956,36 @@ class BaseLinter(object): ...@@ -934,47 +956,36 @@ class BaseLinter(object):
num_open_chars=num_open_chars - 1, strings=strings num_open_chars=num_open_chars - 1, strings=strings
) )
def _check_concat_with_html(self, file_contents, rule, results): def _uncommented_start_index(self, template, start_index):
""" """
Checks that strings with HTML are not concatenated Finds the first start_index that is on an uncommented line.
Arguments: Arguments:
file_contents: The contents of the JavaScript file. template: The template to be searched.
rule: The rule that was violated if this fails. start_index: The start index of the last open char.
results: A file results objects to which violations will be added.
""" Returns:
lines = StringLines(file_contents) If start_index is on an uncommented out line, returns start_index.
last_expression = None Otherwise, returns the start_index of the first line that is
# attempt to match a string that starts with '<' or ends with '>' uncommented, if there is one. Otherwise, returns None.
regex_string_with_html = r"""["'](?:\s*<.*|.*>\s*)["']""" """
regex_concat_with_html = r"(\+\s*{}|{}\s*\+)".format(regex_string_with_html, regex_string_with_html) if self.LINE_COMMENT_DELIM is not None:
for match in re.finditer(regex_concat_with_html, file_contents): line_start_index = StringLines(template).index_to_line_start_index(start_index)
found_new_violation = False uncommented_line_start_index_regex = re.compile("^(?!\s*{})".format(self.LINE_COMMENT_DELIM), re.MULTILINE)
if last_expression is not None: # Finds the line start index of the first uncommented line, including the current line.
last_line = lines.index_to_line_number(last_expression.start_index) match = uncommented_line_start_index_regex.search(template, line_start_index)
# check if violation should be expanded to more of the same line if match is None:
if last_line == lines.index_to_line_number(match.start()): # No uncommented lines.
last_expression = Expression( return None
last_expression.start_index, match.end(), template=file_contents elif match.start() < start_index:
) # Current line is uncommented, so return original start_index.
return start_index
else: else:
results.violations.append(ExpressionRuleViolation( # Return start of first uncommented line.
rule, last_expression return match.start()
))
found_new_violation = True
else: else:
found_new_violation = True # No line comment delimeter, so this acts as a no-op.
if found_new_violation: return start_index
last_expression = Expression(
match.start(), match.end(), template=file_contents
)
# add final expression
if last_expression is not None:
results.violations.append(ExpressionRuleViolation(
rule, last_expression
))
class UnderscoreTemplateLinter(BaseLinter): class UnderscoreTemplateLinter(BaseLinter):
...@@ -1093,7 +1104,8 @@ class JavaScriptLinter(BaseLinter): ...@@ -1093,7 +1104,8 @@ class JavaScriptLinter(BaseLinter):
""" """
The linter for JavaScript and CoffeeScript files. The linter for JavaScript and CoffeeScript files.
""" """
underScoreLinter = UnderscoreTemplateLinter()
LINE_COMMENT_DELIM = "//"
def __init__(self): def __init__(self):
""" """
...@@ -1102,6 +1114,7 @@ class JavaScriptLinter(BaseLinter): ...@@ -1102,6 +1114,7 @@ class JavaScriptLinter(BaseLinter):
super(JavaScriptLinter, self).__init__() super(JavaScriptLinter, self).__init__()
self._skip_javascript_dirs = self._skip_dirs + ('i18n', 'static/coffee') self._skip_javascript_dirs = self._skip_dirs + ('i18n', 'static/coffee')
self._skip_coffeescript_dirs = self._skip_dirs self._skip_coffeescript_dirs = self._skip_dirs
self.underscore_linter = UnderscoreTemplateLinter()
def process_file(self, directory, file_name): def process_file(self, directory, file_name):
""" """
...@@ -1168,8 +1181,8 @@ class JavaScriptLinter(BaseLinter): ...@@ -1168,8 +1181,8 @@ class JavaScriptLinter(BaseLinter):
self._check_javascript_interpolate(file_contents, results) self._check_javascript_interpolate(file_contents, results)
self._check_javascript_escape(file_contents, results) self._check_javascript_escape(file_contents, results)
self._check_concat_with_html(file_contents, Rules.javascript_concat_html, results) self._check_concat_with_html(file_contents, Rules.javascript_concat_html, results)
self.underScoreLinter.check_underscore_file_is_safe(file_contents, results) self.underscore_linter.check_underscore_file_is_safe(file_contents, results)
results.prepare_results(file_contents, line_comment_delim='//') results.prepare_results(file_contents, line_comment_delim=self.LINE_COMMENT_DELIM)
def _get_expression_for_function(self, file_contents, function_start_match): def _get_expression_for_function(self, file_contents, function_start_match):
""" """
...@@ -1406,6 +1419,370 @@ class JavaScriptLinter(BaseLinter): ...@@ -1406,6 +1419,370 @@ class JavaScriptLinter(BaseLinter):
return True return True
return False return False
def _check_concat_with_html(self, file_contents, rule, results):
"""
Checks that strings with HTML are not concatenated
Arguments:
file_contents: The contents of the JavaScript file.
rule: The rule that was violated if this fails.
results: A file results objects to which violations will be added.
"""
lines = StringLines(file_contents)
last_expression = None
# attempt to match a string that starts with '<' or ends with '>'
regex_string_with_html = r"""["'](?:\s*<.*|.*>\s*)["']"""
regex_concat_with_html = r"(\+\s*{}|{}\s*\+)".format(regex_string_with_html, regex_string_with_html)
for match in re.finditer(regex_concat_with_html, file_contents):
found_new_violation = False
if last_expression is not None:
last_line = lines.index_to_line_number(last_expression.start_index)
# check if violation should be expanded to more of the same line
if last_line == lines.index_to_line_number(match.start()):
last_expression = Expression(
last_expression.start_index, match.end(), template=file_contents
)
else:
results.violations.append(ExpressionRuleViolation(
rule, last_expression
))
found_new_violation = True
else:
found_new_violation = True
if found_new_violation:
last_expression = Expression(
match.start(), match.end(), template=file_contents
)
# add final expression
if last_expression is not None:
results.violations.append(ExpressionRuleViolation(
rule, last_expression
))
class BaseVisitor(ast.NodeVisitor):
"""
Base class for AST NodeVisitor used for Python safe linting.
Important: This base visitor skips all __repr__ function definitions.
"""
def __init__(self, file_contents, results):
"""
Init method.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
"""
super(BaseVisitor, self).__init__()
self.file_contents = file_contents
self.lines = StringLines(self.file_contents)
self.results = results
def node_to_expression(self, node):
"""
Takes a node and translates it to an expression to be used with
violations.
Arguments:
node: An AST node.
"""
line_start_index = self.lines.line_number_to_start_index(node.lineno)
start_index = line_start_index + node.col_offset
if isinstance(node, ast.Str):
# Triple quotes give col_offset of -1 on the last line of the string.
if node.col_offset == -1:
triple_quote_regex = re.compile("""['"]{3}""")
end_triple_quote_match = triple_quote_regex.search(self.file_contents, line_start_index)
open_quote_index = self.file_contents.rfind(end_triple_quote_match.group(), 0, end_triple_quote_match.start())
if open_quote_index > 0:
start_index = open_quote_index
else:
# If we can't find a starting quote, let's assume that what
# we considered the end quote is really the start quote.
start_index = end_triple_quote_match.start()
string = ParseString(self.file_contents, start_index, len(self.file_contents))
return Expression(string.start_index, string.end_index)
else:
return Expression(start_index)
def visit_FunctionDef(self, node):
"""
Skips processing of __repr__ functions, since these sometimes use '<'
for non-HTML purposes.
Arguments:
node: An AST node.
"""
if node.name != '__repr__':
self.generic_visit(node)
class HtmlStringVisitor(BaseVisitor):
"""
Checks for strings that contain HTML. Assumes any string with < or > is
considered potential HTML.
To be used only with strings in context of format or concat.
"""
def __init__(self, file_contents, results, skip_wrapped_html=False):
"""
Init function.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
skip_wrapped_html: True if visitor should skip strings wrapped with
HTML() or Text(), and False otherwise.
"""
super(HtmlStringVisitor, self).__init__(file_contents, results)
self.skip_wrapped_html = skip_wrapped_html
self.unsafe_html_string_nodes = []
self.over_escaped_entity_string_nodes = []
self.has_text_or_html_call = False
def visit_Str(self, node):
"""
When strings are visited, checks if it contains HTML.
Arguments:
node: An AST node.
"""
# Skips '<' (and '>') in regex named groups. For example, "(?P<group>)".
if re.search('[(][?]P<', node.s) is None and re.search('[<>]', node.s) is not None:
self.unsafe_html_string_nodes.append(node)
if re.search(r"&[#]?[a-zA-Z0-9]+;", node.s):
self.over_escaped_entity_string_nodes.append(node)
def visit_Call(self, node):
"""
Skips processing of string contained inside HTML() and Text() calls when
skip_wrapped_html is True.
Arguments:
node: An AST node.
"""
is_html_or_text_call = isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']
if self.skip_wrapped_html and is_html_or_text_call:
self.has_text_or_html_call = True
else:
self.generic_visit(node)
class ContainsFormatVisitor(BaseVisitor):
"""
Checks if there are any nested format() calls.
This visitor is meant to be called on HTML() and Text() ast.Call nodes to
search for any illegal nested format() calls.
"""
def __init__(self, file_contents, results):
"""
Init function.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
"""
super(ContainsFormatVisitor, self).__init__(file_contents, results)
self.contains_format_call = False
def visit_Attribute(self, node):
"""
Simple check for format calls (attribute).
Arguments:
node: An AST node.
"""
# Attribute(expr value, identifier attr, expr_context ctx)
if node.attr == 'format':
self.contains_format_call = True
else:
self.generic_visit(node)
class FormatInterpolateVisitor(BaseVisitor):
"""
Checks if format() interpolates any HTML() or Text() calls. In other words,
are Text() or HTML() calls nested inside the call to format().
This visitor is meant to be called on a format() attribute node.
"""
def __init__(self, file_contents, results):
"""
Init function.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
"""
super(FormatInterpolateVisitor, self).__init__(file_contents, results)
self.interpolates_text_or_html = False
self.format_caller_node = None
def visit_Call(self, node):
"""
Checks all calls. Remembers the caller of the initial format() call, or
in other words, the left-hand side of the call. Also tracks if HTML()
or Text() calls were seen.
Arguments:
node: The AST root node.
"""
if isinstance(node.func, ast.Attribute) and node.func.attr is 'format':
if self.format_caller_node is None:
# Store the caller, or left-hand-side node of the initial
# format() call.
self.format_caller_node = node.func.value
elif isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']:
# found Text() or HTML() call in arguments passed to format()
self.interpolates_text_or_html = True
self.generic_visit(node)
def generic_visit(self, node):
"""
Determines whether or not to continue to visit nodes according to the
following rules:
- Once a Text() or HTML() call has been found, stop visiting more nodes.
- Skip the caller of the outer-most format() call, or in other words,
the left-hand side of the call.
Arguments:
node: The AST root node.
"""
if self.interpolates_text_or_html is False:
if self.format_caller_node is not node:
super(FormatInterpolateVisitor, self).generic_visit(node)
class OuterFormatVisitor(BaseVisitor):
"""
Only visits outer most Python format() calls. These checks are not repeated
for any nested format() calls.
This visitor is meant to be used once from the root.
"""
def visit_Call(self, node):
"""
Checks that format() calls which contain HTML() or Text() use HTML() or
Text() as the caller. In other words, Text() or HTML() must be used
before format() for any arguments to format() that contain HTML() or
Text().
Arguments:
node: An AST node.
"""
if isinstance(node.func, ast.Attribute) and node.func.attr == 'format':
visitor = HtmlStringVisitor(self.file_contents, self.results, True)
visitor.visit(node)
for unsafe_html_string_node in visitor.unsafe_html_string_nodes:
self.results.violations.append(ExpressionRuleViolation(
Rules.python_wrap_html, self.node_to_expression(unsafe_html_string_node)
))
# Do not continue processing child nodes of this format() node.
else:
self.generic_visit(node)
class AllNodeVisitor(BaseVisitor):
"""
Visits all nodes and does not interfere with calls to generic_visit(). This
is used in conjunction with other visitors to check for a variety of
violations.
This visitor is meant to be used once from the root.
"""
def visit_Attribute(self, node):
"""
Checks for uses of deprecated `display_name_with_default_escaped`.
Arguments:
node: An AST node.
"""
if node.attr == 'display_name_with_default_escaped':
self.results.violations.append(ExpressionRuleViolation(
Rules.python_deprecated_display_name, self.node_to_expression(node)
))
self.generic_visit(node)
def visit_Call(self, node):
"""
Checks for a variety of violations:
- Checks that format() calls with nested HTML() or Text() calls use
HTML() or Text() on the left-hand side.
- For each HTML() and Text() call, calls into separate visitor to check
for inner format() calls.
Arguments:
node: An AST node.
"""
if isinstance(node.func, ast.Attribute) and node.func.attr == 'format':
visitor = FormatInterpolateVisitor(self.file_contents, self.results)
visitor.visit(node)
if visitor.interpolates_text_or_html:
format_caller = node.func.value
is_caller_html_or_text = isinstance(format_caller, ast.Call) and \
isinstance(format_caller.func, ast.Name) and \
format_caller.func.id in ['Text', 'HTML']
# If format call has nested Text() or HTML(), then the caller,
# or left-hand-side of the format() call, must be a call to
# Text() or HTML().
if is_caller_html_or_text is False:
self.results.violations.append(ExpressionRuleViolation(
Rules.python_requires_html_or_text, self.node_to_expression(node.func)
))
elif isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']:
visitor = ContainsFormatVisitor(self.file_contents, self.results)
visitor.visit(node)
if visitor.contains_format_call:
self.results.violations.append(ExpressionRuleViolation(
Rules.python_close_before_format, self.node_to_expression(node.func)
))
self.generic_visit(node)
def visit_BinOp(self, node):
"""
Checks for concat using '+' and interpolation using '%' with strings
containing HTML.
"""
rule = None
if isinstance(node.op, ast.Mod):
rule = Rules.python_interpolate_html
elif isinstance(node.op, ast.Add):
rule = Rules.python_concat_html
if rule is not None:
visitor = HtmlStringVisitor(self.file_contents, self.results)
visitor.visit(node.left)
has_illegal_html_string = len(visitor.unsafe_html_string_nodes) > 0
# Create new visitor to clear state.
visitor = HtmlStringVisitor(self.file_contents, self.results)
visitor.visit(node.right)
has_illegal_html_string = has_illegal_html_string or len(visitor.unsafe_html_string_nodes) > 0
if has_illegal_html_string:
self.results.violations.append(ExpressionRuleViolation(
rule, self.node_to_expression(node)
))
self.generic_visit(node)
class PythonLinter(BaseLinter): class PythonLinter(BaseLinter):
""" """
...@@ -1417,6 +1794,8 @@ class PythonLinter(BaseLinter): ...@@ -1417,6 +1794,8 @@ class PythonLinter(BaseLinter):
Skipping docstrings is an enhancement that could be added. Skipping docstrings is an enhancement that could be added.
""" """
LINE_COMMENT_DELIM = "#"
def __init__(self): def __init__(self):
""" """
Init method. Init method.
...@@ -1446,6 +1825,11 @@ class PythonLinter(BaseLinter): ...@@ -1446,6 +1825,11 @@ class PythonLinter(BaseLinter):
if file_name.lower().endswith('.py') is False: if file_name.lower().endswith('.py') is False:
return results return results
# skip tests.py files
# TODO: Add configuration for files and paths
if file_name.lower().endswith('tests.py'):
return results
# skip this linter code (i.e. safe_template_linter.py) # skip this linter code (i.e. safe_template_linter.py)
if file_name == os.path.basename(__file__): if file_name == os.path.basename(__file__):
return results return results
...@@ -1464,236 +1848,104 @@ class PythonLinter(BaseLinter): ...@@ -1464,236 +1848,104 @@ class PythonLinter(BaseLinter):
results: A file results objects to which violations will be added. results: A file results objects to which violations will be added.
""" """
self._check_concat_with_html(file_contents, Rules.python_concat_html, results) root_node = self.parse_python_code(file_contents, results)
self._check_deprecated_display_name(file_contents, results) self.check_python_code_is_safe(file_contents, root_node, results)
self._check_custom_escape(file_contents, results) # Check rules specific to .py files only
self._check_html(file_contents, results) # Note that in template files, the scope is different, so you can make
results.prepare_results(file_contents, line_comment_delim='#') # different assumptions.
if root_node is not None:
# check format() rules that can be run on outer-most format() calls
visitor = OuterFormatVisitor(file_contents, results)
visitor.visit(root_node)
results.prepare_results(file_contents, line_comment_delim=self.LINE_COMMENT_DELIM)
def _check_deprecated_display_name(self, file_contents, results): def check_python_code_is_safe(self, python_code, root_node, results):
""" """
Checks that the deprecated display_name_with_default_escaped is not Checks for violations in Python code snippet. This can also be used for
used. Adds violation to results if there is a problem. Python that appears in files other than .py files, like in templates.
Arguments: Arguments:
file_contents: The contents of the Python file python_code: The contents of the Python code.
results: A list of results into which violations will be added. root_node: The root node of the Python code parsed by AST.
results: A file results objects to which violations will be added.
""" """
for match in re.finditer(r'\.display_name_with_default_escaped', file_contents): if root_node is not None:
expression = Expression(match.start(), match.end()) # check illegal concatenation and interpolation
results.violations.append(ExpressionRuleViolation( visitor = AllNodeVisitor(python_code, results)
Rules.python_deprecated_display_name, expression visitor.visit(root_node)
)) # check rules parse with regex
self._check_custom_escape(python_code, results)
def _check_custom_escape(self, file_contents, results): def parse_python_code(self, python_code, results):
""" """
Checks for custom escaping calls, rather than using a standard escaping Parses Python code.
method.
Arguments: Arguments:
file_contents: The contents of the Python file python_code: The Python code to be parsed.
results: A list of results into which violations will be added.
"""
for match in re.finditer("(<.*&lt;|&lt;.*<)", file_contents):
expression = Expression(match.start(), match.end())
results.violations.append(ExpressionRuleViolation(
Rules.python_custom_escape, expression
))
def _check_html(self, file_contents, results):
"""
Checks many rules related to HTML in a Python file.
Arguments: Returns:
file_contents: The contents of the Python file The root node that was parsed, or None for SyntaxError.
results: A list of results into which violations will be added.
"""
# Text() Expressions keyed by its end index
text_calls_by_end_index = {}
# HTML() Expressions keyed by its end index
html_calls_by_end_index = {}
start_index = 0
while True:
# check HTML(), Text() and format() calls
result = self._check_html_text_format(
file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
)
next_start_index = result['next_start_index']
interpolate_end_index = result['interpolate_end_index']
# check for interpolation including HTML outside of function calls
self._check_interpolate_with_html(
file_contents, start_index, interpolate_end_index, results
)
# advance the search
start_index = next_start_index
# end if there is nothing left to search
if interpolate_end_index is None:
break
def _check_html_text_format(
self, file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
):
""" """
Checks for HTML(), Text() and format() calls, and various rules related python_code = self._strip_file_encoding(python_code)
to these calls. try:
return ast.parse(python_code)
Arguments: except SyntaxError as e:
file_contents: The contents of the Python file if e.offset is None:
start_index: The index at which to begin searching for a function expression = Expression(0)
call.
text_calls_by_end_index: Text() Expressions keyed by its end index.
html_calls_by_end_index: HTML() Expressions keyed by its end index.
results: A list of results into which violations will be added.
Returns:
A dict with the following keys:
'next_start_index': The start index of the next search for a
function call.
'interpolate_end_index': The end index of the next next search
for interpolation with html, or None if the end of file
should be used.
"""
# used to find opening of .format(), Text() and HTML() calls
regex_function_open = re.compile(r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()")
interpolate_end_index = None
end_index = None
strings = None
html_calls = []
while True:
# first search for HTML(), Text(), or .format()
if end_index is None:
function_match = regex_function_open.search(file_contents, start_index)
else: else:
function_match = regex_function_open.search(file_contents, start_index, end_index) lines = StringLines(python_code)
if function_match is not None: line_start_index = lines.line_number_to_start_index(e.lineno)
if interpolate_end_index is None: expression = Expression(line_start_index + e.offset)
interpolate_end_index = function_match.start()
function_close_result = self._find_closing_char_index(
None, '(', ')', file_contents, start_index=function_match.end(),
)
if function_close_result is None:
results.violations.append(ExpressionRuleViolation(
Rules.python_parse_error, Expression(function_match.start())
))
else:
expression = Expression(
function_match.start(), function_close_result['close_char_index'] + 1, file_contents,
start_delim=function_match.group(), end_delim=")"
)
# if this an outer most Text(), HTML(), or format() call
if end_index is None:
end_index = expression.end_index
interpolate_end_index = expression.start_index
strings = function_close_result['strings']
if function_match.group() == '.format(':
if 'HTML(' in expression.expression_inner or 'Text(' in expression.expression_inner:
is_wrapped_with_text = str(function_match.start()) in text_calls_by_end_index.keys()
is_wrapped_with_html = str(function_match.start()) in html_calls_by_end_index.keys()
if is_wrapped_with_text is False and is_wrapped_with_html is False:
results.violations.append(ExpressionRuleViolation( results.violations.append(ExpressionRuleViolation(
Rules.python_requires_html_or_text, expression Rules.python_parse_error, expression
)) ))
else: # expression is 'HTML(' or 'Text(' return None
# HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
# Generally, format() would be the issue if there is one.
if regex_function_open.search(expression.expression_inner) is not None:
results.violations.append(ExpressionRuleViolation(
Rules.python_close_before_format, expression
))
if function_match.group() == 'Text(':
text_calls_by_end_index[str(expression.end_index)] = expression
else: # function_match.group() == 'HTML(':
html_calls_by_end_index[str(expression.end_index)] = expression
html_calls.append(expression)
start_index = function_match.end()
else:
break
# checks strings in the outer most call to ensure they are properly
# wrapped with HTML()
self._check_format_html_strings_wrapped(strings, html_calls, results)
# compute where to continue the search
if function_match is None and end_index is None:
next_start_index = start_index
elif end_index is None:
next_start_index = function_match.end()
else:
next_start_index = end_index
return {
'next_start_index': next_start_index,
'interpolate_end_index': interpolate_end_index,
}
def _check_format_html_strings_wrapped(self, strings, html_calls, results): def _strip_file_encoding(self, file_contents):
""" """
Checks that any string inside a format call that seems to contain HTML Removes file encoding from file_contents because the file was already
is wrapped with a call to HTML(). read into Unicode, and the AST parser complains.
Arguments: Arguments:
strings: A list of ParseStrings for each string inside the format() file_contents: The Python file contents.
call.
html_calls: A list of Expressions representing all of the HTML()
calls inside the format() call.
results: A list of results into which violations will be added.
""" Returns:
html_strings = [] The Python file contents with the encoding stripped.
html_wrapped_strings = [] """
if strings is not None: # PEP-263 Provides Regex for Declaring Encoding
# find all strings that contain HTML # Example: -*- coding: <encoding name> -*-
for string in strings: # This is only allowed on the first two lines, and it must be stripped
if '<' in string.string: # before parsing, because we have already read into Unicode and the
html_strings.append(string) # AST parser complains.
# check if HTML string is appropriately wrapped encoding_regex = re.compile(r"^[ \t\v]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")
for html_call in html_calls: encoding_match = encoding_regex.search(file_contents)
if html_call.start_index < string.start_index < string.end_index < html_call.end_index: # If encoding comment not found on first line, search second line.
html_wrapped_strings.append(string) if encoding_match is None:
break lines = StringLines(file_contents)
# loop through all unwrapped strings if lines.line_count() >= 2:
for unsafe_string in set(html_strings) - set(html_wrapped_strings): encoding_match = encoding_regex.search(lines.line_number_to_line(2))
unsafe_string_expression = Expression(unsafe_string.start_index) # If encoding was found, strip it
results.violations.append(ExpressionRuleViolation( if encoding_match is not None:
Rules.python_wrap_html, unsafe_string_expression file_contents = file_contents.replace(encoding_match.group(), '#', 1)
)) return file_contents
def _check_interpolate_with_html(self, file_contents, start_index, end_index, results): def _check_custom_escape(self, file_contents, results):
""" """
Find interpolations with html that fall outside of any calls to HTML(), Checks for custom escaping calls, rather than using a standard escaping
Text(), and .format(). method.
Arguments: Arguments:
file_contents: The contents of the Python file file_contents: The contents of the Python file
start_index: The index to start the search, or None if nothing to
search
end_index: The index to end the search, or None if the end of file
should be used.
results: A list of results into which violations will be added. results: A list of results into which violations will be added.
""" """
# used to find interpolation with HTML for match in re.finditer("(<.*&lt;|&lt;.*<)", file_contents):
pattern_interpolate_html_inner = r'(<.*%s|%s.*<|<.*{\w*}|{\w*}.*<)' expression = Expression(match.start(), match.end())
regex_interpolate_html = re.compile(r"""(".*{}.*"|'.*{}.*')""".format(
pattern_interpolate_html_inner, pattern_interpolate_html_inner
))
if end_index is None:
interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index)
else:
interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index, end_index)
for match_html_string in interpolate_string_iter:
expression = Expression(match_html_string.start(), match_html_string.end())
results.violations.append(ExpressionRuleViolation( results.violations.append(ExpressionRuleViolation(
Rules.python_interpolate_html, expression Rules.python_custom_escape, expression
)) ))
...@@ -1701,7 +1953,15 @@ class MakoTemplateLinter(BaseLinter): ...@@ -1701,7 +1953,15 @@ class MakoTemplateLinter(BaseLinter):
""" """
The linter for Mako template files. The linter for Mako template files.
""" """
javaScriptLinter = JavaScriptLinter() LINE_COMMENT_DELIM = "##"
def __init__(self):
"""
Init method.
"""
super(MakoTemplateLinter, self).__init__()
self.javascript_linter = JavaScriptLinter()
self.python_linter = PythonLinter()
def process_file(self, directory, file_name): def process_file(self, directory, file_name):
""" """
...@@ -1772,7 +2032,7 @@ class MakoTemplateLinter(BaseLinter): ...@@ -1772,7 +2032,7 @@ class MakoTemplateLinter(BaseLinter):
return return
has_page_default = self._has_page_default(mako_template, results) has_page_default = self._has_page_default(mako_template, results)
self._check_mako_expressions(mako_template, has_page_default, results) self._check_mako_expressions(mako_template, has_page_default, results)
results.prepare_results(mako_template, line_comment_delim='##') results.prepare_results(mako_template, line_comment_delim=self.LINE_COMMENT_DELIM)
def _is_django_template(self, mako_template): def _is_django_template(self, mako_template):
""" """
...@@ -1861,9 +2121,7 @@ class MakoTemplateLinter(BaseLinter): ...@@ -1861,9 +2121,7 @@ class MakoTemplateLinter(BaseLinter):
continue continue
context = self._get_context(contexts, expression.start_index) context = self._get_context(contexts, expression.start_index)
self._check_filters(mako_template, expression, context, has_page_default, results) self._check_expression_and_filters(mako_template, expression, context, has_page_default, results)
self._check_deprecated_display_name(expression, results)
self._check_html_and_text(expression, has_page_default, results)
def _check_javascript_contexts(self, mako_template, contexts, results): def _check_javascript_contexts(self, mako_template, contexts, results):
""" """
...@@ -1909,104 +2167,79 @@ class MakoTemplateLinter(BaseLinter): ...@@ -1909,104 +2167,79 @@ class MakoTemplateLinter(BaseLinter):
""" """
javascript_results = FileResults("") javascript_results = FileResults("")
self.javaScriptLinter.check_javascript_file_is_safe(javascript_code, javascript_results) self.javascript_linter.check_javascript_file_is_safe(javascript_code, javascript_results)
# translate the violations into the location within the original self._shift_and_add_violations(javascript_results, start_offset, results)
# Mako template
for violation in javascript_results.violations:
expression = violation.expression
expression.start_index += start_offset
if expression.end_index is not None:
expression.end_index += start_offset
results.violations.append(ExpressionRuleViolation(violation.rule, expression))
def _check_deprecated_display_name(self, expression, results): def _check_expression_python(self, python_code, start_offset, has_page_default, results):
""" """
Checks that the deprecated display_name_with_default_escaped is not Lint the Python inside a single Python expression in a Mako template.
used. Adds violation to results if there is a problem.
Arguments: Arguments:
expression: An Expression python_code: The Python contents of an expression.
start_offset: The offset of the Python content inside the original
Mako template.
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added. results: A list of results into which violations will be added.
""" Side effect:
if '.display_name_with_default_escaped' in expression.expression: Adds Python violations to results.
results.violations.append(ExpressionRuleViolation(
Rules.python_deprecated_display_name, expression """
python_results = FileResults("")
# Dedent expression internals so it is parseable.
# Note that the final columns reported could be off somewhat.
adjusted_python_code = textwrap.dedent(python_code)
first_letter_match = re.search('\w', python_code)
adjusted_first_letter_match = re.search('\w', adjusted_python_code)
if first_letter_match is not None and adjusted_first_letter_match is not None:
start_offset += (first_letter_match.start() - adjusted_first_letter_match.start())
python_code = adjusted_python_code
root_node = self.python_linter.parse_python_code(python_code, python_results)
self.python_linter.check_python_code_is_safe(python_code, root_node, python_results)
# Check mako expression specific Python rules.
if root_node is not None:
visitor = HtmlStringVisitor(python_code, python_results, True)
visitor.visit(root_node)
for unsafe_html_string_node in visitor.unsafe_html_string_nodes:
python_results.violations.append(ExpressionRuleViolation(
Rules.python_wrap_html, visitor.node_to_expression(unsafe_html_string_node)
))
if has_page_default:
for over_escaped_entity_string_node in visitor.over_escaped_entity_string_nodes:
python_results.violations.append(ExpressionRuleViolation(
Rules.mako_html_entities, visitor.node_to_expression(over_escaped_entity_string_node)
)) ))
python_results.prepare_results(python_code, line_comment_delim=self.LINE_COMMENT_DELIM)
self._shift_and_add_violations(python_results, start_offset, results)
def _check_html_and_text(self, expression, has_page_default, results): def _shift_and_add_violations(self, other_linter_results, start_offset, results):
""" """
Checks rules related to proper use of HTML() and Text(). Adds results from a different linter to the Mako results, after shifting
the offset into the original Mako template.
Arguments: Arguments:
expression: A Mako Expression. other_linter_results: Results from another linter.
has_page_default: True if the page is marked as default, False start_offset: The offset of the linted code, a part of the template,
otherwise. inside the original Mako template.
results: A list of results into which violations will be added. results: A list of results into which violations will be added.
""" Side effect:
expression_inner = expression.expression_inner Adds violations to results.
# use find to get the template relative inner expression start index
# due to possible skipped white space
template_inner_start_index = expression.start_index
template_inner_start_index += expression.expression.find(expression_inner)
if 'HTML(' in expression_inner:
if expression_inner.startswith('HTML('):
close_paren_index = self._find_closing_char_index(
None, "(", ")", expression_inner, start_index=len('HTML(')
)['close_char_index']
# check that the close paren is at the end of the stripped expression.
if close_paren_index != len(expression_inner) - 1:
results.violations.append(ExpressionRuleViolation(
Rules.mako_html_alone, expression
))
elif expression_inner.startswith('Text(') is False:
results.violations.append(ExpressionRuleViolation(
Rules.mako_html_requires_text, expression
))
else:
if 'Text(' in expression_inner:
results.violations.append(ExpressionRuleViolation(
Rules.mako_text_redundant, expression
))
# strings to be checked for HTML """
unwrapped_html_strings = expression.strings # translate the violations into the proper location within the original
for match in re.finditer(r"(HTML\(|Text\()", expression_inner): # Mako template
result = self._find_closing_char_index(None, "(", ")", expression_inner, start_index=match.end()) for violation in other_linter_results.violations:
if result is not None: expression = violation.expression
close_paren_index = result['close_char_index'] expression.start_index += start_offset
# the argument sent to HTML() or Text() if expression.end_index is not None:
argument = expression_inner[match.end():close_paren_index] expression.end_index += start_offset
if ".format(" in argument: results.violations.append(ExpressionRuleViolation(violation.rule, expression))
results.violations.append(ExpressionRuleViolation(
Rules.python_close_before_format, expression
))
if match.group() == "HTML(":
# remove expression strings wrapped in HTML()
for string in list(unwrapped_html_strings):
html_inner_start_index = template_inner_start_index + match.end()
html_inner_end_index = template_inner_start_index + close_paren_index
if html_inner_start_index <= string.start_index and string.end_index <= html_inner_end_index:
unwrapped_html_strings.remove(string)
# check strings not wrapped in HTML() for '<'
for string in unwrapped_html_strings:
if '<' in string.string_inner:
results.violations.append(ExpressionRuleViolation(
Rules.python_wrap_html, expression
))
break
# check strings not wrapped in HTML() for HTML entities
if has_page_default:
for string in unwrapped_html_strings:
if re.search(r"&[#]?[a-zA-Z0-9]+;", string.string_inner):
results.violations.append(ExpressionRuleViolation(
Rules.mako_html_entities, expression
))
break
def _check_filters(self, mako_template, expression, context, has_page_default, results): def _check_expression_and_filters(self, mako_template, expression, context, has_page_default, results):
""" """
Checks that the filters used in the given Mako expression are valid Checks that the filters used in the given Mako expression are valid
for the given context. Adds violation to results if there is a problem. for the given context. Adds violation to results if there is a problem.
...@@ -2030,13 +2263,21 @@ class MakoTemplateLinter(BaseLinter): ...@@ -2030,13 +2263,21 @@ class MakoTemplateLinter(BaseLinter):
# Example: finds "| n, h}" when given "${x | n, h}" # Example: finds "| n, h}" when given "${x | n, h}"
filters_regex = re.compile(r'\|([.,\w\s]*)\}') filters_regex = re.compile(r'\|([.,\w\s]*)\}')
filters_match = filters_regex.search(expression.expression) filters_match = filters_regex.search(expression.expression)
# Check Python code inside expression.
if filters_match is None:
python_code = expression.expression[2:-1]
else:
python_code = expression.expression[2:filters_match.start()]
self._check_expression_python(python_code, expression.start_index + 2, has_page_default, results)
# Check filters.
if filters_match is None: if filters_match is None:
if context == 'javascript': if context == 'javascript':
results.violations.append(ExpressionRuleViolation( results.violations.append(ExpressionRuleViolation(
Rules.mako_invalid_js_filter, expression Rules.mako_invalid_js_filter, expression
)) ))
return return
filters = filters_match.group(1).replace(" ", "").split(",") filters = filters_match.group(1).replace(" ", "").split(",")
if filters == ['n', 'decode.utf8']: if filters == ['n', 'decode.utf8']:
# {x | n, decode.utf8} is valid in any context # {x | n, decode.utf8} is valid in any context
...@@ -2233,6 +2474,12 @@ class MakoTemplateLinter(BaseLinter): ...@@ -2233,6 +2474,12 @@ class MakoTemplateLinter(BaseLinter):
if start_index < 0: if start_index < 0:
break break
# If start of mako expression is commented out, skip it.
uncommented_start_index = self._uncommented_start_index(mako_template, start_index)
if uncommented_start_index != start_index:
start_index = uncommented_start_index
continue
result = self._find_closing_char_index( result = self._find_closing_char_index(
start_delim, '{', '}', mako_template, start_index=start_index + len(start_delim) start_delim, '{', '}', mako_template, start_index=start_index + len(start_delim)
) )
...@@ -2349,12 +2596,17 @@ def main(): ...@@ -2349,12 +2596,17 @@ def main():
'--list-files', dest='list_files', action='store_true', '--list-files', dest='list_files', action='store_true',
help='Only display the filenames that contain violations.' help='Only display the filenames that contain violations.'
) )
parser.add_argument(
'--verbose', dest='verbose', action='store_true',
help='Print multiple lines where possible for additional context of violations.'
)
parser.add_argument('path', nargs="?", default=None, help='A file to lint or directory to recursively lint.') parser.add_argument('path', nargs="?", default=None, help='A file to lint or directory to recursively lint.')
args = parser.parse_args() args = parser.parse_args()
options = { options = {
'list_files': args.list_files, 'list_files': args.list_files,
'verbose': args.verbose
} }
template_linters = [MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()] template_linters = [MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()]
......
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Testing encoding on second line does not cause violation
message = "<script>alert('XSS');</script>" message = "<script>alert('XSS');</script>"
x = "<string>{}</strong>".format(message) x = "<string>{}</strong>".format(message)
...@@ -73,6 +73,7 @@ class TestLinter(TestCase): ...@@ -73,6 +73,7 @@ class TestLinter(TestCase):
elif data['rule'] is not None: elif data['rule'] is not None:
rules.append(data['rule']) rules.append(data['rule'])
self.assertEqual(len(results.violations), len(rules)) self.assertEqual(len(results.violations), len(rules))
results.violations.sort(key=lambda violation: violation.sort_key())
for violation, rule in zip(results.violations, rules): for violation, rule in zip(results.violations, rules):
self.assertEqual(violation.rule, rule) self.assertEqual(violation.rule, rule)
...@@ -104,6 +105,7 @@ class TestSafeTemplateLinter(TestCase): ...@@ -104,6 +105,7 @@ class TestSafeTemplateLinter(TestCase):
options = { options = {
'list_files': False, 'list_files': False,
'verbose': False,
} }
template_linters = [MakoTemplateLinter(), JavaScriptLinter(), UnderscoreTemplateLinter(), PythonLinter()] template_linters = [MakoTemplateLinter(), JavaScriptLinter(), UnderscoreTemplateLinter(), PythonLinter()]
...@@ -117,13 +119,14 @@ class TestSafeTemplateLinter(TestCase): ...@@ -117,13 +119,14 @@ class TestSafeTemplateLinter(TestCase):
output = out.getvalue() output = out.getvalue()
self.assertEqual(num_violations, 7) self.assertEqual(num_violations, 7)
self.assertIsNotNone(re.search('test\.html.*mako-missing-default', output)) self.assertIsNotNone(re.search('test\.html.*{}'.format(Rules.mako_missing_default.rule_id), output))
self.assertIsNotNone(re.search('test\.coffee.*javascript-concat-html', output)) self.assertIsNotNone(re.search('test\.coffee.*{}'.format(Rules.javascript_concat_html.rule_id), output))
self.assertIsNotNone(re.search('test\.coffee.*underscore-not-escaped', output)) self.assertIsNotNone(re.search('test\.coffee.*{}'.format(Rules.underscore_not_escaped.rule_id), output))
self.assertIsNotNone(re.search('test\.js.*javascript-concat-html', output)) self.assertIsNotNone(re.search('test\.js.*{}'.format(Rules.javascript_concat_html.rule_id), output))
self.assertIsNotNone(re.search('test\.js.*underscore-not-escaped', output)) self.assertIsNotNone(re.search('test\.js.*{}'.format(Rules.underscore_not_escaped.rule_id), output))
self.assertIsNotNone(re.search('test\.underscore.*underscore-not-escaped', output)) self.assertIsNotNone(re.search('test\.underscore.*{}'.format(Rules.underscore_not_escaped.rule_id), output))
self.assertIsNotNone(re.search('test\.py.*python-interpolate-html', output)) self.assertIsNone(re.search('test\.py.*{}'.format(Rules.python_parse_error.rule_id), output))
self.assertIsNotNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output))
@ddt @ddt
...@@ -254,7 +257,7 @@ class TestMakoTemplateLinter(TestLinter): ...@@ -254,7 +257,7 @@ class TestMakoTemplateLinter(TestLinter):
span_end=HTML("</span>"), span_end=HTML("</span>"),
)} )}
"""), """),
'rule': Rules.mako_html_requires_text 'rule': Rules.python_requires_html_or_text
}, },
{ {
'expression': 'expression':
...@@ -275,7 +278,7 @@ class TestMakoTemplateLinter(TestLinter): ...@@ -275,7 +278,7 @@ class TestMakoTemplateLinter(TestLinter):
span_end=HTML("</span>"), span_end=HTML("</span>"),
)} )}
"""), """),
'rule': Rules.mako_html_requires_text 'rule': Rules.python_requires_html_or_text
}, },
{ {
'expression': 'expression':
...@@ -285,7 +288,7 @@ class TestMakoTemplateLinter(TestLinter): ...@@ -285,7 +288,7 @@ class TestMakoTemplateLinter(TestLinter):
link_end=HTML("</a>"), link_end=HTML("</a>"),
))} ))}
"""), """),
'rule': Rules.python_close_before_format 'rule': [Rules.python_close_before_format, Rules.python_requires_html_or_text]
}, },
{ {
'expression': 'expression':
...@@ -305,7 +308,7 @@ class TestMakoTemplateLinter(TestLinter): ...@@ -305,7 +308,7 @@ class TestMakoTemplateLinter(TestLinter):
span_end="</span>", span_end="</span>",
)} )}
"""), """),
'rule': Rules.python_wrap_html 'rule': [Rules.python_wrap_html, Rules.python_wrap_html]
}, },
{ {
'expression': 'expression':
...@@ -334,10 +337,6 @@ class TestMakoTemplateLinter(TestLinter): ...@@ -334,10 +337,6 @@ class TestMakoTemplateLinter(TestLinter):
'rule': Rules.python_wrap_html 'rule': Rules.python_wrap_html
}, },
{ {
'expression': "${ Text('text') }",
'rule': Rules.mako_text_redundant
},
{
'expression': "${ HTML('<span></span>') }", 'expression': "${ HTML('<span></span>') }",
'rule': None 'rule': None
}, },
...@@ -346,8 +345,12 @@ class TestMakoTemplateLinter(TestLinter): ...@@ -346,8 +345,12 @@ class TestMakoTemplateLinter(TestLinter):
'rule': None 'rule': None
}, },
{ {
'expression': "${ HTML('<span></span>') + 'some other text' }", 'expression': "${ '<span></span>' + 'some other text' }",
'rule': Rules.mako_html_alone 'rule': [Rules.python_concat_html, Rules.python_wrap_html]
},
{
'expression': "${ HTML('<span>missing closing parentheses.</span>' }",
'rule': Rules.python_parse_error
}, },
{ {
'expression': "${'Rock &amp; Roll'}", 'expression': "${'Rock &amp; Roll'}",
...@@ -374,6 +377,21 @@ class TestMakoTemplateLinter(TestLinter): ...@@ -374,6 +377,21 @@ class TestMakoTemplateLinter(TestLinter):
self._validate_data_rules(data, results) self._validate_data_rules(data, results)
def test_check_mako_entity_with_no_default(self):
"""
Test _check_mako_file_is_safe does not fail on entities when
safe-by-default is not set.
"""
linter = MakoTemplateLinter()
results = FileResults('')
mako_template = "${'Rock &#38; Roll'}"
linter._check_mako_file_is_safe(mako_template, results)
self.assertEqual(len(results.violations), 1)
self.assertEqual(results.violations[0].rule, Rules.mako_missing_default)
def test_check_mako_expression_default_disabled(self): def test_check_mako_expression_default_disabled(self):
""" """
Test _check_mako_file_is_safe with disable pragma for safe-by-default Test _check_mako_file_is_safe with disable pragma for safe-by-default
...@@ -1126,10 +1144,10 @@ class TestPythonLinter(TestLinter): ...@@ -1126,10 +1144,10 @@ class TestPythonLinter(TestLinter):
@data( @data(
{'template': 'm = "Plain text " + message + "plain text"', 'rule': None}, {'template': 'm = "Plain text " + message + "plain text"', 'rule': None},
{'template': 'm = "檌檒濦 " + message + "plain text"', 'rule': None}, {'template': 'm = "檌檒濦 " + message + "plain text"', 'rule': None},
{'template': 'm = "<p>" + message + "</p>"', 'rule': Rules.python_concat_html},
{'template': ' # m = "<p>" + commentedOutMessage + "</p>"', 'rule': None}, {'template': ' # m = "<p>" + commentedOutMessage + "</p>"', 'rule': None},
{'template': 'm = " <p> " + message + " </p> "', 'rule': Rules.python_concat_html}, {'template': 'm = "<p>" + message + "</p>"', 'rule': [Rules.python_concat_html, Rules.python_concat_html]},
{'template': 'm = " <p> " + message + " broken string', 'rule': Rules.python_concat_html}, {'template': 'm = " <p> " + message + " </p> "', 'rule': [Rules.python_concat_html, Rules.python_concat_html]},
{'template': 'm = " <p> " + message + " broken string', 'rule': Rules.python_parse_error},
) )
def test_concat_with_html(self, data): def test_concat_with_html(self, data):
""" """
...@@ -1139,6 +1157,7 @@ class TestPythonLinter(TestLinter): ...@@ -1139,6 +1157,7 @@ class TestPythonLinter(TestLinter):
results = FileResults('') results = FileResults('')
linter.check_python_file_is_safe(data['template'], results) linter.check_python_file_is_safe(data['template'], results)
self._validate_data_rules(data, results) self._validate_data_rules(data, results)
def test_check_python_expression_display_name(self): def test_check_python_expression_display_name(self):
...@@ -1180,22 +1199,22 @@ class TestPythonLinter(TestLinter): ...@@ -1180,22 +1199,22 @@ class TestPythonLinter(TestLinter):
{ {
'python': 'python':
textwrap.dedent(""" textwrap.dedent("""
msg = "Mixed {span_start}text{span_end}".format( msg = Text("Mixed {span_start}text{span_end}").format(
span_start=HTML("<span>"), span_start=HTML("<span>"),
span_end=HTML("</span>"), span_end=HTML("</span>"),
) )
"""), """),
'rule': Rules.python_requires_html_or_text 'rule': None
}, },
{ {
'python': 'python':
textwrap.dedent(""" textwrap.dedent("""
msg = Text("Mixed {span_start}text{span_end}").format( msg = "Mixed {span_start}text{span_end}".format(
span_start=HTML("<span>"), span_start=HTML("<span>"),
span_end=HTML("</span>"), span_end=HTML("</span>"),
) )
"""), """),
'rule': None 'rule': Rules.python_requires_html_or_text
}, },
{ {
'python': 'python':
...@@ -1231,6 +1250,22 @@ class TestPythonLinter(TestLinter): ...@@ -1231,6 +1250,22 @@ class TestPythonLinter(TestLinter):
{ {
'python': 'python':
textwrap.dedent(""" textwrap.dedent("""
msg = Text("Mixed {link_start}text{link_end}".format(
link_start=HTML("<a href='{}'>".format(HTML('<b>'))),
link_end=HTML("</a>"),
))
"""),
'rule':
[
Rules.python_close_before_format,
Rules.python_requires_html_or_text,
Rules.python_close_before_format,
Rules.python_requires_html_or_text
]
},
{
'python':
textwrap.dedent("""
msg = "Mixed {span_start}text{span_end}".format( msg = "Mixed {span_start}text{span_end}".format(
span_start="<span>", span_start="<span>",
span_end="</span>", span_end="</span>",
...@@ -1249,6 +1284,7 @@ class TestPythonLinter(TestLinter): ...@@ -1249,6 +1284,7 @@ class TestPythonLinter(TestLinter):
'data-course-id="{course_id}" ' 'data-course-id="{course_id}" '
'href="#test-modal">' 'href="#test-modal">'
).format( ).format(
# Line comment with ' to throw off parser
course_id=course_overview.id course_id=course_overview.id
), ),
link_end=HTML('</a>'), link_end=HTML('</a>'),
...@@ -1266,17 +1302,30 @@ class TestPythonLinter(TestLinter): ...@@ -1266,17 +1302,30 @@ class TestPythonLinter(TestLinter):
}, },
{ {
'python': r"""msg = '<a href="{}"'.format(url)""", 'python': r"""msg = '<a href="{}"'.format(url)""",
'rule': Rules.python_interpolate_html 'rule': Rules.python_wrap_html
}, },
{ {
'python': r"""msg = '{}</p>'.format(message)""", 'python': r"""msg = '{}</p>'.format(message)""",
'rule': Rules.python_interpolate_html 'rule': Rules.python_wrap_html
},
{
'python': r"""r'regex with {} and named group(?P<id>\d+)?$'.format(test)""",
'rule': None
}, },
{ {
'python': r"""msg = '<a href="%s"' % url""", 'python': r"""msg = '<a href="%s"' % url""",
'rule': Rules.python_interpolate_html 'rule': Rules.python_interpolate_html
}, },
{ {
'python':
textwrap.dedent("""
def __repr__(self):
# Assume repr implementations are safe, and not HTML
return '<CCXCon {}>'.format(self.title)
"""),
'rule': None
},
{
'python': r"""msg = '%s</p>' % message""", 'python': r"""msg = '%s</p>' % message""",
'rule': Rules.python_interpolate_html 'rule': Rules.python_interpolate_html
}, },
...@@ -1311,38 +1360,94 @@ class TestPythonLinter(TestLinter): ...@@ -1311,38 +1360,94 @@ class TestPythonLinter(TestLinter):
file_content = textwrap.dedent(""" file_content = textwrap.dedent("""
msg1 = '<a href="{}"'.format(url) msg1 = '<a href="{}"'.format(url)
msg2 = Text("Mixed {link_start}text{link_end}").format( msg2 = "Mixed {link_start}text{link_end}".format(
link_start=HTML("<a href='{}'>".format(url)), link_start=HTML("<a href='{}'>".format(url)),
link_end=HTML("</a>"), link_end="</a>",
) )
msg3 = HTML('<span></span>' msg3 = '<a href="%s"' % url
msg4 = "Mixed {span_start}text{span_end}".format(
span_start="<span>",
span_end="</span>",
)
msg5 = '{}</p>'.format(message)
msg6 = Text(_("String with multiple lines "
"{link_start}unenroll{link_end} "
"and final line")).format(
link_start=HTML(
'<a id="link__over_multiple_lines" '
'data-course-id="{course_id}" '
'href="#test-modal">'
).format(
course_id=course_overview.id
),
link_end=HTML('</a>'),
)
msg7 = '<a href="%s"' % url
""") """)
linter.check_python_file_is_safe(file_content, results) linter.check_python_file_is_safe(file_content, results)
self.assertEqual(len(results.violations), 7) results.violations.sort(key=lambda violation: violation.sort_key())
self.assertEqual(results.violations[0].rule, Rules.python_interpolate_html)
self.assertEqual(results.violations[1].rule, Rules.python_close_before_format) self.assertEqual(len(results.violations), 5)
self.assertEqual(results.violations[2].rule, Rules.python_parse_error) self.assertEqual(results.violations[0].rule, Rules.python_wrap_html)
self.assertEqual(results.violations[1].rule, Rules.python_requires_html_or_text)
self.assertEqual(results.violations[2].rule, Rules.python_close_before_format)
self.assertEqual(results.violations[3].rule, Rules.python_wrap_html) self.assertEqual(results.violations[3].rule, Rules.python_wrap_html)
self.assertEqual(results.violations[4].rule, Rules.python_wrap_html) self.assertEqual(results.violations[4].rule, Rules.python_interpolate_html)
self.assertEqual(results.violations[5].rule, Rules.python_interpolate_html)
self.assertEqual(results.violations[6].rule, Rules.python_interpolate_html) @data(
{
'python':
"""
response_str = textwrap.dedent('''
<div>
<h3 class="result">{response}</h3>
</div>
''').format(response=response_text)
""",
'rule': Rules.python_wrap_html,
'start_line': 2,
},
{
'python':
"""
def function(self):
'''
Function comment.
'''
response_str = textwrap.dedent('''
<div>
<h3 class="result">{response}</h3>
</div>
''').format(response=response_text)
""",
'rule': Rules.python_wrap_html,
'start_line': 6,
},
{
'python':
"""
def function(self):
'''
Function comment.
'''
response_str = '''<h3 class="result">{response}</h3>'''.format(response=response_text)
""",
'rule': Rules.python_wrap_html,
'start_line': 6,
},
{
'python':
"""
def function(self):
'''
Function comment.
'''
response_str = textwrap.dedent('''
<div>
\"\"\" Do we care about a nested triple quote? \"\"\"
<h3 class="result">{response}</h3>
</div>
''').format(response=response_text)
""",
'rule': Rules.python_wrap_html,
'start_line': 6,
},
)
def test_check_python_with_triple_quotes(self, data):
"""
Test _check_python_file_is_safe with triple quotes.
"""
linter = PythonLinter()
results = FileResults('')
file_content = textwrap.dedent(data['python'])
linter.check_python_file_is_safe(file_content, results)
self._validate_data_rules(data, results)
self.assertEqual(results.violations[0].start_line, data['start_line'])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment