Enhance safe template linter

- Check includes for Mako templates - Check display_name_with_default_escaped uses - Add exceptions for Underscore and <%= - Skip templates that are Django and not Mako - Add pragma to disable errors - Enhance unit tests - Remove violation mako-js-string-missing-quotes - Refactor line processing into StringLines

Enhance safe template linter
- Check includes for Mako templates - Check display_name_with_default_escaped uses - Add exceptions for Underscore and <%= - Skip templates that are Django and not Mako - Add pragma to disable errors - Enhance unit tests - Remove violation mako-js-string-missing-quotes - Refactor line processing into StringLines
916723fc · Robert Raposa · f730936d · 916723fc · 916723fc · 916723fc
Commit 916723fc authored Mar 29, 2016 by Robert Raposa
5 changed files
--- a/scripts/safe_template_linter.py
+++ b/scripts/safe_template_linter.py
@@ -2,22 +2,22 @@
 """
 A linting tool to check if templates are safe
 """
+from __future__ import print_function
 from enum import Enum
 import os
 import re
 import sys

 _skip_dirs = (
-    '/node_modules',
-    '/vendor',
-    '/spec',
-    '/.pycharm_helpers',
-    '/test_root',
-    '/reports/diff_quality',
-    '/common/static/xmodule/modules',
+    '.pycharm_helpers',
+    'common/static/xmodule/modules',
+    'node_modules',
+    'reports/diff_quality',
+    'spec',
+    'scripts/tests/templates',
+    'test_root',
+    'vendor',
 )
-_skip_mako_dirs = _skip_dirs
-_skip_underscore_dirs = _skip_dirs + ('/test',)


 def _is_skip_dir(skip_dirs, directory):
@@ -33,8 +33,8 @@ def _is_skip_dir(skip_dirs, directory):

    """
    for skip_dir in skip_dirs:
-        dir_contains_skip_dir = (directory.find(skip_dir + '/') >= 0)
-        if dir_contains_skip_dir or directory.endswith(skip_dir):
+        dir_contains_skip_dir = '/' + skip_dir + '/' in directory
+        if dir_contains_skip_dir or directory.startswith(skip_dir) or directory.endswith(skip_dir):
            return True
    return False

@@ -48,117 +48,192 @@ def _load_file(self, file_full_path):

    Returns:
        A string containing the files contents.
+
    """
    with open(file_full_path, 'r') as input_file:
        file_contents = input_file.read()
        return file_contents.decode(encoding='utf-8')


-def _get_line_breaks(self, string):
+class StringLines(object):
+    """
+    StringLines provides utility methods to work with a string in terms of
+    lines.  As an example, it can convert an index into a line number or column
+    number (i.e. index into the line).
    """
-    Creates a list, where each entry represents the index into the string where
-    the next line break was found.

-    Arguments:
-        string: The string in which to find line breaks.
+    def __init__(self, string):
+        """
+        Init method.

-    Returns:
-         A list of indices into the string at which each line break can be
-         found.
+        Arguments:
+            string: The string to work with.

-    """
-    line_breaks = [0]
-    index = 0
-    while True:
-        index = string.find('\n', index)
-        if index < 0:
-            break
-        index += 1
-        line_breaks.append(index)
-    return line_breaks
-
-
-def _get_line_number(self, line_breaks, index):
-    """
-    Given the list of line break indices, and an index, determines the line of
-    the index.
+        """
+        self._string = string
+        self._line_breaks = self._process_line_breaks(string)

-    Arguments:
-        line_breaks: A list of indices into a string at which each line break
-            was found.
-        index: The index into the original string for which we want to know the
-            line number
+    def _process_line_breaks(self, string):
+        """
+        Creates a list, where each entry represents the index into the string
+        where the next line break was found.

-    Returns:
-        The line number of the provided index.
+        Arguments:
+            string: The string in which to find line breaks.

-    """
-    current_line_number = 0
-    for line_break_index in line_breaks:
-        if line_break_index <= index:
-            current_line_number += 1
-        else:
-            break
-    return current_line_number
+        Returns:
+             A list of indices into the string at which each line break can be
+             found.

+        """
+        line_breaks = [0]
+        index = 0
+        while True:
+            index = string.find('\n', index)
+            if index < 0:
+                break
+            index += 1
+            line_breaks.append(index)
+        return line_breaks

-def _get_line(self, string, line_breaks, line_number):
-    """
-    Gets the line of text designated by the provided line number.
+    def get_string(self):
+        """
+        Get the original string.
+        """
+        return self._string

-    Arguments:
-        string: The string of content with line breaks.
-        line_breaks: A list of indices into a string at which each line break
-            was found.
-        line_number: The line number of the line we want to find.
+    def index_to_line_number(self, index):
+        """
+        Given an index, determines the line of the index.

-    Returns:
-        The line of text designated by the provided line number.
+        Arguments:
+            index: The index into the original string for which we want to know
+                the line number

-    """
-    start_index = line_breaks[line_number - 1]
-    if len(line_breaks) == line_number:
-        line = string[start_index:]
-    else:
-        end_index = line_breaks[line_number]
-        line = string[start_index:end_index - 1]
-    return line.encode(encoding='utf-8')
+        Returns:
+            The line number of the provided index.

+        """
+        current_line_number = 0
+        for line_break_index in self._line_breaks:
+            if line_break_index <= index:
+                current_line_number += 1
+            else:
+                break
+        return current_line_number

-def _get_column_number(self, line_breaks, line_number, index):
-    """
-    Gets the column (i.e. index into the line) for the given index into the
-    original string.
+    def index_to_column_number(self, index):
+        """
+        Gets the column (i.e. index into the line) for the given index into the
+        original string.

-    Arguments:
-        line_breaks: A list of indices into a string at which each line break
-            was found.
-        line_number: The line number of the line we want to find.
-        index: The index into the original string.
+        Arguments:
+            index: The index into the original string.

-    Returns:
-        The column (i.e. index into the line) for the given index into the
-        original string.
+        Returns:
+            The column (i.e. index into the line) for the given index into the
+            original string.

-    """
-    start_index = line_breaks[line_number - 1]
-    column = index - start_index + 1
-    return column
+        """
+        start_index = self.index_to_line_start_index(index)
+        column = index - start_index + 1
+        return column
+
+    def index_to_line_start_index(self, index):
+        """
+        Gets the index of the start of the line of the given index.
+
+        Arguments:
+            index: The index into the original string.
+
+        Returns:
+            The index of the start of the line of the given index.
+
+        """
+        line_number = self.index_to_line_number(index)
+        return self.line_number_to_start_index(line_number)
+
+    def line_number_to_start_index(self, line_number):
+        """
+        Gets the starting index for the provided line number.
+
+        Arguments:
+            line_number: The line number of the line for which we want to find
+                the start index.
+
+        Returns:
+            The starting index for the provided line number.
+
+        """
+        return self._line_breaks[line_number - 1]
+
+    def line_number_to_line(self, line_number):
+        """
+        Gets the line of text designated by the provided line number.
+
+        Arguments:
+            line_number: The line number of the line we want to find.
+
+        Returns:
+            The line of text designated by the provided line number.
+
+        """
+        start_index = self._line_breaks[line_number - 1]
+        if len(self._line_breaks) == line_number:
+            line = self._string[start_index:]
+        else:
+            end_index = self._line_breaks[line_number]
+            line = self._string[start_index:end_index - 1]
+        return line
+
+    def line_count(self):
+        """
+        Gets the number of lines in the string.
+        """
+        return len(self._line_breaks)


 class Rules(Enum):
    """
    An Enum of each rule which the linter will check.
    """
-    mako_missing_default = ('mako-missing-default', 'Missing default <%page expression_filter="h"/>.')
-    mako_multiple_page_tags = ('mako-multiple-page-tags', 'A Mako template can only have one <%page> tag.')
-    mako_unparsable_expression = ('mako-unparsable-expression', 'The expression could not be properly parsed.')
-    mako_unwanted_html_filter = ('mako-unwanted-html-filter', 'Remove explicit h filters when it is provided by the page directive.')
-    mako_invalid_html_filter = ('mako-invalid-html-filter', 'The expression is using an invalid filter in an HTML context.')
-    mako_invalid_js_filter = ('mako-invalid-js-filter', 'The expression is using an invalid filter in a JavaScript context.')
-    mako_js_string_missing_quotes = ('mako-js-string-missing-quotes', 'An expression using the js_escape_string filter must have surrounding quotes.')
-
-    underscore_not_escaped = ('underscore-not-escaped', 'Expressions should be escaped using <%- expression %>.')
+    mako_missing_default = (
+        'mako-missing-default',
+        'Missing default <%page expression_filter="h"/>.'
+    )
+    mako_multiple_page_tags = (
+        'mako-multiple-page-tags',
+        'A Mako template can only have one <%page> tag.'
+    )
+    mako_include_with_violations = (
+        'mako-include-with-violations',
+        'Must fix violations in included templates first.'
+    )
+    mako_unparsable_expression = (
+        'mako-unparsable-expression',
+        'The expression could not be properly parsed.'
+    )
+    mako_unwanted_html_filter = (
+        'mako-unwanted-html-filter',
+        'Remove explicit h filters when it is provided by the page directive.'
+    )
+    mako_invalid_html_filter = (
+        'mako-invalid-html-filter',
+        'The expression is using an invalid filter in an HTML context.'
+    )
+    mako_deprecated_display_name = (
+        'mako-deprecated-display-name',
+        'Replace deprecated display_name_with_default_escaped with display_name_with_default.'
+    )
+    mako_invalid_js_filter = (
+        'mako-invalid-js-filter',
+        'The expression is using an invalid filter in a JavaScript context.'
+    )
+
+    underscore_not_escaped = (
+        'underscore-not-escaped',
+        'Expressions should be escaped using <%- expression %>.'
+    )

    def __init__(self, rule_id, rule_summary):
        self.rule_id = rule_id
@@ -180,25 +255,64 @@ class RuleViolation(object):
        """
        self.rule = rule
        self.full_path = ''
+        self.is_disabled = False

-    def prepare_results(self, full_path, file_string, line_breaks):
+    def _mark_disabled(self, string, scope_start_string=False):
+        """
+        Performs the disable pragma search and marks the rule as disabled if a
+        matching pragma is found.
+
+        Pragma format::
+
+            safe-lint: disable=violation-name,other-violation-name
+
+        Arguments:
+            string: The string of code in which to search for the pragma.
+            scope_start_string: True if the pragma must be at the start of the
+                string, False otherwise. The pragma is considered at the start
+                of the string if it has a maximum of 5 non-whitespace characters
+                preceding it.
+
+        Side Effect:
+            Sets self.is_disabled as appropriate based on whether the pragma is
+            found.
+
+        """
+        pragma_match = re.search(r'safe-lint:\s*disable=([a-zA-Z,-]+)', string)
+        if pragma_match is None:
+            return
+        if scope_start_string:
+            spaces_count = string.count(' ', 0, pragma_match.start())
+            non_space_count = pragma_match.start() - spaces_count
+            if non_space_count > 5:
+                return
+
+        for disabled_rule in pragma_match.group(1).split(','):
+            if disabled_rule == self.rule.rule_id:
+                self.is_disabled = True
+                return
+
+    def prepare_results(self, full_path, string_lines):
        """
        Preps this instance for results reporting.

        Arguments:
            full_path: Path of the file in violation.
-            file_string: The contents of the file in violation.
-            line_breaks: A list of indices into file_string at which each line
-                break was found.
+            string_lines: A StringLines containing the contents of the file in
+                violation.

        """
        self.full_path = full_path
+        self._mark_disabled(string_lines.get_string())

-    def print_results(self):
+    def print_results(self, out):
        """
        Prints the results represented by this rule violation.
+
+        Arguments:
+            out: output file
        """
-        print "{}: {}".format(self.full_path, self.rule.rule_id)
+        print("{}: {}".format(self.full_path, self.rule.rule_id), file=out)


 class ExpressionRuleViolation(RuleViolation):
@@ -225,50 +339,94 @@ class ExpressionRuleViolation(RuleViolation):
        self.end_line = 0
        self.end_column = 0
        self.lines = []
+        self.is_disabled = False

-    def prepare_results(self, full_path, file_string, line_breaks):
+    def _mark_expression_disabled(self, string_lines):
+        """
+        Marks the expression violation as disabled if it finds the disable
+        pragma anywhere on the first line of the violation, or at the start of
+        the line preceding the violation.
+
+        Pragma format::
+
+            safe-lint: disable=violation-name,other-violation-name
+
+        Examples::
+
+            <% // safe-lint: disable=underscore-not-escaped %>
+            <%= gettext('Single Line') %>
+
+            <%= gettext('Single Line') %><% // safe-lint: disable=underscore-not-escaped %>
+
+        Arguments:
+            string_lines: A StringLines containing the contents of the file in
+                violation.
+
+        Side Effect:
+            Sets self.is_disabled as appropriate based on whether the pragma is
+            found.
+
+        """
+        # disable pragma can be at the start of the preceding line
+        has_previous_line = self.start_line > 1
+        if has_previous_line:
+            line_to_check = string_lines.line_number_to_line(self.start_line - 1)
+            self._mark_disabled(line_to_check, scope_start_string=True)
+            if self.is_disabled:
+                return
+
+        # TODO: this should work at end of any line of the violation
+        # disable pragma can be anywhere on the first line of the violation
+        line_to_check = string_lines.line_number_to_line(self.start_line)
+        self._mark_disabled(line_to_check, scope_start_string=False)
+
+    def prepare_results(self, full_path, string_lines):
        """
        Preps this instance for results reporting.

        Arguments:
            full_path: Path of the file in violation.
-            file_string: The contents of the file in violation.
-            line_breaks: A list of indices into file_string at which each line
-                break was found.
+            string_lines: A StringLines containing the contents of the file in
+                violation.

        """
        self.full_path = full_path
        start_index = self.expression['start_index']
-        self.start_line = _get_line_number(self, line_breaks, start_index)
-        self.start_column = _get_column_number(self, line_breaks, self.start_line, start_index)
+        self.start_line = string_lines.index_to_line_number(start_index)
+        self.start_column = string_lines.index_to_column_number(start_index)
        end_index = self.expression['end_index']
        if end_index > 0:
-            self.end_line = _get_line_number(self, line_breaks, end_index)
-            self.end_column = _get_column_number(self, line_breaks, self.end_line, end_index)
+            self.end_line = string_lines.index_to_line_number(end_index)
+            self.end_column = string_lines.index_to_column_number(end_index)
        else:
            self.end_line = self.start_line
            self.end_column = '?'
        for line_number in range(self.start_line, self.end_line + 1):
-            self.lines.append(_get_line(self, file_string, line_breaks, line_number))
+            self.lines.append(string_lines.line_number_to_line(line_number))
+        self._mark_expression_disabled(string_lines)

-    def print_results(self):
+    def print_results(self, out):
        """
        Prints the results represented by this rule violation.
+
+        Arguments:
+            out: output file
+
        """
        for line_number in range(self.start_line, self.end_line + 1):
-            if (line_number == self.start_line):
+            if line_number == self.start_line:
                column = self.start_column
                rule_id = self.rule.rule_id + ":"
            else:
                column = 1
                rule_id = " " * (len(self.rule.rule_id) + 1)
-            print "{}: {}:{}: {} {}".format(
+            print("{}: {}:{}: {} {}".format(
                self.full_path,
                line_number,
                column,
                rule_id,
-                self.lines[line_number - self.start_line - 1]
-            )
+                self.lines[line_number - self.start_line - 1].encode(encoding='utf-8')
+            ), file=out)


 class FileResults(object):
@@ -285,7 +443,45 @@ class FileResults(object):

        """
        self.full_path = full_path
+        self.directory = os.path.dirname(full_path)
+        self.is_file = os.path.isfile(full_path)
        self.violations = []
+        self.includes = []
+
+    def resolve_include(self, include, include_results):
+        """
+        Resolves potential include violations and determines if they are real or
+        not. For real violations, adds the violations into the violation list.
+
+        If the include_results is not a file, it will be considered a violation
+        and will require a disable pragma.
+
+        Arguments:
+            include: The include with the potential violation to be resolved.
+            include_results: The results of processing the include file.
+
+        """
+        include_has_violations = (not include_results.is_file) or (len(include_results.violations) > 0)
+        if include_has_violations:
+            self.violations.append(include['potential_violation'])
+
+    def add_include(self, include_file, potential_violation):
+        """
+        Adds an include which also must have no violations.
+
+        Arguments:
+            include_file: The include file as provided in an include.
+            potential_violation: Represents the violation of the include, if the
+                included file has any violations.
+
+        """
+        include_full_path = os.path.normpath(self.directory + '/' + include_file)
+        self.includes.append({
+            'directory': os.path.dirname(include_full_path),
+            'file_name': os.path.split(include_full_path)[1],
+            'full_path': include_full_path,
+            'potential_violation': potential_violation,
+        })

    def prepare_results(self, file_string):
        """
@@ -295,11 +491,13 @@ class FileResults(object):
            file_string: The string of content for this file.

        """
-        line_breaks = _get_line_breaks(self, file_string)
+        string_lines = StringLines(file_string)
        for violation in self.violations:
-            violation.prepare_results(self.full_path, file_string, line_breaks)
+            violation.prepare_results(self.full_path, string_lines)
+        for include in self.includes:
+            include['potential_violation'].prepare_results(self.full_path, string_lines)

-    def print_results(self, options):
+    def print_results(self, options, out):
        """
        Prints the results (i.e. violations) in this file.

@@ -307,13 +505,16 @@ class FileResults(object):
            options: A list of the following options:
                is_quiet: True to print only file names, and False to print
                    all violations.
+            out: output file
+

        """
        if options['is_quiet']:
-            print self.full_path
+            print(self.full_path, file=out)
        else:
            for violation in self.violations:
-                violation.print_results()
+                if not violation.is_disabled:
+                    violation.print_results(out)


 class MakoTemplateLinter(object):
@@ -323,6 +524,18 @@ class MakoTemplateLinter(object):

    _skip_mako_dirs = _skip_dirs

+    def __init__(self):
+        """
+        Init method.
+        """
+        self.results = {}
+
+    def supports_includes(self):
+        """
+        Mako template linter supports linting includes.
+        """
+        return True
+
    def process_file(self, directory, file_name):
        """
        Process file to determine if it is a Mako template file and
@@ -333,12 +546,24 @@ class MakoTemplateLinter(object):
            file_name (string): A filename for a potential Mako file

        Returns:
-            The file results containing any violations, or None if the file is
-            never checked.
+            The file results containing any violations.

        """
-        if not self._is_mako_directory(directory):
-            return None
+        mako_file_full_path = os.path.normpath(directory + '/' + file_name)
+        results = FileResults(mako_file_full_path)
+
+        # don't process the same file twice. this could happen when we process
+        # files included by another file
+        if mako_file_full_path in self.results:
+            return self.results[mako_file_full_path]
+
+        self.results[mako_file_full_path] = results
+
+        if not results.is_file:
+            return results
+
+        if not self._is_valid_directory(directory):
+            return results

        # TODO: When safe-by-default is turned on at the platform level, will we:
        # 1. Turn it on for .html only, or
@@ -347,11 +572,11 @@ class MakoTemplateLinter(object):
        #    the n filter to turn off h for some of these)?
        # For now, we only check .html and .xml files
        if not (file_name.lower().endswith('.html') or file_name.lower().endswith('.xml')):
-            return None
+            return results

-        return self._load_and_check_mako_file_is_safe(directory + '/' + file_name)
+        return self._load_and_check_mako_file_is_safe(mako_file_full_path, results)

-    def _is_mako_directory(self, directory):
+    def _is_valid_directory(self, directory):
        """
        Determines if the provided directory is a directory that could contain
        Mako template files that need to be linted.
@@ -366,12 +591,15 @@ class MakoTemplateLinter(object):
        if _is_skip_dir(self._skip_mako_dirs, directory):
            return False

-        if (directory.find('/templates/') >= 0) or directory.endswith('/templates'):
+        # TODO: This is an imperfect guess concerning the Mako template
+        # directories. This needs to be reviewed before turning on safe by
+        # default at the platform level.
+        if ('/templates/' in directory) or directory.endswith('/templates'):
            return True

        return False

-    def _load_and_check_mako_file_is_safe(self, mako_file_full_path):
+    def _load_and_check_mako_file_is_safe(self, mako_file_full_path, results):
        """
        Loads the Mako template file and checks if it is in violation.

@@ -379,16 +607,12 @@ class MakoTemplateLinter(object):
            mako_file_full_path: The file to be loaded and linted.

        Returns:
-            The file results containing any violations, or None if none found.
+            The file results containing any violations.

        """
        mako_template = _load_file(self, mako_file_full_path)
-        results = FileResults(mako_file_full_path)
        self._check_mako_file_is_safe(mako_template, results)
-        if len(results.violations) > 0:
-            return results
-        else:
-            return None
+        return results

    def _check_mako_file_is_safe(self, mako_template, results):
        """
@@ -399,6 +623,8 @@ class MakoTemplateLinter(object):
            results: A file results objects to which violations will be added.

        """
+        if self._is_django_template(mako_template):
+            return
        has_page_default = False
        if self._has_multiple_page_tags(mako_template):
            results.violations.append(RuleViolation(Rules.mako_multiple_page_tags))
@@ -407,8 +633,24 @@ class MakoTemplateLinter(object):
            if not has_page_default:
                results.violations.append(RuleViolation(Rules.mako_missing_default))
        self._check_mako_expressions(mako_template, has_page_default, results)
+        self._check_include_files(mako_template, results)
        results.prepare_results(mako_template)

+    def _is_django_template(self, mako_template):
+        """
+            Determines if the template is actually a Django template.
+
+        Arguments:
+            mako_template: The template code.
+
+        Returns:
+            True if this is really a Django template, and False otherwise.
+
+        """
+        if re.search('({%.*%})|({{.*}})', mako_template) is not None:
+            return True
+        return False
+
    def _has_multiple_page_tags(self, mako_template):
        """
        Checks if the Mako template contains more than one page expression.
@@ -433,6 +675,30 @@ class MakoTemplateLinter(object):
        page_match = page_h_filter_regex.search(mako_template)
        return page_match

+    def _check_include_files(self, mako_template, results):
+        """
+        Checks if the Mako template includes other template files. If so, sets
+        up potential violations that will be checked later.
+
+        Arguments:
+            mako_template: The contents of the Mako template.
+
+        """
+        regex = re.compile('<%include[^>]*file=(?:"|\')(.+?)(?:"|\')[^>]*/>')
+        for match in regex.finditer(mako_template):
+            include_file = match.group(1)
+            expression = {
+                'start_index': match.start(),
+                'end_index': match.end(),
+                'expression': match.group()
+            }
+            results.add_include(
+                include_file,
+                ExpressionRuleViolation(
+                    Rules.mako_include_with_violations, expression
+                )
+            )
+
    def _check_mako_expressions(self, mako_template, has_page_default, results):
        """
        Searches for Mako expressions and then checks if they contain
@@ -456,11 +722,28 @@ class MakoTemplateLinter(object):

            context = self._get_context(contexts, expression['start_index'])
            self._check_filters(mako_template, expression, context, has_page_default, results)
+            self._check_deprecated_display_name(expression, results)
+
+    def _check_deprecated_display_name(self, expression, results):
+        """
+        Checks that the deprecated display_name_with_default_escaped is not
+        used. Adds violation to results if there is a problem.
+
+        Arguments:
+            expression: A dict containing the start_index, end_index, and
+                expression (text) of the expression.
+            results: A list of results into which violations will be added.
+
+        """
+        if '.display_name_with_default_escaped' in expression['expression']:
+            results.violations.append(ExpressionRuleViolation(
+                Rules.mako_deprecated_display_name, expression
+            ))

    def _check_filters(self, mako_template, expression, context, has_page_default, results):
        """
        Checks that the filters used in the given Mako expression are valid
-        for the given context.
+        for the given context. Adds violation to results if there is a problem.

        Arguments:
            mako_template: The contents of the Mako template.
@@ -506,14 +789,7 @@ class MakoTemplateLinter(object):
                pass
            elif (len(filters) == 2) and (filters[0] == 'n') and (filters[1] == 'js_escaped_string'):
                # {x | n, js_escaped_string} is valid, if surrounded by quotes
-                prior_character = mako_template[expression['start_index'] - 1]
-                next_character = mako_template[expression['end_index'] + 1]
-                has_surrounding_quotes = (prior_character == '\'' and next_character == '\'') or \
-                    (prior_character == '"' and next_character == '"')
-                if not has_surrounding_quotes:
-                    results.violations.append(ExpressionRuleViolation(
-                        Rules.mako_js_string_missing_quotes, expression
-                    ))
+                pass
            else:
                results.violations.append(ExpressionRuleViolation(
                    Rules.mako_invalid_js_filter, expression
@@ -670,7 +946,13 @@ class UnderscoreTemplateLinter(object):
    The linter for Underscore.js template files.
    """

-    _skip_underscore_dirs = _skip_dirs
+    _skip_underscore_dirs = _skip_dirs + ('test',)
+
+    def supports_includes(self):
+        """
+        Underscore template linter does not lint includes.
+        """
+        return False

    def process_file(self, directory, file_name):
        """
@@ -682,32 +964,21 @@ class UnderscoreTemplateLinter(object):
            file_name (string): A filename for a potential underscore file

        Returns:
-            The file results containing any violations, or None if the file is
-            never checked.
+            The file results containing any violations.

        """
-        if not self._is_underscore_directory(directory):
-            return
+        full_path = os.path.normpath(directory + '/' + file_name)
+        results = FileResults(full_path)

-        if not file_name.lower().endswith('.underscore'):
-            return
-
-        full_path = directory + '/' + file_name
-        return self._load_and_check_underscore_file_is_safe(full_path)
-
-    def print_results(self, options):
-        """
-        Prints all results (i.e. violations) for all files that failed this
-        linter.
+        if not self._is_valid_directory(directory):
+            return results

-        Arguments:
-            options: A list of the options.
+        if not file_name.lower().endswith('.underscore'):
+            return results

-        """
-        for result in self._results:
-            result.print_results(options)
+        return self._load_and_check_underscore_file_is_safe(full_path, results)

-    def _is_underscore_directory(self, directory):
+    def _is_valid_directory(self, directory):
        """
        Determines if the provided directory is a directory that could contain
        Underscore.js template files that need to be linted.
@@ -724,7 +995,7 @@ class UnderscoreTemplateLinter(object):

        return True

-    def _load_and_check_underscore_file_is_safe(self, file_full_path):
+    def _load_and_check_underscore_file_is_safe(self, file_full_path, results):
        """
        Loads the Underscore.js template file and checks if it is in violation.

@@ -732,17 +1003,12 @@ class UnderscoreTemplateLinter(object):
            file_full_path: The file to be loaded and linted

        Returns:
-            The file results containing any violations, or None if the file is
-            never checked.
+            The file results containing any violations.

        """
        underscore_template = _load_file(self, file_full_path)
-        results = FileResults(file_full_path)
        self._check_underscore_file_is_safe(underscore_template, results)
-        if len(results.violations) > 0:
-            return results
-        else:
-            return None
+        return results

    def _check_underscore_file_is_safe(self, underscore_template, results):
        """
@@ -767,9 +1033,36 @@ class UnderscoreTemplateLinter(object):
        """
        expressions = self._find_unescaped_expressions(underscore_template)
        for expression in expressions:
-            results.violations.append(ExpressionRuleViolation(
-                Rules.underscore_not_escaped, expression
-            ))
+            if not self._is_safe_unescaped_expression(expression):
+                results.violations.append(ExpressionRuleViolation(
+                    Rules.underscore_not_escaped, expression
+                ))
+
+    def _is_safe_unescaped_expression(self, expression):
+        """
+        Determines whether an expression is safely escaped, even though it is
+        using the expression syntax that doesn't itself escape (i.e. <%= ).
+
+        In some cases it is ok to not use the Underscore.js template escape
+        (i.e. <%- ) because the escaping is happening inside the expression.
+
+        Safe examples::
+
+            <%= HtmlUtils.ensureHtml(message) %>
+            <%= _.escape(message) %>
+
+        Arguments:
+            expression: The expression being checked.
+
+        Returns:
+            True if the expression has been safely escaped, and False otherwise.
+
+        """
+        if expression['expression_inner'].startswith('HtmlUtils.'):
+            return True
+        if expression['expression_inner'].startswith('_.escape('):
+            return True
+        return False

    def _find_unescaped_expressions(self, underscore_template):
        """
@@ -788,7 +1081,7 @@ class UnderscoreTemplateLinter(object):
                end_index: The index of the end of the expression.
                expression: The text of the expression.
        """
-        unescaped_expression_regex = re.compile("<%=.*?%>", re.DOTALL)
+        unescaped_expression_regex = re.compile("<%=(.*?)%>", re.DOTALL)

        expressions = []
        for match in unescaped_expression_regex.finditer(underscore_template):
@@ -796,13 +1089,14 @@ class UnderscoreTemplateLinter(object):
                'start_index': match.start(),
                'end_index': match.end(),
                'expression': match.group(),
+                'expression_inner': match.group(1).strip()
            }
            expressions.append(expression)

        return expressions


-def _process_current_walk(current_walk, template_linters, options):
+def _process_current_walk(current_walk, template_linters, options, out):
    """
    For each linter, lints all the files in the current os walk.  This means
    finding and printing violations.
@@ -811,18 +1105,26 @@ def _process_current_walk(current_walk, template_linters, options):
        current_walk: A walk returned by os.walk().
        template_linters: A list of linting objects.
        options: A list of the options.
+        out: output file

    """
-    walk_directory = current_walk[0]
+    walk_directory = os.path.normpath(current_walk[0])
    walk_files = current_walk[2]
    for walk_file in walk_files:
+        walk_file = os.path.normpath(walk_file)
        for template_linter in template_linters:
            results = template_linter.process_file(walk_directory, walk_file)
-            if results is not None:
-                results.print_results(options)
+            if template_linter.supports_includes():
+                for include in results.includes:
+                    include_results = template_linter.process_file(
+                        include['directory'],
+                        include['file_name']
+                    )
+                    results.resolve_include(include, include_results)
+            results.print_results(options, out)


-def _process_os_walk(starting_dir, template_linters, options):
+def _process_os_walk(starting_dir, template_linters, options, out):
    """
    For each linter, lints all the directories in the starting directory.

@@ -830,10 +1132,11 @@ def _process_os_walk(starting_dir, template_linters, options):
        starting_dir: The initial directory to begin the walk.
        template_linters: A list of linting objects.
        options: A list of the options.
+        out: output file

    """
    for current_walk in os.walk(starting_dir):
-        _process_current_walk(current_walk, template_linters, options)
+        _process_current_walk(current_walk, template_linters, options, out)


 def main():
@@ -842,25 +1145,27 @@ def main():

    Prints all of the violations.
    """
+
    #TODO: Use click
    if '--help' in sys.argv:
-        print "Check that templates are safe."
-        print "Options:"
-        print "   --quiet    Just display the filenames with violations."
-        print
-        print "Rules:"
+        print("Check that templates are safe.")
+        print("Options:")
+        print("   --quiet    Just display the filenames that have violations.")
+        print("")
+        print("Rules:")
        for rule in Rules.__members__.values():
-            print "  {0[0]}: {0[1]}".format(rule.value)
+            print("  {0[0]}: {0[1]}".format(rule.value))
        return

    is_quiet = '--quiet' in sys.argv
+    # TODO --file=...

    options = {
        'is_quiet': is_quiet,
    }

    template_linters = [MakoTemplateLinter(), UnderscoreTemplateLinter()]
-    _process_os_walk('.', template_linters, options)
+    _process_os_walk('.', template_linters, options, out=sys.stdout)


 if __name__ == "__main__":

--- a/scripts/tests/templates/includes/include_safe.html
+++ b/scripts/tests/templates/includes/include_safe.html
+## Mako template with safe by default page expression
+<%page expression_filter="h"/>
--- a/scripts/tests/templates/includes/include_unsafe.html
+++ b/scripts/tests/templates/includes/include_unsafe.html
+## Mako template with no safe by default page expression
--- a/scripts/tests/templates/test.html
+++ b/scripts/tests/templates/test.html
+<%include file="./includes/include_safe.html" args="message" />
+
+<%include file="./includes/include_unsafe.html" args="message" />
+
+<%include file="bad_file_name.html" />
+
+## safe-lint: disable=mako-include-with-violations
+<%include file="bad_file_name.html" />
--- a/scripts/tests/test_safe_template_linter.py
+++ b/scripts/tests/test_safe_template_linter.py
@@ -2,14 +2,45 @@
 Tests for safe_template_linter.py
 """
 from ddt import ddt, data
+import mock
+import re
+from StringIO import StringIO
 import textwrap
 from unittest import TestCase

 from ..safe_template_linter import (
-    FileResults, MakoTemplateLinter, Rules
+    _process_os_walk, FileResults, MakoTemplateLinter, UnderscoreTemplateLinter, Rules
 )


+class TestSafeTemplateLinter(TestCase):
+    """
+    Test some top-level linter functions
+    """
+
+    def test_process_os_walk_with_includes(self):
+        """
+        Tests the top-level processing of template files, including Mako
+        includes.
+        """
+        out = StringIO()
+
+        options = {
+            'is_quiet': False,
+        }
+
+        template_linters = [MakoTemplateLinter(), UnderscoreTemplateLinter()]
+
+        with mock.patch.object(MakoTemplateLinter, '_is_valid_directory', return_value=True) as mock_is_valid_directory:
+            _process_os_walk('scripts/tests/templates', template_linters, options, out)
+
+        output = out.getvalue()
+        self.assertIsNotNone(re.search('test\.html.*mako-missing-default', out.getvalue()))
+        self.assertIsNotNone(re.search('test\.html.*mako-include-with-violations.*include_unsafe\.html', out.getvalue()))
+        self.assertIsNotNone(re.search('test\.html.*mako-include-with-violations.*bad_file_name\.html', out.getvalue()))
+        self.assertIsNotNone(re.search('include_unsafe\.html.*mako-missing-default', out.getvalue()))
+
+
 @ddt
 class TestMakoTemplateLinter(TestCase):
    """
@@ -20,16 +51,17 @@ class TestMakoTemplateLinter(TestCase):
        {'directory': 'lms/templates', 'expected': True},
        {'directory': 'lms/templates/support', 'expected': True},
        {'directory': 'lms/templates/support', 'expected': True},
+        {'directory': 'test_root/staticfiles/templates', 'expected': False},
        {'directory': './test_root/staticfiles/templates', 'expected': False},
        {'directory': './some/random/path', 'expected': False},
    )
-    def test_is_mako_directory(self, data):
+    def test_is_valid_directory(self, data):
        """
-        Test _is_mako_directory correctly determines mako directories
+        Test _is_valid_directory correctly determines mako directories
        """
        linter = MakoTemplateLinter()

-        self.assertEqual(linter._is_mako_directory(data['directory']), data['expected'])
+        self.assertEqual(linter._is_valid_directory(data['directory']), data['expected'])

    @data(
        {
@@ -110,6 +142,79 @@ class TestMakoTemplateLinter(TestCase):
        self.assertEqual(results.violations[3].rule, Rules.mako_invalid_html_filter)
        self.assertEqual(results.violations[3].expression['expression'], "${x | n, dump_js_escaped_json}")

+    def test_check_mako_expression_display_name(self):
+        """
+        Test _check_mako_file_is_safe with display_name_with_default_escaped
+        fails.
+        """
+        linter = MakoTemplateLinter()
+        results = FileResults('')
+
+        mako_template = textwrap.dedent("""
+            <%page expression_filter="h"/>
+            ${course.display_name_with_default_escaped}
+        """)
+
+        linter._check_mako_file_is_safe(mako_template, results)
+
+        self.assertEqual(len(results.violations), 1)
+        self.assertEqual(results.violations[0].rule, Rules.mako_deprecated_display_name)
+
+    def test_check_mako_expression_default_disabled(self):
+        """
+        Test _check_mako_file_is_safe with disable pragma for safe-by-default
+        works to designate that this is not a Mako file
+        """
+        linter = MakoTemplateLinter()
+        results = FileResults('')
+
+        mako_template = textwrap.dedent("""
+            # This is anything but a Mako file.
+
+            # pragma can appear anywhere in file
+            # safe-lint: disable=mako-missing-default
+        """)
+
+        linter._check_mako_file_is_safe(mako_template, results)
+
+        self.assertEqual(len(results.violations), 1)
+        self.assertTrue(results.violations[0].is_disabled)
+
+    def test_check_mako_expression_disabled(self):
+        """
+        Test _check_mako_file_is_safe with disable pragma results in no
+        violation
+        """
+        linter = MakoTemplateLinter()
+        results = FileResults('')
+
+        mako_template = textwrap.dedent("""
+            <%page expression_filter="h"/>
+            ## safe-lint: disable=mako-unwanted-html-filter
+            ${x | h}
+        """)
+
+        linter._check_mako_file_is_safe(mako_template, results)
+
+        self.assertEqual(len(results.violations), 1)
+        self.assertTrue(results.violations[0].is_disabled)
+
+    @data(
+        {'template': '{% extends "wiki/base.html" %}'},
+        {'template': '{{ message }}'},
+    )
+    def test_check_mako_on_django_template(self, data):
+        """
+        Test _check_mako_file_is_safe with disable pragma results in no
+        violation
+        """
+        linter = MakoTemplateLinter()
+        results = FileResults('')
+
+        linter._check_mako_file_is_safe(data['template'], results)
+
+        self.assertEqual(len(results.violations), 0)
+
    def test_check_mako_expressions_in_html_without_default(self):
        """
        Test _check_mako_file_is_safe in html context without the page level
@@ -145,14 +250,12 @@ class TestMakoTemplateLinter(TestCase):
                ${x | n, dump_html_escaped_json}
                ${x | n, dump_js_escaped_json}
                "${x-with-quotes | n, js_escaped_string}"
-                '${x-with-quotes | n, js_escaped_string}'
-                ${x-missing-quotes | n, js_escaped_string}
            </script>
        """)

        linter._check_mako_file_is_safe(mako_template, results)

-        self.assertEqual(len(results.violations), 6)
+        self.assertEqual(len(results.violations), 5)
        self.assertEqual(results.violations[0].rule, Rules.mako_invalid_js_filter)
        self.assertEqual(results.violations[0].expression['expression'], "${x}")
        self.assertEqual(results.violations[1].rule, Rules.mako_unparsable_expression)
@@ -164,8 +267,6 @@ class TestMakoTemplateLinter(TestCase):
        self.assertEqual(results.violations[3].expression['expression'], "${x | h}")
        self.assertEqual(results.violations[4].rule, Rules.mako_invalid_js_filter)
        self.assertEqual(results.violations[4].expression['expression'], "${x | n, dump_html_escaped_json}")
-        self.assertEqual(results.violations[5].rule, Rules.mako_js_string_missing_quotes)
-        self.assertEqual(results.violations[5].expression['expression'], "${x-missing-quotes | n, js_escaped_string}")

    def test_check_mako_expressions_in_require_js(self):
        """
@@ -185,11 +286,9 @@ class TestMakoTemplateLinter(TestCase):

        linter._check_mako_file_is_safe(mako_template, results)

-        self.assertEqual(len(results.violations), 2)
+        self.assertEqual(len(results.violations), 1)
        self.assertEqual(results.violations[0].rule, Rules.mako_invalid_js_filter)
        self.assertEqual(results.violations[0].expression['expression'], "${x}")
-        self.assertEqual(results.violations[1].rule, Rules.mako_js_string_missing_quotes)
-        self.assertEqual(results.violations[1].expression['expression'], "${x | n, js_escaped_string}")

    @data(
        {'media_type': 'text/javascript', 'expected_violations': 0},
@@ -335,3 +434,150 @@ class TestMakoTemplateLinter(TestCase):
        end_index = expression['end_index']
        self.assertEqual(template_string[start_index:end_index + 1], expected_expression)
        self.assertEqual(expression['expression'], expected_expression)
+
+
+@ddt
+class TestUnderscoreTemplateLinter(TestCase):
+    """
+    Test UnderscoreTemplateLinter
+    """
+
+    def test_check_underscore_file_is_safe(self):
+        """
+        Test _check_underscore_file_is_safe with safe template
+        """
+        linter = UnderscoreTemplateLinter()
+        results = FileResults('')
+
+        template = textwrap.dedent("""
+            <%- gettext('Single Line') %>
+
+            <%-
+                gettext('Multiple Lines')
+            %>
+        """)
+
+        linter._check_underscore_file_is_safe(template, results)
+
+        self.assertEqual(len(results.violations), 0)
+
+    def test_check_underscore_file_is_not_safe(self):
+        """
+        Test _check_underscore_file_is_safe with unsafe template
+        """
+        linter = UnderscoreTemplateLinter()
+        results = FileResults('')
+
+        template = textwrap.dedent("""
+            <%= gettext('Single Line') %>
+
+            <%=
+                gettext('Multiple Lines')
+            %>
+        """)
+
+        linter._check_underscore_file_is_safe(template, results)
+
+        self.assertEqual(len(results.violations), 2)
+        self.assertEqual(results.violations[0].rule, Rules.underscore_not_escaped)
+        self.assertEqual(results.violations[1].rule, Rules.underscore_not_escaped)
+
+    @data(
+        {
+            'template':
+                '<% // safe-lint:   disable=underscore-not-escaped   %>\n'
+                '<%= message %>',
+            'is_disabled': [True],
+        },
+        {
+            'template':
+                '<% // safe-lint: disable=another-rule,underscore-not-escaped %>\n'
+                '<%= message %>',
+            'is_disabled': [True],
+        },
+        {
+            'template':
+                '<% // safe-lint: disable=another-rule %>\n'
+                '<%= message %>',
+            'is_disabled': [False],
+        },
+        {
+            'template':
+                '<% // safe-lint: disable=underscore-not-escaped %>\n'
+                '<%= message %>\n'
+                '<%= message %>',
+            'is_disabled': [True, False],
+        },
+        {
+            'template':
+                '// This test does not use proper Underscore.js Template syntax\n'
+                '// But, it is just testing that a maximum of 5 non-whitespace\n'
+                '// are used to designate start of line for disabling the next line.\n'
+                ' 1 2 3 4 5 safe-lint: disable=underscore-not-escaped %>\n'
+                '<%= message %>\n'
+                ' 1 2 3 4 5 6 safe-lint: disable=underscore-not-escaped %>\n'
+                '<%= message %>',
+            'is_disabled': [True, False],
+        },
+        {
+            'template':
+                '<%= message %><% // safe-lint: disable=underscore-not-escaped %>\n'
+                '<%= message %>',
+            'is_disabled': [True, False],
+        },
+        {
+            'template':
+                '<%= message %>\n'
+                '<% // safe-lint: disable=underscore-not-escaped %>',
+            'is_disabled': [False],
+        },
+    )
+    def test_check_underscore_file_disable_rule(self, data):
+        """
+        Test _check_underscore_file_is_safe with various disabled pragmas
+        """
+        linter = UnderscoreTemplateLinter()
+        results = FileResults('')
+
+        linter._check_underscore_file_is_safe(data['template'], results)
+
+        violation_count = len(data['is_disabled'])
+        self.assertEqual(len(results.violations), violation_count)
+        for index in range(0, violation_count - 1):
+            self.assertEqual(results.violations[index].is_disabled, data['is_disabled'][index])
+
+    def test_check_underscore_file_disables_one_violation(self):
+        """
+        Test _check_underscore_file_is_safe with disabled before a line only
+        disables for the violation following
+        """
+        linter = UnderscoreTemplateLinter()
+        results = FileResults('')
+
+        template = textwrap.dedent("""
+            <% // safe-lint: disable=underscore-not-escaped %>
+            <%= message %>
+            <%= message %>
+        """)
+
+        linter._check_underscore_file_is_safe(template, results)
+
+        self.assertEqual(len(results.violations), 2)
+        self.assertEqual(results.violations[0].is_disabled, True)
+        self.assertEqual(results.violations[1].is_disabled, False)
+
+    @data(
+        {'template': '<%= HtmlUtils.ensureHtml(message) %>'},
+        {'template': '<%= _.escape(message) %>'},
+    )
+    def test_check_underscore_no_escape_allowed(self, data):
+        """
+        Test _check_underscore_file_is_safe with expressions that are allowed
+        without escaping because the internal calls properly escape.
+        """
+        linter = UnderscoreTemplateLinter()
+        results = FileResults('')
+
+        linter._check_underscore_file_is_safe(data['template'], results)
+
+        self.assertEqual(len(results.violations), 0)