safe_template_linter.py 98.7 KB
Newer Older
1 2
#!/usr/bin/env python
"""
Robert Raposa committed
3
A linting tool to check if templates are safe
4
"""
5
from __future__ import print_function
6
import argparse
7
import ast
8 9 10 11
from enum import Enum
import os
import re
import sys
12
import textwrap
13

14

15 16 17 18 19
class StringLines(object):
    """
    StringLines provides utility methods to work with a string in terms of
    lines.  As an example, it can convert an index into a line number or column
    number (i.e. index into the line).
Robert Raposa committed
20 21
    """

22 23 24
    def __init__(self, string):
        """
        Init method.
Robert Raposa committed
25

26 27
        Arguments:
            string: The string to work with.
28

29 30
        """
        self._string = string
Robert Raposa committed
31 32 33 34
        self._line_start_indexes = self._process_line_breaks(string)
        # this is an exclusive index used in the case that the template doesn't
        # end with a new line
        self.eof_index = len(string)
Robert Raposa committed
35

36 37 38 39
    def _process_line_breaks(self, string):
        """
        Creates a list, where each entry represents the index into the string
        where the next line break was found.
Robert Raposa committed
40

41 42
        Arguments:
            string: The string in which to find line breaks.
Robert Raposa committed
43

44
        Returns:
Robert Raposa committed
45
             A list of indices into the string at which each line begins.
46

47
        """
Robert Raposa committed
48
        line_start_indexes = [0]
49 50 51 52 53 54
        index = 0
        while True:
            index = string.find('\n', index)
            if index < 0:
                break
            index += 1
Robert Raposa committed
55 56
            line_start_indexes.append(index)
        return line_start_indexes
57

58 59 60 61 62
    def get_string(self):
        """
        Get the original string.
        """
        return self._string
Robert Raposa committed
63

64 65 66
    def index_to_line_number(self, index):
        """
        Given an index, determines the line of the index.
Robert Raposa committed
67

68 69 70
        Arguments:
            index: The index into the original string for which we want to know
                the line number
Robert Raposa committed
71

72 73
        Returns:
            The line number of the provided index.
74

75 76
        """
        current_line_number = 0
Robert Raposa committed
77
        for line_break_index in self._line_start_indexes:
78 79 80 81 82
            if line_break_index <= index:
                current_line_number += 1
            else:
                break
        return current_line_number
83

84 85 86 87
    def index_to_column_number(self, index):
        """
        Gets the column (i.e. index into the line) for the given index into the
        original string.
Robert Raposa committed
88

89 90
        Arguments:
            index: The index into the original string.
Robert Raposa committed
91

92 93 94
        Returns:
            The column (i.e. index into the line) for the given index into the
            original string.
Robert Raposa committed
95

96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
        """
        start_index = self.index_to_line_start_index(index)
        column = index - start_index + 1
        return column

    def index_to_line_start_index(self, index):
        """
        Gets the index of the start of the line of the given index.

        Arguments:
            index: The index into the original string.

        Returns:
            The index of the start of the line of the given index.

        """
        line_number = self.index_to_line_number(index)
        return self.line_number_to_start_index(line_number)

Robert Raposa committed
115 116 117 118 119 120 121 122 123 124 125 126 127 128
    def index_to_line_end_index(self, index):
        """
        Gets the index of the end of the line of the given index.

        Arguments:
            index: The index into the original string.

        Returns:
            The index of the end of the line of the given index.

        """
        line_number = self.index_to_line_number(index)
        return self.line_number_to_end_index(line_number)

129 130 131 132 133 134 135 136 137 138 139 140
    def line_number_to_start_index(self, line_number):
        """
        Gets the starting index for the provided line number.

        Arguments:
            line_number: The line number of the line for which we want to find
                the start index.

        Returns:
            The starting index for the provided line number.

        """
Robert Raposa committed
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
        return self._line_start_indexes[line_number - 1]

    def line_number_to_end_index(self, line_number):
        """
        Gets the ending index for the provided line number.

        Arguments:
            line_number: The line number of the line for which we want to find
                the end index.

        Returns:
            The ending index for the provided line number.

        """
        if line_number < len(self._line_start_indexes):
            return self._line_start_indexes[line_number]
        else:
            # an exclusive index in the case that the file didn't end with a
            # newline.
            return self.eof_index
161 162 163 164 165 166 167 168 169 170 171 172

    def line_number_to_line(self, line_number):
        """
        Gets the line of text designated by the provided line number.

        Arguments:
            line_number: The line number of the line we want to find.

        Returns:
            The line of text designated by the provided line number.

        """
Robert Raposa committed
173 174
        start_index = self._line_start_indexes[line_number - 1]
        if len(self._line_start_indexes) == line_number:
175 176
            line = self._string[start_index:]
        else:
Robert Raposa committed
177
            end_index = self._line_start_indexes[line_number]
178 179 180 181 182 183 184
            line = self._string[start_index:end_index - 1]
        return line

    def line_count(self):
        """
        Gets the number of lines in the string.
        """
Robert Raposa committed
185
        return len(self._line_start_indexes)
186 187 188


class Rules(Enum):
Robert Raposa committed
189 190 191
    """
    An Enum of each rule which the linter will check.
    """
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
    # IMPORTANT: Do not edit without also updating the docs:
    # - http://edx.readthedocs.io/projects/edx-developer-guide/en/latest/conventions/safe_templates.html#safe-template-linter
    mako_missing_default = 'mako-missing-default'
    mako_multiple_page_tags = 'mako-multiple-page-tags'
    mako_unparseable_expression = 'mako-unparseable-expression'
    mako_unwanted_html_filter = 'mako-unwanted-html-filter'
    mako_invalid_html_filter = 'mako-invalid-html-filter'
    mako_invalid_js_filter = 'mako-invalid-js-filter'
    mako_js_missing_quotes = 'mako-js-missing-quotes'
    mako_js_html_string = 'mako-js-html-string'
    mako_html_entities = 'mako-html-entities'
    mako_unknown_context = 'mako-unknown-context'
    underscore_not_escaped = 'underscore-not-escaped'
    javascript_jquery_append = 'javascript-jquery-append'
    javascript_jquery_prepend = 'javascript-jquery-prepend'
    javascript_jquery_insertion = 'javascript-jquery-insertion'
    javascript_jquery_insert_into_target = 'javascript-jquery-insert-into-target'
    javascript_jquery_html = 'javascript-jquery-html'
    javascript_concat_html = 'javascript-concat-html'
    javascript_escape = 'javascript-escape'
    javascript_interpolate = 'javascript-interpolate'
    python_concat_html = 'python-concat-html'
    python_custom_escape = 'python-custom-escape'
    python_deprecated_display_name = 'python-deprecated-display-name'
    python_requires_html_or_text = 'python-requires-html-or-text'
    python_close_before_format = 'python-close-before-format'
    python_wrap_html = 'python-wrap-html'
    python_interpolate_html = 'python-interpolate-html'
    python_parse_error = 'python-parse-error'

    def __init__(self, rule_id):
223 224 225
        self.rule_id = rule_id


Robert Raposa committed
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
class Expression(object):
    """
    Represents an arbitrary expression.

    An expression can be any type of code snippet. It will sometimes have a
    starting and ending delimiter, but not always.

    Here are some example expressions::

        ${x | n, decode.utf8}
        <%= x %>
        function(x)
        "<p>" + message + "</p>"

    Other details of note:
    - Only a start_index is required for a valid expression.
    - If end_index is None, it means we couldn't parse the rest of the
    expression.
    - All other details of the expression are optional, and are only added if
    and when supplied and needed for additional checks.  They are not necessary
    for the final results output.

    """

    def __init__(self, start_index, end_index=None, template=None, start_delim="", end_delim="", strings=None):
        """
        Init method.

        Arguments:
            start_index: the starting index of the expression
            end_index: the index immediately following the expression, or None
                if the expression was unparseable
            template: optional template code in which the expression was found
            start_delim: optional starting delimiter of the expression
            end_delim: optional ending delimeter of the expression
            strings: optional list of ParseStrings

        """
        self.start_index = start_index
        self.end_index = end_index
        self.start_delim = start_delim
        self.end_delim = end_delim
        self.strings = strings
        if template is not None and self.end_index is not None:
            self.expression = template[start_index:end_index]
            self.expression_inner = self.expression[len(start_delim):-len(end_delim)].strip()
        else:
            self.expression = None
            self.expression_inner = None


Robert Raposa committed
277 278 279 280
class RuleViolation(object):
    """
    Base class representing a rule violation which can be used for reporting.
    """
281 282

    def __init__(self, rule):
Robert Raposa committed
283 284 285 286 287 288 289
        """
        Init method.

        Arguments:
            rule: The Rule which was violated.

        """
290 291
        self.rule = rule
        self.full_path = ''
292
        self.is_disabled = False
293

294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328
    def _mark_disabled(self, string, scope_start_string=False):
        """
        Performs the disable pragma search and marks the rule as disabled if a
        matching pragma is found.

        Pragma format::

            safe-lint: disable=violation-name,other-violation-name

        Arguments:
            string: The string of code in which to search for the pragma.
            scope_start_string: True if the pragma must be at the start of the
                string, False otherwise. The pragma is considered at the start
                of the string if it has a maximum of 5 non-whitespace characters
                preceding it.

        Side Effect:
            Sets self.is_disabled as appropriate based on whether the pragma is
            found.

        """
        pragma_match = re.search(r'safe-lint:\s*disable=([a-zA-Z,-]+)', string)
        if pragma_match is None:
            return
        if scope_start_string:
            spaces_count = string.count(' ', 0, pragma_match.start())
            non_space_count = pragma_match.start() - spaces_count
            if non_space_count > 5:
                return

        for disabled_rule in pragma_match.group(1).split(','):
            if disabled_rule == self.rule.rule_id:
                self.is_disabled = True
                return

329 330 331 332
    def sort_key(self):
        """
        Returns a key that can be sorted on
        """
333
        return (0, 0, self.rule.rule_id)
334

Robert Raposa committed
335 336 337 338 339 340
    def first_line(self):
        """
        Since a file level rule has no first line, returns empty string.
        """
        return ''

341
    def prepare_results(self, full_path, string_lines):
Robert Raposa committed
342 343 344 345 346
        """
        Preps this instance for results reporting.

        Arguments:
            full_path: Path of the file in violation.
347 348
            string_lines: A StringLines containing the contents of the file in
                violation.
Robert Raposa committed
349 350

        """
351
        self.full_path = full_path
352
        self._mark_disabled(string_lines.get_string())
353

354
    def print_results(self, _options, out):
Robert Raposa committed
355 356
        """
        Prints the results represented by this rule violation.
357 358

        Arguments:
359
            _options: ignored
360
            out: output file
Robert Raposa committed
361
        """
362
        print("{}: {}".format(self.full_path, self.rule.rule_id), file=out)
363 364


Robert Raposa committed
365 366 367 368 369 370 371
class ExpressionRuleViolation(RuleViolation):
    """
    A class representing a particular rule violation for expressions which
    contain more specific details of the location of the violation for reporting
    purposes.

    """
372 373

    def __init__(self, rule, expression):
Robert Raposa committed
374 375 376 377 378
        """
        Init method.

        Arguments:
            rule: The Rule which was violated.
Robert Raposa committed
379
            expression: The Expression that was in violation.
Robert Raposa committed
380 381 382

        """
        super(ExpressionRuleViolation, self).__init__(rule)
383 384 385 386 387 388
        self.expression = expression
        self.start_line = 0
        self.start_column = 0
        self.end_line = 0
        self.end_column = 0
        self.lines = []
389
        self.is_disabled = False
390

391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
    def _mark_expression_disabled(self, string_lines):
        """
        Marks the expression violation as disabled if it finds the disable
        pragma anywhere on the first line of the violation, or at the start of
        the line preceding the violation.

        Pragma format::

            safe-lint: disable=violation-name,other-violation-name

        Examples::

            <% // safe-lint: disable=underscore-not-escaped %>
            <%= gettext('Single Line') %>

            <%= gettext('Single Line') %><% // safe-lint: disable=underscore-not-escaped %>

        Arguments:
            string_lines: A StringLines containing the contents of the file in
                violation.

        Side Effect:
            Sets self.is_disabled as appropriate based on whether the pragma is
            found.

        """
        # disable pragma can be at the start of the preceding line
        has_previous_line = self.start_line > 1
        if has_previous_line:
            line_to_check = string_lines.line_number_to_line(self.start_line - 1)
            self._mark_disabled(line_to_check, scope_start_string=True)
            if self.is_disabled:
                return

        # TODO: this should work at end of any line of the violation
        # disable pragma can be anywhere on the first line of the violation
        line_to_check = string_lines.line_number_to_line(self.start_line)
        self._mark_disabled(line_to_check, scope_start_string=False)

430 431 432 433
    def sort_key(self):
        """
        Returns a key that can be sorted on
        """
434
        return (self.start_line, self.start_column, self.rule.rule_id)
435

Robert Raposa committed
436 437 438 439 440 441
    def first_line(self):
        """
        Returns the initial line of code of the violation.
        """
        return self.lines[0]

442
    def prepare_results(self, full_path, string_lines):
Robert Raposa committed
443 444 445 446 447
        """
        Preps this instance for results reporting.

        Arguments:
            full_path: Path of the file in violation.
448 449
            string_lines: A StringLines containing the contents of the file in
                violation.
Robert Raposa committed
450 451

        """
452
        self.full_path = full_path
Robert Raposa committed
453
        start_index = self.expression.start_index
454 455
        self.start_line = string_lines.index_to_line_number(start_index)
        self.start_column = string_lines.index_to_column_number(start_index)
Robert Raposa committed
456 457
        end_index = self.expression.end_index
        if end_index is not None:
458 459
            self.end_line = string_lines.index_to_line_number(end_index)
            self.end_column = string_lines.index_to_column_number(end_index)
460 461 462 463
        else:
            self.end_line = self.start_line
            self.end_column = '?'
        for line_number in range(self.start_line, self.end_line + 1):
464 465
            self.lines.append(string_lines.line_number_to_line(line_number))
        self._mark_expression_disabled(string_lines)
466

467
    def print_results(self, options, out):
Robert Raposa committed
468 469
        """
        Prints the results represented by this rule violation.
470 471

        Arguments:
472 473 474 475
            options: A list of the following options:
                list_files: True to print only file names, and False to print
                    all violations.
                verbose: True for multiple lines of context, False single line.
476 477
            out: output file

Robert Raposa committed
478
        """
479 480 481 482 483
        if options['verbose']:
            end_line = self.end_line + 1
        else:
            end_line = self.start_line + 1
        for line_number in range(self.start_line, end_line):
484
            if line_number == self.start_line:
485 486 487 488 489
                column = self.start_column
                rule_id = self.rule.rule_id + ":"
            else:
                column = 1
                rule_id = " " * (len(self.rule.rule_id) + 1)
490
            line = self.lines[line_number - self.start_line].encode(encoding='utf-8')
491
            print("{}: {}:{}: {} {}".format(
492 493 494 495
                self.full_path,
                line_number,
                column,
                rule_id,
496
                line
497
            ), file=out)
498 499


500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545
class SummaryResults(object):
    """
    Contains the summary results for all violations.
    """

    def __init__(self):
        """
        Init method.
        """
        self.total_violations = 0
        self.totals_by_rule = dict.fromkeys(
            [rule.rule_id for rule in Rules.__members__.values()], 0
        )

    def add_violation(self, violation):
        """
        Adds a violation to the summary details.

        Arguments:
            violation: The violation to add to the summary.

        """
        self.total_violations += 1
        self.totals_by_rule[violation.rule.rule_id] += 1

    def print_results(self, options, out):
        """
        Prints the results (i.e. violations) in this file.

        Arguments:
            options: A list of the following options:
                list_files: True to print only file names, and False to print
                    all violations.
                rule_totals: If True include totals by rule.
            out: output file

        """
        if options['list_files'] is False:
            if options['rule_totals']:
                max_rule_id_len = max(len(rule_id) for rule_id in self.totals_by_rule)
                print("", file=out)
                for rule_id in sorted(self.totals_by_rule.keys()):
                    padding = " " * (max_rule_id_len - len(rule_id))
                    print("{}: {}{} violations".format(rule_id, padding, self.totals_by_rule[rule_id]), file=out)
                print("", file=out)

546
            # matches output of eslint for simplicity
547 548 549 550
            print("", file=out)
            print("{} violations total".format(self.total_violations), file=out)


551
class FileResults(object):
Robert Raposa committed
552 553 554
    """
    Contains the results, or violations, for a file.
    """
555 556

    def __init__(self, full_path):
Robert Raposa committed
557 558 559 560 561 562 563
        """
        Init method.

        Arguments:
            full_path: The full path for this file.

        """
564
        self.full_path = full_path
565 566
        self.directory = os.path.dirname(full_path)
        self.is_file = os.path.isfile(full_path)
Robert Raposa committed
567
        self.violations = []
568

Robert Raposa committed
569
    def prepare_results(self, file_string, line_comment_delim=None):
Robert Raposa committed
570 571 572 573 574
        """
        Prepares the results for output for this file.

        Arguments:
            file_string: The string of content for this file.
Robert Raposa committed
575 576
            line_comment_delim: A string representing the start of a line
                comment. For example "##" for Mako and "//" for JavaScript.
Robert Raposa committed
577 578

        """
579
        string_lines = StringLines(file_string)
Robert Raposa committed
580
        for violation in self.violations:
581
            violation.prepare_results(self.full_path, string_lines)
Robert Raposa committed
582 583
        if line_comment_delim is not None:
            self._filter_commented_code(line_comment_delim)
584

585
    def print_results(self, options, summary_results, out):
Robert Raposa committed
586 587 588 589 590
        """
        Prints the results (i.e. violations) in this file.

        Arguments:
            options: A list of the following options:
591
                list_files: True to print only file names, and False to print
Robert Raposa committed
592
                    all violations.
593
            summary_results: A SummaryResults with a summary of the violations.
594
                verbose: True for multiple lines of context, False single line.
595 596
            out: output file

597 598
        Side effect:
            Updates the passed SummaryResults.
Robert Raposa committed
599 600

        """
601
        if options['list_files']:
602 603
            if self.violations is not None and 0 < len(self.violations):
                print(self.full_path, file=out)
604
        else:
605
            self.violations.sort(key=lambda violation: violation.sort_key())
Robert Raposa committed
606
            for violation in self.violations:
607
                if not violation.is_disabled:
608
                    violation.print_results(options, out)
609
                    summary_results.add_violation(violation)
Robert Raposa committed
610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634

    def _filter_commented_code(self, line_comment_delim):
        """
        Remove any violations that were found in commented out code.

        Arguments:
            line_comment_delim: A string representing the start of a line
                comment. For example "##" for Mako and "//" for JavaScript.

        """
        self.violations = [v for v in self.violations if not self._is_commented(v, line_comment_delim)]

    def _is_commented(self, violation, line_comment_delim):
        """
        Checks if violation line is commented out.

        Arguments:
            violation: The violation to check
            line_comment_delim: A string representing the start of a line
                comment. For example "##" for Mako and "//" for JavaScript.

        Returns:
            True if the first line of the violation is actually commented out,
            False otherwise.
        """
635 636 637 638 639 640
        if 'parse' in violation.rule.rule_id:
            # For parse rules, don't filter them because the comment could be a
            # part of the parse issue to begin with.
            return False
        else:
            return violation.first_line().lstrip().startswith(line_comment_delim)
641 642


643 644 645 646 647
class ParseString(object):
    """
    ParseString is the result of parsing a string out of a template.

    A ParseString has the following attributes:
Robert Raposa committed
648 649
        start_index: The index of the first quote, or None if none found
        end_index: The index following the closing quote, or None if
650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668
            unparseable
        quote_length: The length of the quote.  Could be 3 for a Python
            triple quote.  Or None if none found.
        string: the text of the parsed string, or None if none found.
        string_inner: the text inside the quotes of the parsed string, or None
            if none found.

    """

    def __init__(self, template, start_index, end_index):
        """
        Init method.

        Arguments:
            template: The template to be searched.
            start_index: The start index to search.
            end_index: The end index to search before.

        """
Robert Raposa committed
669
        self.end_index = None
670 671 672 673
        self.quote_length = None
        self.string = None
        self.string_inner = None
        self.start_index = self._find_string_start(template, start_index, end_index)
Robert Raposa committed
674
        if self.start_index is not None:
675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692
            result = self._parse_string(template, self.start_index)
            if result is not None:
                self.end_index = result['end_index']
                self.quote_length = result['quote_length']
                self.string = result['string']
                self.string_inner = result['string_inner']

    def _find_string_start(self, template, start_index, end_index):
        """
        Finds the index of the end of start of a string.  In other words, the
        first single or double quote.

        Arguments:
            template: The template to be searched.
            start_index: The start index to search.
            end_index: The end index to search before.

        Returns:
Robert Raposa committed
693 694
            The start index of the first single or double quote, or None if no
            quote was found.
695
        """
696 697 698
        quote_regex = re.compile(r"""['"]""")
        start_match = quote_regex.search(template, start_index, end_index)
        if start_match is None:
Robert Raposa committed
699
            return None
700 701
        else:
            return start_match.start()
702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746

    def _parse_string(self, template, start_index):
        """
        Finds the indices of a string inside a template.

        Arguments:
            template: The template to be searched.
            start_index: The start index of the open quote.

        Returns:
            A dict containing the following, or None if not parseable:
                end_index: The index following the closing quote
                quote_length: The length of the quote.  Could be 3 for a Python
                    triple quote.
                string: the text of the parsed string
                string_inner: the text inside the quotes of the parsed string

        """
        quote = template[start_index]
        if quote not in ["'", '"']:
            raise ValueError("start_index must refer to a single or double quote.")
        triple_quote = quote * 3
        if template.startswith(triple_quote, start_index):
            quote = triple_quote

        next_start_index = start_index + len(quote)
        while True:
            quote_end_index = template.find(quote, next_start_index)
            backslash_index = template.find("\\", next_start_index)
            if quote_end_index < 0:
                return None
            if 0 <= backslash_index < quote_end_index:
                next_start_index = backslash_index + 2
            else:
                end_index = quote_end_index + len(quote)
                quote_length = len(quote)
                string = template[start_index:end_index]
                return {
                    'end_index': end_index,
                    'quote_length': quote_length,
                    'string': string,
                    'string_inner': string[quote_length:-quote_length],
                }


Robert Raposa committed
747
class BaseLinter(object):
Robert Raposa committed
748
    """
Robert Raposa committed
749
    BaseLinter provides some helper functions that are used by multiple linters.
750

Robert Raposa committed
751
    """
752 753 754

    LINE_COMMENT_DELIM = None

Robert Raposa committed
755
    def _is_valid_directory(self, skip_dirs, directory):
Robert Raposa committed
756 757
        """
        Determines if the provided directory is a directory that could contain
Robert Raposa committed
758
        a file that needs to be linted.
Robert Raposa committed
759 760

        Arguments:
Robert Raposa committed
761
            skip_dirs: The directories to be skipped.
Robert Raposa committed
762 763 764
            directory: The directory to be linted.

        Returns:
Robert Raposa committed
765 766
            True if this directory should be linted for violations and False
            otherwise.
Robert Raposa committed
767
        """
768
        if is_skip_dir(skip_dirs, directory):
769 770
            return False

Robert Raposa committed
771
        return True
772

Robert Raposa committed
773
    def _load_file(self, file_full_path):
Robert Raposa committed
774
        """
Robert Raposa committed
775
        Loads a file into a string.
Robert Raposa committed
776 777

        Arguments:
Robert Raposa committed
778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798
            file_full_path: The full path of the file to be loaded.

        Returns:
            A string containing the files contents.

        """
        with open(file_full_path, 'r') as input_file:
            file_contents = input_file.read()
            return file_contents.decode(encoding='utf-8')

    def _load_and_check_file_is_safe(self, file_full_path, lint_function, results):
        """
        Loads the Python file and checks if it is in violation.

        Arguments:
            file_full_path: The file to be loaded and linted.
            lint_function: A function that will lint for violations. It must
                take two arguments:
                1) string contents of the file
                2) results object
            results: A FileResults to be used for this file
Robert Raposa committed
799

800
        Returns:
801
            The file results containing any violations.
Robert Raposa committed
802 803

        """
Robert Raposa committed
804 805
        file_contents = self._load_file(file_full_path)
        lint_function(file_contents, results)
806
        return results
807

Robert Raposa committed
808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834
    def _find_closing_char_index(
            self, start_delim, open_char, close_char, template, start_index, num_open_chars=0, strings=None
    ):
        """
        Finds the index of the closing char that matches the opening char.

        For example, this could be used to find the end of a Mako expression,
        where the open and close characters would be '{' and '}'.

        Arguments:
            start_delim: If provided (e.g. '${' for Mako expressions), the
                closing character must be found before the next start_delim.
            open_char: The opening character to be matched (e.g '{')
            close_char: The closing character to be matched (e.g '}')
            template: The template to be searched.
            start_index: The start index of the last open char.
            num_open_chars: The current number of open chars.
            strings: A list of ParseStrings already parsed

        Returns:
            A dict containing the following, or None if unparseable:
                close_char_index: The index of the closing character
                strings: a list of ParseStrings

        """
        strings = [] if strings is None else strings

835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859
        # Find start index of an uncommented line.
        start_index = self._uncommented_start_index(template, start_index)
        # loop until we found something useful on an uncommented out line
        while start_index is not None:
            close_char_index = template.find(close_char, start_index)
            if close_char_index < 0:
                # If we can't find a close char, let's just quit.
                return None
            open_char_index = template.find(open_char, start_index, close_char_index)
            parse_string = ParseString(template, start_index, close_char_index)

            valid_index_list = [close_char_index]
            if 0 <= open_char_index:
                valid_index_list.append(open_char_index)
            if parse_string.start_index is not None:
                valid_index_list.append(parse_string.start_index)
            min_valid_index = min(valid_index_list)

            start_index = self._uncommented_start_index(template, min_valid_index)
            if start_index == min_valid_index:
                break

        if start_index is None:
            # No uncommented code to search.
            return None
Robert Raposa committed
860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892

        if parse_string.start_index == min_valid_index:
            strings.append(parse_string)
            if parse_string.end_index is None:
                return None
            else:
                return self._find_closing_char_index(
                    start_delim, open_char, close_char, template, start_index=parse_string.end_index,
                    num_open_chars=num_open_chars, strings=strings
                )

        if open_char_index == min_valid_index:
            if start_delim is not None:
                # if we find another starting delim, consider this unparseable
                start_delim_index = template.find(start_delim, start_index, close_char_index)
                if 0 <= start_delim_index < open_char_index:
                    return None
            return self._find_closing_char_index(
                start_delim, open_char, close_char, template, start_index=open_char_index + 1,
                num_open_chars=num_open_chars + 1, strings=strings
            )

        if num_open_chars == 0:
            return {
                'close_char_index': close_char_index,
                'strings': strings,
            }
        else:
            return self._find_closing_char_index(
                start_delim, open_char, close_char, template, start_index=close_char_index + 1,
                num_open_chars=num_open_chars - 1, strings=strings
            )

893
    def _uncommented_start_index(self, template, start_index):
Robert Raposa committed
894
        """
895
        Finds the first start_index that is on an uncommented line.
Robert Raposa committed
896 897

        Arguments:
898 899
            template: The template to be searched.
            start_index: The start index of the last open char.
Robert Raposa committed
900

901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916
        Returns:
            If start_index is on an uncommented out line, returns start_index.
            Otherwise, returns the start_index of the first line that is
            uncommented, if there is one. Otherwise, returns None.
        """
        if self.LINE_COMMENT_DELIM is not None:
            line_start_index = StringLines(template).index_to_line_start_index(start_index)
            uncommented_line_start_index_regex = re.compile("^(?!\s*{})".format(self.LINE_COMMENT_DELIM), re.MULTILINE)
            # Finds the line start index of the first uncommented line, including the current line.
            match = uncommented_line_start_index_regex.search(template, line_start_index)
            if match is None:
                # No uncommented lines.
                return None
            elif match.start() < start_index:
                # Current line is uncommented, so return original start_index.
                return start_index
Robert Raposa committed
917
            else:
918 919 920 921 922
                # Return start of first uncommented line.
                return match.start()
        else:
            # No line comment delimeter, so this acts as a no-op.
            return start_index
Robert Raposa committed
923 924 925 926 927 928 929 930 931 932 933


class UnderscoreTemplateLinter(BaseLinter):
    """
    The linter for Underscore.js template files.
    """
    def __init__(self):
        """
        Init method.
        """
        super(UnderscoreTemplateLinter, self).__init__()
934
        self._skip_underscore_dirs = SKIP_DIRS + ('test',)
Robert Raposa committed
935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959

    def process_file(self, directory, file_name):
        """
        Process file to determine if it is an Underscore template file and
        if it is safe.

        Arguments:
            directory (string): The directory of the file to be checked
            file_name (string): A filename for a potential underscore file

        Returns:
            The file results containing any violations.

        """
        full_path = os.path.normpath(directory + '/' + file_name)
        results = FileResults(full_path)

        if not self._is_valid_directory(self._skip_underscore_dirs, directory):
            return results

        if not file_name.lower().endswith('.underscore'):
            return results

        return self._load_and_check_file_is_safe(full_path, self.check_underscore_file_is_safe, results)

Robert Raposa committed
960
    def check_underscore_file_is_safe(self, underscore_template, results):
Robert Raposa committed
961
        """
Robert Raposa committed
962
        Checks for violations in an Underscore.js template.
Robert Raposa committed
963 964

        Arguments:
Robert Raposa committed
965
            underscore_template: The contents of the Underscore.js template.
966
            results: A file results objects to which violations will be added.
Robert Raposa committed
967 968

        """
Robert Raposa committed
969 970
        self._check_underscore_expressions(underscore_template, results)
        results.prepare_results(underscore_template)
971

Robert Raposa committed
972
    def _check_underscore_expressions(self, underscore_template, results):
973
        """
Robert Raposa committed
974
        Searches for Underscore.js expressions that contain violations.
975 976

        Arguments:
Robert Raposa committed
977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002
            underscore_template: The contents of the Underscore.js template.
            results: A list of results into which violations will be added.

        """
        expressions = self._find_unescaped_expressions(underscore_template)
        for expression in expressions:
            if not self._is_safe_unescaped_expression(expression):
                results.violations.append(ExpressionRuleViolation(
                    Rules.underscore_not_escaped, expression
                ))

    def _is_safe_unescaped_expression(self, expression):
        """
        Determines whether an expression is safely escaped, even though it is
        using the expression syntax that doesn't itself escape (i.e. <%= ).

        In some cases it is ok to not use the Underscore.js template escape
        (i.e. <%- ) because the escaping is happening inside the expression.

        Safe examples::

            <%= HtmlUtils.ensureHtml(message) %>
            <%= _.escape(message) %>

        Arguments:
            expression: The Expression being checked.
1003 1004

        Returns:
Robert Raposa committed
1005
            True if the Expression has been safely escaped, and False otherwise.
1006 1007

        """
Robert Raposa committed
1008 1009 1010
        if expression.expression_inner.startswith('HtmlUtils.'):
            return True
        if expression.expression_inner.startswith('_.escape('):
1011 1012 1013
            return True
        return False

Robert Raposa committed
1014
    def _find_unescaped_expressions(self, underscore_template):
1015
        """
Robert Raposa committed
1016 1017 1018
        Returns a list of unsafe expressions.

        At this time all expressions that are unescaped are considered unsafe.
1019 1020

        Arguments:
Robert Raposa committed
1021
            underscore_template: The contents of the Underscore.js template.
1022

Robert Raposa committed
1023 1024
        Returns:
            A list of Expressions.
1025
        """
Robert Raposa committed
1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036
        unescaped_expression_regex = re.compile("<%=.*?%>", re.DOTALL)

        expressions = []
        for match in unescaped_expression_regex.finditer(underscore_template):
            expression = Expression(
                match.start(), match.end(), template=underscore_template, start_delim="<%=", end_delim="%>"
            )
            expressions.append(expression)
        return expressions


Robert Raposa committed
1037
class JavaScriptLinter(BaseLinter):
Robert Raposa committed
1038 1039 1040
    """
    The linter for JavaScript and CoffeeScript files.
    """
1041 1042

    LINE_COMMENT_DELIM = "//"
Robert Raposa committed
1043

Robert Raposa committed
1044 1045 1046 1047 1048
    def __init__(self):
        """
        Init method.
        """
        super(JavaScriptLinter, self).__init__()
1049 1050
        self._skip_javascript_dirs = SKIP_DIRS + ('i18n', 'static/coffee')
        self._skip_coffeescript_dirs = SKIP_DIRS
1051
        self.underscore_linter = UnderscoreTemplateLinter()
Robert Raposa committed
1052

Robert Raposa committed
1053
    def process_file(self, directory, file_name):
Robert Raposa committed
1054
        """
Robert Raposa committed
1055 1056
        Process file to determine if it is a JavaScript file and
        if it is safe.
Robert Raposa committed
1057 1058

        Arguments:
Robert Raposa committed
1059 1060 1061 1062 1063
            directory (string): The directory of the file to be checked
            file_name (string): A filename for a potential JavaScript file

        Returns:
            The file results containing any violations.
Robert Raposa committed
1064 1065

        """
Robert Raposa committed
1066 1067
        file_full_path = os.path.normpath(directory + '/' + file_name)
        results = FileResults(file_full_path)
1068

Robert Raposa committed
1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081
        if not results.is_file:
            return results

        if file_name.lower().endswith('.js') and not file_name.lower().endswith('.min.js'):
            skip_dirs = self._skip_javascript_dirs
        elif file_name.lower().endswith('.coffee'):
            skip_dirs = self._skip_coffeescript_dirs
        else:
            return results

        if not self._is_valid_directory(skip_dirs, directory):
            return results

Robert Raposa committed
1082
        return self._load_and_check_file_is_safe(file_full_path, self.check_javascript_file_is_safe, results)
1083

Robert Raposa committed
1084
    def check_javascript_file_is_safe(self, file_contents, results):
1085
        """
Robert Raposa committed
1086
        Checks for violations in a JavaScript file.
1087 1088

        Arguments:
Robert Raposa committed
1089 1090
            file_contents: The contents of the JavaScript file.
            results: A file results objects to which violations will be added.
1091 1092

        """
Robert Raposa committed
1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116
        no_caller_check = None
        no_argument_check = None
        self._check_jquery_function(
            file_contents, "append", Rules.javascript_jquery_append, no_caller_check,
            self._is_jquery_argument_safe, results
        )
        self._check_jquery_function(
            file_contents, "prepend", Rules.javascript_jquery_prepend, no_caller_check,
            self._is_jquery_argument_safe, results
        )
        self._check_jquery_function(
            file_contents, "unwrap|wrap|wrapAll|wrapInner|after|before|replaceAll|replaceWith",
            Rules.javascript_jquery_insertion, no_caller_check, self._is_jquery_argument_safe, results
        )
        self._check_jquery_function(
            file_contents, "appendTo|prependTo|insertAfter|insertBefore",
            Rules.javascript_jquery_insert_into_target, self._is_jquery_insert_caller_safe, no_argument_check, results
        )
        self._check_jquery_function(
            file_contents, "html", Rules.javascript_jquery_html, no_caller_check,
            self._is_jquery_html_argument_safe, results
        )
        self._check_javascript_interpolate(file_contents, results)
        self._check_javascript_escape(file_contents, results)
Robert Raposa committed
1117
        self._check_concat_with_html(file_contents, Rules.javascript_concat_html, results)
1118 1119
        self.underscore_linter.check_underscore_file_is_safe(file_contents, results)
        results.prepare_results(file_contents, line_comment_delim=self.LINE_COMMENT_DELIM)
1120

Robert Raposa committed
1121
    def _get_expression_for_function(self, file_contents, function_start_match):
Robert Raposa committed
1122
        """
Robert Raposa committed
1123 1124
        Returns an expression that matches the function call opened with
        function_start_match.
Robert Raposa committed
1125 1126

        Arguments:
Robert Raposa committed
1127 1128 1129
            file_contents: The contents of the JavaScript file.
            function_start_match: A regex match representing the start of the function
                call (e.g. ".escape(").
1130

Robert Raposa committed
1131 1132
        Returns:
            An Expression that best matches the function.
1133 1134

        """
Robert Raposa committed
1135 1136
        start_index = function_start_match.start()
        inner_start_index = function_start_match.end()
Robert Raposa committed
1137
        result = self._find_closing_char_index(
Robert Raposa committed
1138 1139 1140 1141 1142 1143 1144 1145 1146 1147
            None, "(", ")", file_contents, start_index=inner_start_index
        )
        if result is not None:
            end_index = result['close_char_index'] + 1
            expression = Expression(
                start_index, end_index, template=file_contents, start_delim=function_start_match.group(), end_delim=")"
            )
        else:
            expression = Expression(start_index)
        return expression
1148

Robert Raposa committed
1149
    def _check_javascript_interpolate(self, file_contents, results):
1150
        """
Robert Raposa committed
1151
        Checks that interpolate() calls are safe.
1152

Robert Raposa committed
1153 1154
        Only use of StringUtils.interpolate() or HtmlUtils.interpolateText()
        are safe.
Robert Raposa committed
1155 1156

        Arguments:
Robert Raposa committed
1157 1158
            file_contents: The contents of the JavaScript file.
            results: A file results objects to which violations will be added.
Robert Raposa committed
1159 1160

        """
Robert Raposa committed
1161 1162 1163 1164 1165
        # Ignores calls starting with "StringUtils.", because those are safe
        regex = re.compile(r"(?<!StringUtils).interpolate\(")
        for function_match in regex.finditer(file_contents):
            expression = self._get_expression_for_function(file_contents, function_match)
            results.violations.append(ExpressionRuleViolation(Rules.javascript_interpolate, expression))
1166

Robert Raposa committed
1167
    def _check_javascript_escape(self, file_contents, results):
Robert Raposa committed
1168
        """
Robert Raposa committed
1169
        Checks that only necessary escape() are used.
Robert Raposa committed
1170

Robert Raposa committed
1171
        Allows for _.escape(), although this shouldn't be the recommendation.
Robert Raposa committed
1172

Robert Raposa committed
1173 1174 1175
        Arguments:
            file_contents: The contents of the JavaScript file.
            results: A file results objects to which violations will be added.
1176

Robert Raposa committed
1177
        """
Robert Raposa committed
1178 1179 1180 1181 1182
        # Ignores calls starting with "_.", because those are safe
        regex = regex = re.compile(r"(?<!_).escape\(")
        for function_match in regex.finditer(file_contents):
            expression = self._get_expression_for_function(file_contents, function_match)
            results.violations.append(ExpressionRuleViolation(Rules.javascript_escape, expression))
1183

Robert Raposa committed
1184
    def _check_jquery_function(self, file_contents, function_names, rule, is_caller_safe, is_argument_safe, results):
1185
        """
Robert Raposa committed
1186 1187
        Checks that the JQuery function_names (e.g. append(), prepend()) calls
        are safe.
1188 1189

        Arguments:
Robert Raposa committed
1190 1191 1192 1193 1194 1195 1196 1197 1198 1199
            file_contents: The contents of the JavaScript file.
            function_names: A pipe delimited list of names of the functions
                (e.g. "wrap|after|before").
            rule: The name of the rule to use for validation errors (e.g.
                Rules.javascript_jquery_append).
            is_caller_safe: A function to test if caller of the JQuery function
                is safe.
            is_argument_safe: A function to test if the argument passed to the
                JQuery function is safe.
            results: A file results objects to which violations will be added.
1200 1201

        """
Robert Raposa committed
1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221
        # Ignores calls starting with "HtmlUtils.", because those are safe
        regex = re.compile(r"(?<!HtmlUtils).(?:{})\(".format(function_names))
        for function_match in regex.finditer(file_contents):
            is_violation = True
            expression = self._get_expression_for_function(file_contents, function_match)
            if expression.end_index is not None:
                start_index = expression.start_index
                inner_start_index = function_match.end()
                close_paren_index = expression.end_index - 1
                function_argument = file_contents[inner_start_index:close_paren_index].strip()
                if is_argument_safe is not None and is_caller_safe is None:
                    is_violation = is_argument_safe(function_argument) is False
                elif is_caller_safe is not None and is_argument_safe is None:
                    line_start_index = StringLines(file_contents).index_to_line_start_index(start_index)
                    caller_line_start = file_contents[line_start_index:start_index]
                    is_violation = is_caller_safe(caller_line_start) is False
                else:
                    raise ValueError("Must supply either is_argument_safe, or is_caller_safe, but not both.")
            if is_violation:
                results.violations.append(ExpressionRuleViolation(rule, expression))
1222

Robert Raposa committed
1223
    def _is_jquery_argument_safe_html_utils_call(self, argument):
Robert Raposa committed
1224
        """
Robert Raposa committed
1225 1226 1227 1228 1229 1230
        Checks that the argument sent to a jQuery DOM insertion function is a
        safe call to HtmlUtils.

        A safe argument is of the form:
        - HtmlUtils.xxx(anything).toString()
        - edx.HtmlUtils.xxx(anything).toString()
Robert Raposa committed
1231 1232

        Arguments:
Robert Raposa committed
1233 1234
            argument: The argument sent to the jQuery function (e.g.
            append(argument)).
Robert Raposa committed
1235 1236

        Returns:
Robert Raposa committed
1237
            True if the argument is safe, and False otherwise.
1238

Robert Raposa committed
1239 1240 1241 1242
        """
        # match on HtmlUtils.xxx().toString() or edx.HtmlUtils
        match = re.search(r"(?:edx\.)?HtmlUtils\.[a-zA-Z0-9]+\(.*\)\.toString\(\)", argument)
        return match is not None and match.group() == argument
1243

Robert Raposa committed
1244
    def _is_jquery_argument_safe(self, argument):
Robert Raposa committed
1245
        """
Robert Raposa committed
1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257
        Check the argument sent to a jQuery DOM insertion function (e.g.
        append()) to check if it is safe.

        Safe arguments include:
        - the argument can end with ".el", ".$el" (with no concatenation)
        - the argument can be a single variable ending in "El" or starting with
            "$". For example, "testEl" or "$test".
        - the argument can be a single string literal with no HTML tags
        - the argument can be a call to $() with the first argument a string
            literal with a single HTML tag.  For example, ".append($('<br/>'))"
            or ".append($('<br/>'))".
        - the argument can be a call to HtmlUtils.xxx(html).toString()
Robert Raposa committed
1258 1259

        Arguments:
Robert Raposa committed
1260 1261
            argument: The argument sent to the jQuery function (e.g.
            append(argument)).
Robert Raposa committed
1262

1263
        Returns:
Robert Raposa committed
1264
            True if the argument is safe, and False otherwise.
Robert Raposa committed
1265 1266

        """
Robert Raposa committed
1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293
        match_variable_name = re.search("[_$a-zA-Z]+[_$a-zA-Z0-9]*", argument)
        if match_variable_name is not None and match_variable_name.group() == argument:
            if argument.endswith('El') or argument.startswith('$'):
                return True
        elif argument.startswith('"') or argument.startswith("'"):
            # a single literal string with no HTML is ok
            # 1. it gets rid of false negatives for non-jquery calls (e.g. graph.append("g"))
            # 2. JQuery will treat this as a plain text string and will escape any & if needed.
            string = ParseString(argument, 0, len(argument))
            if string.string == argument and "<" not in argument:
                return True
        elif argument.startswith('$('):
            # match on JQuery calls with single string and single HTML tag
            # Examples:
            #    $("<span>")
            #    $("<div/>")
            #    $("<div/>", {...})
            match = re.search(r"""\$\(\s*['"]<[a-zA-Z0-9]+\s*[/]?>['"]\s*[,)]""", argument)
            if match is not None:
                return True
        elif self._is_jquery_argument_safe_html_utils_call(argument):
            return True
        # check rules that shouldn't use concatenation
        elif "+" not in argument:
            if argument.endswith('.el') or argument.endswith('.$el'):
                return True
        return False
1294

Robert Raposa committed
1295
    def _is_jquery_html_argument_safe(self, argument):
Robert Raposa committed
1296
        """
Robert Raposa committed
1297 1298 1299 1300 1301 1302 1303
        Check the argument sent to the jQuery html() function to check if it is
        safe.

        Safe arguments to html():
        - no argument (i.e. getter rather than setter)
        - empty string is safe
        - the argument can be a call to HtmlUtils.xxx(html).toString()
Robert Raposa committed
1304 1305

        Arguments:
Robert Raposa committed
1306
            argument: The argument sent to html() in code (i.e. html(argument)).
Robert Raposa committed
1307

Robert Raposa committed
1308 1309
        Returns:
            True if the argument is safe, and False otherwise.
1310

Robert Raposa committed
1311
        """
Robert Raposa committed
1312 1313 1314 1315 1316
        if argument == "" or argument == "''" or argument == '""':
            return True
        elif self._is_jquery_argument_safe_html_utils_call(argument):
            return True
        return False
Robert Raposa committed
1317

Robert Raposa committed
1318
    def _is_jquery_insert_caller_safe(self, caller_line_start):
Robert Raposa committed
1319
        """
Robert Raposa committed
1320 1321
        Check that the caller of a jQuery DOM insertion function that takes a
        target is safe (e.g. thisEl.appendTo(target)).
1322

Robert Raposa committed
1323
        If original line was::
1324

Robert Raposa committed
1325
            draggableObj.iconEl.appendTo(draggableObj.containerEl);
1326

Robert Raposa committed
1327
        Parameter caller_line_start would be:
1328

Robert Raposa committed
1329 1330 1331 1332 1333 1334
            draggableObj.iconEl

        Safe callers include:
        - the caller can be ".el", ".$el"
        - the caller can be a single variable ending in "El" or starting with
            "$". For example, "testEl" or "$test".
1335 1336

        Arguments:
Robert Raposa committed
1337
            caller_line_start: The line leading up to the jQuery function call.
1338 1339

        Returns:
Robert Raposa committed
1340
            True if the caller is safe, and False otherwise.
1341 1342

        """
Robert Raposa committed
1343 1344 1345 1346 1347 1348 1349 1350
        # matches end of line for caller, which can't itself be a function
        caller_match = re.search(r"(?:\s*|[.])([_$a-zA-Z]+[_$a-zA-Z0-9])*$", caller_line_start)
        if caller_match is None:
            return False
        caller = caller_match.group(1)
        if caller is None:
            return False
        elif caller.endswith('El') or caller.startswith('$'):
1351
            return True
Robert Raposa committed
1352
        elif caller == 'el' or caller == 'parentNode':
1353 1354
            return True
        return False
1355

1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367
    def _check_concat_with_html(self, file_contents, rule, results):
        """
        Checks that strings with HTML are not concatenated

        Arguments:
            file_contents: The contents of the JavaScript file.
            rule: The rule that was violated if this fails.
            results: A file results objects to which violations will be added.

        """
        lines = StringLines(file_contents)
        last_expression = None
1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392
        # Match quoted strings that starts with '<' or ends with '>'.
        regex_string_with_html = r"""
            {quote}                             # Opening quote.
                (
                   \s*<                         # Starts with '<' (ignoring spaces)
                   ([^{quote}]|[\\]{quote})*    # followed by anything but a closing quote.
                |                               # Or,
                   ([^{quote}]|[\\]{quote})*    # Anything but a closing quote
                   >\s*                         # ending with '>' (ignoring spaces)
                )
            {quote}                             # Closing quote.
        """
        # Match single or double quote.
        regex_string_with_html = "({}|{})".format(
            regex_string_with_html.format(quote="'"),
            regex_string_with_html.format(quote='"'),
        )
        # Match quoted HTML strings next to a '+'.
        regex_concat_with_html = re.compile(
            r"(\+\s*{string_with_html}|{string_with_html}\s*\+)".format(
                string_with_html=regex_string_with_html,
            ),
            re.VERBOSE
        )
        for match in regex_concat_with_html.finditer(file_contents):
1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511
            found_new_violation = False
            if last_expression is not None:
                last_line = lines.index_to_line_number(last_expression.start_index)
                # check if violation should be expanded to more of the same line
                if last_line == lines.index_to_line_number(match.start()):
                    last_expression = Expression(
                        last_expression.start_index, match.end(), template=file_contents
                    )
                else:
                    results.violations.append(ExpressionRuleViolation(
                        rule, last_expression
                    ))
                    found_new_violation = True
            else:
                found_new_violation = True
            if found_new_violation:
                last_expression = Expression(
                    match.start(), match.end(), template=file_contents
                )

        # add final expression
        if last_expression is not None:
            results.violations.append(ExpressionRuleViolation(
                rule, last_expression
            ))


class BaseVisitor(ast.NodeVisitor):
    """
    Base class for AST NodeVisitor used for Python safe linting.

    Important: This base visitor skips all __repr__ function definitions.
    """
    def __init__(self, file_contents, results):
        """
        Init method.

        Arguments:
            file_contents: The contents of the Python file.
            results: A file results objects to which violations will be added.

        """
        super(BaseVisitor, self).__init__()
        self.file_contents = file_contents
        self.lines = StringLines(self.file_contents)
        self.results = results

    def node_to_expression(self, node):
        """
        Takes a node and translates it to an expression to be used with
        violations.

        Arguments:
            node: An AST node.

        """
        line_start_index = self.lines.line_number_to_start_index(node.lineno)
        start_index = line_start_index + node.col_offset
        if isinstance(node, ast.Str):
            # Triple quotes give col_offset of -1 on the last line of the string.
            if node.col_offset == -1:
                triple_quote_regex = re.compile("""['"]{3}""")
                end_triple_quote_match = triple_quote_regex.search(self.file_contents, line_start_index)
                open_quote_index = self.file_contents.rfind(end_triple_quote_match.group(), 0, end_triple_quote_match.start())
                if open_quote_index > 0:
                    start_index = open_quote_index
                else:
                    # If we can't find a starting quote, let's assume that what
                    # we considered the end quote is really the start quote.
                    start_index = end_triple_quote_match.start()
            string = ParseString(self.file_contents, start_index, len(self.file_contents))
            return Expression(string.start_index, string.end_index)
        else:
            return Expression(start_index)

    def visit_FunctionDef(self, node):
        """
        Skips processing of __repr__ functions, since these sometimes use '<'
        for non-HTML purposes.

        Arguments:
            node: An AST node.
        """
        if node.name != '__repr__':
            self.generic_visit(node)


class HtmlStringVisitor(BaseVisitor):
    """
    Checks for strings that contain HTML. Assumes any string with < or > is
    considered potential HTML.

    To be used only with strings in context of format or concat.

    """
    def __init__(self, file_contents, results, skip_wrapped_html=False):
        """
        Init function.

        Arguments:
            file_contents: The contents of the Python file.
            results: A file results objects to which violations will be added.
            skip_wrapped_html: True if visitor should skip strings wrapped with
                HTML() or Text(), and False otherwise.
        """
        super(HtmlStringVisitor, self).__init__(file_contents, results)
        self.skip_wrapped_html = skip_wrapped_html
        self.unsafe_html_string_nodes = []
        self.over_escaped_entity_string_nodes = []
        self.has_text_or_html_call = False

    def visit_Str(self, node):
        """
        When strings are visited, checks if it contains HTML.

        Arguments:
            node: An AST node.
        """
        # Skips '<' (and '>') in regex named groups. For example, "(?P<group>)".
1512
        if re.search('[(][?]P<', node.s) is None and re.search('<', node.s) is not None:
1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740
            self.unsafe_html_string_nodes.append(node)
        if re.search(r"&[#]?[a-zA-Z0-9]+;", node.s):
            self.over_escaped_entity_string_nodes.append(node)

    def visit_Call(self, node):
        """
        Skips processing of string contained inside HTML() and Text() calls when
        skip_wrapped_html is True.

        Arguments:
            node: An AST node.

        """
        is_html_or_text_call = isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']
        if self.skip_wrapped_html and is_html_or_text_call:
            self.has_text_or_html_call = True
        else:
            self.generic_visit(node)


class ContainsFormatVisitor(BaseVisitor):
    """
    Checks if there are any nested format() calls.

    This visitor is meant to be called on HTML() and Text() ast.Call nodes to
    search for any illegal nested format() calls.

    """
    def __init__(self, file_contents, results):
        """
        Init function.

        Arguments:
            file_contents: The contents of the Python file.
            results: A file results objects to which violations will be added.

        """
        super(ContainsFormatVisitor, self).__init__(file_contents, results)
        self.contains_format_call = False

    def visit_Attribute(self, node):
        """
        Simple check for format calls (attribute).

        Arguments:
            node: An AST node.

        """
        # Attribute(expr value, identifier attr, expr_context ctx)
        if node.attr == 'format':
            self.contains_format_call = True
        else:
            self.generic_visit(node)


class FormatInterpolateVisitor(BaseVisitor):
    """
    Checks if format() interpolates any HTML() or Text() calls. In other words,
    are Text() or HTML() calls nested inside the call to format().

    This visitor is meant to be called on a format() attribute node.

    """
    def __init__(self, file_contents, results):
        """
        Init function.

        Arguments:
            file_contents: The contents of the Python file.
            results: A file results objects to which violations will be added.

        """
        super(FormatInterpolateVisitor, self).__init__(file_contents, results)
        self.interpolates_text_or_html = False
        self.format_caller_node = None

    def visit_Call(self, node):
        """
        Checks all calls. Remembers the caller of the initial format() call, or
        in other words, the left-hand side of the call. Also tracks if HTML()
        or Text() calls were seen.

        Arguments:
            node: The AST root node.

        """
        if isinstance(node.func, ast.Attribute) and node.func.attr is 'format':
            if self.format_caller_node is None:
                # Store the caller, or left-hand-side node of the initial
                # format() call.
                self.format_caller_node = node.func.value
        elif isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']:
            # found Text() or HTML() call in arguments passed to format()
            self.interpolates_text_or_html = True
        self.generic_visit(node)

    def generic_visit(self, node):
        """
        Determines whether or not to continue to visit nodes according to the
        following rules:
        - Once a Text() or HTML() call has been found, stop visiting more nodes.
        - Skip the caller of the outer-most format() call, or in other words,
        the left-hand side of the call.

        Arguments:
            node: The AST root node.

        """
        if self.interpolates_text_or_html is False:
            if self.format_caller_node is not node:
                super(FormatInterpolateVisitor, self).generic_visit(node)


class OuterFormatVisitor(BaseVisitor):
    """
    Only visits outer most Python format() calls. These checks are not repeated
    for any nested format() calls.

    This visitor is meant to be used once from the root.

    """
    def visit_Call(self, node):
        """
        Checks that format() calls which contain HTML() or Text() use HTML() or
        Text() as the caller. In other words, Text() or HTML() must be used
        before format() for any arguments to format() that contain HTML() or
        Text().

        Arguments:
             node: An AST node.
        """
        if isinstance(node.func, ast.Attribute) and node.func.attr == 'format':
            visitor = HtmlStringVisitor(self.file_contents, self.results, True)
            visitor.visit(node)
            for unsafe_html_string_node in visitor.unsafe_html_string_nodes:
                self.results.violations.append(ExpressionRuleViolation(
                    Rules.python_wrap_html, self.node_to_expression(unsafe_html_string_node)
                ))
            # Do not continue processing child nodes of this format() node.
        else:
            self.generic_visit(node)


class AllNodeVisitor(BaseVisitor):
    """
    Visits all nodes and does not interfere with calls to generic_visit(). This
    is used in conjunction with other visitors to check for a variety of
    violations.

    This visitor is meant to be used once from the root.

    """

    def visit_Attribute(self, node):
        """
        Checks for uses of deprecated `display_name_with_default_escaped`.

        Arguments:
             node: An AST node.
        """
        if node.attr == 'display_name_with_default_escaped':
            self.results.violations.append(ExpressionRuleViolation(
                Rules.python_deprecated_display_name, self.node_to_expression(node)
            ))
        self.generic_visit(node)

    def visit_Call(self, node):
        """
        Checks for a variety of violations:
        - Checks that format() calls with nested HTML() or Text() calls use
        HTML() or Text() on the left-hand side.
        - For each HTML() and Text() call, calls into separate visitor to check
        for inner format() calls.

        Arguments:
             node: An AST node.

        """
        if isinstance(node.func, ast.Attribute) and node.func.attr == 'format':
            visitor = FormatInterpolateVisitor(self.file_contents, self.results)
            visitor.visit(node)
            if visitor.interpolates_text_or_html:
                format_caller = node.func.value
                is_caller_html_or_text = isinstance(format_caller, ast.Call) and \
                    isinstance(format_caller.func, ast.Name) and \
                    format_caller.func.id in ['Text', 'HTML']
                # If format call has nested Text() or HTML(), then the caller,
                # or left-hand-side of the format() call, must be a call to
                # Text() or HTML().
                if is_caller_html_or_text is False:
                    self.results.violations.append(ExpressionRuleViolation(
                        Rules.python_requires_html_or_text, self.node_to_expression(node.func)
                    ))
        elif isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']:
            visitor = ContainsFormatVisitor(self.file_contents, self.results)
            visitor.visit(node)
            if visitor.contains_format_call:
                self.results.violations.append(ExpressionRuleViolation(
                    Rules.python_close_before_format, self.node_to_expression(node.func)
                ))

        self.generic_visit(node)

    def visit_BinOp(self, node):
        """
        Checks for concat using '+' and interpolation using '%' with strings
        containing HTML.

        """
        rule = None
        if isinstance(node.op, ast.Mod):
            rule = Rules.python_interpolate_html
        elif isinstance(node.op, ast.Add):
            rule = Rules.python_concat_html
        if rule is not None:
            visitor = HtmlStringVisitor(self.file_contents, self.results)
            visitor.visit(node.left)
            has_illegal_html_string = len(visitor.unsafe_html_string_nodes) > 0
            # Create new visitor to clear state.
            visitor = HtmlStringVisitor(self.file_contents, self.results)
            visitor.visit(node.right)
            has_illegal_html_string = has_illegal_html_string or len(visitor.unsafe_html_string_nodes) > 0
            if has_illegal_html_string:
                self.results.violations.append(ExpressionRuleViolation(
                    rule, self.node_to_expression(node)
                ))
        self.generic_visit(node)

Robert Raposa committed
1741

Robert Raposa committed
1742
class PythonLinter(BaseLinter):
1743
    """
Robert Raposa committed
1744 1745 1746 1747 1748 1749
    The linter for Python files.

    The current implementation of the linter does naive Python parsing. It does
    not use the parser. One known issue is that parsing errors found inside a
    docstring need to be disabled, rather than being automatically skipped.
    Skipping docstrings is an enhancement that could be added.
1750
    """
Robert Raposa committed
1751

1752 1753
    LINE_COMMENT_DELIM = "#"

Robert Raposa committed
1754 1755 1756 1757 1758
    def __init__(self):
        """
        Init method.
        """
        super(PythonLinter, self).__init__()
1759
        self._skip_python_dirs = SKIP_DIRS + ('tests', 'test/acceptance')
1760 1761 1762

    def process_file(self, directory, file_name):
        """
Robert Raposa committed
1763
        Process file to determine if it is a Python file and
1764 1765 1766 1767
        if it is safe.

        Arguments:
            directory (string): The directory of the file to be checked
Robert Raposa committed
1768
            file_name (string): A filename for a potential Python file
1769 1770 1771 1772 1773

        Returns:
            The file results containing any violations.

        """
Robert Raposa committed
1774 1775
        file_full_path = os.path.normpath(directory + '/' + file_name)
        results = FileResults(file_full_path)
1776 1777 1778 1779

        if not results.is_file:
            return results

Robert Raposa committed
1780
        if file_name.lower().endswith('.py') is False:
1781 1782
            return results

1783 1784 1785 1786 1787
        # skip tests.py files
        # TODO: Add configuration for files and paths
        if file_name.lower().endswith('tests.py'):
            return results

Robert Raposa committed
1788 1789
        # skip this linter code (i.e. safe_template_linter.py)
        if file_name == os.path.basename(__file__):
1790 1791
            return results

Robert Raposa committed
1792 1793
        if not self._is_valid_directory(self._skip_python_dirs, directory):
            return results
1794

Robert Raposa committed
1795
        return self._load_and_check_file_is_safe(file_full_path, self.check_python_file_is_safe, results)
Robert Raposa committed
1796

Robert Raposa committed
1797
    def check_python_file_is_safe(self, file_contents, results):
1798
        """
Robert Raposa committed
1799
        Checks for violations in a Python file.
1800 1801

        Arguments:
Robert Raposa committed
1802
            file_contents: The contents of the Python file.
1803 1804 1805
            results: A file results objects to which violations will be added.

        """
1806 1807 1808 1809 1810 1811 1812 1813 1814 1815
        root_node = self.parse_python_code(file_contents, results)
        self.check_python_code_is_safe(file_contents, root_node, results)
        # Check rules specific to .py files only
        # Note that in template files, the scope is different, so you can make
        # different assumptions.
        if root_node is not None:
            # check format() rules that can be run on outer-most format() calls
            visitor = OuterFormatVisitor(file_contents, results)
            visitor.visit(root_node)
        results.prepare_results(file_contents, line_comment_delim=self.LINE_COMMENT_DELIM)
Robert Raposa committed
1816

1817
    def check_python_code_is_safe(self, python_code, root_node, results):
Robert Raposa committed
1818
        """
1819 1820
        Checks for violations in Python code snippet. This can also be used for
        Python that appears in files other than .py files, like in templates.
1821 1822

        Arguments:
1823 1824 1825
            python_code: The contents of the Python code.
            root_node: The root node of the Python code parsed by AST.
            results: A file results objects to which violations will be added.
Robert Raposa committed
1826

1827
        """
1828 1829 1830 1831 1832 1833
        if root_node is not None:
            # check illegal concatenation and interpolation
            visitor = AllNodeVisitor(python_code, results)
            visitor.visit(root_node)
        # check rules parse with regex
        self._check_custom_escape(python_code, results)
1834

1835
    def parse_python_code(self, python_code, results):
1836
        """
1837
        Parses Python code.
1838

Robert Raposa committed
1839
        Arguments:
1840
            python_code: The Python code to be parsed.
Robert Raposa committed
1841

1842 1843
        Returns:
            The root node that was parsed, or None for SyntaxError.
1844 1845

        """
1846 1847 1848
        python_code = self._strip_file_encoding(python_code)
        try:
            return ast.parse(python_code)
1849

1850 1851 1852
        except SyntaxError as e:
            if e.offset is None:
                expression = Expression(0)
Robert Raposa committed
1853
            else:
1854 1855 1856 1857 1858 1859 1860
                lines = StringLines(python_code)
                line_start_index = lines.line_number_to_start_index(e.lineno)
                expression = Expression(line_start_index + e.offset)
            results.violations.append(ExpressionRuleViolation(
                Rules.python_parse_error, expression
            ))
            return None
Robert Raposa committed
1861

1862
    def _strip_file_encoding(self, file_contents):
Robert Raposa committed
1863
        """
1864 1865
        Removes file encoding from file_contents because the file was already
        read into Unicode, and the AST parser complains.
1866 1867

        Arguments:
1868
            file_contents: The Python file contents.
Robert Raposa committed
1869

1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888
        Returns:
            The Python file contents with the encoding stripped.
        """
        # PEP-263 Provides Regex for Declaring Encoding
        # Example: -*- coding: <encoding name> -*-
        # This is only allowed on the first two lines, and it must be stripped
        # before parsing, because we have already read into Unicode and the
        # AST parser complains.
        encoding_regex = re.compile(r"^[ \t\v]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")
        encoding_match = encoding_regex.search(file_contents)
        # If encoding comment not found on first line, search second line.
        if encoding_match is None:
            lines = StringLines(file_contents)
            if lines.line_count() >= 2:
                encoding_match = encoding_regex.search(lines.line_number_to_line(2))
        # If encoding was found, strip it
        if encoding_match is not None:
            file_contents = file_contents.replace(encoding_match.group(), '#', 1)
        return file_contents
Robert Raposa committed
1889

1890
    def _check_custom_escape(self, file_contents, results):
Robert Raposa committed
1891
        """
1892 1893
        Checks for custom escaping calls, rather than using a standard escaping
        method.
1894

Robert Raposa committed
1895
        Arguments:
Robert Raposa committed
1896
            file_contents: The contents of the Python file
Robert Raposa committed
1897
            results: A list of results into which violations will be added.
1898 1899

        """
1900 1901
        for match in re.finditer("(<.*&lt;|&lt;.*<)", file_contents):
            expression = Expression(match.start(), match.end())
1902
            results.violations.append(ExpressionRuleViolation(
1903
                Rules.python_custom_escape, expression
1904 1905
            ))

1906

Robert Raposa committed
1907 1908 1909 1910
class MakoTemplateLinter(BaseLinter):
    """
    The linter for Mako template files.
    """
1911 1912 1913 1914 1915 1916 1917 1918 1919
    LINE_COMMENT_DELIM = "##"

    def __init__(self):
        """
        Init method.
        """
        super(MakoTemplateLinter, self).__init__()
        self.javascript_linter = JavaScriptLinter()
        self.python_linter = PythonLinter()
1920

Robert Raposa committed
1921
    def process_file(self, directory, file_name):
Robert Raposa committed
1922
        """
Robert Raposa committed
1923 1924
        Process file to determine if it is a Mako template file and
        if it is safe.
1925 1926

        Arguments:
Robert Raposa committed
1927 1928
            directory (string): The directory of the file to be checked
            file_name (string): A filename for a potential Mako file
1929

Robert Raposa committed
1930 1931
        Returns:
            The file results containing any violations.
1932 1933

        """
Robert Raposa committed
1934 1935
        mako_file_full_path = os.path.normpath(directory + '/' + file_name)
        results = FileResults(mako_file_full_path)
1936

Robert Raposa committed
1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954
        if not results.is_file:
            return results

        if not self._is_valid_directory(directory):
            return results

        # TODO: When safe-by-default is turned on at the platform level, will we:
        # 1. Turn it on for .html only, or
        # 2. Turn it on for all files, and have different rulesets that have
        #    different rules of .xml, .html, .js, .txt Mako templates (e.g. use
        #    the n filter to turn off h for some of these)?
        # For now, we only check .html and .xml files
        if not (file_name.lower().endswith('.html') or file_name.lower().endswith('.xml')):
            return results

        return self._load_and_check_file_is_safe(mako_file_full_path, self._check_mako_file_is_safe, results)

    def _is_valid_directory(self, directory):
Robert Raposa committed
1955
        """
Robert Raposa committed
1956 1957
        Determines if the provided directory is a directory that could contain
        Mako template files that need to be linted.
1958 1959

        Arguments:
Robert Raposa committed
1960
            directory: The directory to be linted.
1961 1962

        Returns:
Robert Raposa committed
1963 1964
            True if this directory should be linted for Mako template violations
            and False otherwise.
Robert Raposa committed
1965
        """
1966
        if is_skip_dir(SKIP_DIRS, directory):
Robert Raposa committed
1967
            return False
1968

Robert Raposa committed
1969 1970 1971 1972 1973
        # TODO: This is an imperfect guess concerning the Mako template
        # directories. This needs to be reviewed before turning on safe by
        # default at the platform level.
        if ('/templates/' in directory) or directory.endswith('/templates'):
            return True
1974

Robert Raposa committed
1975 1976 1977
        return False

    def _check_mako_file_is_safe(self, mako_template, results):
1978
        """
Robert Raposa committed
1979
        Checks for violations in a Mako template.
1980

Robert Raposa committed
1981 1982 1983
        Arguments:
            mako_template: The contents of the Mako template.
            results: A file results objects to which violations will be added.
1984

Robert Raposa committed
1985 1986 1987 1988 1989
        """
        if self._is_django_template(mako_template):
            return
        has_page_default = self._has_page_default(mako_template, results)
        self._check_mako_expressions(mako_template, has_page_default, results)
1990
        self._check_mako_python_blocks(mako_template, has_page_default, results)
1991
        results.prepare_results(mako_template, line_comment_delim=self.LINE_COMMENT_DELIM)
1992

Robert Raposa committed
1993
    def _is_django_template(self, mako_template):
Robert Raposa committed
1994
        """
Robert Raposa committed
1995
            Determines if the template is actually a Django template.
1996 1997

        Arguments:
Robert Raposa committed
1998
            mako_template: The template code.
1999 2000

        Returns:
Robert Raposa committed
2001 2002
            True if this is really a Django template, and False otherwise.

2003
        """
Robert Raposa committed
2004 2005 2006
        if re.search('({%.*%})|({{.*}})', mako_template) is not None:
            return True
        return False
2007

Robert Raposa committed
2008
    def _get_page_tag_count(self, mako_template):
2009
        """
Robert Raposa committed
2010 2011
        Determines the number of page expressions in the Mako template. Ignores
        page expressions that are commented out.
2012 2013

        Arguments:
Robert Raposa committed
2014
            mako_template: The contents of the Mako template.
Robert Raposa committed
2015 2016

        Returns:
Robert Raposa committed
2017
            The number of page expressions
2018
        """
Robert Raposa committed
2019 2020 2021
        count = len(re.findall('<%page ', mako_template, re.IGNORECASE))
        count_commented = len(re.findall(r'##\s+<%page ', mako_template, re.IGNORECASE))
        return max(0, count - count_commented)
Robert Raposa committed
2022

Robert Raposa committed
2023 2024 2025 2026
    def _has_page_default(self, mako_template, results):
        """
        Checks if the Mako template contains the page expression marking it as
        safe by default.
2027

Robert Raposa committed
2028 2029 2030
        Arguments:
            mako_template: The contents of the Mako template.
            results: A list of results into which violations will be added.
2031

Robert Raposa committed
2032 2033
        Side effect:
            Adds violations regarding page default if necessary
Robert Raposa committed
2034

Robert Raposa committed
2035 2036
        Returns:
            True if the template has the page default, and False otherwise.
Robert Raposa committed
2037 2038

        """
Robert Raposa committed
2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054
        page_tag_count = self._get_page_tag_count(mako_template)
        # check if there are too many page expressions
        if 2 <= page_tag_count:
            results.violations.append(RuleViolation(Rules.mako_multiple_page_tags))
            return False
        # make sure there is exactly 1 page expression, excluding commented out
        # page expressions, before proceeding
        elif page_tag_count != 1:
            results.violations.append(RuleViolation(Rules.mako_missing_default))
            return False
        # check that safe by default (h filter) is turned on
        page_h_filter_regex = re.compile('<%page[^>]*expression_filter=(?:"h"|\'h\')[^>]*/>')
        page_match = page_h_filter_regex.search(mako_template)
        if not page_match:
            results.violations.append(RuleViolation(Rules.mako_missing_default))
        return page_match
Robert Raposa committed
2055

Robert Raposa committed
2056
    def _check_mako_expressions(self, mako_template, has_page_default, results):
Robert Raposa committed
2057
        """
Robert Raposa committed
2058 2059 2060
        Searches for Mako expressions and then checks if they contain
        violations, including checking JavaScript contexts for JavaScript
        violations.
Robert Raposa committed
2061 2062

        Arguments:
Robert Raposa committed
2063 2064 2065 2066
            mako_template: The contents of the Mako template.
            has_page_default: True if the page is marked as default, False
                otherwise.
            results: A list of results into which violations will be added.
Robert Raposa committed
2067 2068

        """
Robert Raposa committed
2069 2070 2071 2072 2073 2074 2075 2076 2077
        expressions = self._find_mako_expressions(mako_template)
        contexts = self._get_contexts(mako_template)
        self._check_javascript_contexts(mako_template, contexts, results)
        for expression in expressions:
            if expression.end_index is None:
                results.violations.append(ExpressionRuleViolation(
                    Rules.mako_unparseable_expression, expression
                ))
                continue
Robert Raposa committed
2078

Robert Raposa committed
2079
            context = self._get_context(contexts, expression.start_index)
2080
            self._check_expression_and_filters(mako_template, expression, context, has_page_default, results)
Robert Raposa committed
2081

Robert Raposa committed
2082 2083 2084 2085
    def _check_javascript_contexts(self, mako_template, contexts, results):
        """
        Lint the JavaScript contexts for JavaScript violations inside a Mako
        template.
Robert Raposa committed
2086

Robert Raposa committed
2087 2088 2089 2090
        Arguments:
            mako_template: The contents of the Mako template.
            contexts: A list of context dicts with 'type' and 'index'.
            results: A list of results into which violations will be added.
Robert Raposa committed
2091

Robert Raposa committed
2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108
        Side effect:
            Adds JavaScript violations to results.
        """
        javascript_start_index = None
        for context in contexts:
            if context['type'] == 'javascript':
                if javascript_start_index < 0:
                    javascript_start_index = context['index']
            else:
                if javascript_start_index is not None:
                    javascript_end_index = context['index']
                    javascript_code = mako_template[javascript_start_index:javascript_end_index]
                    self._check_javascript_context(javascript_code, javascript_start_index, results)
                    javascript_start_index = None
        if javascript_start_index is not None:
            javascript_code = mako_template[javascript_start_index:]
            self._check_javascript_context(javascript_code, javascript_start_index, results)
Robert Raposa committed
2109

Robert Raposa committed
2110
    def _check_javascript_context(self, javascript_code, start_offset, results):
Robert Raposa committed
2111
        """
Robert Raposa committed
2112 2113
        Lint a single JavaScript context for JavaScript violations inside a Mako
        template.
Robert Raposa committed
2114 2115

        Arguments:
Robert Raposa committed
2116 2117 2118 2119 2120 2121 2122
            javascript_code: The template contents of the JavaScript context.
            start_offset: The offset of the JavaScript context inside the
                original Mako template.
            results: A list of results into which violations will be added.

        Side effect:
            Adds JavaScript violations to results.
Robert Raposa committed
2123 2124

        """
Robert Raposa committed
2125
        javascript_results = FileResults("")
2126 2127
        self.javascript_linter.check_javascript_file_is_safe(javascript_code, javascript_results)
        self._shift_and_add_violations(javascript_results, start_offset, results)
Robert Raposa committed
2128

2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152
    def _check_mako_python_blocks(self, mako_template, has_page_default, results):
        """
        Searches for Mako python blocks and checks if they contain
        violations.

        Arguments:
            mako_template: The contents of the Mako template.
            has_page_default: True if the page is marked as default, False
                otherwise.
            results: A list of results into which violations will be added.

        """
        # Finds Python blocks such as <% ... %>, skipping other Mako start tags
        # such as <%def> and <%page>.
        python_block_regex = re.compile(r'<%\s(?P<code>.*?)%>', re.DOTALL)

        for python_block_match in python_block_regex.finditer(mako_template):
            self._check_expression_python(
                python_code=python_block_match.group('code'),
                start_offset=(python_block_match.start() + len('<% ')),
                has_page_default=has_page_default,
                results=results
            )

2153
    def _check_expression_python(self, python_code, start_offset, has_page_default, results):
Robert Raposa committed
2154
        """
2155
        Lint the Python inside a single Python expression in a Mako template.
Robert Raposa committed
2156 2157

        Arguments:
2158 2159 2160 2161 2162
            python_code: The Python contents of an expression.
            start_offset: The offset of the Python content inside the original
                Mako template.
            has_page_default: True if the page is marked as default, False
                otherwise.
Robert Raposa committed
2163 2164
            results: A list of results into which violations will be added.

2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196
        Side effect:
            Adds Python violations to results.

        """
        python_results = FileResults("")

        # Dedent expression internals so it is parseable.
        # Note that the final columns reported could be off somewhat.
        adjusted_python_code = textwrap.dedent(python_code)
        first_letter_match = re.search('\w', python_code)
        adjusted_first_letter_match = re.search('\w', adjusted_python_code)
        if first_letter_match is not None and adjusted_first_letter_match is not None:
            start_offset += (first_letter_match.start() - adjusted_first_letter_match.start())
        python_code = adjusted_python_code

        root_node = self.python_linter.parse_python_code(python_code, python_results)
        self.python_linter.check_python_code_is_safe(python_code, root_node, python_results)
        # Check mako expression specific Python rules.
        if root_node is not None:
            visitor = HtmlStringVisitor(python_code, python_results, True)
            visitor.visit(root_node)
            for unsafe_html_string_node in visitor.unsafe_html_string_nodes:
                python_results.violations.append(ExpressionRuleViolation(
                    Rules.python_wrap_html, visitor.node_to_expression(unsafe_html_string_node)
                ))
            if has_page_default:
                for over_escaped_entity_string_node in visitor.over_escaped_entity_string_nodes:
                    python_results.violations.append(ExpressionRuleViolation(
                        Rules.mako_html_entities, visitor.node_to_expression(over_escaped_entity_string_node)
                    ))
        python_results.prepare_results(python_code, line_comment_delim=self.LINE_COMMENT_DELIM)
        self._shift_and_add_violations(python_results, start_offset, results)
Robert Raposa committed
2197

2198
    def _shift_and_add_violations(self, other_linter_results, start_offset, results):
Robert Raposa committed
2199
        """
2200 2201
        Adds results from a different linter to the Mako results, after shifting
        the offset into the original Mako template.
Robert Raposa committed
2202 2203

        Arguments:
2204 2205 2206
            other_linter_results: Results from another linter.
            start_offset: The offset of the linted code, a part of the template,
                inside the original Mako template.
Robert Raposa committed
2207 2208
            results: A list of results into which violations will be added.

2209 2210
        Side effect:
            Adds violations to results.
Robert Raposa committed
2211

2212 2213 2214 2215 2216 2217 2218 2219 2220
        """
        # translate the violations into the proper location within the original
        # Mako template
        for violation in other_linter_results.violations:
            expression = violation.expression
            expression.start_index += start_offset
            if expression.end_index is not None:
                expression.end_index += start_offset
            results.violations.append(ExpressionRuleViolation(violation.rule, expression))
Robert Raposa committed
2221

2222
    def _check_expression_and_filters(self, mako_template, expression, context, has_page_default, results):
Robert Raposa committed
2223
        """
Robert Raposa committed
2224 2225
        Checks that the filters used in the given Mako expression are valid
        for the given context. Adds violation to results if there is a problem.
Robert Raposa committed
2226 2227

        Arguments:
Robert Raposa committed
2228 2229 2230 2231 2232 2233
            mako_template: The contents of the Mako template.
            expression: A Mako Expression.
            context: The context of the page in which the expression was found
                (e.g. javascript, html).
            has_page_default: True if the page is marked as default, False
                otherwise.
Robert Raposa committed
2234 2235 2236
            results: A list of results into which violations will be added.

        """
Robert Raposa committed
2237 2238 2239 2240 2241
        if context == 'unknown':
            results.violations.append(ExpressionRuleViolation(
                Rules.mako_unknown_context, expression
            ))
            return
Robert Raposa committed
2242

Robert Raposa committed
2243 2244 2245
        # Example: finds "| n, h}" when given "${x | n, h}"
        filters_regex = re.compile(r'\|([.,\w\s]*)\}')
        filters_match = filters_regex.search(expression.expression)
2246 2247 2248 2249 2250 2251 2252 2253 2254

        # Check Python code inside expression.
        if filters_match is None:
            python_code = expression.expression[2:-1]
        else:
            python_code = expression.expression[2:filters_match.start()]
        self._check_expression_python(python_code, expression.start_index + 2, has_page_default, results)

        # Check filters.
Robert Raposa committed
2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288
        if filters_match is None:
            if context == 'javascript':
                results.violations.append(ExpressionRuleViolation(
                    Rules.mako_invalid_js_filter, expression
                ))
            return
        filters = filters_match.group(1).replace(" ", "").split(",")
        if filters == ['n', 'decode.utf8']:
            # {x | n, decode.utf8} is valid in any context
            pass
        elif context == 'html':
            if filters == ['h']:
                if has_page_default:
                    # suppress this violation if the page default hasn't been set,
                    # otherwise the template might get less safe
                    results.violations.append(ExpressionRuleViolation(
                        Rules.mako_unwanted_html_filter, expression
                    ))
            else:
                results.violations.append(ExpressionRuleViolation(
                    Rules.mako_invalid_html_filter, expression
                ))
        elif context == 'javascript':
            self._check_js_expression_not_with_html(mako_template, expression, results)
            if filters == ['n', 'dump_js_escaped_json']:
                # {x | n, dump_js_escaped_json} is valid
                pass
            elif filters == ['n', 'js_escaped_string']:
                # {x | n, js_escaped_string} is valid, if surrounded by quotes
                self._check_js_string_expression_in_quotes(mako_template, expression, results)
            else:
                results.violations.append(ExpressionRuleViolation(
                    Rules.mako_invalid_js_filter, expression
                ))
Robert Raposa committed
2289

Robert Raposa committed
2290
    def _check_js_string_expression_in_quotes(self, mako_template, expression, results):
Robert Raposa committed
2291
        """
Robert Raposa committed
2292 2293
        Checks that a Mako expression using js_escaped_string is surrounded by
        quotes.
Robert Raposa committed
2294 2295

        Arguments:
Robert Raposa committed
2296 2297
            mako_template: The contents of the Mako template.
            expression: A Mako Expression.
Robert Raposa committed
2298
            results: A list of results into which violations will be added.
Robert Raposa committed
2299 2300 2301 2302 2303 2304
        """
        parse_string = self._find_string_wrapping_expression(mako_template, expression)
        if parse_string is None:
            results.violations.append(ExpressionRuleViolation(
                Rules.mako_js_missing_quotes, expression
            ))
Robert Raposa committed
2305

Robert Raposa committed
2306 2307 2308 2309
    def _check_js_expression_not_with_html(self, mako_template, expression, results):
        """
        Checks that a Mako expression in a JavaScript context does not appear in
        a string that also contains HTML.
Robert Raposa committed
2310

Robert Raposa committed
2311 2312 2313 2314
        Arguments:
            mako_template: The contents of the Mako template.
            expression: A Mako Expression.
            results: A list of results into which violations will be added.
Robert Raposa committed
2315
        """
Robert Raposa committed
2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350
        parse_string = self._find_string_wrapping_expression(mako_template, expression)
        if parse_string is not None and re.search('[<>]', parse_string.string) is not None:
            results.violations.append(ExpressionRuleViolation(
                Rules.mako_js_html_string, expression
            ))

    def _find_string_wrapping_expression(self, mako_template, expression):
        """
        Finds the string wrapping the Mako expression if there is one.

        Arguments:
            mako_template: The contents of the Mako template.
            expression: A Mako Expression.

        Returns:
            ParseString representing a scrubbed version of the wrapped string,
            where the Mako expression was replaced with "${...}", if a wrapped
            string was found.  Otherwise, returns None if none found.
        """
        lines = StringLines(mako_template)
        start_index = lines.index_to_line_start_index(expression.start_index)
        if expression.end_index is not None:
            end_index = lines.index_to_line_end_index(expression.end_index)
        else:
            return None
        # scrub out the actual expression so any code inside the expression
        # doesn't interfere with rules applied to the surrounding code (i.e.
        # checking JavaScript).
        scrubbed_lines = "".join((
            mako_template[start_index:expression.start_index],
            "${...}",
            mako_template[expression.end_index:end_index]
        ))
        adjusted_start_index = expression.start_index - start_index
        start_index = 0
Robert Raposa committed
2351
        while True:
Robert Raposa committed
2352 2353 2354 2355 2356 2357
            parse_string = ParseString(scrubbed_lines, start_index, len(scrubbed_lines))
            # check for validly parsed string
            if 0 <= parse_string.start_index < parse_string.end_index:
                # check if expression is contained in the given string
                if parse_string.start_index < adjusted_start_index < parse_string.end_index:
                    return parse_string
Robert Raposa committed
2358
                else:
Robert Raposa committed
2359 2360
                    # move to check next string
                    start_index = parse_string.end_index
Robert Raposa committed
2361 2362
            else:
                break
Robert Raposa committed
2363
        return None
Robert Raposa committed
2364

Robert Raposa committed
2365 2366 2367 2368
    def _get_contexts(self, mako_template):
        """
        Returns a data structure that represents the indices at which the
        template changes from HTML context to JavaScript and back.
Robert Raposa committed
2369

Robert Raposa committed
2370 2371 2372 2373 2374 2375 2376 2377 2378
        Return:
            A list of dicts where each dict contains:
                - index: the index of the context.
                - type: the context type (e.g. 'html' or 'javascript').
        """
        contexts_re = re.compile(
            r"""
                <script.*?> |  # script tag start
                </script> |  # script tag end
2379 2380
                <%static:require_module(_async)?.*?> |  # require js script tag start (optionally the _async version)
                </%static:require_module(_async)?> | # require js script tag end (optionally the _async version)
Robert Raposa committed
2381 2382 2383 2384 2385 2386
                <%block[ ]*name=['"]requirejs['"]\w*> |  # require js tag start
                </%block>  # require js tag end
            """,
            re.VERBOSE | re.IGNORECASE
        )
        media_type_re = re.compile(r"""type=['"].*?['"]""", re.IGNORECASE)
Robert Raposa committed
2387

Robert Raposa committed
2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411
        contexts = [{'index': 0, 'type': 'html'}]
        javascript_types = [
            'text/javascript', 'text/ecmascript', 'application/ecmascript', 'application/javascript',
            'text/x-mathjax-config', 'json/xblock-args'
        ]
        html_types = ['text/template']
        for context in contexts_re.finditer(mako_template):
            match_string = context.group().lower()
            if match_string.startswith("<script"):
                match_type = media_type_re.search(match_string)
                context_type = 'javascript'
                if match_type is not None:
                    # get media type (e.g. get text/javascript from
                    # type="text/javascript")
                    match_type = match_type.group()[6:-1].lower()
                    if match_type in html_types:
                        context_type = 'html'
                    elif match_type not in javascript_types:
                        context_type = 'unknown'
                contexts.append({'index': context.end(), 'type': context_type})
            elif match_string.startswith("</"):
                contexts.append({'index': context.start(), 'type': 'html'})
            else:
                contexts.append({'index': context.end(), 'type': 'javascript'})
Robert Raposa committed
2412

Robert Raposa committed
2413 2414 2415
        return contexts

    def _get_context(self, contexts, index):
Robert Raposa committed
2416
        """
Robert Raposa committed
2417 2418
        Gets the context (e.g. javascript, html) of the template at the given
        index.
Robert Raposa committed
2419 2420

        Arguments:
Robert Raposa committed
2421 2422 2423
            contexts: A list of dicts where each dict contains the 'index' of the context
                and the context 'type' (e.g. 'html' or 'javascript').
            index: The index for which we want the context.
Robert Raposa committed
2424

Robert Raposa committed
2425 2426
        Returns:
             The context (e.g. javascript or html) for the given index.
Robert Raposa committed
2427
        """
Robert Raposa committed
2428 2429 2430 2431 2432 2433 2434
        current_context = contexts[0]['type']
        for context in contexts:
            if context['index'] <= index:
                current_context = context['type']
            else:
                break
        return current_context
Robert Raposa committed
2435

Robert Raposa committed
2436
    def _find_mako_expressions(self, mako_template):
Robert Raposa committed
2437
        """
Robert Raposa committed
2438 2439
        Finds all the Mako expressions in a Mako template and creates a list
        of dicts for each expression.
Robert Raposa committed
2440 2441

        Arguments:
Robert Raposa committed
2442 2443 2444 2445
            mako_template: The content of the Mako template.

        Returns:
            A list of Expressions.
Robert Raposa committed
2446 2447

        """
Robert Raposa committed
2448 2449 2450 2451 2452 2453 2454 2455 2456
        start_delim = '${'
        start_index = 0
        expressions = []

        while True:
            start_index = mako_template.find(start_delim, start_index)
            if start_index < 0:
                break

2457 2458 2459 2460 2461 2462
            # If start of mako expression is commented out, skip it.
            uncommented_start_index = self._uncommented_start_index(mako_template, start_index)
            if uncommented_start_index != start_index:
                start_index = uncommented_start_index
                continue

Robert Raposa committed
2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485
            result = self._find_closing_char_index(
                start_delim, '{', '}', mako_template, start_index=start_index + len(start_delim)
            )
            if result is None:
                expression = Expression(start_index)
                # for parsing error, restart search right after the start of the
                # current expression
                start_index = start_index + len(start_delim)
            else:
                close_char_index = result['close_char_index']
                expression = mako_template[start_index:close_char_index + 1]
                expression = Expression(
                    start_index,
                    end_index=close_char_index + 1,
                    template=mako_template,
                    start_delim=start_delim,
                    end_delim='}',
                    strings=result['strings'],
                )
                # restart search after the current expression
                start_index = expression.end_index
            expressions.append(expression)
        return expressions
Robert Raposa committed
2486 2487


2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521
SKIP_DIRS = (
    '.git',
    '.pycharm_helpers',
    'common/static/xmodule/modules',
    'perf_tests',
    'node_modules',
    'reports/diff_quality',
    'scripts/tests/templates',
    'spec',
    'test_root',
    'vendor',
)


def is_skip_dir(skip_dirs, directory):
    """
    Determines whether a directory should be skipped or linted.

    Arguments:
        skip_dirs: The configured directories to be skipped.
        directory: The current directory to be tested.

    Returns:
         True if the directory should be skipped, and False otherwise.

    """
    for skip_dir in skip_dirs:
        skip_dir_regex = re.compile(
            "(.*/)*{}(/.*)*".format(re.escape(skip_dir)))
        if skip_dir_regex.match(directory) is not None:
            return True
    return False


2522
def _process_file(full_path, template_linters, options, summary_results, out):
2523 2524 2525 2526 2527 2528 2529 2530
    """
    For each linter, lints the provided file.  This means finding and printing
    violations.

    Arguments:
        full_path: The full path of the file to lint.
        template_linters: A list of linting objects.
        options: A list of the options.
2531
        summary_results: A SummaryResults with a summary of the violations.
2532 2533 2534
        out: output file

    """
Robert Raposa committed
2535
    num_violations = 0
2536
    directory = os.path.dirname(full_path)
Robert Raposa committed
2537
    file_name = os.path.basename(full_path)
2538
    for template_linter in template_linters:
Robert Raposa committed
2539
        results = template_linter.process_file(directory, file_name)
2540
        results.print_results(options, summary_results, out)
2541 2542


2543
def _process_os_dir(directory, files, template_linters, options, summary_results, out):
Robert Raposa committed
2544
    """
2545
    Calls out to lint each file in the passed list of files.
Robert Raposa committed
2546 2547

    Arguments:
2548 2549
        directory: Directory being linted.
        files: All files in the directory to be linted.
Robert Raposa committed
2550
        template_linters: A list of linting objects.
2551
        options: A list of the options.
2552
        summary_results: A SummaryResults with a summary of the violations.
2553
        out: output file
Robert Raposa committed
2554 2555

    """
2556 2557
    for current_file in sorted(files, key=lambda s: s.lower()):
        full_path = os.path.join(directory, current_file)
2558
        _process_file(full_path, template_linters, options, summary_results, out)
2559 2560


2561
def _process_os_dirs(starting_dir, template_linters, options, summary_results, out):
Robert Raposa committed
2562 2563 2564 2565 2566 2567
    """
    For each linter, lints all the directories in the starting directory.

    Arguments:
        starting_dir: The initial directory to begin the walk.
        template_linters: A list of linting objects.
2568
        options: A list of the options.
2569
        summary_results: A SummaryResults with a summary of the violations.
2570
        out: output file
Robert Raposa committed
2571

2572
    """
2573 2574 2575 2576 2577 2578
    for root, dirs, files in os.walk(starting_dir):
        if is_skip_dir(SKIP_DIRS, root):
            del dirs
            continue
        dirs.sort(key=lambda s: s.lower())
        _process_os_dir(root, files, template_linters, options, summary_results, out)
2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602


def _lint(file_or_dir, template_linters, options, summary_results, out):
    """
    For each linter, lints the provided file or directory.

    Arguments:
        file_or_dir: The file or initial directory to lint.
        template_linters: A list of linting objects.
        options: A list of the options.
        summary_results: A SummaryResults with a summary of the violations.
        out: output file

    """

    if file_or_dir is not None and os.path.isfile(file_or_dir):
        _process_file(file_or_dir, template_linters, options, summary_results, out)
    else:
        directory = "."
        if file_or_dir is not None:
            if os.path.exists(file_or_dir):
                directory = file_or_dir
            else:
                raise ValueError("Path [{}] is not a valid file or directory.".format(file_or_dir))
2603
        _process_os_dirs(directory, template_linters, options, summary_results, out)
2604 2605

    summary_results.print_results(options, out)
2606 2607


2608
def main():
Robert Raposa committed
2609 2610 2611
    """
    Used to execute the linter. Use --help option for help.

2612
    Prints all violations.
Robert Raposa committed
2613
    """
2614 2615
    epilog = "For more help using the safe template linter, including details on how\n"
    epilog += "to understand and fix any violations, read the docs here:\n"
Robert Raposa committed
2616 2617 2618
    epilog += "\n"
    # pylint: disable=line-too-long
    epilog += "  http://edx.readthedocs.org/projects/edx-developer-guide/en/latest/conventions/safe_templates.html#safe-template-linter\n"
2619 2620 2621 2622

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description='Checks that templates are safe.',
2623
        epilog=epilog,
2624
    )
Robert Raposa committed
2625
    parser.add_argument(
2626 2627
        '--list-files', dest='list_files', action='store_true',
        help='Only display the filenames that contain violations.'
Robert Raposa committed
2628
    )
2629
    parser.add_argument(
2630 2631 2632 2633
        '--rule-totals', dest='rule_totals', action='store_true',
        help='Display the totals for each rule.'
    )
    parser.add_argument(
2634 2635 2636
        '--verbose', dest='verbose', action='store_true',
        help='Print multiple lines where possible for additional context of violations.'
    )
2637
    parser.add_argument('path', nargs="?", default=None, help='A file to lint or directory to recursively lint.')
2638

2639
    args = parser.parse_args()
2640 2641

    options = {
2642
        'list_files': args.list_files,
2643 2644
        'rule_totals': args.rule_totals,
        'verbose': args.verbose,
2645
    }
Robert Raposa committed
2646
    template_linters = [MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()]
2647
    summary_results = SummaryResults()
2648

2649
    _lint(args.path, template_linters, options, summary_results, out=sys.stdout)
2650 2651 2652 2653


if __name__ == "__main__":
    main()