Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-platform
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-platform
Commits
fb13dc64
Commit
fb13dc64
authored
May 02, 2016
by
Robert Raposa
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Move MakoLinter.
parent
a6b9ba7d
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
667 additions
and
667 deletions
+667
-667
scripts/safe_template_linter.py
+667
-667
No files found.
scripts/safe_template_linter.py
View file @
fb13dc64
...
@@ -1407,855 +1407,855 @@ class JavaScriptLinter(BaseLinter):
...
@@ -1407,855 +1407,855 @@ class JavaScriptLinter(BaseLinter):
return
False
return
False
class
MakoTemplate
Linter
(
BaseLinter
):
class
Python
Linter
(
BaseLinter
):
"""
"""
The linter for Mako template files.
The linter for Python files.
The current implementation of the linter does naive Python parsing. It does
not use the parser. One known issue is that parsing errors found inside a
docstring need to be disabled, rather than being automatically skipped.
Skipping docstrings is an enhancement that could be added.
"""
"""
javaScriptLinter
=
JavaScriptLinter
()
def
__init__
(
self
):
"""
Init method.
"""
super
(
PythonLinter
,
self
)
.
__init__
()
self
.
_skip_python_dirs
=
self
.
_skip_dirs
+
(
'tests'
,
'test/acceptance'
)
def
process_file
(
self
,
directory
,
file_name
):
def
process_file
(
self
,
directory
,
file_name
):
"""
"""
Process file to determine if it is a
Mako template
file and
Process file to determine if it is a
Python
file and
if it is safe.
if it is safe.
Arguments:
Arguments:
directory (string): The directory of the file to be checked
directory (string): The directory of the file to be checked
file_name (string): A filename for a potential
Mako
file
file_name (string): A filename for a potential
Python
file
Returns:
Returns:
The file results containing any violations.
The file results containing any violations.
"""
"""
mako_
file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
results
=
FileResults
(
mako_
file_full_path
)
results
=
FileResults
(
file_full_path
)
if
not
results
.
is_file
:
if
not
results
.
is_file
:
return
results
return
results
if
not
self
.
_is_valid_directory
(
directory
)
:
if
file_name
.
lower
()
.
endswith
(
'.py'
)
is
False
:
return
results
return
results
# TODO: When safe-by-default is turned on at the platform level, will we:
# skip this linter code (i.e. safe_template_linter.py)
# 1. Turn it on for .html only, or
if
file_name
==
os
.
path
.
basename
(
__file__
):
# 2. Turn it on for all files, and have different rulesets that have
# different rules of .xml, .html, .js, .txt Mako templates (e.g. use
# the n filter to turn off h for some of these)?
# For now, we only check .html and .xml files
if
not
(
file_name
.
lower
()
.
endswith
(
'.html'
)
or
file_name
.
lower
()
.
endswith
(
'.xml'
)):
return
results
return
results
return
self
.
_load_and_check_file_is_safe
(
mako_file_full_path
,
self
.
_check_mako_file_is_safe
,
results
)
if
not
self
.
_is_valid_directory
(
self
.
_skip_python_dirs
,
directory
):
return
results
def
_is_valid_directory
(
self
,
directory
):
"""
Determines if the provided directory is a directory that could contain
Mako template files that need to be linted.
Arguments:
directory: The directory to be linted.
Returns:
True if this directory should be linted for Mako template violations
and False otherwise.
"""
if
self
.
_is_skip_dir
(
self
.
_skip_dirs
,
directory
):
return
False
# TODO: This is an imperfect guess concerning the Mako template
# directories. This needs to be reviewed before turning on safe by
# default at the platform level.
if
(
'/templates/'
in
directory
)
or
directory
.
endswith
(
'/templates'
):
return
True
return
False
return
self
.
_load_and_check_file_is_safe
(
file_full_path
,
self
.
check_python_file_is_safe
,
results
)
def
_check_mako_file_is_safe
(
self
,
mako_template
,
results
):
def
check_python_file_is_safe
(
self
,
file_contents
,
results
):
"""
"""
Checks for violations in a
Mako templat
e.
Checks for violations in a
Python fil
e.
Arguments:
Arguments:
mako_template: The contents of the Mako templat
e.
file_contents: The contents of the Python fil
e.
results: A file results objects to which violations will be added.
results: A file results objects to which violations will be added.
"""
"""
if
self
.
_is_django_template
(
mako_template
):
self
.
_check_concat_with_html
(
file_contents
,
Rules
.
python_concat_html
,
results
)
return
self
.
_check_deprecated_display_name
(
file_contents
,
results
)
has_page_default
=
self
.
_has_page_default
(
mako_template
,
results
)
self
.
_check_custom_escape
(
file_contents
,
results
)
self
.
_check_mako_expressions
(
mako_template
,
has_page_default
,
results
)
self
.
_check_html
(
file_contents
,
results
)
results
.
prepare_results
(
mako_template
,
line_comment_delim
=
'##'
)
results
.
prepare_results
(
file_contents
,
line_comment_delim
=
'#'
)
def
_is_django_template
(
self
,
mako_template
):
"""
Determines if the template is actually a Django template.
Arguments:
mako_template: The template code.
Returns:
True if this is really a Django template, and False otherwise.
"""
if
re
.
search
(
'({
%.*%
})|({{.*}})'
,
mako_template
)
is
not
None
:
return
True
return
False
def
_get_page_tag_count
(
self
,
mako_template
):
"""
Determines the number of page expressions in the Mako template. Ignores
page expressions that are commented out.
Arguments:
mako_template: The contents of the Mako template.
Returns:
The number of page expressions
"""
count
=
len
(
re
.
findall
(
'<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
count_commented
=
len
(
re
.
findall
(
r'##\s+<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
return
max
(
0
,
count
-
count_commented
)
def
_
has_page_default
(
self
,
mako_template
,
results
):
def
_
check_deprecated_display_name
(
self
,
file_contents
,
results
):
"""
"""
Checks
if the Mako template contains the page expression marking it as
Checks
that the deprecated display_name_with_default_escaped is not
safe by default
.
used. Adds violation to results if there is a problem
.
Arguments:
Arguments:
mako_template: The contents of the Mako template.
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
Side effect:
Adds violations regarding page default if necessary
Returns:
True if the template has the page default, and False otherwise.
"""
"""
page_tag_count
=
self
.
_get_page_tag_count
(
mako_template
)
for
match
in
re
.
finditer
(
r'\.display_name_with_default_escaped'
,
file_contents
):
# check if there are too many page expressions
expression
=
Expression
(
match
.
start
(),
match
.
end
())
if
2
<=
page_tag_count
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_multiple_page_tags
))
Rules
.
python_deprecated_display_name
,
expression
return
False
))
# make sure there is exactly 1 page expression, excluding commented out
# page expressions, before proceeding
elif
page_tag_count
!=
1
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
False
# check that safe by default (h filter) is turned on
page_h_filter_regex
=
re
.
compile
(
'<
%
page[^>]*expression_filter=(?:"h"|
\'
h
\'
)[^>]*/>'
)
page_match
=
page_h_filter_regex
.
search
(
mako_template
)
if
not
page_match
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
page_match
def
_check_
mako_expressions
(
self
,
mako_template
,
has_page_default
,
results
):
def
_check_
custom_escape
(
self
,
file_contents
,
results
):
"""
"""
Searches for Mako expressions and then checks if they contain
Checks for custom escaping calls, rather than using a standard escaping
violations, including checking JavaScript contexts for JavaScript
method.
violations.
Arguments:
Arguments:
mako_template: The contents of the Mako template.
file_contents: The contents of the Python file
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
"""
"""
expressions
=
self
.
_find_mako_expressions
(
mako_template
)
for
match
in
re
.
finditer
(
"(<.*<|<.*<)"
,
file_contents
):
contexts
=
self
.
_get_contexts
(
mako_template
)
expression
=
Expression
(
match
.
start
(),
match
.
end
())
self
.
_check_javascript_contexts
(
mako_template
,
contexts
,
results
)
for
expression
in
expressions
:
if
expression
.
end_index
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unparseable_expression
,
expression
Rules
.
python_custom_escape
,
expression
))
))
continue
context
=
self
.
_get_context
(
contexts
,
expression
.
start_index
)
self
.
_check_filters
(
mako_template
,
expression
,
context
,
has_page_default
,
results
)
self
.
_check_deprecated_display_name
(
expression
,
results
)
self
.
_check_html_and_text
(
expression
,
has_page_default
,
results
)
def
_check_
javascript_contexts
(
self
,
mako_template
,
contex
ts
,
results
):
def
_check_
html
(
self
,
file_conten
ts
,
results
):
"""
"""
Lint the JavaScript contexts for JavaScript violations inside a Mako
Checks many rules related to HTML in a Python file.
template.
Arguments:
Arguments:
mako_template: The contents of the Mako template.
file_contents: The contents of the Python file
contexts: A list of context dicts with 'type' and 'index'.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
Side effect:
Adds JavaScript violations to results.
"""
"""
javascript_start_index
=
None
# Text() Expressions keyed by its end index
for
context
in
contexts
:
text_calls_by_end_index
=
{}
if
context
[
'type'
]
==
'javascript'
:
# HTML() Expressions keyed by its end index
if
javascript_start_index
<
0
:
html_calls_by_end_index
=
{}
javascript_start_index
=
context
[
'index'
]
start_index
=
0
else
:
while
True
:
if
javascript_start_index
is
not
None
:
javascript_end_index
=
context
[
'index'
]
javascript_code
=
mako_template
[
javascript_start_index
:
javascript_end_index
]
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
javascript_start_index
=
None
if
javascript_start_index
is
not
None
:
javascript_code
=
mako_template
[
javascript_start_index
:]
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
def
_check_javascript_context
(
self
,
javascript_code
,
start_offset
,
results
):
# check HTML(), Text() and format() calls
"""
result
=
self
.
_check_html_text_format
(
Lint a single JavaScript context for JavaScript violations inside a Mako
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
template.
)
next_start_index
=
result
[
'next_start_index'
]
interpolate_end_index
=
result
[
'interpolate_end_index'
]
Arguments:
# check for interpolation including HTML outside of function calls
javascript_code: The template contents of the JavaScript context.
self
.
_check_interpolate_with_html
(
start_offset: The offset of the JavaScript context inside the
file_contents
,
start_index
,
interpolate_end_index
,
results
original Mako template.
)
results: A list of results into which violations will be added.
Side effect:
# advance the search
Adds JavaScript violations to results.
start_index
=
next_start_index
"""
# end if there is nothing left to search
javascript_results
=
FileResults
(
""
)
if
interpolate_end_index
is
None
:
self
.
javaScriptLinter
.
check_javascript_file_is_safe
(
javascript_code
,
javascript_results
)
break
# translate the violations into the location within the original
# Mako template
for
violation
in
javascript_results
.
violations
:
expression
=
violation
.
expression
expression
.
start_index
+=
start_offset
if
expression
.
end_index
is
not
None
:
expression
.
end_index
+=
start_offset
results
.
violations
.
append
(
ExpressionRuleViolation
(
violation
.
rule
,
expression
))
def
_check_deprecated_display_name
(
self
,
expression
,
results
):
def
_check_html_text_format
(
self
,
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
):
"""
"""
Checks
that the deprecated display_name_with_default_escaped is not
Checks
for HTML(), Text() and format() calls, and various rules related
used. Adds violation to results if there is a problem
.
to these calls
.
Arguments:
Arguments:
expression: An Expression
file_contents: The contents of the Python file
start_index: The index at which to begin searching for a function
call.
text_calls_by_end_index: Text() Expressions keyed by its end index.
html_calls_by_end_index: HTML() Expressions keyed by its end index.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
"""
Returns:
if
'.display_name_with_default_escaped'
in
expression
.
expression
:
A dict with the following keys:
results
.
violations
.
append
(
ExpressionRuleViolation
(
'next_start_index': The start index of the next search for a
Rules
.
python_deprecated_display_name
,
expression
function call.
))
'interpolate_end_index': The end index of the next next search
for interpolation with html, or None if the end of file
def
_check_html_and_text
(
self
,
expression
,
has_page_default
,
results
):
should be used.
"""
Checks rules related to proper use of HTML() and Text().
Arguments:
expression: A Mako Expression.
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
"""
"""
expression_inner
=
expression
.
expression_inner
# used to find opening of .format(), Text() and HTML() calls
# use find to get the template relative inner expression start index
regex_function_open
=
re
.
compile
(
r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()"
)
# due to possible skipped white space
interpolate_end_index
=
None
template_inner_start_index
=
expression
.
start_index
end_index
=
None
template_inner_start_index
+=
expression
.
expression
.
find
(
expression_inner
)
strings
=
None
if
'HTML('
in
expression_inner
:
html_calls
=
[]
if
expression_inner
.
startswith
(
'HTML('
):
while
True
:
close_paren_index
=
self
.
_find_closing_char_index
(
# first search for HTML(), Text(), or .format()
None
,
"("
,
")"
,
expression_inner
,
start_index
=
len
(
'HTML('
)
if
end_index
is
None
:
)[
'close_char_index'
]
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
)
# check that the close paren is at the end of the stripped expression.
else
:
if
close_paren_index
!=
len
(
expression_inner
)
-
1
:
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
,
end_index
)
results
.
violations
.
append
(
ExpressionRuleViolation
(
if
function_match
is
not
None
:
Rules
.
mako_html_alone
,
expression
if
interpolate_end_index
is
None
:
))
interpolate_end_index
=
function_match
.
start
()
elif
expression_inner
.
startswith
(
'Text('
)
is
False
:
function_close_result
=
self
.
_find_closing_char_index
(
None
,
'('
,
')'
,
file_contents
,
start_index
=
function_match
.
end
(),
)
if
function_close_result
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_requires_text
,
expression
Rules
.
python_parse_error
,
Expression
(
function_match
.
start
())
))
))
else
:
else
:
if
'Text('
in
expression_inner
:
expression
=
Expression
(
function_match
.
start
(),
function_close_result
[
'close_char_index'
]
+
1
,
file_contents
,
start_delim
=
function_match
.
group
(),
end_delim
=
")"
)
# if this an outer most Text(), HTML(), or format() call
if
end_index
is
None
:
end_index
=
expression
.
end_index
interpolate_end_index
=
expression
.
start_index
strings
=
function_close_result
[
'strings'
]
if
function_match
.
group
()
==
'.format('
:
if
'HTML('
in
expression
.
expression_inner
or
'Text('
in
expression
.
expression_inner
:
is_wrapped_with_text
=
str
(
function_match
.
start
())
in
text_calls_by_end_index
.
keys
()
is_wrapped_with_html
=
str
(
function_match
.
start
())
in
html_calls_by_end_index
.
keys
()
if
is_wrapped_with_text
is
False
and
is_wrapped_with_html
is
False
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_text_redundan
t
,
expression
Rules
.
python_requires_html_or_tex
t
,
expression
))
))
else
:
# expression is 'HTML(' or 'Text('
# strings to be checked for HTML
# HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
unwrapped_html_strings
=
expression
.
strings
# Generally, format() would be the issue if there is one.
for
match
in
re
.
finditer
(
r"(HTML\(|Text\()"
,
expression_inner
):
if
regex_function_open
.
search
(
expression
.
expression_inner
)
is
not
None
:
result
=
self
.
_find_closing_char_index
(
None
,
"("
,
")"
,
expression_inner
,
start_index
=
match
.
end
())
if
result
is
not
None
:
close_paren_index
=
result
[
'close_char_index'
]
# the argument sent to HTML() or Text()
argument
=
expression_inner
[
match
.
end
():
close_paren_index
]
if
".format("
in
argument
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_close_before_format
,
expression
Rules
.
python_close_before_format
,
expression
))
))
if
match
.
group
()
==
"HTML("
:
if
function_match
.
group
()
==
'Text('
:
# remove expression strings wrapped in HTML()
text_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
for
string
in
list
(
unwrapped_html_strings
):
else
:
# function_match.group() == 'HTML(':
html_inner_start_index
=
template_inner_start_index
+
match
.
end
()
html_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
html_inner_end_index
=
template_inner_start_index
+
close_paren_index
html_calls
.
append
(
expression
)
if
html_inner_start_index
<=
string
.
start_index
and
string
.
end_index
<=
html_inner_end_index
:
unwrapped_html_strings
.
remove
(
string
)
# check strings not wrapped in HTML() for '<'
start_index
=
function_match
.
end
()
for
string
in
unwrapped_html_strings
:
else
:
if
'<'
in
string
.
string_inner
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_wrap_html
,
expression
))
break
# check strings not wrapped in HTML() for HTML entities
if
has_page_default
:
for
string
in
unwrapped_html_strings
:
if
re
.
search
(
r"&[#]?[a-zA-Z0-9]+;"
,
string
.
string_inner
):
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_entities
,
expression
))
break
break
def
_check_filters
(
self
,
mako_template
,
expression
,
context
,
has_page_default
,
results
):
# checks strings in the outer most call to ensure they are properly
# wrapped with HTML()
self
.
_check_format_html_strings_wrapped
(
strings
,
html_calls
,
results
)
# compute where to continue the search
if
function_match
is
None
and
end_index
is
None
:
next_start_index
=
start_index
elif
end_index
is
None
:
next_start_index
=
function_match
.
end
()
else
:
next_start_index
=
end_index
return
{
'next_start_index'
:
next_start_index
,
'interpolate_end_index'
:
interpolate_end_index
,
}
def
_check_format_html_strings_wrapped
(
self
,
strings
,
html_calls
,
results
):
"""
"""
Checks that
the filters used in the given Mako expression are valid
Checks that
any string inside a format call that seems to contain HTML
for the given context. Adds violation to results if there is a problem
.
is wrapped with a call to HTML()
.
Arguments:
Arguments:
mako_template: The contents of the Mako template.
strings: A list of ParseStrings for each string inside the format()
expression: A Mako Expression.
call.
context: The context of the page in which the expression was found
html_calls: A list of Expressions representing all of the HTML()
(e.g. javascript, html).
calls inside the format() call.
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
"""
"""
if
context
==
'unknown'
:
html_strings
=
[]
results
.
violations
.
append
(
ExpressionRuleViolation
(
html_wrapped_strings
=
[]
Rules
.
mako_unknown_context
,
expression
if
strings
is
not
None
:
))
# find all strings that contain HTML
return
for
string
in
strings
:
if
'<'
in
string
.
string
:
# Example: finds "| n, h}" when given "${x | n, h}"
html_strings
.
append
(
string
)
filters_regex
=
re
.
compile
(
r'\|([.,\w\s]*)\}'
)
# check if HTML string is appropriately wrapped
filters_match
=
filters_regex
.
search
(
expression
.
expression
)
for
html_call
in
html_calls
:
if
filters_match
is
None
:
if
html_call
.
start_index
<
string
.
start_index
<
string
.
end_index
<
html_call
.
end_index
:
if
context
==
'javascript'
:
html_wrapped_strings
.
append
(
string
)
results
.
violations
.
append
(
ExpressionRuleViolation
(
break
Rules
.
mako_invalid_js_filter
,
expression
# loop through all unwrapped strings
))
for
unsafe_string
in
set
(
html_strings
)
-
set
(
html_wrapped_strings
):
return
unsafe_string_expression
=
Expression
(
unsafe_string
.
start_index
)
filters
=
filters_match
.
group
(
1
)
.
replace
(
" "
,
""
)
.
split
(
","
)
if
filters
==
[
'n'
,
'decode.utf8'
]:
# {x | n, decode.utf8} is valid in any context
pass
elif
context
==
'html'
:
if
filters
==
[
'h'
]:
if
has_page_default
:
# suppress this violation if the page default hasn't been set,
# otherwise the template might get less safe
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unwanted_html_filter
,
expression
))
else
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_html_filter
,
expression
))
elif
context
==
'javascript'
:
self
.
_check_js_expression_not_with_html
(
mako_template
,
expression
,
results
)
if
filters
==
[
'n'
,
'dump_js_escaped_json'
]:
# {x | n, dump_js_escaped_json} is valid
pass
elif
filters
==
[
'n'
,
'js_escaped_string'
]:
# {x | n, js_escaped_string} is valid, if surrounded by quotes
self
.
_check_js_string_expression_in_quotes
(
mako_template
,
expression
,
results
)
else
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_js_filter
,
expression
Rules
.
python_wrap_html
,
unsafe_string_
expression
))
))
def
_check_
js_string_expression_in_quotes
(
self
,
mako_template
,
expression
,
results
):
def
_check_
interpolate_with_html
(
self
,
file_contents
,
start_index
,
end_index
,
results
):
"""
"""
Checks that a Mako expression using js_escaped_string is surrounded by
Find interpolations with html that fall outside of any calls to HTML(),
quotes
.
Text(), and .format()
.
Arguments:
Arguments:
mako_template: The contents of the Mako template.
file_contents: The contents of the Python file
expression: A Mako Expression.
start_index: The index to start the search, or None if nothing to
search
end_index: The index to end the search, or None if the end of file
should be used.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
"""
"""
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
# used to find interpolation with HTML
if
parse_string
is
None
:
pattern_interpolate_html_inner
=
r'(<.*
%
s|
%
s.*<|<.*{\w*}|{\w*}.*<)'
regex_interpolate_html
=
re
.
compile
(
r"""(".*{}.*"|'.*{}.*')"""
.
format
(
pattern_interpolate_html_inner
,
pattern_interpolate_html_inner
))
if
end_index
is
None
:
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
)
else
:
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
,
end_index
)
for
match_html_string
in
interpolate_string_iter
:
expression
=
Expression
(
match_html_string
.
start
(),
match_html_string
.
end
())
results
.
violations
.
append
(
ExpressionRuleViolation
(
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_js_missing_quotes
,
expression
Rules
.
python_interpolate_html
,
expression
))
))
def
_check_js_expression_not_with_html
(
self
,
mako_template
,
expression
,
results
):
"""
Checks that a Mako expression in a JavaScript context does not appear in
a string that also contains HTML.
Arguments:
class
MakoTemplateLinter
(
BaseLinter
):
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
"""
"""
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
The linter for Mako template files.
if
parse_string
is
not
None
and
re
.
search
(
'[<>]'
,
parse_string
.
string
)
is
not
None
:
"""
results
.
violations
.
append
(
ExpressionRuleViolation
(
javaScriptLinter
=
JavaScriptLinter
()
Rules
.
mako_js_html_string
,
expression
))
def
_find_string_wrapping_expression
(
self
,
mako_template
,
expression
):
def
process_file
(
self
,
directory
,
file_name
):
"""
"""
Finds the string wrapping the Mako expression if there is one.
Process file to determine if it is a Mako template file and
if it is safe.
Arguments:
Arguments:
mako_template: The contents of the Mako template.
directory (string): The directory of the file to be checked
expression: A Mako Expression.
file_name (string): A filename for a potential Mako file
Returns:
Returns:
ParseString representing a scrubbed version of the wrapped string,
The file results containing any violations.
where the Mako expression was replaced with "${...}", if a wrapped
string was found. Otherwise, returns None if none found.
"""
lines
=
StringLines
(
mako_template
)
start_index
=
lines
.
index_to_line_start_index
(
expression
.
start_index
)
if
expression
.
end_index
is
not
None
:
end_index
=
lines
.
index_to_line_end_index
(
expression
.
end_index
)
else
:
return
None
# scrub out the actual expression so any code inside the expression
# doesn't interfere with rules applied to the surrounding code (i.e.
# checking JavaScript).
scrubbed_lines
=
""
.
join
((
mako_template
[
start_index
:
expression
.
start_index
],
"${...}"
,
mako_template
[
expression
.
end_index
:
end_index
]
))
adjusted_start_index
=
expression
.
start_index
-
start_index
start_index
=
0
while
True
:
parse_string
=
ParseString
(
scrubbed_lines
,
start_index
,
len
(
scrubbed_lines
))
# check for validly parsed string
if
0
<=
parse_string
.
start_index
<
parse_string
.
end_index
:
# check if expression is contained in the given string
if
parse_string
.
start_index
<
adjusted_start_index
<
parse_string
.
end_index
:
return
parse_string
else
:
# move to check next string
start_index
=
parse_string
.
end_index
else
:
break
return
None
def
_get_contexts
(
self
,
mako_template
):
"""
Returns a data structure that represents the indices at which the
template changes from HTML context to JavaScript and back.
Return:
A list of dicts where each dict contains:
- index: the index of the context.
- type: the context type (e.g. 'html' or 'javascript').
"""
"""
contexts_re
=
re
.
compile
(
mako_file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
r"""
results
=
FileResults
(
mako_file_full_path
)
<script.*?> | # script tag start
</script> | # script tag end
<
%
static:require_module.*?> | # require js script tag start
</
%
static:require_module> | # require js script tag end
<
%
block[ ]*name=['"]requirejs['"]\w*> | # require js tag start
</
%
block> # require js tag end
"""
,
re
.
VERBOSE
|
re
.
IGNORECASE
)
media_type_re
=
re
.
compile
(
r"""type=['"].*?['"]"""
,
re
.
IGNORECASE
)
contexts
=
[{
'index'
:
0
,
'type'
:
'html'
}]
javascript_types
=
[
'text/javascript'
,
'text/ecmascript'
,
'application/ecmascript'
,
'application/javascript'
,
'text/x-mathjax-config'
,
'json/xblock-args'
]
html_types
=
[
'text/template'
]
for
context
in
contexts_re
.
finditer
(
mako_template
):
match_string
=
context
.
group
()
.
lower
()
if
match_string
.
startswith
(
"<script"
):
match_type
=
media_type_re
.
search
(
match_string
)
context_type
=
'javascript'
if
match_type
is
not
None
:
# get media type (e.g. get text/javascript from
# type="text/javascript")
match_type
=
match_type
.
group
()[
6
:
-
1
]
.
lower
()
if
match_type
in
html_types
:
context_type
=
'html'
elif
match_type
not
in
javascript_types
:
context_type
=
'unknown'
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
context_type
})
elif
match_string
.
startswith
(
"</"
):
contexts
.
append
({
'index'
:
context
.
start
(),
'type'
:
'html'
})
else
:
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
'javascript'
})
return
contexts
if
not
results
.
is_file
:
return
results
def
_get_context
(
self
,
contexts
,
index
):
if
not
self
.
_is_valid_directory
(
directory
):
"""
return
results
Gets the context (e.g. javascript, html) of the template at the given
index.
Arguments:
# TODO: When safe-by-default is turned on at the platform level, will we:
contexts: A list of dicts where each dict contains the 'index' of the context
# 1. Turn it on for .html only, or
and the context 'type' (e.g. 'html' or 'javascript').
# 2. Turn it on for all files, and have different rulesets that have
index: The index for which we want the context.
# different rules of .xml, .html, .js, .txt Mako templates (e.g. use
# the n filter to turn off h for some of these)?
# For now, we only check .html and .xml files
if
not
(
file_name
.
lower
()
.
endswith
(
'.html'
)
or
file_name
.
lower
()
.
endswith
(
'.xml'
)):
return
results
Returns:
return
self
.
_load_and_check_file_is_safe
(
mako_file_full_path
,
self
.
_check_mako_file_is_safe
,
results
)
The context (e.g. javascript or html) for the given index.
"""
current_context
=
contexts
[
0
][
'type'
]
for
context
in
contexts
:
if
context
[
'index'
]
<=
index
:
current_context
=
context
[
'type'
]
else
:
break
return
current_context
def
_
find_mako_expressions
(
self
,
mako_template
):
def
_
is_valid_directory
(
self
,
directory
):
"""
"""
Finds all the Mako expressions in a Mako template and creates a list
Determines if the provided directory is a directory that could contain
of dicts for each expression
.
Mako template files that need to be linted
.
Arguments:
Arguments:
mako_template: The content of the Mako template
.
directory: The directory to be linted
.
Returns:
Returns:
A list of Expressions.
True if this directory should be linted for Mako template violations
and False otherwise.
"""
"""
start_delim
=
'${'
if
self
.
_is_skip_dir
(
self
.
_skip_dirs
,
directory
):
start_index
=
0
return
False
expressions
=
[]
while
True
:
start_index
=
mako_template
.
find
(
start_delim
,
start_index
)
if
start_index
<
0
:
break
result
=
self
.
_find_closing_char_index
(
# TODO: This is an imperfect guess concerning the Mako template
start_delim
,
'{'
,
'}'
,
mako_template
,
start_index
=
start_index
+
len
(
start_delim
)
# directories. This needs to be reviewed before turning on safe by
)
# default at the platform level.
if
result
is
None
:
if
(
'/templates/'
in
directory
)
or
directory
.
endswith
(
'/templates'
):
expression
=
Expression
(
start_index
)
return
True
# for parsing error, restart search right after the start of the
# current expression
start_index
=
start_index
+
len
(
start_delim
)
else
:
close_char_index
=
result
[
'close_char_index'
]
expression
=
mako_template
[
start_index
:
close_char_index
+
1
]
expression
=
Expression
(
start_index
,
end_index
=
close_char_index
+
1
,
template
=
mako_template
,
start_delim
=
start_delim
,
end_delim
=
'}'
,
strings
=
result
[
'strings'
],
)
# restart search after the current expression
start_index
=
expression
.
end_index
expressions
.
append
(
expression
)
return
expressions
return
False
class
PythonLinter
(
BaseLinter
):
def
_check_mako_file_is_safe
(
self
,
mako_template
,
results
):
"""
"""
The linter for Python files
.
Checks for violations in a Mako template
.
The current implementation of the linter does naive Python parsing. It does
Arguments:
not use the parser. One known issue is that parsing errors found inside a
mako_template: The contents of the Mako template.
docstring need to be disabled, rather than being automatically skipped.
results: A file results objects to which violations will be added.
Skipping docstrings is an enhancement that could be added.
"""
def
__init__
(
self
):
"""
"""
Init method.
if
self
.
_is_django_template
(
mako_template
):
"""
return
super
(
PythonLinter
,
self
)
.
__init__
()
has_page_default
=
self
.
_has_page_default
(
mako_template
,
results
)
self
.
_skip_python_dirs
=
self
.
_skip_dirs
+
(
'tests'
,
'test/acceptance'
)
self
.
_check_mako_expressions
(
mako_template
,
has_page_default
,
results
)
results
.
prepare_results
(
mako_template
,
line_comment_delim
=
'##'
)
def
process_file
(
self
,
directory
,
file_nam
e
):
def
_is_django_template
(
self
,
mako_templat
e
):
"""
"""
Process file to determine if it is a Python file and
Determines if the template is actually a Django template.
if it is safe.
Arguments:
Arguments:
directory (string): The directory of the file to be checked
mako_template: The template code.
file_name (string): A filename for a potential Python file
Returns:
Returns:
T
he file results containing any violations
.
T
rue if this is really a Django template, and False otherwise
.
"""
"""
file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
if
re
.
search
(
'({
%.*%
})|({{.*}})'
,
mako_template
)
is
not
None
:
results
=
FileResults
(
file_full_path
)
return
True
return
False
if
not
results
.
is_file
:
return
results
if
file_name
.
lower
()
.
endswith
(
'.py'
)
is
False
:
return
results
# skip this linter code (i.e. safe_template_linter.py)
if
file_name
==
os
.
path
.
basename
(
__file__
):
return
results
if
not
self
.
_is_valid_directory
(
self
.
_skip_python_dirs
,
directory
):
return
results
return
self
.
_load_and_check_file_is_safe
(
file_full_path
,
self
.
check_python_file_is_safe
,
results
)
def
check_python_file_is_safe
(
self
,
file_contents
,
results
):
def
_get_page_tag_count
(
self
,
mako_template
):
"""
"""
Checks for violations in a Python file.
Determines the number of page expressions in the Mako template. Ignores
page expressions that are commented out.
Arguments:
Arguments:
file_contents: The contents of the Python file.
mako_template: The contents of the Mako template.
results: A file results objects to which violations will be added.
Returns:
The number of page expressions
"""
"""
self
.
_check_concat_with_html
(
file_contents
,
Rules
.
python_concat_html
,
results
)
count
=
len
(
re
.
findall
(
'<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
self
.
_check_deprecated_display_name
(
file_contents
,
results
)
count_commented
=
len
(
re
.
findall
(
r'##\s+<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
self
.
_check_custom_escape
(
file_contents
,
results
)
return
max
(
0
,
count
-
count_commented
)
self
.
_check_html
(
file_contents
,
results
)
results
.
prepare_results
(
file_contents
,
line_comment_delim
=
'#'
)
def
_
check_deprecated_display_name
(
self
,
file_contents
,
results
):
def
_
has_page_default
(
self
,
mako_template
,
results
):
"""
"""
Checks
that the deprecated display_name_with_default_escaped is not
Checks
if the Mako template contains the page expression marking it as
used. Adds violation to results if there is a problem
.
safe by default
.
Arguments:
Arguments:
file_contents: The contents of the Python file
mako_template: The contents of the Mako template.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
"""
Side effect:
for
match
in
re
.
finditer
(
r'\.display_name_with_default_escaped'
,
file_contents
):
Adds violations regarding page default if necessary
expression
=
Expression
(
match
.
start
(),
match
.
end
())
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_deprecated_display_name
,
expression
))
def
_check_custom_escape
(
self
,
file_contents
,
results
):
Returns:
"""
True if the template has the page default, and False otherwise.
Checks for custom escaping calls, rather than using a standard escaping
method.
Arguments:
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
"""
"""
for
match
in
re
.
finditer
(
"(<.*<|<.*<)"
,
file_contents
):
page_tag_count
=
self
.
_get_page_tag_count
(
mako_template
)
expression
=
Expression
(
match
.
start
(),
match
.
end
())
# check if there are too many page expressions
results
.
violations
.
append
(
ExpressionRuleViolation
(
if
2
<=
page_tag_count
:
Rules
.
python_custom_escape
,
expression
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_multiple_page_tags
))
))
return
False
# make sure there is exactly 1 page expression, excluding commented out
# page expressions, before proceeding
elif
page_tag_count
!=
1
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
False
# check that safe by default (h filter) is turned on
page_h_filter_regex
=
re
.
compile
(
'<
%
page[^>]*expression_filter=(?:"h"|
\'
h
\'
)[^>]*/>'
)
page_match
=
page_h_filter_regex
.
search
(
mako_template
)
if
not
page_match
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
page_match
def
_check_
html
(
self
,
file_contents
,
results
):
def
_check_
mako_expressions
(
self
,
mako_template
,
has_page_default
,
results
):
"""
"""
Checks many rules related to HTML in a Python file.
Searches for Mako expressions and then checks if they contain
violations, including checking JavaScript contexts for JavaScript
violations.
Arguments:
Arguments:
file_contents: The contents of the Python file
mako_template: The contents of the Mako template.
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
"""
"""
# Text() Expressions keyed by its end index
expressions
=
self
.
_find_mako_expressions
(
mako_template
)
text_calls_by_end_index
=
{}
contexts
=
self
.
_get_contexts
(
mako_template
)
# HTML() Expressions keyed by its end index
self
.
_check_javascript_contexts
(
mako_template
,
contexts
,
results
)
html_calls_by_end_index
=
{}
for
expression
in
expressions
:
start_index
=
0
if
expression
.
end_index
is
None
:
while
True
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unparseable_expression
,
expression
# check HTML(), Text() and format() calls
))
result
=
self
.
_check_html_text_format
(
continue
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
)
next_start_index
=
result
[
'next_start_index'
]
interpolate_end_index
=
result
[
'interpolate_end_index'
]
# check for interpolation including HTML outside of function calls
self
.
_check_interpolate_with_html
(
file_contents
,
start_index
,
interpolate_end_index
,
results
)
# advance the search
start_index
=
next_start_index
# end if there is nothing left to search
context
=
self
.
_get_context
(
contexts
,
expression
.
start_index
)
if
interpolate_end_index
is
None
:
self
.
_check_filters
(
mako_template
,
expression
,
context
,
has_page_default
,
results
)
break
self
.
_check_deprecated_display_name
(
expression
,
results
)
self
.
_check_html_and_text
(
expression
,
has_page_default
,
results
)
def
_check_html_text_format
(
def
_check_javascript_contexts
(
self
,
mako_template
,
contexts
,
results
):
self
,
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
):
"""
"""
Checks for HTML(), Text() and format() calls, and various rules related
Lint the JavaScript contexts for JavaScript violations inside a Mako
t
o these calls
.
t
emplate
.
Arguments:
Arguments:
file_contents: The contents of the Python file
mako_template: The contents of the Mako template.
start_index: The index at which to begin searching for a function
contexts: A list of context dicts with 'type' and 'index'.
call.
text_calls_by_end_index: Text() Expressions keyed by its end index.
html_calls_by_end_index: HTML() Expressions keyed by its end index.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
Returns:
Side effect:
A dict with the following keys:
Adds JavaScript violations to results.
'next_start_index': The start index of the next search for a
function call.
'interpolate_end_index': The end index of the next next search
for interpolation with html, or None if the end of file
should be used.
"""
"""
# used to find opening of .format(), Text() and HTML() calls
javascript_start_index
=
None
regex_function_open
=
re
.
compile
(
r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()"
)
for
context
in
contexts
:
interpolate_end_index
=
None
if
context
[
'type'
]
==
'javascript'
:
end_index
=
None
if
javascript_start_index
<
0
:
strings
=
None
javascript_start_index
=
context
[
'index'
]
html_calls
=
[]
while
True
:
# first search for HTML(), Text(), or .format()
if
end_index
is
None
:
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
)
else
:
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
,
end_index
)
if
function_match
is
not
None
:
if
interpolate_end_index
is
None
:
interpolate_end_index
=
function_match
.
start
()
function_close_result
=
self
.
_find_closing_char_index
(
None
,
'('
,
')'
,
file_contents
,
start_index
=
function_match
.
end
(),
)
if
function_close_result
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_parse_error
,
Expression
(
function_match
.
start
())
))
else
:
else
:
expression
=
Expression
(
if
javascript_start_index
is
not
None
:
function_match
.
start
(),
function_close_result
[
'close_char_index'
]
+
1
,
file_contents
,
javascript_end_index
=
context
[
'index'
]
start_delim
=
function_match
.
group
(),
end_delim
=
")"
javascript_code
=
mako_template
[
javascript_start_index
:
javascript_end_index
]
)
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
# if this an outer most Text(), HTML(), or format() call
javascript_start_index
=
None
if
end_index
is
None
:
if
javascript_start_index
is
not
None
:
end_index
=
expression
.
end_index
javascript_code
=
mako_template
[
javascript_start_index
:]
interpolate_end_index
=
expression
.
start_index
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
strings
=
function_close_result
[
'strings'
]
if
function_match
.
group
()
==
'.format('
:
if
'HTML('
in
expression
.
expression_inner
or
'Text('
in
expression
.
expression_inner
:
is_wrapped_with_text
=
str
(
function_match
.
start
())
in
text_calls_by_end_index
.
keys
()
is_wrapped_with_html
=
str
(
function_match
.
start
())
in
html_calls_by_end_index
.
keys
()
if
is_wrapped_with_text
is
False
and
is_wrapped_with_html
is
False
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_requires_html_or_text
,
expression
))
else
:
# expression is 'HTML(' or 'Text('
# HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
# Generally, format() would be the issue if there is one.
if
regex_function_open
.
search
(
expression
.
expression_inner
)
is
not
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_close_before_format
,
expression
))
if
function_match
.
group
()
==
'Text('
:
text_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
else
:
# function_match.group() == 'HTML(':
html_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
html_calls
.
append
(
expression
)
start_index
=
function_match
.
end
()
def
_check_javascript_context
(
self
,
javascript_code
,
start_offset
,
results
):
else
:
"""
break
Lint a single JavaScript context for JavaScript violations inside a Mako
template.
# checks strings in the outer most call to ensure they are properly
Arguments:
# wrapped with HTML()
javascript_code: The template contents of the JavaScript context.
self
.
_check_format_html_strings_wrapped
(
strings
,
html_calls
,
results
)
start_offset: The offset of the JavaScript context inside the
original Mako template.
results: A list of results into which violations will be added.
# compute where to continue the search
Side effect:
if
function_match
is
None
and
end_index
is
None
:
Adds JavaScript violations to results.
next_start_index
=
start_index
elif
end_index
is
None
:
next_start_index
=
function_match
.
end
()
else
:
next_start_index
=
end_index
return
{
"""
'next_start_index'
:
next_start_index
,
javascript_results
=
FileResults
(
""
)
'interpolate_end_index'
:
interpolate_end_index
,
self
.
javaScriptLinter
.
check_javascript_file_is_safe
(
javascript_code
,
javascript_results
)
}
# translate the violations into the location within the original
# Mako template
for
violation
in
javascript_results
.
violations
:
expression
=
violation
.
expression
expression
.
start_index
+=
start_offset
if
expression
.
end_index
is
not
None
:
expression
.
end_index
+=
start_offset
results
.
violations
.
append
(
ExpressionRuleViolation
(
violation
.
rule
,
expression
))
def
_check_
format_html_strings_wrapped
(
self
,
strings
,
html_calls
,
results
):
def
_check_
deprecated_display_name
(
self
,
expression
,
results
):
"""
"""
Checks that
any string inside a format call that seems to contain HTML
Checks that
the deprecated display_name_with_default_escaped is not
is wrapped with a call to HTML()
.
used. Adds violation to results if there is a problem
.
Arguments:
Arguments:
strings: A list of ParseStrings for each string inside the format()
expression: An Expression
call.
html_calls: A list of Expressions representing all of the HTML()
calls inside the format() call.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
"""
"""
html_strings
=
[]
if
'.display_name_with_default_escaped'
in
expression
.
expression
:
html_wrapped_strings
=
[]
if
strings
is
not
None
:
# find all strings that contain HTML
for
string
in
strings
:
if
'<'
in
string
.
string
:
html_strings
.
append
(
string
)
# check if HTML string is appropriately wrapped
for
html_call
in
html_calls
:
if
html_call
.
start_index
<
string
.
start_index
<
string
.
end_index
<
html_call
.
end_index
:
html_wrapped_strings
.
append
(
string
)
break
# loop through all unwrapped strings
for
unsafe_string
in
set
(
html_strings
)
-
set
(
html_wrapped_strings
):
unsafe_string_expression
=
Expression
(
unsafe_string
.
start_index
)
results
.
violations
.
append
(
ExpressionRuleViolation
(
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_wrap_html
,
unsafe_string_
expression
Rules
.
python_deprecated_display_name
,
expression
))
))
def
_check_
interpolate_with_html
(
self
,
file_contents
,
start_index
,
end_index
,
results
):
def
_check_
html_and_text
(
self
,
expression
,
has_page_default
,
results
):
"""
"""
Find interpolations with html that fall outside of any calls to HTML(),
Checks rules related to proper use of HTML() and Text().
Text(), and .format().
Arguments:
Arguments:
file_contents: The contents of the Python file
expression: A Mako Expression.
start_index: The index to start the search, or None if nothing to
has_page_default: True if the page is marked as default, False
search
otherwise.
end_index: The index to end the search, or None if the end of file
should be used.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
"""
"""
# used to find interpolation with HTML
expression_inner
=
expression
.
expression_inner
pattern_interpolate_html_inner
=
r'(<.*
%
s|
%
s.*<|<.*{\w*}|{\w*}.*<)'
# use find to get the template relative inner expression start index
regex_interpolate_html
=
re
.
compile
(
r"""(".*{}.*"|'.*{}.*')"""
.
format
(
# due to possible skipped white space
pattern_interpolate_html_inner
,
pattern_interpolate_html_inner
template_inner_start_index
=
expression
.
start_index
))
template_inner_start_index
+=
expression
.
expression
.
find
(
expression_inner
)
if
end_index
is
None
:
if
'HTML('
in
expression_inner
:
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
)
if
expression_inner
.
startswith
(
'HTML('
):
else
:
close_paren_index
=
self
.
_find_closing_char_index
(
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
,
end_index
)
None
,
"("
,
")"
,
expression_inner
,
start_index
=
len
(
'HTML('
)
for
match_html_string
in
interpolate_string_iter
:
)[
'close_char_index'
]
expression
=
Expression
(
match_html_string
.
start
(),
match_html_string
.
end
())
# check that the close paren is at the end of the stripped expression.
if
close_paren_index
!=
len
(
expression_inner
)
-
1
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_interpolate_html
,
expression
Rules
.
mako_html_alone
,
expression
))
elif
expression_inner
.
startswith
(
'Text('
)
is
False
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_requires_text
,
expression
))
else
:
if
'Text('
in
expression_inner
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_text_redundant
,
expression
))
# strings to be checked for HTML
unwrapped_html_strings
=
expression
.
strings
for
match
in
re
.
finditer
(
r"(HTML\(|Text\()"
,
expression_inner
):
result
=
self
.
_find_closing_char_index
(
None
,
"("
,
")"
,
expression_inner
,
start_index
=
match
.
end
())
if
result
is
not
None
:
close_paren_index
=
result
[
'close_char_index'
]
# the argument sent to HTML() or Text()
argument
=
expression_inner
[
match
.
end
():
close_paren_index
]
if
".format("
in
argument
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_close_before_format
,
expression
))
if
match
.
group
()
==
"HTML("
:
# remove expression strings wrapped in HTML()
for
string
in
list
(
unwrapped_html_strings
):
html_inner_start_index
=
template_inner_start_index
+
match
.
end
()
html_inner_end_index
=
template_inner_start_index
+
close_paren_index
if
html_inner_start_index
<=
string
.
start_index
and
string
.
end_index
<=
html_inner_end_index
:
unwrapped_html_strings
.
remove
(
string
)
# check strings not wrapped in HTML() for '<'
for
string
in
unwrapped_html_strings
:
if
'<'
in
string
.
string_inner
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_wrap_html
,
expression
))
break
# check strings not wrapped in HTML() for HTML entities
if
has_page_default
:
for
string
in
unwrapped_html_strings
:
if
re
.
search
(
r"&[#]?[a-zA-Z0-9]+;"
,
string
.
string_inner
):
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_entities
,
expression
))
break
def
_check_filters
(
self
,
mako_template
,
expression
,
context
,
has_page_default
,
results
):
"""
Checks that the filters used in the given Mako expression are valid
for the given context. Adds violation to results if there is a problem.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
context: The context of the page in which the expression was found
(e.g. javascript, html).
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
"""
if
context
==
'unknown'
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unknown_context
,
expression
))
return
# Example: finds "| n, h}" when given "${x | n, h}"
filters_regex
=
re
.
compile
(
r'\|([.,\w\s]*)\}'
)
filters_match
=
filters_regex
.
search
(
expression
.
expression
)
if
filters_match
is
None
:
if
context
==
'javascript'
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_js_filter
,
expression
))
return
filters
=
filters_match
.
group
(
1
)
.
replace
(
" "
,
""
)
.
split
(
","
)
if
filters
==
[
'n'
,
'decode.utf8'
]:
# {x | n, decode.utf8} is valid in any context
pass
elif
context
==
'html'
:
if
filters
==
[
'h'
]:
if
has_page_default
:
# suppress this violation if the page default hasn't been set,
# otherwise the template might get less safe
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unwanted_html_filter
,
expression
))
else
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_html_filter
,
expression
))
elif
context
==
'javascript'
:
self
.
_check_js_expression_not_with_html
(
mako_template
,
expression
,
results
)
if
filters
==
[
'n'
,
'dump_js_escaped_json'
]:
# {x | n, dump_js_escaped_json} is valid
pass
elif
filters
==
[
'n'
,
'js_escaped_string'
]:
# {x | n, js_escaped_string} is valid, if surrounded by quotes
self
.
_check_js_string_expression_in_quotes
(
mako_template
,
expression
,
results
)
else
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_js_filter
,
expression
))
def
_check_js_string_expression_in_quotes
(
self
,
mako_template
,
expression
,
results
):
"""
Checks that a Mako expression using js_escaped_string is surrounded by
quotes.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
"""
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
if
parse_string
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_js_missing_quotes
,
expression
))
def
_check_js_expression_not_with_html
(
self
,
mako_template
,
expression
,
results
):
"""
Checks that a Mako expression in a JavaScript context does not appear in
a string that also contains HTML.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
"""
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
if
parse_string
is
not
None
and
re
.
search
(
'[<>]'
,
parse_string
.
string
)
is
not
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_js_html_string
,
expression
))
def
_find_string_wrapping_expression
(
self
,
mako_template
,
expression
):
"""
Finds the string wrapping the Mako expression if there is one.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
Returns:
ParseString representing a scrubbed version of the wrapped string,
where the Mako expression was replaced with "${...}", if a wrapped
string was found. Otherwise, returns None if none found.
"""
lines
=
StringLines
(
mako_template
)
start_index
=
lines
.
index_to_line_start_index
(
expression
.
start_index
)
if
expression
.
end_index
is
not
None
:
end_index
=
lines
.
index_to_line_end_index
(
expression
.
end_index
)
else
:
return
None
# scrub out the actual expression so any code inside the expression
# doesn't interfere with rules applied to the surrounding code (i.e.
# checking JavaScript).
scrubbed_lines
=
""
.
join
((
mako_template
[
start_index
:
expression
.
start_index
],
"${...}"
,
mako_template
[
expression
.
end_index
:
end_index
]
))
))
adjusted_start_index
=
expression
.
start_index
-
start_index
start_index
=
0
while
True
:
parse_string
=
ParseString
(
scrubbed_lines
,
start_index
,
len
(
scrubbed_lines
))
# check for validly parsed string
if
0
<=
parse_string
.
start_index
<
parse_string
.
end_index
:
# check if expression is contained in the given string
if
parse_string
.
start_index
<
adjusted_start_index
<
parse_string
.
end_index
:
return
parse_string
else
:
# move to check next string
start_index
=
parse_string
.
end_index
else
:
break
return
None
def
_get_contexts
(
self
,
mako_template
):
"""
Returns a data structure that represents the indices at which the
template changes from HTML context to JavaScript and back.
Return:
A list of dicts where each dict contains:
- index: the index of the context.
- type: the context type (e.g. 'html' or 'javascript').
"""
contexts_re
=
re
.
compile
(
r"""
<script.*?> | # script tag start
</script> | # script tag end
<
%
static:require_module.*?> | # require js script tag start
</
%
static:require_module> | # require js script tag end
<
%
block[ ]*name=['"]requirejs['"]\w*> | # require js tag start
</
%
block> # require js tag end
"""
,
re
.
VERBOSE
|
re
.
IGNORECASE
)
media_type_re
=
re
.
compile
(
r"""type=['"].*?['"]"""
,
re
.
IGNORECASE
)
contexts
=
[{
'index'
:
0
,
'type'
:
'html'
}]
javascript_types
=
[
'text/javascript'
,
'text/ecmascript'
,
'application/ecmascript'
,
'application/javascript'
,
'text/x-mathjax-config'
,
'json/xblock-args'
]
html_types
=
[
'text/template'
]
for
context
in
contexts_re
.
finditer
(
mako_template
):
match_string
=
context
.
group
()
.
lower
()
if
match_string
.
startswith
(
"<script"
):
match_type
=
media_type_re
.
search
(
match_string
)
context_type
=
'javascript'
if
match_type
is
not
None
:
# get media type (e.g. get text/javascript from
# type="text/javascript")
match_type
=
match_type
.
group
()[
6
:
-
1
]
.
lower
()
if
match_type
in
html_types
:
context_type
=
'html'
elif
match_type
not
in
javascript_types
:
context_type
=
'unknown'
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
context_type
})
elif
match_string
.
startswith
(
"</"
):
contexts
.
append
({
'index'
:
context
.
start
(),
'type'
:
'html'
})
else
:
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
'javascript'
})
return
contexts
def
_get_context
(
self
,
contexts
,
index
):
"""
Gets the context (e.g. javascript, html) of the template at the given
index.
Arguments:
contexts: A list of dicts where each dict contains the 'index' of the context
and the context 'type' (e.g. 'html' or 'javascript').
index: The index for which we want the context.
Returns:
The context (e.g. javascript or html) for the given index.
"""
current_context
=
contexts
[
0
][
'type'
]
for
context
in
contexts
:
if
context
[
'index'
]
<=
index
:
current_context
=
context
[
'type'
]
else
:
break
return
current_context
def
_find_mako_expressions
(
self
,
mako_template
):
"""
Finds all the Mako expressions in a Mako template and creates a list
of dicts for each expression.
Arguments:
mako_template: The content of the Mako template.
Returns:
A list of Expressions.
"""
start_delim
=
'${'
start_index
=
0
expressions
=
[]
while
True
:
start_index
=
mako_template
.
find
(
start_delim
,
start_index
)
if
start_index
<
0
:
break
result
=
self
.
_find_closing_char_index
(
start_delim
,
'{'
,
'}'
,
mako_template
,
start_index
=
start_index
+
len
(
start_delim
)
)
if
result
is
None
:
expression
=
Expression
(
start_index
)
# for parsing error, restart search right after the start of the
# current expression
start_index
=
start_index
+
len
(
start_delim
)
else
:
close_char_index
=
result
[
'close_char_index'
]
expression
=
mako_template
[
start_index
:
close_char_index
+
1
]
expression
=
Expression
(
start_index
,
end_index
=
close_char_index
+
1
,
template
=
mako_template
,
start_delim
=
start_delim
,
end_delim
=
'}'
,
strings
=
result
[
'strings'
],
)
# restart search after the current expression
start_index
=
expression
.
end_index
expressions
.
append
(
expression
)
return
expressions
def
_process_file
(
full_path
,
template_linters
,
options
,
out
):
def
_process_file
(
full_path
,
template_linters
,
options
,
out
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment