Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-platform
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-platform
Commits
fb13dc64
Commit
fb13dc64
authored
May 02, 2016
by
Robert Raposa
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Move MakoLinter.
parent
a6b9ba7d
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
667 additions
and
667 deletions
+667
-667
scripts/safe_template_linter.py
+667
-667
No files found.
scripts/safe_template_linter.py
View file @
fb13dc64
...
...
@@ -1407,855 +1407,855 @@ class JavaScriptLinter(BaseLinter):
return
False
class
MakoTemplate
Linter
(
BaseLinter
):
class
Python
Linter
(
BaseLinter
):
"""
The linter for Mako template files.
The linter for Python files.
The current implementation of the linter does naive Python parsing. It does
not use the parser. One known issue is that parsing errors found inside a
docstring need to be disabled, rather than being automatically skipped.
Skipping docstrings is an enhancement that could be added.
"""
javaScriptLinter
=
JavaScriptLinter
()
def
__init__
(
self
):
"""
Init method.
"""
super
(
PythonLinter
,
self
)
.
__init__
()
self
.
_skip_python_dirs
=
self
.
_skip_dirs
+
(
'tests'
,
'test/acceptance'
)
def
process_file
(
self
,
directory
,
file_name
):
"""
Process file to determine if it is a
Mako template
file and
Process file to determine if it is a
Python
file and
if it is safe.
Arguments:
directory (string): The directory of the file to be checked
file_name (string): A filename for a potential
Mako
file
file_name (string): A filename for a potential
Python
file
Returns:
The file results containing any violations.
"""
mako_
file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
results
=
FileResults
(
mako_
file_full_path
)
file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
results
=
FileResults
(
file_full_path
)
if
not
results
.
is_file
:
return
results
if
not
self
.
_is_valid_directory
(
directory
)
:
if
file_name
.
lower
()
.
endswith
(
'.py'
)
is
False
:
return
results
# TODO: When safe-by-default is turned on at the platform level, will we:
# 1. Turn it on for .html only, or
# 2. Turn it on for all files, and have different rulesets that have
# different rules of .xml, .html, .js, .txt Mako templates (e.g. use
# the n filter to turn off h for some of these)?
# For now, we only check .html and .xml files
if
not
(
file_name
.
lower
()
.
endswith
(
'.html'
)
or
file_name
.
lower
()
.
endswith
(
'.xml'
)):
# skip this linter code (i.e. safe_template_linter.py)
if
file_name
==
os
.
path
.
basename
(
__file__
):
return
results
return
self
.
_load_and_check_file_is_safe
(
mako_file_full_path
,
self
.
_check_mako_file_is_safe
,
results
)
def
_is_valid_directory
(
self
,
directory
):
"""
Determines if the provided directory is a directory that could contain
Mako template files that need to be linted.
Arguments:
directory: The directory to be linted.
Returns:
True if this directory should be linted for Mako template violations
and False otherwise.
"""
if
self
.
_is_skip_dir
(
self
.
_skip_dirs
,
directory
):
return
False
# TODO: This is an imperfect guess concerning the Mako template
# directories. This needs to be reviewed before turning on safe by
# default at the platform level.
if
(
'/templates/'
in
directory
)
or
directory
.
endswith
(
'/templates'
):
return
True
if
not
self
.
_is_valid_directory
(
self
.
_skip_python_dirs
,
directory
):
return
results
return
False
return
self
.
_load_and_check_file_is_safe
(
file_full_path
,
self
.
check_python_file_is_safe
,
results
)
def
_check_mako_file_is_safe
(
self
,
mako_template
,
results
):
def
check_python_file_is_safe
(
self
,
file_contents
,
results
):
"""
Checks for violations in a
Mako templat
e.
Checks for violations in a
Python fil
e.
Arguments:
mako_template: The contents of the Mako templat
e.
file_contents: The contents of the Python fil
e.
results: A file results objects to which violations will be added.
"""
if
self
.
_is_django_template
(
mako_template
):
return
has_page_default
=
self
.
_has_page_default
(
mako_template
,
results
)
self
.
_check_mako_expressions
(
mako_template
,
has_page_default
,
results
)
results
.
prepare_results
(
mako_template
,
line_comment_delim
=
'##'
)
def
_is_django_template
(
self
,
mako_template
):
"""
Determines if the template is actually a Django template.
Arguments:
mako_template: The template code.
Returns:
True if this is really a Django template, and False otherwise.
"""
if
re
.
search
(
'({
%.*%
})|({{.*}})'
,
mako_template
)
is
not
None
:
return
True
return
False
def
_get_page_tag_count
(
self
,
mako_template
):
"""
Determines the number of page expressions in the Mako template. Ignores
page expressions that are commented out.
Arguments:
mako_template: The contents of the Mako template.
Returns:
The number of page expressions
"""
count
=
len
(
re
.
findall
(
'<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
count_commented
=
len
(
re
.
findall
(
r'##\s+<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
return
max
(
0
,
count
-
count_commented
)
self
.
_check_concat_with_html
(
file_contents
,
Rules
.
python_concat_html
,
results
)
self
.
_check_deprecated_display_name
(
file_contents
,
results
)
self
.
_check_custom_escape
(
file_contents
,
results
)
self
.
_check_html
(
file_contents
,
results
)
results
.
prepare_results
(
file_contents
,
line_comment_delim
=
'#'
)
def
_
has_page_default
(
self
,
mako_template
,
results
):
def
_
check_deprecated_display_name
(
self
,
file_contents
,
results
):
"""
Checks
if the Mako template contains the page expression marking it as
safe by default
.
Checks
that the deprecated display_name_with_default_escaped is not
used. Adds violation to results if there is a problem
.
Arguments:
mako_template: The contents of the Mako template.
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
Side effect:
Adds violations regarding page default if necessary
Returns:
True if the template has the page default, and False otherwise.
"""
page_tag_count
=
self
.
_get_page_tag_count
(
mako_template
)
# check if there are too many page expressions
if
2
<=
page_tag_count
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_multiple_page_tags
))
return
False
# make sure there is exactly 1 page expression, excluding commented out
# page expressions, before proceeding
elif
page_tag_count
!=
1
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
False
# check that safe by default (h filter) is turned on
page_h_filter_regex
=
re
.
compile
(
'<
%
page[^>]*expression_filter=(?:"h"|
\'
h
\'
)[^>]*/>'
)
page_match
=
page_h_filter_regex
.
search
(
mako_template
)
if
not
page_match
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
page_match
for
match
in
re
.
finditer
(
r'\.display_name_with_default_escaped'
,
file_contents
):
expression
=
Expression
(
match
.
start
(),
match
.
end
())
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_deprecated_display_name
,
expression
))
def
_check_
mako_expressions
(
self
,
mako_template
,
has_page_default
,
results
):
def
_check_
custom_escape
(
self
,
file_contents
,
results
):
"""
Searches for Mako expressions and then checks if they contain
violations, including checking JavaScript contexts for JavaScript
violations.
Checks for custom escaping calls, rather than using a standard escaping
method.
Arguments:
mako_template: The contents of the Mako template.
has_page_default: True if the page is marked as default, False
otherwise.
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
"""
expressions
=
self
.
_find_mako_expressions
(
mako_template
)
contexts
=
self
.
_get_contexts
(
mako_template
)
self
.
_check_javascript_contexts
(
mako_template
,
contexts
,
results
)
for
expression
in
expressions
:
if
expression
.
end_index
is
None
:
for
match
in
re
.
finditer
(
"(<.*<|<.*<)"
,
file_contents
):
expression
=
Expression
(
match
.
start
(),
match
.
end
())
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unparseable_expression
,
expression
Rules
.
python_custom_escape
,
expression
))
continue
context
=
self
.
_get_context
(
contexts
,
expression
.
start_index
)
self
.
_check_filters
(
mako_template
,
expression
,
context
,
has_page_default
,
results
)
self
.
_check_deprecated_display_name
(
expression
,
results
)
self
.
_check_html_and_text
(
expression
,
has_page_default
,
results
)
def
_check_
javascript_contexts
(
self
,
mako_template
,
contex
ts
,
results
):
def
_check_
html
(
self
,
file_conten
ts
,
results
):
"""
Lint the JavaScript contexts for JavaScript violations inside a Mako
template.
Checks many rules related to HTML in a Python file.
Arguments:
mako_template: The contents of the Mako template.
contexts: A list of context dicts with 'type' and 'index'.
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
Side effect:
Adds JavaScript violations to results.
"""
javascript_start_index
=
None
for
context
in
contexts
:
if
context
[
'type'
]
==
'javascript'
:
if
javascript_start_index
<
0
:
javascript_start_index
=
context
[
'index'
]
else
:
if
javascript_start_index
is
not
None
:
javascript_end_index
=
context
[
'index'
]
javascript_code
=
mako_template
[
javascript_start_index
:
javascript_end_index
]
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
javascript_start_index
=
None
if
javascript_start_index
is
not
None
:
javascript_code
=
mako_template
[
javascript_start_index
:]
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
# Text() Expressions keyed by its end index
text_calls_by_end_index
=
{}
# HTML() Expressions keyed by its end index
html_calls_by_end_index
=
{}
start_index
=
0
while
True
:
def
_check_javascript_context
(
self
,
javascript_code
,
start_offset
,
results
):
"""
Lint a single JavaScript context for JavaScript violations inside a Mako
template.
# check HTML(), Text() and format() calls
result
=
self
.
_check_html_text_format
(
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
)
next_start_index
=
result
[
'next_start_index'
]
interpolate_end_index
=
result
[
'interpolate_end_index'
]
Arguments:
javascript_code: The template contents of the JavaScript context.
start_offset: The offset of the JavaScript context inside the
original Mako template.
results: A list of results into which violations will be added.
# check for interpolation including HTML outside of function calls
self
.
_check_interpolate_with_html
(
file_contents
,
start_index
,
interpolate_end_index
,
results
)
Side effect:
Adds JavaScript violations to results.
# advance the search
start_index
=
next_start_index
"""
javascript_results
=
FileResults
(
""
)
self
.
javaScriptLinter
.
check_javascript_file_is_safe
(
javascript_code
,
javascript_results
)
# translate the violations into the location within the original
# Mako template
for
violation
in
javascript_results
.
violations
:
expression
=
violation
.
expression
expression
.
start_index
+=
start_offset
if
expression
.
end_index
is
not
None
:
expression
.
end_index
+=
start_offset
results
.
violations
.
append
(
ExpressionRuleViolation
(
violation
.
rule
,
expression
))
# end if there is nothing left to search
if
interpolate_end_index
is
None
:
break
def
_check_deprecated_display_name
(
self
,
expression
,
results
):
def
_check_html_text_format
(
self
,
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
):
"""
Checks
that the deprecated display_name_with_default_escaped is not
used. Adds violation to results if there is a problem
.
Checks
for HTML(), Text() and format() calls, and various rules related
to these calls
.
Arguments:
expression: An Expression
file_contents: The contents of the Python file
start_index: The index at which to begin searching for a function
call.
text_calls_by_end_index: Text() Expressions keyed by its end index.
html_calls_by_end_index: HTML() Expressions keyed by its end index.
results: A list of results into which violations will be added.
"""
if
'.display_name_with_default_escaped'
in
expression
.
expression
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_deprecated_display_name
,
expression
))
def
_check_html_and_text
(
self
,
expression
,
has_page_default
,
results
):
"""
Checks rules related to proper use of HTML() and Text().
Arguments:
expression: A Mako Expression.
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
Returns:
A dict with the following keys:
'next_start_index': The start index of the next search for a
function call.
'interpolate_end_index': The end index of the next next search
for interpolation with html, or None if the end of file
should be used.
"""
expression_inner
=
expression
.
expression_inner
# use find to get the template relative inner expression start index
# due to possible skipped white space
template_inner_start_index
=
expression
.
start_index
template_inner_start_index
+=
expression
.
expression
.
find
(
expression_inner
)
if
'HTML('
in
expression_inner
:
if
expression_inner
.
startswith
(
'HTML('
):
close_paren_index
=
self
.
_find_closing_char_index
(
None
,
"("
,
")"
,
expression_inner
,
start_index
=
len
(
'HTML('
)
)[
'close_char_index'
]
# check that the close paren is at the end of the stripped expression.
if
close_paren_index
!=
len
(
expression_inner
)
-
1
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_alone
,
expression
))
elif
expression_inner
.
startswith
(
'Text('
)
is
False
:
# used to find opening of .format(), Text() and HTML() calls
regex_function_open
=
re
.
compile
(
r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()"
)
interpolate_end_index
=
None
end_index
=
None
strings
=
None
html_calls
=
[]
while
True
:
# first search for HTML(), Text(), or .format()
if
end_index
is
None
:
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
)
else
:
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
,
end_index
)
if
function_match
is
not
None
:
if
interpolate_end_index
is
None
:
interpolate_end_index
=
function_match
.
start
()
function_close_result
=
self
.
_find_closing_char_index
(
None
,
'('
,
')'
,
file_contents
,
start_index
=
function_match
.
end
(),
)
if
function_close_result
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_requires_text
,
expression
Rules
.
python_parse_error
,
Expression
(
function_match
.
start
())
))
else
:
if
'Text('
in
expression_inner
:
expression
=
Expression
(
function_match
.
start
(),
function_close_result
[
'close_char_index'
]
+
1
,
file_contents
,
start_delim
=
function_match
.
group
(),
end_delim
=
")"
)
# if this an outer most Text(), HTML(), or format() call
if
end_index
is
None
:
end_index
=
expression
.
end_index
interpolate_end_index
=
expression
.
start_index
strings
=
function_close_result
[
'strings'
]
if
function_match
.
group
()
==
'.format('
:
if
'HTML('
in
expression
.
expression_inner
or
'Text('
in
expression
.
expression_inner
:
is_wrapped_with_text
=
str
(
function_match
.
start
())
in
text_calls_by_end_index
.
keys
()
is_wrapped_with_html
=
str
(
function_match
.
start
())
in
html_calls_by_end_index
.
keys
()
if
is_wrapped_with_text
is
False
and
is_wrapped_with_html
is
False
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_text_redundan
t
,
expression
Rules
.
python_requires_html_or_tex
t
,
expression
))
# strings to be checked for HTML
unwrapped_html_strings
=
expression
.
strings
for
match
in
re
.
finditer
(
r"(HTML\(|Text\()"
,
expression_inner
):
result
=
self
.
_find_closing_char_index
(
None
,
"("
,
")"
,
expression_inner
,
start_index
=
match
.
end
())
if
result
is
not
None
:
close_paren_index
=
result
[
'close_char_index'
]
# the argument sent to HTML() or Text()
argument
=
expression_inner
[
match
.
end
():
close_paren_index
]
if
".format("
in
argument
:
else
:
# expression is 'HTML(' or 'Text('
# HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
# Generally, format() would be the issue if there is one.
if
regex_function_open
.
search
(
expression
.
expression_inner
)
is
not
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_close_before_format
,
expression
))
if
match
.
group
()
==
"HTML("
:
# remove expression strings wrapped in HTML()
for
string
in
list
(
unwrapped_html_strings
):
html_inner_start_index
=
template_inner_start_index
+
match
.
end
()
html_inner_end_index
=
template_inner_start_index
+
close_paren_index
if
html_inner_start_index
<=
string
.
start_index
and
string
.
end_index
<=
html_inner_end_index
:
unwrapped_html_strings
.
remove
(
string
)
if
function_match
.
group
()
==
'Text('
:
text_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
else
:
# function_match.group() == 'HTML(':
html_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
html_calls
.
append
(
expression
)
# check strings not wrapped in HTML() for '<'
for
string
in
unwrapped_html_strings
:
if
'<'
in
string
.
string_inner
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_wrap_html
,
expression
))
break
# check strings not wrapped in HTML() for HTML entities
if
has_page_default
:
for
string
in
unwrapped_html_strings
:
if
re
.
search
(
r"&[#]?[a-zA-Z0-9]+;"
,
string
.
string_inner
):
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_entities
,
expression
))
start_index
=
function_match
.
end
()
else
:
break
def
_check_filters
(
self
,
mako_template
,
expression
,
context
,
has_page_default
,
results
):
# checks strings in the outer most call to ensure they are properly
# wrapped with HTML()
self
.
_check_format_html_strings_wrapped
(
strings
,
html_calls
,
results
)
# compute where to continue the search
if
function_match
is
None
and
end_index
is
None
:
next_start_index
=
start_index
elif
end_index
is
None
:
next_start_index
=
function_match
.
end
()
else
:
next_start_index
=
end_index
return
{
'next_start_index'
:
next_start_index
,
'interpolate_end_index'
:
interpolate_end_index
,
}
def
_check_format_html_strings_wrapped
(
self
,
strings
,
html_calls
,
results
):
"""
Checks that
the filters used in the given Mako expression are valid
for the given context. Adds violation to results if there is a problem
.
Checks that
any string inside a format call that seems to contain HTML
is wrapped with a call to HTML()
.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
context: The context of the page in which the expression was found
(e.g. javascript, html).
has_page_default: True if the page is marked as default, False
otherwise.
strings: A list of ParseStrings for each string inside the format()
call.
html_calls: A list of Expressions representing all of the HTML()
calls inside the format() call.
results: A list of results into which violations will be added.
"""
if
context
==
'unknown'
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unknown_context
,
expression
))
return
# Example: finds "| n, h}" when given "${x | n, h}"
filters_regex
=
re
.
compile
(
r'\|([.,\w\s]*)\}'
)
filters_match
=
filters_regex
.
search
(
expression
.
expression
)
if
filters_match
is
None
:
if
context
==
'javascript'
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_js_filter
,
expression
))
return
filters
=
filters_match
.
group
(
1
)
.
replace
(
" "
,
""
)
.
split
(
","
)
if
filters
==
[
'n'
,
'decode.utf8'
]:
# {x | n, decode.utf8} is valid in any context
pass
elif
context
==
'html'
:
if
filters
==
[
'h'
]:
if
has_page_default
:
# suppress this violation if the page default hasn't been set,
# otherwise the template might get less safe
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unwanted_html_filter
,
expression
))
else
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_html_filter
,
expression
))
elif
context
==
'javascript'
:
self
.
_check_js_expression_not_with_html
(
mako_template
,
expression
,
results
)
if
filters
==
[
'n'
,
'dump_js_escaped_json'
]:
# {x | n, dump_js_escaped_json} is valid
pass
elif
filters
==
[
'n'
,
'js_escaped_string'
]:
# {x | n, js_escaped_string} is valid, if surrounded by quotes
self
.
_check_js_string_expression_in_quotes
(
mako_template
,
expression
,
results
)
else
:
html_strings
=
[]
html_wrapped_strings
=
[]
if
strings
is
not
None
:
# find all strings that contain HTML
for
string
in
strings
:
if
'<'
in
string
.
string
:
html_strings
.
append
(
string
)
# check if HTML string is appropriately wrapped
for
html_call
in
html_calls
:
if
html_call
.
start_index
<
string
.
start_index
<
string
.
end_index
<
html_call
.
end_index
:
html_wrapped_strings
.
append
(
string
)
break
# loop through all unwrapped strings
for
unsafe_string
in
set
(
html_strings
)
-
set
(
html_wrapped_strings
):
unsafe_string_expression
=
Expression
(
unsafe_string
.
start_index
)
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_js_filter
,
expression
Rules
.
python_wrap_html
,
unsafe_string_
expression
))
def
_check_
js_string_expression_in_quotes
(
self
,
mako_template
,
expression
,
results
):
def
_check_
interpolate_with_html
(
self
,
file_contents
,
start_index
,
end_index
,
results
):
"""
Checks that a Mako expression using js_escaped_string is surrounded by
quotes
.
Find interpolations with html that fall outside of any calls to HTML(),
Text(), and .format()
.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
file_contents: The contents of the Python file
start_index: The index to start the search, or None if nothing to
search
end_index: The index to end the search, or None if the end of file
should be used.
results: A list of results into which violations will be added.
"""
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
if
parse_string
is
None
:
# used to find interpolation with HTML
pattern_interpolate_html_inner
=
r'(<.*
%
s|
%
s.*<|<.*{\w*}|{\w*}.*<)'
regex_interpolate_html
=
re
.
compile
(
r"""(".*{}.*"|'.*{}.*')"""
.
format
(
pattern_interpolate_html_inner
,
pattern_interpolate_html_inner
))
if
end_index
is
None
:
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
)
else
:
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
,
end_index
)
for
match_html_string
in
interpolate_string_iter
:
expression
=
Expression
(
match_html_string
.
start
(),
match_html_string
.
end
())
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_js_missing_quotes
,
expression
Rules
.
python_interpolate_html
,
expression
))
def
_check_js_expression_not_with_html
(
self
,
mako_template
,
expression
,
results
):
"""
Checks that a Mako expression in a JavaScript context does not appear in
a string that also contains HTML.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
class
MakoTemplateLinter
(
BaseLinter
):
"""
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
if
parse_string
is
not
None
and
re
.
search
(
'[<>]'
,
parse_string
.
string
)
is
not
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_js_html_string
,
expression
))
The linter for Mako template files.
"""
javaScriptLinter
=
JavaScriptLinter
()
def
_find_string_wrapping_expression
(
self
,
mako_template
,
expression
):
def
process_file
(
self
,
directory
,
file_name
):
"""
Finds the string wrapping the Mako expression if there is one.
Process file to determine if it is a Mako template file and
if it is safe.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
directory (string): The directory of the file to be checked
file_name (string): A filename for a potential Mako file
Returns:
ParseString representing a scrubbed version of the wrapped string,
where the Mako expression was replaced with "${...}", if a wrapped
string was found. Otherwise, returns None if none found.
"""
lines
=
StringLines
(
mako_template
)
start_index
=
lines
.
index_to_line_start_index
(
expression
.
start_index
)
if
expression
.
end_index
is
not
None
:
end_index
=
lines
.
index_to_line_end_index
(
expression
.
end_index
)
else
:
return
None
# scrub out the actual expression so any code inside the expression
# doesn't interfere with rules applied to the surrounding code (i.e.
# checking JavaScript).
scrubbed_lines
=
""
.
join
((
mako_template
[
start_index
:
expression
.
start_index
],
"${...}"
,
mako_template
[
expression
.
end_index
:
end_index
]
))
adjusted_start_index
=
expression
.
start_index
-
start_index
start_index
=
0
while
True
:
parse_string
=
ParseString
(
scrubbed_lines
,
start_index
,
len
(
scrubbed_lines
))
# check for validly parsed string
if
0
<=
parse_string
.
start_index
<
parse_string
.
end_index
:
# check if expression is contained in the given string
if
parse_string
.
start_index
<
adjusted_start_index
<
parse_string
.
end_index
:
return
parse_string
else
:
# move to check next string
start_index
=
parse_string
.
end_index
else
:
break
return
None
def
_get_contexts
(
self
,
mako_template
):
"""
Returns a data structure that represents the indices at which the
template changes from HTML context to JavaScript and back.
The file results containing any violations.
Return:
A list of dicts where each dict contains:
- index: the index of the context.
- type: the context type (e.g. 'html' or 'javascript').
"""
contexts_re
=
re
.
compile
(
r"""
<script.*?> | # script tag start
</script> | # script tag end
<
%
static:require_module.*?> | # require js script tag start
</
%
static:require_module> | # require js script tag end
<
%
block[ ]*name=['"]requirejs['"]\w*> | # require js tag start
</
%
block> # require js tag end
"""
,
re
.
VERBOSE
|
re
.
IGNORECASE
)
media_type_re
=
re
.
compile
(
r"""type=['"].*?['"]"""
,
re
.
IGNORECASE
)
contexts
=
[{
'index'
:
0
,
'type'
:
'html'
}]
javascript_types
=
[
'text/javascript'
,
'text/ecmascript'
,
'application/ecmascript'
,
'application/javascript'
,
'text/x-mathjax-config'
,
'json/xblock-args'
]
html_types
=
[
'text/template'
]
for
context
in
contexts_re
.
finditer
(
mako_template
):
match_string
=
context
.
group
()
.
lower
()
if
match_string
.
startswith
(
"<script"
):
match_type
=
media_type_re
.
search
(
match_string
)
context_type
=
'javascript'
if
match_type
is
not
None
:
# get media type (e.g. get text/javascript from
# type="text/javascript")
match_type
=
match_type
.
group
()[
6
:
-
1
]
.
lower
()
if
match_type
in
html_types
:
context_type
=
'html'
elif
match_type
not
in
javascript_types
:
context_type
=
'unknown'
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
context_type
})
elif
match_string
.
startswith
(
"</"
):
contexts
.
append
({
'index'
:
context
.
start
(),
'type'
:
'html'
})
else
:
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
'javascript'
})
mako_file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
results
=
FileResults
(
mako_file_full_path
)
return
contexts
if
not
results
.
is_file
:
return
results
def
_get_context
(
self
,
contexts
,
index
):
"""
Gets the context (e.g. javascript, html) of the template at the given
index.
if
not
self
.
_is_valid_directory
(
directory
):
return
results
Arguments:
contexts: A list of dicts where each dict contains the 'index' of the context
and the context 'type' (e.g. 'html' or 'javascript').
index: The index for which we want the context.
# TODO: When safe-by-default is turned on at the platform level, will we:
# 1. Turn it on for .html only, or
# 2. Turn it on for all files, and have different rulesets that have
# different rules of .xml, .html, .js, .txt Mako templates (e.g. use
# the n filter to turn off h for some of these)?
# For now, we only check .html and .xml files
if
not
(
file_name
.
lower
()
.
endswith
(
'.html'
)
or
file_name
.
lower
()
.
endswith
(
'.xml'
)):
return
results
Returns:
The context (e.g. javascript or html) for the given index.
"""
current_context
=
contexts
[
0
][
'type'
]
for
context
in
contexts
:
if
context
[
'index'
]
<=
index
:
current_context
=
context
[
'type'
]
else
:
break
return
current_context
return
self
.
_load_and_check_file_is_safe
(
mako_file_full_path
,
self
.
_check_mako_file_is_safe
,
results
)
def
_
find_mako_expressions
(
self
,
mako_template
):
def
_
is_valid_directory
(
self
,
directory
):
"""
Finds all the Mako expressions in a Mako template and creates a list
of dicts for each expression
.
Determines if the provided directory is a directory that could contain
Mako template files that need to be linted
.
Arguments:
mako_template: The content of the Mako template
.
directory: The directory to be linted
.
Returns:
A list of Expressions.
True if this directory should be linted for Mako template violations
and False otherwise.
"""
start_delim
=
'${'
start_index
=
0
expressions
=
[]
while
True
:
start_index
=
mako_template
.
find
(
start_delim
,
start_index
)
if
start_index
<
0
:
break
if
self
.
_is_skip_dir
(
self
.
_skip_dirs
,
directory
):
return
False
result
=
self
.
_find_closing_char_index
(
start_delim
,
'{'
,
'}'
,
mako_template
,
start_index
=
start_index
+
len
(
start_delim
)
)
if
result
is
None
:
expression
=
Expression
(
start_index
)
# for parsing error, restart search right after the start of the
# current expression
start_index
=
start_index
+
len
(
start_delim
)
else
:
close_char_index
=
result
[
'close_char_index'
]
expression
=
mako_template
[
start_index
:
close_char_index
+
1
]
expression
=
Expression
(
start_index
,
end_index
=
close_char_index
+
1
,
template
=
mako_template
,
start_delim
=
start_delim
,
end_delim
=
'}'
,
strings
=
result
[
'strings'
],
)
# restart search after the current expression
start_index
=
expression
.
end_index
expressions
.
append
(
expression
)
return
expressions
# TODO: This is an imperfect guess concerning the Mako template
# directories. This needs to be reviewed before turning on safe by
# default at the platform level.
if
(
'/templates/'
in
directory
)
or
directory
.
endswith
(
'/templates'
):
return
True
return
False
class
PythonLinter
(
BaseLinter
):
def
_check_mako_file_is_safe
(
self
,
mako_template
,
results
):
"""
The linter for Python files
.
Checks for violations in a Mako template
.
The current implementation of the linter does naive Python parsing. It does
not use the parser. One known issue is that parsing errors found inside a
docstring need to be disabled, rather than being automatically skipped.
Skipping docstrings is an enhancement that could be added.
"""
Arguments:
mako_template: The contents of the Mako template.
results: A file results objects to which violations will be added.
def
__init__
(
self
):
"""
Init method.
"""
super
(
PythonLinter
,
self
)
.
__init__
()
self
.
_skip_python_dirs
=
self
.
_skip_dirs
+
(
'tests'
,
'test/acceptance'
)
if
self
.
_is_django_template
(
mako_template
):
return
has_page_default
=
self
.
_has_page_default
(
mako_template
,
results
)
self
.
_check_mako_expressions
(
mako_template
,
has_page_default
,
results
)
results
.
prepare_results
(
mako_template
,
line_comment_delim
=
'##'
)
def
process_file
(
self
,
directory
,
file_nam
e
):
def
_is_django_template
(
self
,
mako_templat
e
):
"""
Process file to determine if it is a Python file and
if it is safe.
Determines if the template is actually a Django template.
Arguments:
directory (string): The directory of the file to be checked
file_name (string): A filename for a potential Python file
mako_template: The template code.
Returns:
T
he file results containing any violations
.
T
rue if this is really a Django template, and False otherwise
.
"""
file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
results
=
FileResults
(
file_full_path
)
if
not
results
.
is_file
:
return
results
if
file_name
.
lower
()
.
endswith
(
'.py'
)
is
False
:
return
results
# skip this linter code (i.e. safe_template_linter.py)
if
file_name
==
os
.
path
.
basename
(
__file__
):
return
results
if
not
self
.
_is_valid_directory
(
self
.
_skip_python_dirs
,
directory
):
return
results
return
self
.
_load_and_check_file_is_safe
(
file_full_path
,
self
.
check_python_file_is_safe
,
results
)
if
re
.
search
(
'({
%.*%
})|({{.*}})'
,
mako_template
)
is
not
None
:
return
True
return
False
def
check_python_file_is_safe
(
self
,
file_contents
,
results
):
def
_get_page_tag_count
(
self
,
mako_template
):
"""
Checks for violations in a Python file.
Determines the number of page expressions in the Mako template. Ignores
page expressions that are commented out.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
mako_template: The contents of the Mako template.
Returns:
The number of page expressions
"""
self
.
_check_concat_with_html
(
file_contents
,
Rules
.
python_concat_html
,
results
)
self
.
_check_deprecated_display_name
(
file_contents
,
results
)
self
.
_check_custom_escape
(
file_contents
,
results
)
self
.
_check_html
(
file_contents
,
results
)
results
.
prepare_results
(
file_contents
,
line_comment_delim
=
'#'
)
count
=
len
(
re
.
findall
(
'<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
count_commented
=
len
(
re
.
findall
(
r'##\s+<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
return
max
(
0
,
count
-
count_commented
)
def
_
check_deprecated_display_name
(
self
,
file_contents
,
results
):
def
_
has_page_default
(
self
,
mako_template
,
results
):
"""
Checks
that the deprecated display_name_with_default_escaped is not
used. Adds violation to results if there is a problem
.
Checks
if the Mako template contains the page expression marking it as
safe by default
.
Arguments:
file_contents: The contents of the Python file
mako_template: The contents of the Mako template.
results: A list of results into which violations will be added.
"""
for
match
in
re
.
finditer
(
r'\.display_name_with_default_escaped'
,
file_contents
):
expression
=
Expression
(
match
.
start
(),
match
.
end
())
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_deprecated_display_name
,
expression
))
Side effect:
Adds violations regarding page default if necessary
def
_check_custom_escape
(
self
,
file_contents
,
results
):
"""
Checks for custom escaping calls, rather than using a standard escaping
method.
Arguments:
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
Returns:
True if the template has the page default, and False otherwise.
"""
for
match
in
re
.
finditer
(
"(<.*<|<.*<)"
,
file_contents
):
expression
=
Expression
(
match
.
start
(),
match
.
end
())
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_custom_escape
,
expression
))
page_tag_count
=
self
.
_get_page_tag_count
(
mako_template
)
# check if there are too many page expressions
if
2
<=
page_tag_count
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_multiple_page_tags
))
return
False
# make sure there is exactly 1 page expression, excluding commented out
# page expressions, before proceeding
elif
page_tag_count
!=
1
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
False
# check that safe by default (h filter) is turned on
page_h_filter_regex
=
re
.
compile
(
'<
%
page[^>]*expression_filter=(?:"h"|
\'
h
\'
)[^>]*/>'
)
page_match
=
page_h_filter_regex
.
search
(
mako_template
)
if
not
page_match
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
page_match
def
_check_
html
(
self
,
file_contents
,
results
):
def
_check_
mako_expressions
(
self
,
mako_template
,
has_page_default
,
results
):
"""
Checks many rules related to HTML in a Python file.
Searches for Mako expressions and then checks if they contain
violations, including checking JavaScript contexts for JavaScript
violations.
Arguments:
file_contents: The contents of the Python file
mako_template: The contents of the Mako template.
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
"""
# Text() Expressions keyed by its end index
text_calls_by_end_index
=
{}
# HTML() Expressions keyed by its end index
html_calls_by_end_index
=
{}
start_index
=
0
while
True
:
# check HTML(), Text() and format() calls
result
=
self
.
_check_html_text_format
(
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
)
next_start_index
=
result
[
'next_start_index'
]
interpolate_end_index
=
result
[
'interpolate_end_index'
]
# check for interpolation including HTML outside of function calls
self
.
_check_interpolate_with_html
(
file_contents
,
start_index
,
interpolate_end_index
,
results
)
# advance the search
start_index
=
next_start_index
expressions
=
self
.
_find_mako_expressions
(
mako_template
)
contexts
=
self
.
_get_contexts
(
mako_template
)
self
.
_check_javascript_contexts
(
mako_template
,
contexts
,
results
)
for
expression
in
expressions
:
if
expression
.
end_index
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unparseable_expression
,
expression
))
continue
# end if there is nothing left to search
if
interpolate_end_index
is
None
:
break
context
=
self
.
_get_context
(
contexts
,
expression
.
start_index
)
self
.
_check_filters
(
mako_template
,
expression
,
context
,
has_page_default
,
results
)
self
.
_check_deprecated_display_name
(
expression
,
results
)
self
.
_check_html_and_text
(
expression
,
has_page_default
,
results
)
def
_check_html_text_format
(
self
,
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
):
def
_check_javascript_contexts
(
self
,
mako_template
,
contexts
,
results
):
"""
Checks for HTML(), Text() and format() calls, and various rules related
t
o these calls
.
Lint the JavaScript contexts for JavaScript violations inside a Mako
t
emplate
.
Arguments:
file_contents: The contents of the Python file
start_index: The index at which to begin searching for a function
call.
text_calls_by_end_index: Text() Expressions keyed by its end index.
html_calls_by_end_index: HTML() Expressions keyed by its end index.
mako_template: The contents of the Mako template.
contexts: A list of context dicts with 'type' and 'index'.
results: A list of results into which violations will be added.
Returns:
A dict with the following keys:
'next_start_index': The start index of the next search for a
function call.
'interpolate_end_index': The end index of the next next search
for interpolation with html, or None if the end of file
should be used.
Side effect:
Adds JavaScript violations to results.
"""
# used to find opening of .format(), Text() and HTML() calls
regex_function_open
=
re
.
compile
(
r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()"
)
interpolate_end_index
=
None
end_index
=
None
strings
=
None
html_calls
=
[]
while
True
:
# first search for HTML(), Text(), or .format()
if
end_index
is
None
:
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
)
else
:
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
,
end_index
)
if
function_match
is
not
None
:
if
interpolate_end_index
is
None
:
interpolate_end_index
=
function_match
.
start
()
function_close_result
=
self
.
_find_closing_char_index
(
None
,
'('
,
')'
,
file_contents
,
start_index
=
function_match
.
end
(),
)
if
function_close_result
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_parse_error
,
Expression
(
function_match
.
start
())
))
javascript_start_index
=
None
for
context
in
contexts
:
if
context
[
'type'
]
==
'javascript'
:
if
javascript_start_index
<
0
:
javascript_start_index
=
context
[
'index'
]
else
:
expression
=
Expression
(
function_match
.
start
(),
function_close_result
[
'close_char_index'
]
+
1
,
file_contents
,
start_delim
=
function_match
.
group
(),
end_delim
=
")"
)
# if this an outer most Text(), HTML(), or format() call
if
end_index
is
None
:
end_index
=
expression
.
end_index
interpolate_end_index
=
expression
.
start_index
strings
=
function_close_result
[
'strings'
]
if
function_match
.
group
()
==
'.format('
:
if
'HTML('
in
expression
.
expression_inner
or
'Text('
in
expression
.
expression_inner
:
is_wrapped_with_text
=
str
(
function_match
.
start
())
in
text_calls_by_end_index
.
keys
()
is_wrapped_with_html
=
str
(
function_match
.
start
())
in
html_calls_by_end_index
.
keys
()
if
is_wrapped_with_text
is
False
and
is_wrapped_with_html
is
False
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_requires_html_or_text
,
expression
))
else
:
# expression is 'HTML(' or 'Text('
# HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
# Generally, format() would be the issue if there is one.
if
regex_function_open
.
search
(
expression
.
expression_inner
)
is
not
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_close_before_format
,
expression
))
if
function_match
.
group
()
==
'Text('
:
text_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
else
:
# function_match.group() == 'HTML(':
html_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
html_calls
.
append
(
expression
)
if
javascript_start_index
is
not
None
:
javascript_end_index
=
context
[
'index'
]
javascript_code
=
mako_template
[
javascript_start_index
:
javascript_end_index
]
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
javascript_start_index
=
None
if
javascript_start_index
is
not
None
:
javascript_code
=
mako_template
[
javascript_start_index
:]
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
start_index
=
function_match
.
end
()
else
:
break
def
_check_javascript_context
(
self
,
javascript_code
,
start_offset
,
results
):
"""
Lint a single JavaScript context for JavaScript violations inside a Mako
template.
# checks strings in the outer most call to ensure they are properly
# wrapped with HTML()
self
.
_check_format_html_strings_wrapped
(
strings
,
html_calls
,
results
)
Arguments:
javascript_code: The template contents of the JavaScript context.
start_offset: The offset of the JavaScript context inside the
original Mako template.
results: A list of results into which violations will be added.
# compute where to continue the search
if
function_match
is
None
and
end_index
is
None
:
next_start_index
=
start_index
elif
end_index
is
None
:
next_start_index
=
function_match
.
end
()
else
:
next_start_index
=
end_index
Side effect:
Adds JavaScript violations to results.
return
{
'next_start_index'
:
next_start_index
,
'interpolate_end_index'
:
interpolate_end_index
,
}
"""
javascript_results
=
FileResults
(
""
)
self
.
javaScriptLinter
.
check_javascript_file_is_safe
(
javascript_code
,
javascript_results
)
# translate the violations into the location within the original
# Mako template
for
violation
in
javascript_results
.
violations
:
expression
=
violation
.
expression
expression
.
start_index
+=
start_offset
if
expression
.
end_index
is
not
None
:
expression
.
end_index
+=
start_offset
results
.
violations
.
append
(
ExpressionRuleViolation
(
violation
.
rule
,
expression
))
def
_check_
format_html_strings_wrapped
(
self
,
strings
,
html_calls
,
results
):
def
_check_
deprecated_display_name
(
self
,
expression
,
results
):
"""
Checks that
any string inside a format call that seems to contain HTML
is wrapped with a call to HTML()
.
Checks that
the deprecated display_name_with_default_escaped is not
used. Adds violation to results if there is a problem
.
Arguments:
strings: A list of ParseStrings for each string inside the format()
call.
html_calls: A list of Expressions representing all of the HTML()
calls inside the format() call.
expression: An Expression
results: A list of results into which violations will be added.
"""
html_strings
=
[]
html_wrapped_strings
=
[]
if
strings
is
not
None
:
# find all strings that contain HTML
for
string
in
strings
:
if
'<'
in
string
.
string
:
html_strings
.
append
(
string
)
# check if HTML string is appropriately wrapped
for
html_call
in
html_calls
:
if
html_call
.
start_index
<
string
.
start_index
<
string
.
end_index
<
html_call
.
end_index
:
html_wrapped_strings
.
append
(
string
)
break
# loop through all unwrapped strings
for
unsafe_string
in
set
(
html_strings
)
-
set
(
html_wrapped_strings
):
unsafe_string_expression
=
Expression
(
unsafe_string
.
start_index
)
if
'.display_name_with_default_escaped'
in
expression
.
expression
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_wrap_html
,
unsafe_string_
expression
Rules
.
python_deprecated_display_name
,
expression
))
def
_check_
interpolate_with_html
(
self
,
file_contents
,
start_index
,
end_index
,
results
):
def
_check_
html_and_text
(
self
,
expression
,
has_page_default
,
results
):
"""
Find interpolations with html that fall outside of any calls to HTML(),
Text(), and .format().
Checks rules related to proper use of HTML() and Text().
Arguments:
file_contents: The contents of the Python file
start_index: The index to start the search, or None if nothing to
search
end_index: The index to end the search, or None if the end of file
should be used.
expression: A Mako Expression.
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
"""
# used to find interpolation with HTML
pattern_interpolate_html_inner
=
r'(<.*
%
s|
%
s.*<|<.*{\w*}|{\w*}.*<)'
regex_interpolate_html
=
re
.
compile
(
r"""(".*{}.*"|'.*{}.*')"""
.
format
(
pattern_interpolate_html_inner
,
pattern_interpolate_html_inner
))
if
end_index
is
None
:
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
)
else
:
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
,
end_index
)
for
match_html_string
in
interpolate_string_iter
:
expression
=
Expression
(
match_html_string
.
start
(),
match_html_string
.
end
())
expression_inner
=
expression
.
expression_inner
# use find to get the template relative inner expression start index
# due to possible skipped white space
template_inner_start_index
=
expression
.
start_index
template_inner_start_index
+=
expression
.
expression
.
find
(
expression_inner
)
if
'HTML('
in
expression_inner
:
if
expression_inner
.
startswith
(
'HTML('
):
close_paren_index
=
self
.
_find_closing_char_index
(
None
,
"("
,
")"
,
expression_inner
,
start_index
=
len
(
'HTML('
)
)[
'close_char_index'
]
# check that the close paren is at the end of the stripped expression.
if
close_paren_index
!=
len
(
expression_inner
)
-
1
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_interpolate_html
,
expression
Rules
.
mako_html_alone
,
expression
))
elif
expression_inner
.
startswith
(
'Text('
)
is
False
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_requires_text
,
expression
))
else
:
if
'Text('
in
expression_inner
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_text_redundant
,
expression
))
# strings to be checked for HTML
unwrapped_html_strings
=
expression
.
strings
for
match
in
re
.
finditer
(
r"(HTML\(|Text\()"
,
expression_inner
):
result
=
self
.
_find_closing_char_index
(
None
,
"("
,
")"
,
expression_inner
,
start_index
=
match
.
end
())
if
result
is
not
None
:
close_paren_index
=
result
[
'close_char_index'
]
# the argument sent to HTML() or Text()
argument
=
expression_inner
[
match
.
end
():
close_paren_index
]
if
".format("
in
argument
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_close_before_format
,
expression
))
if
match
.
group
()
==
"HTML("
:
# remove expression strings wrapped in HTML()
for
string
in
list
(
unwrapped_html_strings
):
html_inner_start_index
=
template_inner_start_index
+
match
.
end
()
html_inner_end_index
=
template_inner_start_index
+
close_paren_index
if
html_inner_start_index
<=
string
.
start_index
and
string
.
end_index
<=
html_inner_end_index
:
unwrapped_html_strings
.
remove
(
string
)
# check strings not wrapped in HTML() for '<'
for
string
in
unwrapped_html_strings
:
if
'<'
in
string
.
string_inner
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_wrap_html
,
expression
))
break
# check strings not wrapped in HTML() for HTML entities
if
has_page_default
:
for
string
in
unwrapped_html_strings
:
if
re
.
search
(
r"&[#]?[a-zA-Z0-9]+;"
,
string
.
string_inner
):
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_entities
,
expression
))
break
def
_check_filters
(
self
,
mako_template
,
expression
,
context
,
has_page_default
,
results
):
"""
Checks that the filters used in the given Mako expression are valid
for the given context. Adds violation to results if there is a problem.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
context: The context of the page in which the expression was found
(e.g. javascript, html).
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
"""
if
context
==
'unknown'
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unknown_context
,
expression
))
return
# Example: finds "| n, h}" when given "${x | n, h}"
filters_regex
=
re
.
compile
(
r'\|([.,\w\s]*)\}'
)
filters_match
=
filters_regex
.
search
(
expression
.
expression
)
if
filters_match
is
None
:
if
context
==
'javascript'
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_js_filter
,
expression
))
return
filters
=
filters_match
.
group
(
1
)
.
replace
(
" "
,
""
)
.
split
(
","
)
if
filters
==
[
'n'
,
'decode.utf8'
]:
# {x | n, decode.utf8} is valid in any context
pass
elif
context
==
'html'
:
if
filters
==
[
'h'
]:
if
has_page_default
:
# suppress this violation if the page default hasn't been set,
# otherwise the template might get less safe
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unwanted_html_filter
,
expression
))
else
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_html_filter
,
expression
))
elif
context
==
'javascript'
:
self
.
_check_js_expression_not_with_html
(
mako_template
,
expression
,
results
)
if
filters
==
[
'n'
,
'dump_js_escaped_json'
]:
# {x | n, dump_js_escaped_json} is valid
pass
elif
filters
==
[
'n'
,
'js_escaped_string'
]:
# {x | n, js_escaped_string} is valid, if surrounded by quotes
self
.
_check_js_string_expression_in_quotes
(
mako_template
,
expression
,
results
)
else
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_js_filter
,
expression
))
def
_check_js_string_expression_in_quotes
(
self
,
mako_template
,
expression
,
results
):
"""
Checks that a Mako expression using js_escaped_string is surrounded by
quotes.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
"""
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
if
parse_string
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_js_missing_quotes
,
expression
))
def
_check_js_expression_not_with_html
(
self
,
mako_template
,
expression
,
results
):
"""
Checks that a Mako expression in a JavaScript context does not appear in
a string that also contains HTML.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
"""
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
if
parse_string
is
not
None
and
re
.
search
(
'[<>]'
,
parse_string
.
string
)
is
not
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_js_html_string
,
expression
))
def
_find_string_wrapping_expression
(
self
,
mako_template
,
expression
):
"""
Finds the string wrapping the Mako expression if there is one.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
Returns:
ParseString representing a scrubbed version of the wrapped string,
where the Mako expression was replaced with "${...}", if a wrapped
string was found. Otherwise, returns None if none found.
"""
lines
=
StringLines
(
mako_template
)
start_index
=
lines
.
index_to_line_start_index
(
expression
.
start_index
)
if
expression
.
end_index
is
not
None
:
end_index
=
lines
.
index_to_line_end_index
(
expression
.
end_index
)
else
:
return
None
# scrub out the actual expression so any code inside the expression
# doesn't interfere with rules applied to the surrounding code (i.e.
# checking JavaScript).
scrubbed_lines
=
""
.
join
((
mako_template
[
start_index
:
expression
.
start_index
],
"${...}"
,
mako_template
[
expression
.
end_index
:
end_index
]
))
adjusted_start_index
=
expression
.
start_index
-
start_index
start_index
=
0
while
True
:
parse_string
=
ParseString
(
scrubbed_lines
,
start_index
,
len
(
scrubbed_lines
))
# check for validly parsed string
if
0
<=
parse_string
.
start_index
<
parse_string
.
end_index
:
# check if expression is contained in the given string
if
parse_string
.
start_index
<
adjusted_start_index
<
parse_string
.
end_index
:
return
parse_string
else
:
# move to check next string
start_index
=
parse_string
.
end_index
else
:
break
return
None
def
_get_contexts
(
self
,
mako_template
):
"""
Returns a data structure that represents the indices at which the
template changes from HTML context to JavaScript and back.
Return:
A list of dicts where each dict contains:
- index: the index of the context.
- type: the context type (e.g. 'html' or 'javascript').
"""
contexts_re
=
re
.
compile
(
r"""
<script.*?> | # script tag start
</script> | # script tag end
<
%
static:require_module.*?> | # require js script tag start
</
%
static:require_module> | # require js script tag end
<
%
block[ ]*name=['"]requirejs['"]\w*> | # require js tag start
</
%
block> # require js tag end
"""
,
re
.
VERBOSE
|
re
.
IGNORECASE
)
media_type_re
=
re
.
compile
(
r"""type=['"].*?['"]"""
,
re
.
IGNORECASE
)
contexts
=
[{
'index'
:
0
,
'type'
:
'html'
}]
javascript_types
=
[
'text/javascript'
,
'text/ecmascript'
,
'application/ecmascript'
,
'application/javascript'
,
'text/x-mathjax-config'
,
'json/xblock-args'
]
html_types
=
[
'text/template'
]
for
context
in
contexts_re
.
finditer
(
mako_template
):
match_string
=
context
.
group
()
.
lower
()
if
match_string
.
startswith
(
"<script"
):
match_type
=
media_type_re
.
search
(
match_string
)
context_type
=
'javascript'
if
match_type
is
not
None
:
# get media type (e.g. get text/javascript from
# type="text/javascript")
match_type
=
match_type
.
group
()[
6
:
-
1
]
.
lower
()
if
match_type
in
html_types
:
context_type
=
'html'
elif
match_type
not
in
javascript_types
:
context_type
=
'unknown'
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
context_type
})
elif
match_string
.
startswith
(
"</"
):
contexts
.
append
({
'index'
:
context
.
start
(),
'type'
:
'html'
})
else
:
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
'javascript'
})
return
contexts
def
_get_context
(
self
,
contexts
,
index
):
"""
Gets the context (e.g. javascript, html) of the template at the given
index.
Arguments:
contexts: A list of dicts where each dict contains the 'index' of the context
and the context 'type' (e.g. 'html' or 'javascript').
index: The index for which we want the context.
Returns:
The context (e.g. javascript or html) for the given index.
"""
current_context
=
contexts
[
0
][
'type'
]
for
context
in
contexts
:
if
context
[
'index'
]
<=
index
:
current_context
=
context
[
'type'
]
else
:
break
return
current_context
def
_find_mako_expressions
(
self
,
mako_template
):
"""
Finds all the Mako expressions in a Mako template and creates a list
of dicts for each expression.
Arguments:
mako_template: The content of the Mako template.
Returns:
A list of Expressions.
"""
start_delim
=
'${'
start_index
=
0
expressions
=
[]
while
True
:
start_index
=
mako_template
.
find
(
start_delim
,
start_index
)
if
start_index
<
0
:
break
result
=
self
.
_find_closing_char_index
(
start_delim
,
'{'
,
'}'
,
mako_template
,
start_index
=
start_index
+
len
(
start_delim
)
)
if
result
is
None
:
expression
=
Expression
(
start_index
)
# for parsing error, restart search right after the start of the
# current expression
start_index
=
start_index
+
len
(
start_delim
)
else
:
close_char_index
=
result
[
'close_char_index'
]
expression
=
mako_template
[
start_index
:
close_char_index
+
1
]
expression
=
Expression
(
start_index
,
end_index
=
close_char_index
+
1
,
template
=
mako_template
,
start_delim
=
start_delim
,
end_delim
=
'}'
,
strings
=
result
[
'strings'
],
)
# restart search after the current expression
start_index
=
expression
.
end_index
expressions
.
append
(
expression
)
return
expressions
def
_process_file
(
full_path
,
template_linters
,
options
,
out
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment