Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-platform
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-platform
Commits
fb13dc64
Commit
fb13dc64
authored
May 02, 2016
by
Robert Raposa
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Move MakoLinter.
parent
a6b9ba7d
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
655 additions
and
655 deletions
+655
-655
scripts/safe_template_linter.py
+655
-655
No files found.
scripts/safe_template_linter.py
View file @
fb13dc64
...
...
@@ -1407,855 +1407,855 @@ class JavaScriptLinter(BaseLinter):
return
False
class
MakoTemplate
Linter
(
BaseLinter
):
class
Python
Linter
(
BaseLinter
):
"""
The linter for Mako template files.
The linter for Python files.
The current implementation of the linter does naive Python parsing. It does
not use the parser. One known issue is that parsing errors found inside a
docstring need to be disabled, rather than being automatically skipped.
Skipping docstrings is an enhancement that could be added.
"""
javaScriptLinter
=
JavaScriptLinter
()
def
__init__
(
self
):
"""
Init method.
"""
super
(
PythonLinter
,
self
)
.
__init__
()
self
.
_skip_python_dirs
=
self
.
_skip_dirs
+
(
'tests'
,
'test/acceptance'
)
def
process_file
(
self
,
directory
,
file_name
):
"""
Process file to determine if it is a
Mako template
file and
Process file to determine if it is a
Python
file and
if it is safe.
Arguments:
directory (string): The directory of the file to be checked
file_name (string): A filename for a potential
Mako
file
file_name (string): A filename for a potential
Python
file
Returns:
The file results containing any violations.
"""
mako_
file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
results
=
FileResults
(
mako_
file_full_path
)
file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
results
=
FileResults
(
file_full_path
)
if
not
results
.
is_file
:
return
results
if
not
self
.
_is_valid_directory
(
directory
)
:
if
file_name
.
lower
()
.
endswith
(
'.py'
)
is
False
:
return
results
# TODO: When safe-by-default is turned on at the platform level, will we:
# 1. Turn it on for .html only, or
# 2. Turn it on for all files, and have different rulesets that have
# different rules of .xml, .html, .js, .txt Mako templates (e.g. use
# the n filter to turn off h for some of these)?
# For now, we only check .html and .xml files
if
not
(
file_name
.
lower
()
.
endswith
(
'.html'
)
or
file_name
.
lower
()
.
endswith
(
'.xml'
)):
# skip this linter code (i.e. safe_template_linter.py)
if
file_name
==
os
.
path
.
basename
(
__file__
):
return
results
return
self
.
_load_and_check_file_is_safe
(
mako_file_full_path
,
self
.
_check_mako_file_is_safe
,
results
)
def
_is_valid_directory
(
self
,
directory
):
"""
Determines if the provided directory is a directory that could contain
Mako template files that need to be linted.
Arguments:
directory: The directory to be linted.
Returns:
True if this directory should be linted for Mako template violations
and False otherwise.
"""
if
self
.
_is_skip_dir
(
self
.
_skip_dirs
,
directory
):
return
False
# TODO: This is an imperfect guess concerning the Mako template
# directories. This needs to be reviewed before turning on safe by
# default at the platform level.
if
(
'/templates/'
in
directory
)
or
directory
.
endswith
(
'/templates'
):
return
True
if
not
self
.
_is_valid_directory
(
self
.
_skip_python_dirs
,
directory
):
return
results
return
False
return
self
.
_load_and_check_file_is_safe
(
file_full_path
,
self
.
check_python_file_is_safe
,
results
)
def
_check_mako_file_is_safe
(
self
,
mako_template
,
results
):
def
check_python_file_is_safe
(
self
,
file_contents
,
results
):
"""
Checks for violations in a
Mako templat
e.
Checks for violations in a
Python fil
e.
Arguments:
mako_template: The contents of the Mako templat
e.
file_contents: The contents of the Python fil
e.
results: A file results objects to which violations will be added.
"""
if
self
.
_is_django_template
(
mako_template
):
return
has_page_default
=
self
.
_has_page_default
(
mako_template
,
results
)
self
.
_check_mako_expressions
(
mako_template
,
has_page_default
,
results
)
results
.
prepare_results
(
mako_template
,
line_comment_delim
=
'##'
)
def
_is_django_template
(
self
,
mako_template
):
"""
Determines if the template is actually a Django template.
Arguments:
mako_template: The template code.
Returns:
True if this is really a Django template, and False otherwise.
"""
if
re
.
search
(
'({
%.*%
})|({{.*}})'
,
mako_template
)
is
not
None
:
return
True
return
False
self
.
_check_concat_with_html
(
file_contents
,
Rules
.
python_concat_html
,
results
)
self
.
_check_deprecated_display_name
(
file_contents
,
results
)
self
.
_check_custom_escape
(
file_contents
,
results
)
self
.
_check_html
(
file_contents
,
results
)
results
.
prepare_results
(
file_contents
,
line_comment_delim
=
'#'
)
def
_
get_page_tag_count
(
self
,
mako_template
):
def
_
check_deprecated_display_name
(
self
,
file_contents
,
results
):
"""
Determines the number of page expressions in the Mako template. Ignores
page expressions that are commented out
.
Checks that the deprecated display_name_with_default_escaped is not
used. Adds violation to results if there is a problem
.
Arguments:
mako_template: The contents of the Mako template.
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
Returns:
The number of page expressions
"""
count
=
len
(
re
.
findall
(
'<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
count_commented
=
len
(
re
.
findall
(
r'##\s+<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
return
max
(
0
,
count
-
count_commented
)
for
match
in
re
.
finditer
(
r'\.display_name_with_default_escaped'
,
file_contents
):
expression
=
Expression
(
match
.
start
(),
match
.
end
())
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_deprecated_display_name
,
expression
))
def
_
has_page_default
(
self
,
mako_template
,
results
):
def
_
check_custom_escape
(
self
,
file_contents
,
results
):
"""
Checks
if the Mako template contains the page expression marking it as
safe by default
.
Checks
for custom escaping calls, rather than using a standard escaping
method
.
Arguments:
mako_template: The contents of the Mako template.
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
Side effect:
Adds violations regarding page default if necessary
Returns:
True if the template has the page default, and False otherwise.
"""
page_tag_count
=
self
.
_get_page_tag_count
(
mako_template
)
# check if there are too many page expressions
if
2
<=
page_tag_count
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_multiple_page_tags
))
return
False
# make sure there is exactly 1 page expression, excluding commented out
# page expressions, before proceeding
elif
page_tag_count
!=
1
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
False
# check that safe by default (h filter) is turned on
page_h_filter_regex
=
re
.
compile
(
'<
%
page[^>]*expression_filter=(?:"h"|
\'
h
\'
)[^>]*/>'
)
page_match
=
page_h_filter_regex
.
search
(
mako_template
)
if
not
page_match
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
page_match
for
match
in
re
.
finditer
(
"(<.*<|<.*<)"
,
file_contents
):
expression
=
Expression
(
match
.
start
(),
match
.
end
())
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_custom_escape
,
expression
))
def
_check_
mako_expressions
(
self
,
mako_template
,
has_page_default
,
results
):
def
_check_
html
(
self
,
file_contents
,
results
):
"""
Searches for Mako expressions and then checks if they contain
violations, including checking JavaScript contexts for JavaScript
violations.
Checks many rules related to HTML in a Python file.
Arguments:
mako_template: The contents of the Mako template.
has_page_default: True if the page is marked as default, False
otherwise.
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
"""
expressions
=
self
.
_find_mako_expressions
(
mako_template
)
contexts
=
self
.
_get_contexts
(
mako_template
)
self
.
_check_javascript_contexts
(
mako_template
,
contexts
,
results
)
for
expression
in
expressions
:
if
expression
.
end_index
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unparseable_expression
,
expression
))
continue
# Text() Expressions keyed by its end index
text_calls_by_end_index
=
{}
# HTML() Expressions keyed by its end index
html_calls_by_end_index
=
{}
start_index
=
0
while
True
:
context
=
self
.
_get_context
(
contexts
,
expression
.
start_index
)
self
.
_check_filters
(
mako_template
,
expression
,
context
,
has_page_default
,
results
)
self
.
_check_deprecated_display_name
(
expression
,
results
)
self
.
_check_html_and_text
(
expression
,
has_page_default
,
results
)
# check HTML(), Text() and format() calls
result
=
self
.
_check_html_text_format
(
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
)
next_start_index
=
result
[
'next_start_index'
]
interpolate_end_index
=
result
[
'interpolate_end_index'
]
def
_check_javascript_contexts
(
self
,
mako_template
,
contexts
,
results
):
"""
Lint the JavaScript contexts for JavaScript violations inside a Mako
template.
# check for interpolation including HTML outside of function calls
self
.
_check_interpolate_with_html
(
file_contents
,
start_index
,
interpolate_end_index
,
results
)
Arguments:
mako_template: The contents of the Mako template.
contexts: A list of context dicts with 'type' and 'index'.
results: A list of results into which violations will be added.
# advance the search
start_index
=
next_start_index
Side effect:
Adds JavaScript violations to results.
"""
javascript_start_index
=
None
for
context
in
contexts
:
if
context
[
'type'
]
==
'javascript'
:
if
javascript_start_index
<
0
:
javascript_start_index
=
context
[
'index'
]
else
:
if
javascript_start_index
is
not
None
:
javascript_end_index
=
context
[
'index'
]
javascript_code
=
mako_template
[
javascript_start_index
:
javascript_end_index
]
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
javascript_start_index
=
None
if
javascript_start_index
is
not
None
:
javascript_code
=
mako_template
[
javascript_start_index
:]
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
# end if there is nothing left to search
if
interpolate_end_index
is
None
:
break
def
_check_javascript_context
(
self
,
javascript_code
,
start_offset
,
results
):
def
_check_html_text_format
(
self
,
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
):
"""
Lint a single JavaScript context for JavaScript violations inside a Mako
t
emplate
.
Checks for HTML(), Text() and format() calls, and various rules related
t
o these calls
.
Arguments:
javascript_code: The template contents of the JavaScript context.
start_offset: The offset of the JavaScript context inside the
original Mako template.
file_contents: The contents of the Python file
start_index: The index at which to begin searching for a function
call.
text_calls_by_end_index: Text() Expressions keyed by its end index.
html_calls_by_end_index: HTML() Expressions keyed by its end index.
results: A list of results into which violations will be added.
Side effect:
Adds JavaScript violations to results.
"""
javascript_results
=
FileResults
(
""
)
self
.
javaScriptLinter
.
check_javascript_file_is_safe
(
javascript_code
,
javascript_results
)
# translate the violations into the location within the original
# Mako template
for
violation
in
javascript_results
.
violations
:
expression
=
violation
.
expression
expression
.
start_index
+=
start_offset
if
expression
.
end_index
is
not
None
:
expression
.
end_index
+=
start_offset
results
.
violations
.
append
(
ExpressionRuleViolation
(
violation
.
rule
,
expression
))
Returns:
A dict with the following keys:
'next_start_index': The start index of the next search for a
function call.
'interpolate_end_index': The end index of the next next search
for interpolation with html, or None if the end of file
should be used.
def
_check_deprecated_display_name
(
self
,
expression
,
results
):
"""
Checks that the deprecated display_name_with_default_escaped is not
used. Adds violation to results if there is a problem.
# used to find opening of .format(), Text() and HTML() calls
regex_function_open
=
re
.
compile
(
r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()"
)
interpolate_end_index
=
None
end_index
=
None
strings
=
None
html_calls
=
[]
while
True
:
# first search for HTML(), Text(), or .format()
if
end_index
is
None
:
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
)
else
:
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
,
end_index
)
if
function_match
is
not
None
:
if
interpolate_end_index
is
None
:
interpolate_end_index
=
function_match
.
start
()
function_close_result
=
self
.
_find_closing_char_index
(
None
,
'('
,
')'
,
file_contents
,
start_index
=
function_match
.
end
(),
)
if
function_close_result
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_parse_error
,
Expression
(
function_match
.
start
())
))
else
:
expression
=
Expression
(
function_match
.
start
(),
function_close_result
[
'close_char_index'
]
+
1
,
file_contents
,
start_delim
=
function_match
.
group
(),
end_delim
=
")"
)
# if this an outer most Text(), HTML(), or format() call
if
end_index
is
None
:
end_index
=
expression
.
end_index
interpolate_end_index
=
expression
.
start_index
strings
=
function_close_result
[
'strings'
]
if
function_match
.
group
()
==
'.format('
:
if
'HTML('
in
expression
.
expression_inner
or
'Text('
in
expression
.
expression_inner
:
is_wrapped_with_text
=
str
(
function_match
.
start
())
in
text_calls_by_end_index
.
keys
()
is_wrapped_with_html
=
str
(
function_match
.
start
())
in
html_calls_by_end_index
.
keys
()
if
is_wrapped_with_text
is
False
and
is_wrapped_with_html
is
False
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_requires_html_or_text
,
expression
))
else
:
# expression is 'HTML(' or 'Text('
# HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
# Generally, format() would be the issue if there is one.
if
regex_function_open
.
search
(
expression
.
expression_inner
)
is
not
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_close_before_format
,
expression
))
if
function_match
.
group
()
==
'Text('
:
text_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
else
:
# function_match.group() == 'HTML(':
html_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
html_calls
.
append
(
expression
)
Arguments:
e
xpression: An Expression
results: A list of results into which violations will be added.
start_index
=
function_match
.
end
()
e
lse
:
break
"""
if
'.display_name_with_default_escaped'
in
expression
.
expression
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_deprecated_display_name
,
expression
))
# checks strings in the outer most call to ensure they are properly
# wrapped with HTML()
self
.
_check_format_html_strings_wrapped
(
strings
,
html_calls
,
results
)
def
_check_html_and_text
(
self
,
expression
,
has_page_default
,
results
):
# compute where to continue the search
if
function_match
is
None
and
end_index
is
None
:
next_start_index
=
start_index
elif
end_index
is
None
:
next_start_index
=
function_match
.
end
()
else
:
next_start_index
=
end_index
return
{
'next_start_index'
:
next_start_index
,
'interpolate_end_index'
:
interpolate_end_index
,
}
def
_check_format_html_strings_wrapped
(
self
,
strings
,
html_calls
,
results
):
"""
Checks rules related to proper use of HTML() and Text().
Checks that any string inside a format call that seems to contain HTML
is wrapped with a call to HTML().
Arguments:
expression: A Mako Expression.
has_page_default: True if the page is marked as default, False
otherwise.
strings: A list of ParseStrings for each string inside the format()
call.
html_calls: A list of Expressions representing all of the HTML()
calls inside the format() call.
results: A list of results into which violations will be added.
"""
expression_inner
=
expression
.
expression_inner
# use find to get the template relative inner expression start index
# due to possible skipped white space
template_inner_start_index
=
expression
.
start_index
template_inner_start_index
+=
expression
.
expression
.
find
(
expression_inner
)
if
'HTML('
in
expression_inner
:
if
expression_inner
.
startswith
(
'HTML('
):
close_paren_index
=
self
.
_find_closing_char_index
(
None
,
"("
,
")"
,
expression_inner
,
start_index
=
len
(
'HTML('
)
)[
'close_char_index'
]
# check that the close paren is at the end of the stripped expression.
if
close_paren_index
!=
len
(
expression_inner
)
-
1
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_alone
,
expression
))
elif
expression_inner
.
startswith
(
'Text('
)
is
False
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_requires_text
,
expression
))
else
:
if
'Text('
in
expression_inner
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_text_redundant
,
expression
))
# strings to be checked for HTML
unwrapped_html_strings
=
expression
.
strings
for
match
in
re
.
finditer
(
r"(HTML\(|Text\()"
,
expression_inner
):
result
=
self
.
_find_closing_char_index
(
None
,
"("
,
")"
,
expression_inner
,
start_index
=
match
.
end
())
if
result
is
not
None
:
close_paren_index
=
result
[
'close_char_index'
]
# the argument sent to HTML() or Text()
argument
=
expression_inner
[
match
.
end
():
close_paren_index
]
if
".format("
in
argument
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_close_before_format
,
expression
))
if
match
.
group
()
==
"HTML("
:
# remove expression strings wrapped in HTML()
for
string
in
list
(
unwrapped_html_strings
):
html_inner_start_index
=
template_inner_start_index
+
match
.
end
()
html_inner_end_index
=
template_inner_start_index
+
close_paren_index
if
html_inner_start_index
<=
string
.
start_index
and
string
.
end_index
<=
html_inner_end_index
:
unwrapped_html_strings
.
remove
(
string
)
# check strings not wrapped in HTML() for '<'
for
string
in
unwrapped_html_strings
:
if
'<'
in
string
.
string_inner
:
html_strings
=
[]
html_wrapped_strings
=
[]
if
strings
is
not
None
:
# find all strings that contain HTML
for
string
in
strings
:
if
'<'
in
string
.
string
:
html_strings
.
append
(
string
)
# check if HTML string is appropriately wrapped
for
html_call
in
html_calls
:
if
html_call
.
start_index
<
string
.
start_index
<
string
.
end_index
<
html_call
.
end_index
:
html_wrapped_strings
.
append
(
string
)
break
# loop through all unwrapped strings
for
unsafe_string
in
set
(
html_strings
)
-
set
(
html_wrapped_strings
):
unsafe_string_expression
=
Expression
(
unsafe_string
.
start_index
)
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_wrap_html
,
expression
Rules
.
python_wrap_html
,
unsafe_string_
expression
))
break
# check strings not wrapped in HTML() for HTML entities
if
has_page_default
:
for
string
in
unwrapped_html_strings
:
if
re
.
search
(
r"&[#]?[a-zA-Z0-9]+;"
,
string
.
string_inner
):
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_entities
,
expression
))
break
def
_check_
filters
(
self
,
mako_template
,
expression
,
context
,
has_page_default
,
results
):
def
_check_
interpolate_with_html
(
self
,
file_contents
,
start_index
,
end_index
,
results
):
"""
Checks that the filters used in the given Mako expression are valid
for the given context. Adds violation to results if there is a problem
.
Find interpolations with html that fall outside of any calls to HTML(),
Text(), and .format()
.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
context: The context of the page in which the expression was found
(e.g. javascript, html).
has_page_default: True if the page is marked as default, False
otherwise.
file_contents: The contents of the Python file
start_index: The index to start the search, or None if nothing to
search
end_index: The index to end the search, or None if the end of file
should be used.
results: A list of results into which violations will be added.
"""
if
context
==
'unknown'
:
# used to find interpolation with HTML
pattern_interpolate_html_inner
=
r'(<.*
%
s|
%
s.*<|<.*{\w*}|{\w*}.*<)'
regex_interpolate_html
=
re
.
compile
(
r"""(".*{}.*"|'.*{}.*')"""
.
format
(
pattern_interpolate_html_inner
,
pattern_interpolate_html_inner
))
if
end_index
is
None
:
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
)
else
:
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
,
end_index
)
for
match_html_string
in
interpolate_string_iter
:
expression
=
Expression
(
match_html_string
.
start
(),
match_html_string
.
end
())
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unknown_context
,
expression
Rules
.
python_interpolate_html
,
expression
))
return
# Example: finds "| n, h}" when given "${x | n, h}"
filters_regex
=
re
.
compile
(
r'\|([.,\w\s]*)\}'
)
filters_match
=
filters_regex
.
search
(
expression
.
expression
)
if
filters_match
is
None
:
if
context
==
'javascript'
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_js_filter
,
expression
))
return
filters
=
filters_match
.
group
(
1
)
.
replace
(
" "
,
""
)
.
split
(
","
)
if
filters
==
[
'n'
,
'decode.utf8'
]:
# {x | n, decode.utf8} is valid in any context
pass
elif
context
==
'html'
:
if
filters
==
[
'h'
]:
if
has_page_default
:
# suppress this violation if the page default hasn't been set,
# otherwise the template might get less safe
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unwanted_html_filter
,
expression
))
else
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_html_filter
,
expression
))
elif
context
==
'javascript'
:
self
.
_check_js_expression_not_with_html
(
mako_template
,
expression
,
results
)
if
filters
==
[
'n'
,
'dump_js_escaped_json'
]:
# {x | n, dump_js_escaped_json} is valid
pass
elif
filters
==
[
'n'
,
'js_escaped_string'
]:
# {x | n, js_escaped_string} is valid, if surrounded by quotes
self
.
_check_js_string_expression_in_quotes
(
mako_template
,
expression
,
results
)
else
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_js_filter
,
expression
))
class
MakoTemplateLinter
(
BaseLinter
):
"""
The linter for Mako template files.
"""
javaScriptLinter
=
JavaScriptLinter
()
def
_check_js_string_expression_in_quotes
(
self
,
mako_template
,
expression
,
results
):
def
process_file
(
self
,
directory
,
file_name
):
"""
Checks that a Mako expression using js_escaped_string is surrounded by
quotes
.
Process file to determine if it is a Mako template file and
if it is safe
.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
"""
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
if
parse_string
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_js_missing_quotes
,
expression
))
directory (string): The directory of the file to be checked
file_name (string): A filename for a potential Mako file
def
_check_js_expression_not_with_html
(
self
,
mako_template
,
expression
,
results
):
"""
Checks that a Mako expression in a JavaScript context does not appear in
a string that also contains HTML.
Returns:
The file results containing any violations.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
"""
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
if
parse_string
is
not
None
and
re
.
search
(
'[<>]'
,
parse_string
.
string
)
is
not
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_js_html_string
,
expression
))
mako_file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
results
=
FileResults
(
mako_file_full_path
)
def
_find_string_wrapping_expression
(
self
,
mako_template
,
expression
):
if
not
results
.
is_file
:
return
results
if
not
self
.
_is_valid_directory
(
directory
):
return
results
# TODO: When safe-by-default is turned on at the platform level, will we:
# 1. Turn it on for .html only, or
# 2. Turn it on for all files, and have different rulesets that have
# different rules of .xml, .html, .js, .txt Mako templates (e.g. use
# the n filter to turn off h for some of these)?
# For now, we only check .html and .xml files
if
not
(
file_name
.
lower
()
.
endswith
(
'.html'
)
or
file_name
.
lower
()
.
endswith
(
'.xml'
)):
return
results
return
self
.
_load_and_check_file_is_safe
(
mako_file_full_path
,
self
.
_check_mako_file_is_safe
,
results
)
def
_is_valid_directory
(
self
,
directory
):
"""
Finds the string wrapping the Mako expression if there is one.
Determines if the provided directory is a directory that could contain
Mako template files that need to be linted.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
directory: The directory to be linted.
Returns:
ParseString representing a scrubbed version of the wrapped string,
where the Mako expression was replaced with "${...}", if a wrapped
string was found. Otherwise, returns None if none found.
True if this directory should be linted for Mako template violations
and False otherwise.
"""
lines
=
StringLines
(
mako_template
)
start_index
=
lines
.
index_to_line_start_index
(
expression
.
start_index
)
if
expression
.
end_index
is
not
None
:
end_index
=
lines
.
index_to_line_end_index
(
expression
.
end_index
)
else
:
return
None
# scrub out the actual expression so any code inside the expression
# doesn't interfere with rules applied to the surrounding code (i.e.
# checking JavaScript).
scrubbed_lines
=
""
.
join
((
mako_template
[
start_index
:
expression
.
start_index
],
"${...}"
,
mako_template
[
expression
.
end_index
:
end_index
]
))
adjusted_start_index
=
expression
.
start_index
-
start_index
start_index
=
0
while
True
:
parse_string
=
ParseString
(
scrubbed_lines
,
start_index
,
len
(
scrubbed_lines
))
# check for validly parsed string
if
0
<=
parse_string
.
start_index
<
parse_string
.
end_index
:
# check if expression is contained in the given string
if
parse_string
.
start_index
<
adjusted_start_index
<
parse_string
.
end_index
:
return
parse_string
else
:
# move to check next string
start_index
=
parse_string
.
end_index
else
:
break
return
None
if
self
.
_is_skip_dir
(
self
.
_skip_dirs
,
directory
):
return
False
def
_get_contexts
(
self
,
mako_template
):
"""
Returns a data structure that represents the indices at which the
template changes from HTML context to JavaScript and back.
# TODO: This is an imperfect guess concerning the Mako template
# directories. This needs to be reviewed before turning on safe by
# default at the platform level.
if
(
'/templates/'
in
directory
)
or
directory
.
endswith
(
'/templates'
):
return
True
Return:
A list of dicts where each dict contains:
- index: the index of the context.
- type: the context type (e.g. 'html' or 'javascript').
return
False
def
_check_mako_file_is_safe
(
self
,
mako_template
,
results
):
"""
contexts_re
=
re
.
compile
(
r"""
<script.*?> | # script tag start
</script> | # script tag end
<
%
static:require_module.*?> | # require js script tag start
</
%
static:require_module> | # require js script tag end
<
%
block[ ]*name=['"]requirejs['"]\w*> | # require js tag start
</
%
block> # require js tag end
"""
,
re
.
VERBOSE
|
re
.
IGNORECASE
)
media_type_re
=
re
.
compile
(
r"""type=['"].*?['"]"""
,
re
.
IGNORECASE
)
Checks for violations in a Mako template.
contexts
=
[{
'index'
:
0
,
'type'
:
'html'
}]
javascript_types
=
[
'text/javascript'
,
'text/ecmascript'
,
'application/ecmascript'
,
'application/javascript'
,
'text/x-mathjax-config'
,
'json/xblock-args'
]
html_types
=
[
'text/template'
]
for
context
in
contexts_re
.
finditer
(
mako_template
):
match_string
=
context
.
group
()
.
lower
()
if
match_string
.
startswith
(
"<script"
):
match_type
=
media_type_re
.
search
(
match_string
)
context_type
=
'javascript'
if
match_type
is
not
None
:
# get media type (e.g. get text/javascript from
# type="text/javascript")
match_type
=
match_type
.
group
()[
6
:
-
1
]
.
lower
()
if
match_type
in
html_types
:
context_type
=
'html'
elif
match_type
not
in
javascript_types
:
context_type
=
'unknown'
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
context_type
})
elif
match_string
.
startswith
(
"</"
):
contexts
.
append
({
'index'
:
context
.
start
(),
'type'
:
'html'
})
else
:
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
'javascript'
})
Arguments:
mako_template: The contents of the Mako template.
results: A file results objects to which violations will be added.
return
contexts
"""
if
self
.
_is_django_template
(
mako_template
):
return
has_page_default
=
self
.
_has_page_default
(
mako_template
,
results
)
self
.
_check_mako_expressions
(
mako_template
,
has_page_default
,
results
)
results
.
prepare_results
(
mako_template
,
line_comment_delim
=
'##'
)
def
_
get_context
(
self
,
contexts
,
index
):
def
_
is_django_template
(
self
,
mako_template
):
"""
Gets the context (e.g. javascript, html) of the template at the given
index.
Determines if the template is actually a Django template.
Arguments:
contexts: A list of dicts where each dict contains the 'index' of the context
and the context 'type' (e.g. 'html' or 'javascript').
index: The index for which we want the context.
mako_template: The template code.
Returns:
The context (e.g. javascript or html) for the given index.
True if this is really a Django template, and False otherwise.
"""
current_context
=
contexts
[
0
][
'type'
]
for
context
in
contexts
:
if
context
[
'index'
]
<=
index
:
current_context
=
context
[
'type'
]
else
:
break
return
current_context
if
re
.
search
(
'({
%.*%
})|({{.*}})'
,
mako_template
)
is
not
None
:
return
True
return
False
def
_
find_mako_expressions
(
self
,
mako_template
):
def
_
get_page_tag_count
(
self
,
mako_template
):
"""
Finds all the Mako expressions in a Mako template and creates a list
of dicts for each expression
.
Determines the number of page expressions in the Mako template. Ignores
page expressions that are commented out
.
Arguments:
mako_template: The content of the Mako template.
mako_template: The content
s
of the Mako template.
Returns:
A list of Expressions.
The number of page expressions
"""
start_delim
=
'${'
start_index
=
0
expressions
=
[]
while
True
:
start_index
=
mako_template
.
find
(
start_delim
,
start_index
)
if
start_index
<
0
:
break
count
=
len
(
re
.
findall
(
'<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
count_commented
=
len
(
re
.
findall
(
r'##\s+<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
return
max
(
0
,
count
-
count_commented
)
result
=
self
.
_find_closing_char_index
(
start_delim
,
'{'
,
'}'
,
mako_template
,
start_index
=
start_index
+
len
(
start_delim
)
)
if
result
is
None
:
expression
=
Expression
(
start_index
)
# for parsing error, restart search right after the start of the
# current expression
start_index
=
start_index
+
len
(
start_delim
)
else
:
close_char_index
=
result
[
'close_char_index'
]
expression
=
mako_template
[
start_index
:
close_char_index
+
1
]
expression
=
Expression
(
start_index
,
end_index
=
close_char_index
+
1
,
template
=
mako_template
,
start_delim
=
start_delim
,
end_delim
=
'}'
,
strings
=
result
[
'strings'
],
)
# restart search after the current expression
start_index
=
expression
.
end_index
expressions
.
append
(
expression
)
return
expressions
def
_has_page_default
(
self
,
mako_template
,
results
):
"""
Checks if the Mako template contains the page expression marking it as
safe by default.
Arguments:
mako_template: The contents of the Mako template.
results: A list of results into which violations will be added.
class
PythonLinter
(
BaseLinter
):
"""
The linter for Python files.
Side effect:
Adds violations regarding page default if necessary
The current implementation of the linter does naive Python parsing. It does
not use the parser. One known issue is that parsing errors found inside a
docstring need to be disabled, rather than being automatically skipped.
Skipping docstrings is an enhancement that could be added.
"""
Returns:
True if the template has the page default, and False otherwise.
def
__init__
(
self
):
"""
Init method.
"""
super
(
PythonLinter
,
self
)
.
__init__
()
self
.
_skip_python_dirs
=
self
.
_skip_dirs
+
(
'tests'
,
'test/acceptance'
)
page_tag_count
=
self
.
_get_page_tag_count
(
mako_template
)
# check if there are too many page expressions
if
2
<=
page_tag_count
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_multiple_page_tags
))
return
False
# make sure there is exactly 1 page expression, excluding commented out
# page expressions, before proceeding
elif
page_tag_count
!=
1
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
False
# check that safe by default (h filter) is turned on
page_h_filter_regex
=
re
.
compile
(
'<
%
page[^>]*expression_filter=(?:"h"|
\'
h
\'
)[^>]*/>'
)
page_match
=
page_h_filter_regex
.
search
(
mako_template
)
if
not
page_match
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
page_match
def
process_file
(
self
,
directory
,
file_name
):
def
_check_mako_expressions
(
self
,
mako_template
,
has_page_default
,
results
):
"""
Process file to determine if it is a Python file and
if it is safe.
Searches for Mako expressions and then checks if they contain
violations, including checking JavaScript contexts for JavaScript
violations.
Arguments:
directory (string): The directory of the file to be checked
file_name (string): A filename for a potential Python file
Returns:
The file results containing any violations.
mako_template: The contents of the Mako template.
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
"""
file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
results
=
FileResults
(
file_full_path
)
if
not
results
.
is_file
:
return
results
expressions
=
self
.
_find_mako_expressions
(
mako_template
)
contexts
=
self
.
_get_contexts
(
mako_template
)
self
.
_check_javascript_contexts
(
mako_template
,
contexts
,
results
)
for
expression
in
expressions
:
if
expression
.
end_index
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unparseable_expression
,
expression
))
continue
if
file_name
.
lower
()
.
endswith
(
'.py'
)
is
False
:
return
results
context
=
self
.
_get_context
(
contexts
,
expression
.
start_index
)
self
.
_check_filters
(
mako_template
,
expression
,
context
,
has_page_default
,
results
)
self
.
_check_deprecated_display_name
(
expression
,
results
)
self
.
_check_html_and_text
(
expression
,
has_page_default
,
results
)
# skip this linter code (i.e. safe_template_linter.py)
if
file_name
==
os
.
path
.
basename
(
__file__
):
return
results
def
_check_javascript_contexts
(
self
,
mako_template
,
contexts
,
results
):
"""
Lint the JavaScript contexts for JavaScript violations inside a Mako
template.
if
not
self
.
_is_valid_directory
(
self
.
_skip_python_dirs
,
directory
):
return
results
Arguments:
mako_template: The contents of the Mako template.
contexts: A list of context dicts with 'type' and 'index'.
results: A list of results into which violations will be added.
return
self
.
_load_and_check_file_is_safe
(
file_full_path
,
self
.
check_python_file_is_safe
,
results
)
Side effect:
Adds JavaScript violations to results.
"""
javascript_start_index
=
None
for
context
in
contexts
:
if
context
[
'type'
]
==
'javascript'
:
if
javascript_start_index
<
0
:
javascript_start_index
=
context
[
'index'
]
else
:
if
javascript_start_index
is
not
None
:
javascript_end_index
=
context
[
'index'
]
javascript_code
=
mako_template
[
javascript_start_index
:
javascript_end_index
]
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
javascript_start_index
=
None
if
javascript_start_index
is
not
None
:
javascript_code
=
mako_template
[
javascript_start_index
:]
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
def
check_python_file_is_safe
(
self
,
file_contents
,
results
):
def
_check_javascript_context
(
self
,
javascript_code
,
start_offset
,
results
):
"""
Checks for violations in a Python file.
Lint a single JavaScript context for JavaScript violations inside a Mako
template.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
javascript_code: The template contents of the JavaScript context.
start_offset: The offset of the JavaScript context inside the
original Mako template.
results: A list of results into which violations will be added.
Side effect:
Adds JavaScript violations to results.
"""
self
.
_check_concat_with_html
(
file_contents
,
Rules
.
python_concat_html
,
results
)
self
.
_check_deprecated_display_name
(
file_contents
,
results
)
self
.
_check_custom_escape
(
file_contents
,
results
)
self
.
_check_html
(
file_contents
,
results
)
results
.
prepare_results
(
file_contents
,
line_comment_delim
=
'#'
)
javascript_results
=
FileResults
(
""
)
self
.
javaScriptLinter
.
check_javascript_file_is_safe
(
javascript_code
,
javascript_results
)
# translate the violations into the location within the original
# Mako template
for
violation
in
javascript_results
.
violations
:
expression
=
violation
.
expression
expression
.
start_index
+=
start_offset
if
expression
.
end_index
is
not
None
:
expression
.
end_index
+=
start_offset
results
.
violations
.
append
(
ExpressionRuleViolation
(
violation
.
rule
,
expression
))
def
_check_deprecated_display_name
(
self
,
file_contents
,
results
):
def
_check_deprecated_display_name
(
self
,
expression
,
results
):
"""
Checks that the deprecated display_name_with_default_escaped is not
used. Adds violation to results if there is a problem.
Arguments:
file_contents: The contents of the Python file
expression: An Expression
results: A list of results into which violations will be added.
"""
for
match
in
re
.
finditer
(
r'\.display_name_with_default_escaped'
,
file_contents
):
expression
=
Expression
(
match
.
start
(),
match
.
end
())
if
'.display_name_with_default_escaped'
in
expression
.
expression
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_deprecated_display_name
,
expression
))
def
_check_
custom_escape
(
self
,
file_contents
,
results
):
def
_check_
html_and_text
(
self
,
expression
,
has_page_default
,
results
):
"""
Checks for custom escaping calls, rather than using a standard escaping
method.
Checks rules related to proper use of HTML() and Text().
Arguments:
file_contents: The contents of the Python file
expression: A Mako Expression.
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
"""
for
match
in
re
.
finditer
(
"(<.*<|<.*<)"
,
file_contents
):
expression
=
Expression
(
match
.
start
(),
match
.
end
())
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_custom_escape
,
expression
))
expression_inner
=
expression
.
expression_inner
# use find to get the template relative inner expression start index
# due to possible skipped white space
template_inner_start_index
=
expression
.
start_index
template_inner_start_index
+=
expression
.
expression
.
find
(
expression_inner
)
if
'HTML('
in
expression_inner
:
if
expression_inner
.
startswith
(
'HTML('
):
close_paren_index
=
self
.
_find_closing_char_index
(
None
,
"("
,
")"
,
expression_inner
,
start_index
=
len
(
'HTML('
)
)[
'close_char_index'
]
# check that the close paren is at the end of the stripped expression.
if
close_paren_index
!=
len
(
expression_inner
)
-
1
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_alone
,
expression
))
elif
expression_inner
.
startswith
(
'Text('
)
is
False
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_requires_text
,
expression
))
else
:
if
'Text('
in
expression_inner
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_text_redundant
,
expression
))
def
_check_html
(
self
,
file_contents
,
results
):
# strings to be checked for HTML
unwrapped_html_strings
=
expression
.
strings
for
match
in
re
.
finditer
(
r"(HTML\(|Text\()"
,
expression_inner
):
result
=
self
.
_find_closing_char_index
(
None
,
"("
,
")"
,
expression_inner
,
start_index
=
match
.
end
())
if
result
is
not
None
:
close_paren_index
=
result
[
'close_char_index'
]
# the argument sent to HTML() or Text()
argument
=
expression_inner
[
match
.
end
():
close_paren_index
]
if
".format("
in
argument
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_close_before_format
,
expression
))
if
match
.
group
()
==
"HTML("
:
# remove expression strings wrapped in HTML()
for
string
in
list
(
unwrapped_html_strings
):
html_inner_start_index
=
template_inner_start_index
+
match
.
end
()
html_inner_end_index
=
template_inner_start_index
+
close_paren_index
if
html_inner_start_index
<=
string
.
start_index
and
string
.
end_index
<=
html_inner_end_index
:
unwrapped_html_strings
.
remove
(
string
)
# check strings not wrapped in HTML() for '<'
for
string
in
unwrapped_html_strings
:
if
'<'
in
string
.
string_inner
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_wrap_html
,
expression
))
break
# check strings not wrapped in HTML() for HTML entities
if
has_page_default
:
for
string
in
unwrapped_html_strings
:
if
re
.
search
(
r"&[#]?[a-zA-Z0-9]+;"
,
string
.
string_inner
):
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_entities
,
expression
))
break
def
_check_filters
(
self
,
mako_template
,
expression
,
context
,
has_page_default
,
results
):
"""
Checks many rules related to HTML in a Python file.
Checks that the filters used in the given Mako expression are valid
for the given context. Adds violation to results if there is a problem.
Arguments:
file_contents: The contents of the Python file
mako_template: The contents of the Mako template.
expression: A Mako Expression.
context: The context of the page in which the expression was found
(e.g. javascript, html).
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
"""
# Text() Expressions keyed by its end index
text_calls_by_end_index
=
{}
# HTML() Expressions keyed by its end index
html_calls_by_end_index
=
{}
start_index
=
0
while
True
:
# check HTML(), Text() and format() calls
result
=
self
.
_check_html_text_format
(
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
)
next_start_index
=
result
[
'next_start_index'
]
interpolate_end_index
=
result
[
'interpolate_end_index'
]
# check for interpolation including HTML outside of function calls
self
.
_check_interpolate_with_html
(
file_contents
,
start_index
,
interpolate_end_index
,
results
)
if
context
==
'unknown'
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unknown_context
,
expression
))
return
# advance the search
start_index
=
next_start_index
# Example: finds "| n, h}" when given "${x | n, h}"
filters_regex
=
re
.
compile
(
r'\|([.,\w\s]*)\}'
)
filters_match
=
filters_regex
.
search
(
expression
.
expression
)
if
filters_match
is
None
:
if
context
==
'javascript'
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_js_filter
,
expression
))
return
# end if there is nothing left to search
if
interpolate_end_index
is
None
:
break
filters
=
filters_match
.
group
(
1
)
.
replace
(
" "
,
""
)
.
split
(
","
)
if
filters
==
[
'n'
,
'decode.utf8'
]:
# {x | n, decode.utf8} is valid in any context
pass
elif
context
==
'html'
:
if
filters
==
[
'h'
]:
if
has_page_default
:
# suppress this violation if the page default hasn't been set,
# otherwise the template might get less safe
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unwanted_html_filter
,
expression
))
else
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_html_filter
,
expression
))
elif
context
==
'javascript'
:
self
.
_check_js_expression_not_with_html
(
mako_template
,
expression
,
results
)
if
filters
==
[
'n'
,
'dump_js_escaped_json'
]:
# {x | n, dump_js_escaped_json} is valid
pass
elif
filters
==
[
'n'
,
'js_escaped_string'
]:
# {x | n, js_escaped_string} is valid, if surrounded by quotes
self
.
_check_js_string_expression_in_quotes
(
mako_template
,
expression
,
results
)
else
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_js_filter
,
expression
))
def
_check_html_text_format
(
self
,
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
):
def
_check_js_string_expression_in_quotes
(
self
,
mako_template
,
expression
,
results
):
"""
Checks
for HTML(), Text() and format() calls, and various rules related
to these call
s.
Checks
that a Mako expression using js_escaped_string is surrounded by
quote
s.
Arguments:
file_contents: The contents of the Python file
start_index: The index at which to begin searching for a function
call.
text_calls_by_end_index: Text() Expressions keyed by its end index.
html_calls_by_end_index: HTML() Expressions keyed by its end index.
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
"""
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
if
parse_string
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_js_missing_quotes
,
expression
))
Returns:
A dict with the following keys:
'next_start_index': The start index of the next search for a
function call.
'interpolate_end_index': The end index of the next next search
for interpolation with html, or None if the end of file
should be used.
def
_check_js_expression_not_with_html
(
self
,
mako_template
,
expression
,
results
):
"""
Checks that a Mako expression in a JavaScript context does not appear in
a string that also contains HTML.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
"""
# used to find opening of .format(), Text() and HTML() calls
regex_function_open
=
re
.
compile
(
r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()"
)
interpolate_end_index
=
None
end_index
=
None
strings
=
None
html_calls
=
[]
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
if
parse_string
is
not
None
and
re
.
search
(
'[<>]'
,
parse_string
.
string
)
is
not
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_js_html_string
,
expression
))
def
_find_string_wrapping_expression
(
self
,
mako_template
,
expression
):
"""
Finds the string wrapping the Mako expression if there is one.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
Returns:
ParseString representing a scrubbed version of the wrapped string,
where the Mako expression was replaced with "${...}", if a wrapped
string was found. Otherwise, returns None if none found.
"""
lines
=
StringLines
(
mako_template
)
start_index
=
lines
.
index_to_line_start_index
(
expression
.
start_index
)
if
expression
.
end_index
is
not
None
:
end_index
=
lines
.
index_to_line_end_index
(
expression
.
end_index
)
else
:
return
None
# scrub out the actual expression so any code inside the expression
# doesn't interfere with rules applied to the surrounding code (i.e.
# checking JavaScript).
scrubbed_lines
=
""
.
join
((
mako_template
[
start_index
:
expression
.
start_index
],
"${...}"
,
mako_template
[
expression
.
end_index
:
end_index
]
))
adjusted_start_index
=
expression
.
start_index
-
start_index
start_index
=
0
while
True
:
# first search for HTML(), Text(), or .format()
if
end_index
is
None
:
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
)
else
:
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
,
end_index
)
if
function_match
is
not
None
:
if
interpolate_end_index
is
None
:
interpolate_end_index
=
function_match
.
start
()
function_close_result
=
self
.
_find_closing_char_index
(
None
,
'('
,
')'
,
file_contents
,
start_index
=
function_match
.
end
(),
)
if
function_close_result
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_parse_error
,
Expression
(
function_match
.
start
())
))
parse_string
=
ParseString
(
scrubbed_lines
,
start_index
,
len
(
scrubbed_lines
))
# check for validly parsed string
if
0
<=
parse_string
.
start_index
<
parse_string
.
end_index
:
# check if expression is contained in the given string
if
parse_string
.
start_index
<
adjusted_start_index
<
parse_string
.
end_index
:
return
parse_string
else
:
expression
=
Expression
(
function_match
.
start
(),
function_close_result
[
'close_char_index'
]
+
1
,
file_contents
,
start_delim
=
function_match
.
group
(),
end_delim
=
")"
)
# if this an outer most Text(), HTML(), or format() call
if
end_index
is
None
:
end_index
=
expression
.
end_index
interpolate_end_index
=
expression
.
start_index
strings
=
function_close_result
[
'strings'
]
if
function_match
.
group
()
==
'.format('
:
if
'HTML('
in
expression
.
expression_inner
or
'Text('
in
expression
.
expression_inner
:
is_wrapped_with_text
=
str
(
function_match
.
start
())
in
text_calls_by_end_index
.
keys
()
is_wrapped_with_html
=
str
(
function_match
.
start
())
in
html_calls_by_end_index
.
keys
()
if
is_wrapped_with_text
is
False
and
is_wrapped_with_html
is
False
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_requires_html_or_text
,
expression
))
else
:
# expression is 'HTML(' or 'Text('
# HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
# Generally, format() would be the issue if there is one.
if
regex_function_open
.
search
(
expression
.
expression_inner
)
is
not
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_close_before_format
,
expression
))
if
function_match
.
group
()
==
'Text('
:
text_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
else
:
# function_match.group() == 'HTML(':
html_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
html_calls
.
append
(
expression
)
start_index
=
function_match
.
end
()
# move to check next string
start_index
=
parse_string
.
end_index
else
:
break
return
None
# checks strings in the outer most call to ensure they are properly
# wrapped with HTML()
self
.
_check_format_html_strings_wrapped
(
strings
,
html_calls
,
results
)
def
_get_contexts
(
self
,
mako_template
):
"""
Returns a data structure that represents the indices at which the
template changes from HTML context to JavaScript and back.
# compute where to continue the search
if
function_match
is
None
and
end_index
is
None
:
next_start_index
=
start_index
elif
end_index
is
None
:
next_start_index
=
function_match
.
end
()
else
:
next_start_index
=
end_index
Return:
A list of dicts where each dict contains:
- index: the index of the context.
- type: the context type (e.g. 'html' or 'javascript').
"""
contexts_re
=
re
.
compile
(
r"""
<script.*?> | # script tag start
</script> | # script tag end
<
%
static:require_module.*?> | # require js script tag start
</
%
static:require_module> | # require js script tag end
<
%
block[ ]*name=['"]requirejs['"]\w*> | # require js tag start
</
%
block> # require js tag end
"""
,
re
.
VERBOSE
|
re
.
IGNORECASE
)
media_type_re
=
re
.
compile
(
r"""type=['"].*?['"]"""
,
re
.
IGNORECASE
)
return
{
'next_start_index'
:
next_start_index
,
'interpolate_end_index'
:
interpolate_end_index
,
}
contexts
=
[{
'index'
:
0
,
'type'
:
'html'
}]
javascript_types
=
[
'text/javascript'
,
'text/ecmascript'
,
'application/ecmascript'
,
'application/javascript'
,
'text/x-mathjax-config'
,
'json/xblock-args'
]
html_types
=
[
'text/template'
]
for
context
in
contexts_re
.
finditer
(
mako_template
):
match_string
=
context
.
group
()
.
lower
()
if
match_string
.
startswith
(
"<script"
):
match_type
=
media_type_re
.
search
(
match_string
)
context_type
=
'javascript'
if
match_type
is
not
None
:
# get media type (e.g. get text/javascript from
# type="text/javascript")
match_type
=
match_type
.
group
()[
6
:
-
1
]
.
lower
()
if
match_type
in
html_types
:
context_type
=
'html'
elif
match_type
not
in
javascript_types
:
context_type
=
'unknown'
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
context_type
})
elif
match_string
.
startswith
(
"</"
):
contexts
.
append
({
'index'
:
context
.
start
(),
'type'
:
'html'
})
else
:
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
'javascript'
})
def
_check_format_html_strings_wrapped
(
self
,
strings
,
html_calls
,
results
):
return
contexts
def
_get_context
(
self
,
contexts
,
index
):
"""
Checks that any string inside a format call that seems to contain HTML
i
s wrapped with a call to HTML()
.
Gets the context (e.g. javascript, html) of the template at the given
i
ndex
.
Arguments:
strings: A list of ParseStrings for each string inside the format()
call.
html_calls: A list of Expressions representing all of the HTML()
calls inside the format() call.
results: A list of results into which violations will be added.
contexts: A list of dicts where each dict contains the 'index' of the context
and the context 'type' (e.g. 'html' or 'javascript').
index: The index for which we want the context.
Returns:
The context (e.g. javascript or html) for the given index.
"""
html_strings
=
[]
html_wrapped_strings
=
[]
if
strings
is
not
None
:
# find all strings that contain HTML
for
string
in
strings
:
if
'<'
in
string
.
string
:
html_strings
.
append
(
string
)
# check if HTML string is appropriately wrapped
for
html_call
in
html_calls
:
if
html_call
.
start_index
<
string
.
start_index
<
string
.
end_index
<
html_call
.
end_index
:
html_wrapped_strings
.
append
(
string
)
break
# loop through all unwrapped strings
for
unsafe_string
in
set
(
html_strings
)
-
set
(
html_wrapped_strings
):
unsafe_string_expression
=
Expression
(
unsafe_string
.
start_index
)
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_wrap_html
,
unsafe_string_expression
))
current_context
=
contexts
[
0
][
'type'
]
for
context
in
contexts
:
if
context
[
'index'
]
<=
index
:
current_context
=
context
[
'type'
]
else
:
break
return
current_context
def
_
check_interpolate_with_html
(
self
,
file_contents
,
start_index
,
end_index
,
results
):
def
_
find_mako_expressions
(
self
,
mako_template
):
"""
Find
interpolations with html that fall outside of any calls to HTML(),
Text(), and .format()
.
Find
s all the Mako expressions in a Mako template and creates a list
of dicts for each expression
.
Arguments:
file_contents: The contents of the Python file
start_index: The index to start the search, or None if nothing to
search
end_index: The index to end the search, or None if the end of file
should be used.
results: A list of results into which violations will be added.
mako_template: The content of the Mako template.
Returns:
A list of Expressions.
"""
# used to find interpolation with HTML
pattern_interpolate_html_inner
=
r'(<.*
%
s|
%
s.*<|<.*{\w*}|{\w*}.*<)'
regex_interpolate_html
=
re
.
compile
(
r"""(".*{}.*"|'.*{}.*')"""
.
format
(
pattern_interpolate_html_inner
,
pattern_interpolate_html_inner
))
if
end_index
is
None
:
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
)
else
:
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
,
end_index
)
for
match_html_string
in
interpolate_string_iter
:
expression
=
Expression
(
match_html_string
.
start
(),
match_html_string
.
end
())
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_interpolate_html
,
expression
))
start_delim
=
'${'
start_index
=
0
expressions
=
[]
while
True
:
start_index
=
mako_template
.
find
(
start_delim
,
start_index
)
if
start_index
<
0
:
break
result
=
self
.
_find_closing_char_index
(
start_delim
,
'{'
,
'}'
,
mako_template
,
start_index
=
start_index
+
len
(
start_delim
)
)
if
result
is
None
:
expression
=
Expression
(
start_index
)
# for parsing error, restart search right after the start of the
# current expression
start_index
=
start_index
+
len
(
start_delim
)
else
:
close_char_index
=
result
[
'close_char_index'
]
expression
=
mako_template
[
start_index
:
close_char_index
+
1
]
expression
=
Expression
(
start_index
,
end_index
=
close_char_index
+
1
,
template
=
mako_template
,
start_delim
=
start_delim
,
end_delim
=
'}'
,
strings
=
result
[
'strings'
],
)
# restart search after the current expression
start_index
=
expression
.
end_index
expressions
.
append
(
expression
)
return
expressions
def
_process_file
(
full_path
,
template_linters
,
options
,
out
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment