Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-platform
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-platform
Commits
b1313671
Commit
b1313671
authored
May 02, 2016
by
Robert Raposa
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #12319 from edx/robrap/linter-move-mako
Move MakoTemplateLinter.
parents
15fc53c7
fb13dc64
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
655 additions
and
655 deletions
+655
-655
scripts/safe_template_linter.py
+655
-655
No files found.
scripts/safe_template_linter.py
View file @
b1313671
...
@@ -1407,855 +1407,855 @@ class JavaScriptLinter(BaseLinter):
...
@@ -1407,855 +1407,855 @@ class JavaScriptLinter(BaseLinter):
return
False
return
False
class
MakoTemplate
Linter
(
BaseLinter
):
class
Python
Linter
(
BaseLinter
):
"""
"""
The linter for Mako template files.
The linter for Python files.
The current implementation of the linter does naive Python parsing. It does
not use the parser. One known issue is that parsing errors found inside a
docstring need to be disabled, rather than being automatically skipped.
Skipping docstrings is an enhancement that could be added.
"""
"""
javaScriptLinter
=
JavaScriptLinter
()
def
__init__
(
self
):
"""
Init method.
"""
super
(
PythonLinter
,
self
)
.
__init__
()
self
.
_skip_python_dirs
=
self
.
_skip_dirs
+
(
'tests'
,
'test/acceptance'
)
def
process_file
(
self
,
directory
,
file_name
):
def
process_file
(
self
,
directory
,
file_name
):
"""
"""
Process file to determine if it is a
Mako template
file and
Process file to determine if it is a
Python
file and
if it is safe.
if it is safe.
Arguments:
Arguments:
directory (string): The directory of the file to be checked
directory (string): The directory of the file to be checked
file_name (string): A filename for a potential
Mako
file
file_name (string): A filename for a potential
Python
file
Returns:
Returns:
The file results containing any violations.
The file results containing any violations.
"""
"""
mako_
file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
results
=
FileResults
(
mako_
file_full_path
)
results
=
FileResults
(
file_full_path
)
if
not
results
.
is_file
:
if
not
results
.
is_file
:
return
results
return
results
if
not
self
.
_is_valid_directory
(
directory
)
:
if
file_name
.
lower
()
.
endswith
(
'.py'
)
is
False
:
return
results
return
results
# TODO: When safe-by-default is turned on at the platform level, will we:
# skip this linter code (i.e. safe_template_linter.py)
# 1. Turn it on for .html only, or
if
file_name
==
os
.
path
.
basename
(
__file__
):
# 2. Turn it on for all files, and have different rulesets that have
# different rules of .xml, .html, .js, .txt Mako templates (e.g. use
# the n filter to turn off h for some of these)?
# For now, we only check .html and .xml files
if
not
(
file_name
.
lower
()
.
endswith
(
'.html'
)
or
file_name
.
lower
()
.
endswith
(
'.xml'
)):
return
results
return
results
return
self
.
_load_and_check_file_is_safe
(
mako_file_full_path
,
self
.
_check_mako_file_is_safe
,
results
)
if
not
self
.
_is_valid_directory
(
self
.
_skip_python_dirs
,
directory
):
return
results
def
_is_valid_directory
(
self
,
directory
):
"""
Determines if the provided directory is a directory that could contain
Mako template files that need to be linted.
Arguments:
directory: The directory to be linted.
Returns:
True if this directory should be linted for Mako template violations
and False otherwise.
"""
if
self
.
_is_skip_dir
(
self
.
_skip_dirs
,
directory
):
return
False
# TODO: This is an imperfect guess concerning the Mako template
# directories. This needs to be reviewed before turning on safe by
# default at the platform level.
if
(
'/templates/'
in
directory
)
or
directory
.
endswith
(
'/templates'
):
return
True
return
False
return
self
.
_load_and_check_file_is_safe
(
file_full_path
,
self
.
check_python_file_is_safe
,
results
)
def
_check_mako_file_is_safe
(
self
,
mako_template
,
results
):
def
check_python_file_is_safe
(
self
,
file_contents
,
results
):
"""
"""
Checks for violations in a
Mako templat
e.
Checks for violations in a
Python fil
e.
Arguments:
Arguments:
mako_template: The contents of the Mako templat
e.
file_contents: The contents of the Python fil
e.
results: A file results objects to which violations will be added.
results: A file results objects to which violations will be added.
"""
"""
if
self
.
_is_django_template
(
mako_template
):
self
.
_check_concat_with_html
(
file_contents
,
Rules
.
python_concat_html
,
results
)
return
self
.
_check_deprecated_display_name
(
file_contents
,
results
)
has_page_default
=
self
.
_has_page_default
(
mako_template
,
results
)
self
.
_check_custom_escape
(
file_contents
,
results
)
self
.
_check_mako_expressions
(
mako_template
,
has_page_default
,
results
)
self
.
_check_html
(
file_contents
,
results
)
results
.
prepare_results
(
mako_template
,
line_comment_delim
=
'##'
)
results
.
prepare_results
(
file_contents
,
line_comment_delim
=
'#'
)
def
_is_django_template
(
self
,
mako_template
):
"""
Determines if the template is actually a Django template.
Arguments:
mako_template: The template code.
Returns:
True if this is really a Django template, and False otherwise.
"""
if
re
.
search
(
'({
%.*%
})|({{.*}})'
,
mako_template
)
is
not
None
:
return
True
return
False
def
_
get_page_tag_count
(
self
,
mako_template
):
def
_
check_deprecated_display_name
(
self
,
file_contents
,
results
):
"""
"""
Determines the number of page expressions in the Mako template. Ignores
Checks that the deprecated display_name_with_default_escaped is not
page expressions that are commented out
.
used. Adds violation to results if there is a problem
.
Arguments:
Arguments:
mako_template: The contents of the Mako template.
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
Returns:
The number of page expressions
"""
"""
count
=
len
(
re
.
findall
(
'<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
for
match
in
re
.
finditer
(
r'\.display_name_with_default_escaped'
,
file_contents
):
count_commented
=
len
(
re
.
findall
(
r'##\s+<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
expression
=
Expression
(
match
.
start
(),
match
.
end
())
return
max
(
0
,
count
-
count_commented
)
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_deprecated_display_name
,
expression
))
def
_
has_page_default
(
self
,
mako_template
,
results
):
def
_
check_custom_escape
(
self
,
file_contents
,
results
):
"""
"""
Checks
if the Mako template contains the page expression marking it as
Checks
for custom escaping calls, rather than using a standard escaping
safe by default
.
method
.
Arguments:
Arguments:
mako_template: The contents of the Mako template.
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
Side effect:
Adds violations regarding page default if necessary
Returns:
True if the template has the page default, and False otherwise.
"""
"""
page_tag_count
=
self
.
_get_page_tag_count
(
mako_template
)
for
match
in
re
.
finditer
(
"(<.*<|<.*<)"
,
file_contents
):
# check if there are too many page expressions
expression
=
Expression
(
match
.
start
(),
match
.
end
())
if
2
<=
page_tag_count
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_multiple_page_tags
))
Rules
.
python_custom_escape
,
expression
return
False
))
# make sure there is exactly 1 page expression, excluding commented out
# page expressions, before proceeding
elif
page_tag_count
!=
1
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
False
# check that safe by default (h filter) is turned on
page_h_filter_regex
=
re
.
compile
(
'<
%
page[^>]*expression_filter=(?:"h"|
\'
h
\'
)[^>]*/>'
)
page_match
=
page_h_filter_regex
.
search
(
mako_template
)
if
not
page_match
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
page_match
def
_check_
mako_expressions
(
self
,
mako_template
,
has_page_default
,
results
):
def
_check_
html
(
self
,
file_contents
,
results
):
"""
"""
Searches for Mako expressions and then checks if they contain
Checks many rules related to HTML in a Python file.
violations, including checking JavaScript contexts for JavaScript
violations.
Arguments:
Arguments:
mako_template: The contents of the Mako template.
file_contents: The contents of the Python file
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
"""
"""
expressions
=
self
.
_find_mako_expressions
(
mako_template
)
# Text() Expressions keyed by its end index
contexts
=
self
.
_get_contexts
(
mako_template
)
text_calls_by_end_index
=
{}
self
.
_check_javascript_contexts
(
mako_template
,
contexts
,
results
)
# HTML() Expressions keyed by its end index
for
expression
in
expressions
:
html_calls_by_end_index
=
{}
if
expression
.
end_index
is
None
:
start_index
=
0
results
.
violations
.
append
(
ExpressionRuleViolation
(
while
True
:
Rules
.
mako_unparseable_expression
,
expression
))
continue
context
=
self
.
_get_context
(
contexts
,
expression
.
start_index
)
# check HTML(), Text() and format() calls
self
.
_check_filters
(
mako_template
,
expression
,
context
,
has_page_default
,
results
)
result
=
self
.
_check_html_text_format
(
self
.
_check_deprecated_display_name
(
expression
,
results
)
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
self
.
_check_html_and_text
(
expression
,
has_page_default
,
results
)
)
next_start_index
=
result
[
'next_start_index'
]
interpolate_end_index
=
result
[
'interpolate_end_index'
]
def
_check_javascript_contexts
(
self
,
mako_template
,
contexts
,
results
):
# check for interpolation including HTML outside of function calls
"""
self
.
_check_interpolate_with_html
(
Lint the JavaScript contexts for JavaScript violations inside a Mako
file_contents
,
start_index
,
interpolate_end_index
,
results
template.
)
Arguments:
# advance the search
mako_template: The contents of the Mako template.
start_index
=
next_start_index
contexts: A list of context dicts with 'type' and 'index'.
results: A list of results into which violations will be added.
Side effect:
# end if there is nothing left to search
Adds JavaScript violations to results.
if
interpolate_end_index
is
None
:
"""
break
javascript_start_index
=
None
for
context
in
contexts
:
if
context
[
'type'
]
==
'javascript'
:
if
javascript_start_index
<
0
:
javascript_start_index
=
context
[
'index'
]
else
:
if
javascript_start_index
is
not
None
:
javascript_end_index
=
context
[
'index'
]
javascript_code
=
mako_template
[
javascript_start_index
:
javascript_end_index
]
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
javascript_start_index
=
None
if
javascript_start_index
is
not
None
:
javascript_code
=
mako_template
[
javascript_start_index
:]
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
def
_check_javascript_context
(
self
,
javascript_code
,
start_offset
,
results
):
def
_check_html_text_format
(
self
,
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
):
"""
"""
Lint a single JavaScript context for JavaScript violations inside a Mako
Checks for HTML(), Text() and format() calls, and various rules related
t
emplate
.
t
o these calls
.
Arguments:
Arguments:
javascript_code: The template contents of the JavaScript context.
file_contents: The contents of the Python file
start_offset: The offset of the JavaScript context inside the
start_index: The index at which to begin searching for a function
original Mako template.
call.
text_calls_by_end_index: Text() Expressions keyed by its end index.
html_calls_by_end_index: HTML() Expressions keyed by its end index.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
Side effect:
Returns:
Adds JavaScript violations to results.
A dict with the following keys:
'next_start_index': The start index of the next search for a
"""
function call.
javascript_results
=
FileResults
(
""
)
'interpolate_end_index': The end index of the next next search
self
.
javaScriptLinter
.
check_javascript_file_is_safe
(
javascript_code
,
javascript_results
)
for interpolation with html, or None if the end of file
# translate the violations into the location within the original
should be used.
# Mako template
for
violation
in
javascript_results
.
violations
:
expression
=
violation
.
expression
expression
.
start_index
+=
start_offset
if
expression
.
end_index
is
not
None
:
expression
.
end_index
+=
start_offset
results
.
violations
.
append
(
ExpressionRuleViolation
(
violation
.
rule
,
expression
))
def
_check_deprecated_display_name
(
self
,
expression
,
results
):
"""
"""
Checks that the deprecated display_name_with_default_escaped is not
# used to find opening of .format(), Text() and HTML() calls
used. Adds violation to results if there is a problem.
regex_function_open
=
re
.
compile
(
r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()"
)
interpolate_end_index
=
None
end_index
=
None
strings
=
None
html_calls
=
[]
while
True
:
# first search for HTML(), Text(), or .format()
if
end_index
is
None
:
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
)
else
:
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
,
end_index
)
if
function_match
is
not
None
:
if
interpolate_end_index
is
None
:
interpolate_end_index
=
function_match
.
start
()
function_close_result
=
self
.
_find_closing_char_index
(
None
,
'('
,
')'
,
file_contents
,
start_index
=
function_match
.
end
(),
)
if
function_close_result
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_parse_error
,
Expression
(
function_match
.
start
())
))
else
:
expression
=
Expression
(
function_match
.
start
(),
function_close_result
[
'close_char_index'
]
+
1
,
file_contents
,
start_delim
=
function_match
.
group
(),
end_delim
=
")"
)
# if this an outer most Text(), HTML(), or format() call
if
end_index
is
None
:
end_index
=
expression
.
end_index
interpolate_end_index
=
expression
.
start_index
strings
=
function_close_result
[
'strings'
]
if
function_match
.
group
()
==
'.format('
:
if
'HTML('
in
expression
.
expression_inner
or
'Text('
in
expression
.
expression_inner
:
is_wrapped_with_text
=
str
(
function_match
.
start
())
in
text_calls_by_end_index
.
keys
()
is_wrapped_with_html
=
str
(
function_match
.
start
())
in
html_calls_by_end_index
.
keys
()
if
is_wrapped_with_text
is
False
and
is_wrapped_with_html
is
False
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_requires_html_or_text
,
expression
))
else
:
# expression is 'HTML(' or 'Text('
# HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
# Generally, format() would be the issue if there is one.
if
regex_function_open
.
search
(
expression
.
expression_inner
)
is
not
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_close_before_format
,
expression
))
if
function_match
.
group
()
==
'Text('
:
text_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
else
:
# function_match.group() == 'HTML(':
html_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
html_calls
.
append
(
expression
)
Arguments:
start_index
=
function_match
.
end
()
e
xpression: An Expression
e
lse
:
results: A list of results into which violations will be added.
break
"""
# checks strings in the outer most call to ensure they are properly
if
'.display_name_with_default_escaped'
in
expression
.
expression
:
# wrapped with HTML()
results
.
violations
.
append
(
ExpressionRuleViolation
(
self
.
_check_format_html_strings_wrapped
(
strings
,
html_calls
,
results
)
Rules
.
python_deprecated_display_name
,
expression
))
def
_check_html_and_text
(
self
,
expression
,
has_page_default
,
results
):
# compute where to continue the search
if
function_match
is
None
and
end_index
is
None
:
next_start_index
=
start_index
elif
end_index
is
None
:
next_start_index
=
function_match
.
end
()
else
:
next_start_index
=
end_index
return
{
'next_start_index'
:
next_start_index
,
'interpolate_end_index'
:
interpolate_end_index
,
}
def
_check_format_html_strings_wrapped
(
self
,
strings
,
html_calls
,
results
):
"""
"""
Checks rules related to proper use of HTML() and Text().
Checks that any string inside a format call that seems to contain HTML
is wrapped with a call to HTML().
Arguments:
Arguments:
expression: A Mako Expression.
strings: A list of ParseStrings for each string inside the format()
has_page_default: True if the page is marked as default, False
call.
otherwise.
html_calls: A list of Expressions representing all of the HTML()
calls inside the format() call.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
"""
"""
expression_inner
=
expression
.
expression_inner
html_strings
=
[]
# use find to get the template relative inner expression start index
html_wrapped_strings
=
[]
# due to possible skipped white space
if
strings
is
not
None
:
template_inner_start_index
=
expression
.
start_index
# find all strings that contain HTML
template_inner_start_index
+=
expression
.
expression
.
find
(
expression_inner
)
for
string
in
strings
:
if
'HTML('
in
expression_inner
:
if
'<'
in
string
.
string
:
if
expression_inner
.
startswith
(
'HTML('
):
html_strings
.
append
(
string
)
close_paren_index
=
self
.
_find_closing_char_index
(
# check if HTML string is appropriately wrapped
None
,
"("
,
")"
,
expression_inner
,
start_index
=
len
(
'HTML('
)
for
html_call
in
html_calls
:
)[
'close_char_index'
]
if
html_call
.
start_index
<
string
.
start_index
<
string
.
end_index
<
html_call
.
end_index
:
# check that the close paren is at the end of the stripped expression.
html_wrapped_strings
.
append
(
string
)
if
close_paren_index
!=
len
(
expression_inner
)
-
1
:
break
results
.
violations
.
append
(
ExpressionRuleViolation
(
# loop through all unwrapped strings
Rules
.
mako_html_alone
,
expression
for
unsafe_string
in
set
(
html_strings
)
-
set
(
html_wrapped_strings
):
))
unsafe_string_expression
=
Expression
(
unsafe_string
.
start_index
)
elif
expression_inner
.
startswith
(
'Text('
)
is
False
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_requires_text
,
expression
))
else
:
if
'Text('
in
expression_inner
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_text_redundant
,
expression
))
# strings to be checked for HTML
unwrapped_html_strings
=
expression
.
strings
for
match
in
re
.
finditer
(
r"(HTML\(|Text\()"
,
expression_inner
):
result
=
self
.
_find_closing_char_index
(
None
,
"("
,
")"
,
expression_inner
,
start_index
=
match
.
end
())
if
result
is
not
None
:
close_paren_index
=
result
[
'close_char_index'
]
# the argument sent to HTML() or Text()
argument
=
expression_inner
[
match
.
end
():
close_paren_index
]
if
".format("
in
argument
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_close_before_format
,
expression
))
if
match
.
group
()
==
"HTML("
:
# remove expression strings wrapped in HTML()
for
string
in
list
(
unwrapped_html_strings
):
html_inner_start_index
=
template_inner_start_index
+
match
.
end
()
html_inner_end_index
=
template_inner_start_index
+
close_paren_index
if
html_inner_start_index
<=
string
.
start_index
and
string
.
end_index
<=
html_inner_end_index
:
unwrapped_html_strings
.
remove
(
string
)
# check strings not wrapped in HTML() for '<'
for
string
in
unwrapped_html_strings
:
if
'<'
in
string
.
string_inner
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_wrap_html
,
expression
Rules
.
python_wrap_html
,
unsafe_string_
expression
))
))
break
# check strings not wrapped in HTML() for HTML entities
if
has_page_default
:
for
string
in
unwrapped_html_strings
:
if
re
.
search
(
r"&[#]?[a-zA-Z0-9]+;"
,
string
.
string_inner
):
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_entities
,
expression
))
break
def
_check_
filters
(
self
,
mako_template
,
expression
,
context
,
has_page_default
,
results
):
def
_check_
interpolate_with_html
(
self
,
file_contents
,
start_index
,
end_index
,
results
):
"""
"""
Checks that the filters used in the given Mako expression are valid
Find interpolations with html that fall outside of any calls to HTML(),
for the given context. Adds violation to results if there is a problem
.
Text(), and .format()
.
Arguments:
Arguments:
mako_template: The contents of the Mako template.
file_contents: The contents of the Python file
expression: A Mako Expression.
start_index: The index to start the search, or None if nothing to
context: The context of the page in which the expression was found
search
(e.g. javascript, html).
end_index: The index to end the search, or None if the end of file
has_page_default: True if the page is marked as default, False
should be used.
otherwise.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
"""
"""
if
context
==
'unknown'
:
# used to find interpolation with HTML
pattern_interpolate_html_inner
=
r'(<.*
%
s|
%
s.*<|<.*{\w*}|{\w*}.*<)'
regex_interpolate_html
=
re
.
compile
(
r"""(".*{}.*"|'.*{}.*')"""
.
format
(
pattern_interpolate_html_inner
,
pattern_interpolate_html_inner
))
if
end_index
is
None
:
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
)
else
:
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
,
end_index
)
for
match_html_string
in
interpolate_string_iter
:
expression
=
Expression
(
match_html_string
.
start
(),
match_html_string
.
end
())
results
.
violations
.
append
(
ExpressionRuleViolation
(
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unknown_context
,
expression
Rules
.
python_interpolate_html
,
expression
))
))
return
# Example: finds "| n, h}" when given "${x | n, h}"
filters_regex
=
re
.
compile
(
r'\|([.,\w\s]*)\}'
)
filters_match
=
filters_regex
.
search
(
expression
.
expression
)
if
filters_match
is
None
:
if
context
==
'javascript'
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_js_filter
,
expression
))
return
filters
=
filters_match
.
group
(
1
)
.
replace
(
" "
,
""
)
.
split
(
","
)
class
MakoTemplateLinter
(
BaseLinter
):
if
filters
==
[
'n'
,
'decode.utf8'
]:
"""
# {x | n, decode.utf8} is valid in any context
The linter for Mako template files.
pass
"""
elif
context
==
'html'
:
javaScriptLinter
=
JavaScriptLinter
()
if
filters
==
[
'h'
]:
if
has_page_default
:
# suppress this violation if the page default hasn't been set,
# otherwise the template might get less safe
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unwanted_html_filter
,
expression
))
else
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_html_filter
,
expression
))
elif
context
==
'javascript'
:
self
.
_check_js_expression_not_with_html
(
mako_template
,
expression
,
results
)
if
filters
==
[
'n'
,
'dump_js_escaped_json'
]:
# {x | n, dump_js_escaped_json} is valid
pass
elif
filters
==
[
'n'
,
'js_escaped_string'
]:
# {x | n, js_escaped_string} is valid, if surrounded by quotes
self
.
_check_js_string_expression_in_quotes
(
mako_template
,
expression
,
results
)
else
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_js_filter
,
expression
))
def
_check_js_string_expression_in_quotes
(
self
,
mako_template
,
expression
,
results
):
def
process_file
(
self
,
directory
,
file_name
):
"""
"""
Checks that a Mako expression using js_escaped_string is surrounded by
Process file to determine if it is a Mako template file and
quotes
.
if it is safe
.
Arguments:
Arguments:
mako_template: The contents of the Mako template.
directory (string): The directory of the file to be checked
expression: A Mako Expression.
file_name (string): A filename for a potential Mako file
results: A list of results into which violations will be added.
"""
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
if
parse_string
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_js_missing_quotes
,
expression
))
def
_check_js_expression_not_with_html
(
self
,
mako_template
,
expression
,
results
):
Returns:
"""
The file results containing any violations.
Checks that a Mako expression in a JavaScript context does not appear in
a string that also contains HTML.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
"""
"""
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
mako_file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
if
parse_string
is
not
None
and
re
.
search
(
'[<>]'
,
parse_string
.
string
)
is
not
None
:
results
=
FileResults
(
mako_file_full_path
)
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_js_html_string
,
expression
))
def
_find_string_wrapping_expression
(
self
,
mako_template
,
expression
):
if
not
results
.
is_file
:
return
results
if
not
self
.
_is_valid_directory
(
directory
):
return
results
# TODO: When safe-by-default is turned on at the platform level, will we:
# 1. Turn it on for .html only, or
# 2. Turn it on for all files, and have different rulesets that have
# different rules of .xml, .html, .js, .txt Mako templates (e.g. use
# the n filter to turn off h for some of these)?
# For now, we only check .html and .xml files
if
not
(
file_name
.
lower
()
.
endswith
(
'.html'
)
or
file_name
.
lower
()
.
endswith
(
'.xml'
)):
return
results
return
self
.
_load_and_check_file_is_safe
(
mako_file_full_path
,
self
.
_check_mako_file_is_safe
,
results
)
def
_is_valid_directory
(
self
,
directory
):
"""
"""
Finds the string wrapping the Mako expression if there is one.
Determines if the provided directory is a directory that could contain
Mako template files that need to be linted.
Arguments:
Arguments:
mako_template: The contents of the Mako template.
directory: The directory to be linted.
expression: A Mako Expression.
Returns:
Returns:
ParseString representing a scrubbed version of the wrapped string,
True if this directory should be linted for Mako template violations
where the Mako expression was replaced with "${...}", if a wrapped
and False otherwise.
string was found. Otherwise, returns None if none found.
"""
"""
lines
=
StringLines
(
mako_template
)
if
self
.
_is_skip_dir
(
self
.
_skip_dirs
,
directory
):
start_index
=
lines
.
index_to_line_start_index
(
expression
.
start_index
)
return
False
if
expression
.
end_index
is
not
None
:
end_index
=
lines
.
index_to_line_end_index
(
expression
.
end_index
)
else
:
return
None
# scrub out the actual expression so any code inside the expression
# doesn't interfere with rules applied to the surrounding code (i.e.
# checking JavaScript).
scrubbed_lines
=
""
.
join
((
mako_template
[
start_index
:
expression
.
start_index
],
"${...}"
,
mako_template
[
expression
.
end_index
:
end_index
]
))
adjusted_start_index
=
expression
.
start_index
-
start_index
start_index
=
0
while
True
:
parse_string
=
ParseString
(
scrubbed_lines
,
start_index
,
len
(
scrubbed_lines
))
# check for validly parsed string
if
0
<=
parse_string
.
start_index
<
parse_string
.
end_index
:
# check if expression is contained in the given string
if
parse_string
.
start_index
<
adjusted_start_index
<
parse_string
.
end_index
:
return
parse_string
else
:
# move to check next string
start_index
=
parse_string
.
end_index
else
:
break
return
None
def
_get_contexts
(
self
,
mako_template
):
# TODO: This is an imperfect guess concerning the Mako template
"""
# directories. This needs to be reviewed before turning on safe by
Returns a data structure that represents the indices at which the
# default at the platform level.
template changes from HTML context to JavaScript and back.
if
(
'/templates/'
in
directory
)
or
directory
.
endswith
(
'/templates'
):
return
True
Return:
return
False
A list of dicts where each dict contains:
- index: the index of the context.
def
_check_mako_file_is_safe
(
self
,
mako_template
,
results
):
- type: the context type (e.g. 'html' or 'javascript').
"""
"""
contexts_re
=
re
.
compile
(
Checks for violations in a Mako template.
r"""
<script.*?> | # script tag start
</script> | # script tag end
<
%
static:require_module.*?> | # require js script tag start
</
%
static:require_module> | # require js script tag end
<
%
block[ ]*name=['"]requirejs['"]\w*> | # require js tag start
</
%
block> # require js tag end
"""
,
re
.
VERBOSE
|
re
.
IGNORECASE
)
media_type_re
=
re
.
compile
(
r"""type=['"].*?['"]"""
,
re
.
IGNORECASE
)
contexts
=
[{
'index'
:
0
,
'type'
:
'html'
}]
Arguments:
javascript_types
=
[
mako_template: The contents of the Mako template.
'text/javascript'
,
'text/ecmascript'
,
'application/ecmascript'
,
'application/javascript'
,
results: A file results objects to which violations will be added.
'text/x-mathjax-config'
,
'json/xblock-args'
]
html_types
=
[
'text/template'
]
for
context
in
contexts_re
.
finditer
(
mako_template
):
match_string
=
context
.
group
()
.
lower
()
if
match_string
.
startswith
(
"<script"
):
match_type
=
media_type_re
.
search
(
match_string
)
context_type
=
'javascript'
if
match_type
is
not
None
:
# get media type (e.g. get text/javascript from
# type="text/javascript")
match_type
=
match_type
.
group
()[
6
:
-
1
]
.
lower
()
if
match_type
in
html_types
:
context_type
=
'html'
elif
match_type
not
in
javascript_types
:
context_type
=
'unknown'
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
context_type
})
elif
match_string
.
startswith
(
"</"
):
contexts
.
append
({
'index'
:
context
.
start
(),
'type'
:
'html'
})
else
:
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
'javascript'
})
return
contexts
"""
if
self
.
_is_django_template
(
mako_template
):
return
has_page_default
=
self
.
_has_page_default
(
mako_template
,
results
)
self
.
_check_mako_expressions
(
mako_template
,
has_page_default
,
results
)
results
.
prepare_results
(
mako_template
,
line_comment_delim
=
'##'
)
def
_
get_context
(
self
,
contexts
,
index
):
def
_
is_django_template
(
self
,
mako_template
):
"""
"""
Gets the context (e.g. javascript, html) of the template at the given
Determines if the template is actually a Django template.
index.
Arguments:
Arguments:
contexts: A list of dicts where each dict contains the 'index' of the context
mako_template: The template code.
and the context 'type' (e.g. 'html' or 'javascript').
index: The index for which we want the context.
Returns:
Returns:
The context (e.g. javascript or html) for the given index.
True if this is really a Django template, and False otherwise.
"""
"""
current_context
=
contexts
[
0
][
'type'
]
if
re
.
search
(
'({
%.*%
})|({{.*}})'
,
mako_template
)
is
not
None
:
for
context
in
contexts
:
return
True
if
context
[
'index'
]
<=
index
:
return
False
current_context
=
context
[
'type'
]
else
:
break
return
current_context
def
_
find_mako_expressions
(
self
,
mako_template
):
def
_
get_page_tag_count
(
self
,
mako_template
):
"""
"""
Finds all the Mako expressions in a Mako template and creates a list
Determines the number of page expressions in the Mako template. Ignores
of dicts for each expression
.
page expressions that are commented out
.
Arguments:
Arguments:
mako_template: The content of the Mako template.
mako_template: The content
s
of the Mako template.
Returns:
Returns:
A list of Expressions.
The number of page expressions
"""
"""
start_delim
=
'${'
count
=
len
(
re
.
findall
(
'<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
start_index
=
0
count_commented
=
len
(
re
.
findall
(
r'##\s+<
%
page '
,
mako_template
,
re
.
IGNORECASE
))
expressions
=
[]
return
max
(
0
,
count
-
count_commented
)
while
True
:
start_index
=
mako_template
.
find
(
start_delim
,
start_index
)
if
start_index
<
0
:
break
result
=
self
.
_find_closing_char_index
(
def
_has_page_default
(
self
,
mako_template
,
results
):
start_delim
,
'{'
,
'}'
,
mako_template
,
start_index
=
start_index
+
len
(
start_delim
)
"""
)
Checks if the Mako template contains the page expression marking it as
if
result
is
None
:
safe by default.
expression
=
Expression
(
start_index
)
# for parsing error, restart search right after the start of the
# current expression
start_index
=
start_index
+
len
(
start_delim
)
else
:
close_char_index
=
result
[
'close_char_index'
]
expression
=
mako_template
[
start_index
:
close_char_index
+
1
]
expression
=
Expression
(
start_index
,
end_index
=
close_char_index
+
1
,
template
=
mako_template
,
start_delim
=
start_delim
,
end_delim
=
'}'
,
strings
=
result
[
'strings'
],
)
# restart search after the current expression
start_index
=
expression
.
end_index
expressions
.
append
(
expression
)
return
expressions
Arguments:
mako_template: The contents of the Mako template.
results: A list of results into which violations will be added.
class
PythonLinter
(
BaseLinter
):
Side effect:
"""
Adds violations regarding page default if necessary
The linter for Python files.
The current implementation of the linter does naive Python parsing. It does
Returns:
not use the parser. One known issue is that parsing errors found inside a
True if the template has the page default, and False otherwise.
docstring need to be disabled, rather than being automatically skipped.
Skipping docstrings is an enhancement that could be added.
"""
def
__init__
(
self
):
"""
"""
Init method.
page_tag_count
=
self
.
_get_page_tag_count
(
mako_template
)
"""
# check if there are too many page expressions
super
(
PythonLinter
,
self
)
.
__init__
()
if
2
<=
page_tag_count
:
self
.
_skip_python_dirs
=
self
.
_skip_dirs
+
(
'tests'
,
'test/acceptance'
)
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_multiple_page_tags
))
return
False
# make sure there is exactly 1 page expression, excluding commented out
# page expressions, before proceeding
elif
page_tag_count
!=
1
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
False
# check that safe by default (h filter) is turned on
page_h_filter_regex
=
re
.
compile
(
'<
%
page[^>]*expression_filter=(?:"h"|
\'
h
\'
)[^>]*/>'
)
page_match
=
page_h_filter_regex
.
search
(
mako_template
)
if
not
page_match
:
results
.
violations
.
append
(
RuleViolation
(
Rules
.
mako_missing_default
))
return
page_match
def
process_file
(
self
,
directory
,
file_name
):
def
_check_mako_expressions
(
self
,
mako_template
,
has_page_default
,
results
):
"""
"""
Process file to determine if it is a Python file and
Searches for Mako expressions and then checks if they contain
if it is safe.
violations, including checking JavaScript contexts for JavaScript
violations.
Arguments:
Arguments:
directory (string): The directory of the file to be checked
mako_template: The contents of the Mako template.
file_name (string): A filename for a potential Python file
has_page_default: True if the page is marked as default, False
otherwise.
Returns:
results: A list of results into which violations will be added.
The file results containing any violations.
"""
"""
file_full_path
=
os
.
path
.
normpath
(
directory
+
'/'
+
file_name
)
expressions
=
self
.
_find_mako_expressions
(
mako_template
)
results
=
FileResults
(
file_full_path
)
contexts
=
self
.
_get_contexts
(
mako_template
)
self
.
_check_javascript_contexts
(
mako_template
,
contexts
,
results
)
if
not
results
.
is_file
:
for
expression
in
expressions
:
return
results
if
expression
.
end_index
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unparseable_expression
,
expression
))
continue
if
file_name
.
lower
()
.
endswith
(
'.py'
)
is
False
:
context
=
self
.
_get_context
(
contexts
,
expression
.
start_index
)
return
results
self
.
_check_filters
(
mako_template
,
expression
,
context
,
has_page_default
,
results
)
self
.
_check_deprecated_display_name
(
expression
,
results
)
self
.
_check_html_and_text
(
expression
,
has_page_default
,
results
)
# skip this linter code (i.e. safe_template_linter.py)
def
_check_javascript_contexts
(
self
,
mako_template
,
contexts
,
results
):
if
file_name
==
os
.
path
.
basename
(
__file__
):
"""
return
results
Lint the JavaScript contexts for JavaScript violations inside a Mako
template.
if
not
self
.
_is_valid_directory
(
self
.
_skip_python_dirs
,
directory
):
Arguments:
return
results
mako_template: The contents of the Mako template.
contexts: A list of context dicts with 'type' and 'index'.
results: A list of results into which violations will be added.
return
self
.
_load_and_check_file_is_safe
(
file_full_path
,
self
.
check_python_file_is_safe
,
results
)
Side effect:
Adds JavaScript violations to results.
"""
javascript_start_index
=
None
for
context
in
contexts
:
if
context
[
'type'
]
==
'javascript'
:
if
javascript_start_index
<
0
:
javascript_start_index
=
context
[
'index'
]
else
:
if
javascript_start_index
is
not
None
:
javascript_end_index
=
context
[
'index'
]
javascript_code
=
mako_template
[
javascript_start_index
:
javascript_end_index
]
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
javascript_start_index
=
None
if
javascript_start_index
is
not
None
:
javascript_code
=
mako_template
[
javascript_start_index
:]
self
.
_check_javascript_context
(
javascript_code
,
javascript_start_index
,
results
)
def
check_python_file_is_safe
(
self
,
file_contents
,
results
):
def
_check_javascript_context
(
self
,
javascript_code
,
start_offset
,
results
):
"""
"""
Checks for violations in a Python file.
Lint a single JavaScript context for JavaScript violations inside a Mako
template.
Arguments:
Arguments:
file_contents: The contents of the Python file.
javascript_code: The template contents of the JavaScript context.
results: A file results objects to which violations will be added.
start_offset: The offset of the JavaScript context inside the
original Mako template.
results: A list of results into which violations will be added.
Side effect:
Adds JavaScript violations to results.
"""
"""
self
.
_check_concat_with_html
(
file_contents
,
Rules
.
python_concat_html
,
results
)
javascript_results
=
FileResults
(
""
)
self
.
_check_deprecated_display_name
(
file_contents
,
results
)
self
.
javaScriptLinter
.
check_javascript_file_is_safe
(
javascript_code
,
javascript_results
)
self
.
_check_custom_escape
(
file_contents
,
results
)
# translate the violations into the location within the original
self
.
_check_html
(
file_contents
,
results
)
# Mako template
results
.
prepare_results
(
file_contents
,
line_comment_delim
=
'#'
)
for
violation
in
javascript_results
.
violations
:
expression
=
violation
.
expression
expression
.
start_index
+=
start_offset
if
expression
.
end_index
is
not
None
:
expression
.
end_index
+=
start_offset
results
.
violations
.
append
(
ExpressionRuleViolation
(
violation
.
rule
,
expression
))
def
_check_deprecated_display_name
(
self
,
file_contents
,
results
):
def
_check_deprecated_display_name
(
self
,
expression
,
results
):
"""
"""
Checks that the deprecated display_name_with_default_escaped is not
Checks that the deprecated display_name_with_default_escaped is not
used. Adds violation to results if there is a problem.
used. Adds violation to results if there is a problem.
Arguments:
Arguments:
file_contents: The contents of the Python file
expression: An Expression
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
"""
"""
for
match
in
re
.
finditer
(
r'\.display_name_with_default_escaped'
,
file_contents
):
if
'.display_name_with_default_escaped'
in
expression
.
expression
:
expression
=
Expression
(
match
.
start
(),
match
.
end
())
results
.
violations
.
append
(
ExpressionRuleViolation
(
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_deprecated_display_name
,
expression
Rules
.
python_deprecated_display_name
,
expression
))
))
def
_check_
custom_escape
(
self
,
file_contents
,
results
):
def
_check_
html_and_text
(
self
,
expression
,
has_page_default
,
results
):
"""
"""
Checks for custom escaping calls, rather than using a standard escaping
Checks rules related to proper use of HTML() and Text().
method.
Arguments:
Arguments:
file_contents: The contents of the Python file
expression: A Mako Expression.
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
"""
"""
for
match
in
re
.
finditer
(
"(<.*<|<.*<)"
,
file_contents
):
expression_inner
=
expression
.
expression_inner
expression
=
Expression
(
match
.
start
(),
match
.
end
())
# use find to get the template relative inner expression start index
results
.
violations
.
append
(
ExpressionRuleViolation
(
# due to possible skipped white space
Rules
.
python_custom_escape
,
expression
template_inner_start_index
=
expression
.
start_index
))
template_inner_start_index
+=
expression
.
expression
.
find
(
expression_inner
)
if
'HTML('
in
expression_inner
:
if
expression_inner
.
startswith
(
'HTML('
):
close_paren_index
=
self
.
_find_closing_char_index
(
None
,
"("
,
")"
,
expression_inner
,
start_index
=
len
(
'HTML('
)
)[
'close_char_index'
]
# check that the close paren is at the end of the stripped expression.
if
close_paren_index
!=
len
(
expression_inner
)
-
1
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_alone
,
expression
))
elif
expression_inner
.
startswith
(
'Text('
)
is
False
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_requires_text
,
expression
))
else
:
if
'Text('
in
expression_inner
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_text_redundant
,
expression
))
def
_check_html
(
self
,
file_contents
,
results
):
# strings to be checked for HTML
unwrapped_html_strings
=
expression
.
strings
for
match
in
re
.
finditer
(
r"(HTML\(|Text\()"
,
expression_inner
):
result
=
self
.
_find_closing_char_index
(
None
,
"("
,
")"
,
expression_inner
,
start_index
=
match
.
end
())
if
result
is
not
None
:
close_paren_index
=
result
[
'close_char_index'
]
# the argument sent to HTML() or Text()
argument
=
expression_inner
[
match
.
end
():
close_paren_index
]
if
".format("
in
argument
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_close_before_format
,
expression
))
if
match
.
group
()
==
"HTML("
:
# remove expression strings wrapped in HTML()
for
string
in
list
(
unwrapped_html_strings
):
html_inner_start_index
=
template_inner_start_index
+
match
.
end
()
html_inner_end_index
=
template_inner_start_index
+
close_paren_index
if
html_inner_start_index
<=
string
.
start_index
and
string
.
end_index
<=
html_inner_end_index
:
unwrapped_html_strings
.
remove
(
string
)
# check strings not wrapped in HTML() for '<'
for
string
in
unwrapped_html_strings
:
if
'<'
in
string
.
string_inner
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_wrap_html
,
expression
))
break
# check strings not wrapped in HTML() for HTML entities
if
has_page_default
:
for
string
in
unwrapped_html_strings
:
if
re
.
search
(
r"&[#]?[a-zA-Z0-9]+;"
,
string
.
string_inner
):
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_html_entities
,
expression
))
break
def
_check_filters
(
self
,
mako_template
,
expression
,
context
,
has_page_default
,
results
):
"""
"""
Checks many rules related to HTML in a Python file.
Checks that the filters used in the given Mako expression are valid
for the given context. Adds violation to results if there is a problem.
Arguments:
Arguments:
file_contents: The contents of the Python file
mako_template: The contents of the Mako template.
expression: A Mako Expression.
context: The context of the page in which the expression was found
(e.g. javascript, html).
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
"""
"""
# Text() Expressions keyed by its end index
if
context
==
'unknown'
:
text_calls_by_end_index
=
{}
results
.
violations
.
append
(
ExpressionRuleViolation
(
# HTML() Expressions keyed by its end index
Rules
.
mako_unknown_context
,
expression
html_calls_by_end_index
=
{}
))
start_index
=
0
return
while
True
:
# check HTML(), Text() and format() calls
result
=
self
.
_check_html_text_format
(
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
)
next_start_index
=
result
[
'next_start_index'
]
interpolate_end_index
=
result
[
'interpolate_end_index'
]
# check for interpolation including HTML outside of function calls
self
.
_check_interpolate_with_html
(
file_contents
,
start_index
,
interpolate_end_index
,
results
)
# advance the search
# Example: finds "| n, h}" when given "${x | n, h}"
start_index
=
next_start_index
filters_regex
=
re
.
compile
(
r'\|([.,\w\s]*)\}'
)
filters_match
=
filters_regex
.
search
(
expression
.
expression
)
if
filters_match
is
None
:
if
context
==
'javascript'
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_js_filter
,
expression
))
return
# end if there is nothing left to search
filters
=
filters_match
.
group
(
1
)
.
replace
(
" "
,
""
)
.
split
(
","
)
if
interpolate_end_index
is
None
:
if
filters
==
[
'n'
,
'decode.utf8'
]:
break
# {x | n, decode.utf8} is valid in any context
pass
elif
context
==
'html'
:
if
filters
==
[
'h'
]:
if
has_page_default
:
# suppress this violation if the page default hasn't been set,
# otherwise the template might get less safe
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_unwanted_html_filter
,
expression
))
else
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_html_filter
,
expression
))
elif
context
==
'javascript'
:
self
.
_check_js_expression_not_with_html
(
mako_template
,
expression
,
results
)
if
filters
==
[
'n'
,
'dump_js_escaped_json'
]:
# {x | n, dump_js_escaped_json} is valid
pass
elif
filters
==
[
'n'
,
'js_escaped_string'
]:
# {x | n, js_escaped_string} is valid, if surrounded by quotes
self
.
_check_js_string_expression_in_quotes
(
mako_template
,
expression
,
results
)
else
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_invalid_js_filter
,
expression
))
def
_check_html_text_format
(
def
_check_js_string_expression_in_quotes
(
self
,
mako_template
,
expression
,
results
):
self
,
file_contents
,
start_index
,
text_calls_by_end_index
,
html_calls_by_end_index
,
results
):
"""
"""
Checks
for HTML(), Text() and format() calls, and various rules related
Checks
that a Mako expression using js_escaped_string is surrounded by
to these call
s.
quote
s.
Arguments:
Arguments:
file_contents: The contents of the Python file
mako_template: The contents of the Mako template.
start_index: The index at which to begin searching for a function
expression: A Mako Expression.
call.
text_calls_by_end_index: Text() Expressions keyed by its end index.
html_calls_by_end_index: HTML() Expressions keyed by its end index.
results: A list of results into which violations will be added.
results: A list of results into which violations will be added.
"""
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
if
parse_string
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
mako_js_missing_quotes
,
expression
))
Returns:
def
_check_js_expression_not_with_html
(
self
,
mako_template
,
expression
,
results
):
A dict with the following keys:
"""
'next_start_index': The start index of the next search for a
Checks that a Mako expression in a JavaScript context does not appear in
function call.
a string that also contains HTML.
'interpolate_end_index': The end index of the next next search
for interpolation with html, or None if the end of file
should be used.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
"""
"""
# used to find opening of .format(), Text() and HTML() calls
parse_string
=
self
.
_find_string_wrapping_expression
(
mako_template
,
expression
)
regex_function_open
=
re
.
compile
(
r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()"
)
if
parse_string
is
not
None
and
re
.
search
(
'[<>]'
,
parse_string
.
string
)
is
not
None
:
interpolate_end_index
=
None
results
.
violations
.
append
(
ExpressionRuleViolation
(
end_index
=
None
Rules
.
mako_js_html_string
,
expression
strings
=
None
))
html_calls
=
[]
def
_find_string_wrapping_expression
(
self
,
mako_template
,
expression
):
"""
Finds the string wrapping the Mako expression if there is one.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
Returns:
ParseString representing a scrubbed version of the wrapped string,
where the Mako expression was replaced with "${...}", if a wrapped
string was found. Otherwise, returns None if none found.
"""
lines
=
StringLines
(
mako_template
)
start_index
=
lines
.
index_to_line_start_index
(
expression
.
start_index
)
if
expression
.
end_index
is
not
None
:
end_index
=
lines
.
index_to_line_end_index
(
expression
.
end_index
)
else
:
return
None
# scrub out the actual expression so any code inside the expression
# doesn't interfere with rules applied to the surrounding code (i.e.
# checking JavaScript).
scrubbed_lines
=
""
.
join
((
mako_template
[
start_index
:
expression
.
start_index
],
"${...}"
,
mako_template
[
expression
.
end_index
:
end_index
]
))
adjusted_start_index
=
expression
.
start_index
-
start_index
start_index
=
0
while
True
:
while
True
:
# first search for HTML(), Text(), or .format()
parse_string
=
ParseString
(
scrubbed_lines
,
start_index
,
len
(
scrubbed_lines
))
if
end_index
is
None
:
# check for validly parsed string
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
)
if
0
<=
parse_string
.
start_index
<
parse_string
.
end_index
:
else
:
# check if expression is contained in the given string
function_match
=
regex_function_open
.
search
(
file_contents
,
start_index
,
end_index
)
if
parse_string
.
start_index
<
adjusted_start_index
<
parse_string
.
end_index
:
if
function_match
is
not
None
:
return
parse_string
if
interpolate_end_index
is
None
:
interpolate_end_index
=
function_match
.
start
()
function_close_result
=
self
.
_find_closing_char_index
(
None
,
'('
,
')'
,
file_contents
,
start_index
=
function_match
.
end
(),
)
if
function_close_result
is
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_parse_error
,
Expression
(
function_match
.
start
())
))
else
:
else
:
expression
=
Expression
(
# move to check next string
function_match
.
start
(),
function_close_result
[
'close_char_index'
]
+
1
,
file_contents
,
start_index
=
parse_string
.
end_index
start_delim
=
function_match
.
group
(),
end_delim
=
")"
)
# if this an outer most Text(), HTML(), or format() call
if
end_index
is
None
:
end_index
=
expression
.
end_index
interpolate_end_index
=
expression
.
start_index
strings
=
function_close_result
[
'strings'
]
if
function_match
.
group
()
==
'.format('
:
if
'HTML('
in
expression
.
expression_inner
or
'Text('
in
expression
.
expression_inner
:
is_wrapped_with_text
=
str
(
function_match
.
start
())
in
text_calls_by_end_index
.
keys
()
is_wrapped_with_html
=
str
(
function_match
.
start
())
in
html_calls_by_end_index
.
keys
()
if
is_wrapped_with_text
is
False
and
is_wrapped_with_html
is
False
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_requires_html_or_text
,
expression
))
else
:
# expression is 'HTML(' or 'Text('
# HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
# Generally, format() would be the issue if there is one.
if
regex_function_open
.
search
(
expression
.
expression_inner
)
is
not
None
:
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_close_before_format
,
expression
))
if
function_match
.
group
()
==
'Text('
:
text_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
else
:
# function_match.group() == 'HTML(':
html_calls_by_end_index
[
str
(
expression
.
end_index
)]
=
expression
html_calls
.
append
(
expression
)
start_index
=
function_match
.
end
()
else
:
else
:
break
break
return
None
# checks strings in the outer most call to ensure they are properly
def
_get_contexts
(
self
,
mako_template
):
# wrapped with HTML()
"""
self
.
_check_format_html_strings_wrapped
(
strings
,
html_calls
,
results
)
Returns a data structure that represents the indices at which the
template changes from HTML context to JavaScript and back.
# compute where to continue the search
Return:
if
function_match
is
None
and
end_index
is
None
:
A list of dicts where each dict contains:
next_start_index
=
start_index
- index: the index of the context.
elif
end_index
is
None
:
- type: the context type (e.g. 'html' or 'javascript').
next_start_index
=
function_match
.
end
()
"""
else
:
contexts_re
=
re
.
compile
(
next_start_index
=
end_index
r"""
<script.*?> | # script tag start
</script> | # script tag end
<
%
static:require_module.*?> | # require js script tag start
</
%
static:require_module> | # require js script tag end
<
%
block[ ]*name=['"]requirejs['"]\w*> | # require js tag start
</
%
block> # require js tag end
"""
,
re
.
VERBOSE
|
re
.
IGNORECASE
)
media_type_re
=
re
.
compile
(
r"""type=['"].*?['"]"""
,
re
.
IGNORECASE
)
return
{
contexts
=
[{
'index'
:
0
,
'type'
:
'html'
}]
'next_start_index'
:
next_start_index
,
javascript_types
=
[
'interpolate_end_index'
:
interpolate_end_index
,
'text/javascript'
,
'text/ecmascript'
,
'application/ecmascript'
,
'application/javascript'
,
}
'text/x-mathjax-config'
,
'json/xblock-args'
]
html_types
=
[
'text/template'
]
for
context
in
contexts_re
.
finditer
(
mako_template
):
match_string
=
context
.
group
()
.
lower
()
if
match_string
.
startswith
(
"<script"
):
match_type
=
media_type_re
.
search
(
match_string
)
context_type
=
'javascript'
if
match_type
is
not
None
:
# get media type (e.g. get text/javascript from
# type="text/javascript")
match_type
=
match_type
.
group
()[
6
:
-
1
]
.
lower
()
if
match_type
in
html_types
:
context_type
=
'html'
elif
match_type
not
in
javascript_types
:
context_type
=
'unknown'
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
context_type
})
elif
match_string
.
startswith
(
"</"
):
contexts
.
append
({
'index'
:
context
.
start
(),
'type'
:
'html'
})
else
:
contexts
.
append
({
'index'
:
context
.
end
(),
'type'
:
'javascript'
})
def
_check_format_html_strings_wrapped
(
self
,
strings
,
html_calls
,
results
):
return
contexts
def
_get_context
(
self
,
contexts
,
index
):
"""
"""
Checks that any string inside a format call that seems to contain HTML
Gets the context (e.g. javascript, html) of the template at the given
i
s wrapped with a call to HTML()
.
i
ndex
.
Arguments:
Arguments:
strings: A list of ParseStrings for each string inside the format()
contexts: A list of dicts where each dict contains the 'index' of the context
call.
and the context 'type' (e.g. 'html' or 'javascript').
html_calls: A list of Expressions representing all of the HTML()
index: The index for which we want the context.
calls inside the format() call.
results: A list of results into which violations will be added.
Returns:
The context (e.g. javascript or html) for the given index.
"""
"""
html_strings
=
[]
current_context
=
contexts
[
0
][
'type'
]
html_wrapped_strings
=
[]
for
context
in
contexts
:
if
strings
is
not
None
:
if
context
[
'index'
]
<=
index
:
# find all strings that contain HTML
current_context
=
context
[
'type'
]
for
string
in
strings
:
else
:
if
'<'
in
string
.
string
:
break
html_strings
.
append
(
string
)
return
current_context
# check if HTML string is appropriately wrapped
for
html_call
in
html_calls
:
if
html_call
.
start_index
<
string
.
start_index
<
string
.
end_index
<
html_call
.
end_index
:
html_wrapped_strings
.
append
(
string
)
break
# loop through all unwrapped strings
for
unsafe_string
in
set
(
html_strings
)
-
set
(
html_wrapped_strings
):
unsafe_string_expression
=
Expression
(
unsafe_string
.
start_index
)
results
.
violations
.
append
(
ExpressionRuleViolation
(
Rules
.
python_wrap_html
,
unsafe_string_expression
))
def
_
check_interpolate_with_html
(
self
,
file_contents
,
start_index
,
end_index
,
results
):
def
_
find_mako_expressions
(
self
,
mako_template
):
"""
"""
Find
interpolations with html that fall outside of any calls to HTML(),
Find
s all the Mako expressions in a Mako template and creates a list
Text(), and .format()
.
of dicts for each expression
.
Arguments:
Arguments:
file_contents: The contents of the Python file
mako_template: The content of the Mako template.
start_index: The index to start the search, or None if nothing to
search
Returns:
end_index: The index to end the search, or None if the end of file
A list of Expressions.
should be used.
results: A list of results into which violations will be added.
"""
"""
# used to find interpolation with HTML
start_delim
=
'${'
pattern_interpolate_html_inner
=
r'(<.*
%
s|
%
s.*<|<.*{\w*}|{\w*}.*<)'
start_index
=
0
regex_interpolate_html
=
re
.
compile
(
r"""(".*{}.*"|'.*{}.*')"""
.
format
(
expressions
=
[]
pattern_interpolate_html_inner
,
pattern_interpolate_html_inner
))
while
True
:
if
end_index
is
None
:
start_index
=
mako_template
.
find
(
start_delim
,
start_index
)
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
)
if
start_index
<
0
:
else
:
break
interpolate_string_iter
=
regex_interpolate_html
.
finditer
(
file_contents
,
start_index
,
end_index
)
for
match_html_string
in
interpolate_string_iter
:
result
=
self
.
_find_closing_char_index
(
expression
=
Expression
(
match_html_string
.
start
(),
match_html_string
.
end
())
start_delim
,
'{'
,
'}'
,
mako_template
,
start_index
=
start_index
+
len
(
start_delim
)
results
.
violations
.
append
(
ExpressionRuleViolation
(
)
Rules
.
python_interpolate_html
,
expression
if
result
is
None
:
))
expression
=
Expression
(
start_index
)
# for parsing error, restart search right after the start of the
# current expression
start_index
=
start_index
+
len
(
start_delim
)
else
:
close_char_index
=
result
[
'close_char_index'
]
expression
=
mako_template
[
start_index
:
close_char_index
+
1
]
expression
=
Expression
(
start_index
,
end_index
=
close_char_index
+
1
,
template
=
mako_template
,
start_delim
=
start_delim
,
end_delim
=
'}'
,
strings
=
result
[
'strings'
],
)
# restart search after the current expression
start_index
=
expression
.
end_index
expressions
.
append
(
expression
)
return
expressions
def
_process_file
(
full_path
,
template_linters
,
options
,
out
):
def
_process_file
(
full_path
,
template_linters
,
options
,
out
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment