Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
nltk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
nltk
Commits
d2cfaa35
Commit
d2cfaa35
authored
Mar 25, 2015
by
Anirudh
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
MNT: fixed pep8 errors for files in tools folder
parent
13fc199b
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
71 additions
and
44 deletions
+71
-44
tools/find_deprecated.py
+42
-25
tools/global_replace.py
+8
-7
tools/nltk_term_index.py
+14
-9
tools/run_doctests.py
+3
-1
tools/svnmime.py
+4
-2
No files found.
tools/find_deprecated.py
View file @
d2cfaa35
#!/usr/bin/env python
#!/usr/bin/env python
#
#
#
#
Natural Language Toolkit: Deprecated Function & Class Finder
# Natural Language Toolkit: Deprecated Function & Class Finder
#
#
# Copyright (C) 2001-2015 NLTK Project
# Copyright (C) 2001-2015 NLTK Project
# Author: Edward Loper <edloper@gmail.com>
# Author: Edward Loper <edloper@gmail.com>
...
@@ -23,8 +23,13 @@ identifier will be highlighted in red.
...
@@ -23,8 +23,13 @@ identifier will be highlighted in red.
# Imports
# Imports
######################################################################
######################################################################
import
os
,
re
,
sys
,
tokenize
,
textwrap
import
os
import
nltk
,
nltk
.
corpus
import
re
import
sys
import
tokenize
import
textwrap
import
nltk
import
nltk.corpus
from
doctest
import
DocTestParser
,
register_optionflag
from
doctest
import
DocTestParser
,
register_optionflag
from
cStringIO
import
StringIO
from
cStringIO
import
StringIO
from
nltk
import
defaultdict
from
nltk
import
defaultdict
...
@@ -69,6 +74,7 @@ try:
...
@@ -69,6 +74,7 @@ try:
from
epydoc.cli
import
TerminalController
from
epydoc.cli
import
TerminalController
except
ImportError
:
except
ImportError
:
class
TerminalController
:
class
TerminalController
:
def
__getattr__
(
self
,
attr
):
return
''
def
__getattr__
(
self
,
attr
):
return
''
term
=
TerminalController
()
term
=
TerminalController
()
...
@@ -78,7 +84,9 @@ term = TerminalController()
...
@@ -78,7 +84,9 @@ term = TerminalController()
######################################################################
######################################################################
# If we're using py24, then ignore the +SKIP directive.
# If we're using py24, then ignore the +SKIP directive.
if
sys
.
version_info
[:
2
]
<
(
2
,
5
):
register_optionflag
(
'SKIP'
)
if
sys
.
version_info
[:
2
]
<
(
2
,
5
):
register_optionflag
(
'SKIP'
)
def
strip_quotes
(
s
):
def
strip_quotes
(
s
):
s
=
s
.
strip
()
s
=
s
.
strip
()
...
@@ -89,14 +97,17 @@ def strip_quotes(s):
...
@@ -89,14 +97,17 @@ def strip_quotes(s):
s
=
s
.
strip
()
s
=
s
.
strip
()
return
s
return
s
def
find_class
(
s
,
index
):
def
find_class
(
s
,
index
):
lines
=
s
[:
index
]
.
split
(
'
\n
'
)
lines
=
s
[:
index
]
.
split
(
'
\n
'
)
while
lines
:
while
lines
:
m
=
CLASS_DEF_RE
.
match
(
lines
[
-
1
])
m
=
CLASS_DEF_RE
.
match
(
lines
[
-
1
])
if
m
:
return
m
.
group
(
1
)
+
'.'
if
m
:
return
m
.
group
(
1
)
+
'.'
lines
.
pop
()
lines
.
pop
()
return
'?.'
return
'?.'
def
find_deprecated_defs
(
pkg_dir
):
def
find_deprecated_defs
(
pkg_dir
):
"""
"""
Return a list of all functions marked with the @deprecated
Return a list of all functions marked with the @deprecated
...
@@ -117,16 +128,19 @@ def find_deprecated_defs(pkg_dir):
...
@@ -117,16 +128,19 @@ def find_deprecated_defs(pkg_dir):
msg
=
' '
.
join
(
msg
.
split
())
msg
=
' '
.
join
(
msg
.
split
())
if
m
.
group
()[
0
]
in
'
\t
'
:
if
m
.
group
()[
0
]
in
'
\t
'
:
cls
=
find_class
(
s
,
m
.
start
())
cls
=
find_class
(
s
,
m
.
start
())
deprecated_methods
[
name
]
.
add
(
(
msg
,
cls
,
'()'
)
)
deprecated_methods
[
name
]
.
add
(
(
msg
,
cls
,
'()'
)
)
else
:
else
:
deprecated_funcs
[
name
]
.
add
(
(
msg
,
''
,
'()'
)
)
deprecated_funcs
[
name
]
.
add
(
(
msg
,
''
,
'()'
)
)
else
:
else
:
name
=
m
.
group
(
3
)
name
=
m
.
group
(
3
)
m2
=
STRING_RE
.
match
(
s
,
m
.
end
())
m2
=
STRING_RE
.
match
(
s
,
m
.
end
())
if
m2
:
msg
=
strip_quotes
(
m2
.
group
())
if
m2
:
else
:
msg
=
''
msg
=
strip_quotes
(
m2
.
group
())
else
:
msg
=
''
msg
=
' '
.
join
(
msg
.
split
())
msg
=
' '
.
join
(
msg
.
split
())
deprecated_classes
[
name
]
.
add
(
(
msg
,
''
,
''
))
deprecated_classes
[
name
]
.
add
((
msg
,
''
,
''
))
def
print_deprecated_uses
(
paths
):
def
print_deprecated_uses
(
paths
):
dep_names
=
set
()
dep_names
=
set
()
...
@@ -134,7 +148,7 @@ def print_deprecated_uses(paths):
...
@@ -134,7 +148,7 @@ def print_deprecated_uses(paths):
for
path
in
sorted
(
paths
):
for
path
in
sorted
(
paths
):
if
os
.
path
.
isdir
(
path
):
if
os
.
path
.
isdir
(
path
):
dep_names
.
update
(
print_deprecated_uses
(
dep_names
.
update
(
print_deprecated_uses
(
[
os
.
path
.
join
(
path
,
f
)
for
f
in
os
.
listdir
(
path
)]))
[
os
.
path
.
join
(
path
,
f
)
for
f
in
os
.
listdir
(
path
)]))
elif
path
.
endswith
(
'.py'
):
elif
path
.
endswith
(
'.py'
):
print_deprecated_uses_in
(
open
(
path
)
.
readline
,
path
,
print_deprecated_uses_in
(
open
(
path
)
.
readline
,
path
,
dep_files
,
dep_names
,
0
)
dep_files
,
dep_names
,
0
)
...
@@ -146,9 +160,10 @@ def print_deprecated_uses(paths):
...
@@ -146,9 +160,10 @@ def print_deprecated_uses(paths):
dep_names
,
example
.
lineno
)
dep_names
,
example
.
lineno
)
except
tokenize
.
TokenError
:
except
tokenize
.
TokenError
:
print
(
term
.
RED
+
'Caught TokenError -- '
print
(
term
.
RED
+
'Caught TokenError -- '
'malformatted doctest?'
+
term
.
NORMAL
)
'malformatted doctest?'
+
term
.
NORMAL
)
return
dep_names
return
dep_names
def
print_deprecated_uses_in
(
readline
,
path
,
dep_files
,
dep_names
,
def
print_deprecated_uses_in
(
readline
,
path
,
dep_files
,
dep_names
,
lineno_offset
):
lineno_offset
):
tokiter
=
tokenize
.
generate_tokens
(
readline
)
tokiter
=
tokenize
.
generate_tokens
(
readline
)
...
@@ -158,7 +173,8 @@ def print_deprecated_uses_in(readline, path, dep_files, dep_names,
...
@@ -158,7 +173,8 @@ def print_deprecated_uses_in(readline, path, dep_files, dep_names,
# the @deprecated decorator.
# the @deprecated decorator.
if
line
is
not
context
[
-
1
]:
if
line
is
not
context
[
-
1
]:
context
.
append
(
line
)
context
.
append
(
line
)
if
len
(
context
)
>
10
:
del
context
[
0
]
if
len
(
context
)
>
10
:
del
context
[
0
]
esctok
=
re
.
escape
(
tok
)
esctok
=
re
.
escape
(
tok
)
# Ignore all tokens except deprecated names.
# Ignore all tokens except deprecated names.
if
not
(
tok
in
deprecated_classes
or
if
not
(
tok
in
deprecated_classes
or
...
@@ -175,17 +191,20 @@ def print_deprecated_uses_in(readline, path, dep_files, dep_names,
...
@@ -175,17 +191,20 @@ def print_deprecated_uses_in(readline, path, dep_files, dep_names,
continue
continue
# Print a header for the first use in a file:
# Print a header for the first use in a file:
if
path
not
in
dep_files
:
if
path
not
in
dep_files
:
print
(
'
\n
'
+
term
.
BOLD
+
path
+
term
.
NORMAL
)
print
(
'
\n
'
+
term
.
BOLD
+
path
+
term
.
NORMAL
)
print
(
'
%
slinenum
%
s'
%
(
term
.
YELLOW
,
term
.
NORMAL
))
print
(
'
%
slinenum
%
s'
%
(
term
.
YELLOW
,
term
.
NORMAL
))
dep_files
.
add
(
path
)
dep_files
.
add
(
path
)
# Mark the offending token.
# Mark the offending token.
dep_names
.
add
(
tok
)
dep_names
.
add
(
tok
)
if
term
.
RED
:
sub
=
term
.
RED
+
tok
+
term
.
NORMAL
if
term
.
RED
:
elif
term
.
BOLD
:
sub
=
term
.
BOLD
+
tok
+
term
.
NORMAL
sub
=
term
.
RED
+
tok
+
term
.
NORMAL
else
:
sub
=
'<<'
+
tok
+
'>>'
elif
term
.
BOLD
:
sub
=
term
.
BOLD
+
tok
+
term
.
NORMAL
else
:
sub
=
'<<'
+
tok
+
'>>'
line
=
re
.
sub
(
r'\b
%
s\b'
%
esctok
,
sub
,
line
)
line
=
re
.
sub
(
r'\b
%
s\b'
%
esctok
,
sub
,
line
)
# Print the offending line.
# Print the offending line.
print
(
'
%
s[
%5
d]
%
s
%
s'
%
(
term
.
YELLOW
,
start
[
0
]
+
lineno_offset
,
print
(
'
%
s[
%5
d]
%
s
%
s'
%
(
term
.
YELLOW
,
start
[
0
]
+
lineno_offset
,
term
.
NORMAL
,
line
.
rstrip
()))
term
.
NORMAL
,
line
.
rstrip
()))
...
@@ -208,19 +227,17 @@ def main():
...
@@ -208,19 +227,17 @@ def main():
if
not
dep_names
:
if
not
dep_names
:
print
(
'No deprecated funcs or classes found!'
)
print
(
'No deprecated funcs or classes found!'
)
else
:
else
:
print
(
"
\n
"
+
term
.
BOLD
+
"What you should use instead:"
+
term
.
NORMAL
)
print
(
"
\n
"
+
term
.
BOLD
+
"What you should use instead:"
+
term
.
NORMAL
)
for
name
in
sorted
(
dep_names
):
for
name
in
sorted
(
dep_names
):
msgs
=
deprecated_funcs
[
name
]
.
union
(
msgs
=
deprecated_funcs
[
name
]
.
union
(
deprecated_classes
[
name
])
.
union
(
deprecated_classes
[
name
])
.
union
(
deprecated_methods
[
name
])
deprecated_methods
[
name
])
for
msg
,
prefix
,
suffix
in
msgs
:
for
msg
,
prefix
,
suffix
in
msgs
:
print
(
textwrap
.
fill
(
term
.
RED
+
prefix
+
name
+
suffix
+
print
(
textwrap
.
fill
(
term
.
RED
+
prefix
+
name
+
suffix
+
term
.
NORMAL
+
': '
+
msg
,
term
.
NORMAL
+
': '
+
msg
,
width
=
75
,
initial_indent
=
' '
*
2
,
width
=
75
,
initial_indent
=
' '
*
2
,
subsequent_indent
=
' '
*
6
))
subsequent_indent
=
' '
*
6
))
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
main
()
main
()
tools/global_replace.py
View file @
d2cfaa35
#!/usr/bin/env python
#!/usr/bin/env python
#
#
#
# Natural Language Toolkit: substitute a pattern with a replacement in every file
#
Natural Language Toolkit: substitute a pattern with
#
#
a replacement in every file
# Copyright (C) 2001-2015 NLTK Project
# Copyright (C) 2001-2015 NLTK Project
# Author: Edward Loper <edloper@gmail.com>
# Author: Edward Loper <edloper@gmail.com>
# Steven Bird <stevenbird1@gmail.com>
# Steven Bird <stevenbird1@gmail.com>
# URL: <http://nltk.org/>
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT
# For license information, see LICENSE.TXT
# NB Should work on all platforms, http://www.python.org/doc/2.5.2/lib/os-file-dir.html
# NB Should work on all platforms,
# http://www.python.org/doc/2.5.2/lib/os-file-dir.html
import
os
import
stat
import
sys
import
os
,
stat
,
sys
def
update
(
file
,
pattern
,
replacement
):
def
update
(
file
,
pattern
,
replacement
):
...
@@ -56,6 +60,3 @@ if __name__ == '__main__':
...
@@ -56,6 +60,3 @@ if __name__ == '__main__':
count
+=
1
count
+=
1
print
(
"Updated
%
d files"
%
count
)
print
(
"Updated
%
d files"
%
count
)
tools/nltk_term_index.py
View file @
d2cfaa35
from
__future__
import
print_function
from
__future__
import
print_function
import
re
,
sys
import
re
import
sys
import
nltk
import
nltk
import
epydoc.docbuilder
,
epydoc
.
cli
import
epydoc.docbuilder
import
epydoc.cli
from
epydoc
import
log
from
epydoc
import
log
STOPLIST
=
'../../tools/nltk_term_index.stoplist'
STOPLIST
=
'../../tools/nltk_term_index.stoplist'
...
@@ -14,6 +16,7 @@ logger = epydoc.cli.ConsoleLogger(0)
...
@@ -14,6 +16,7 @@ logger = epydoc.cli.ConsoleLogger(0)
logger
.
_verbosity
=
5
logger
.
_verbosity
=
5
log
.
register_logger
(
logger
)
log
.
register_logger
(
logger
)
def
find_all_names
(
stoplist
):
def
find_all_names
(
stoplist
):
ROOT
=
[
'nltk'
]
ROOT
=
[
'nltk'
]
logger
.
_verbosity
=
0
logger
.
_verbosity
=
0
...
@@ -21,7 +24,7 @@ def find_all_names(stoplist):
...
@@ -21,7 +24,7 @@ def find_all_names(stoplist):
valdocs
=
sorted
(
docindex
.
reachable_valdocs
(
valdocs
=
sorted
(
docindex
.
reachable_valdocs
(
imports
=
False
,
imports
=
False
,
#packages=False, bases=False, submodules=False,
#packages=False, bases=False, submodules=False,
#subclasses=False,
#
subclasses=False,
private
=
False
))
private
=
False
))
logger
.
_verbosity
=
5
logger
.
_verbosity
=
5
names
=
nltk
.
defaultdict
(
list
)
names
=
nltk
.
defaultdict
(
list
)
...
@@ -29,12 +32,14 @@ def find_all_names(stoplist):
...
@@ -29,12 +32,14 @@ def find_all_names(stoplist):
for
valdoc
in
valdocs
:
for
valdoc
in
valdocs
:
name
=
valdoc
.
canonical_name
name
=
valdoc
.
canonical_name
if
(
name
is
not
epydoc
.
apidoc
.
UNKNOWN
and
if
(
name
is
not
epydoc
.
apidoc
.
UNKNOWN
and
name
is
not
None
and
name
[
0
]
==
'nltk'
):
name
is
not
None
and
name
[
0
]
==
'nltk'
):
n
+=
1
n
+=
1
for
i
in
range
(
len
(
name
)):
for
i
in
range
(
len
(
name
)):
key
=
str
(
name
[
i
:])
key
=
str
(
name
[
i
:])
if
len
(
key
)
==
1
:
continue
if
len
(
key
)
==
1
:
if
key
in
stoplist
:
continue
continue
if
key
in
stoplist
:
continue
names
[
key
]
.
append
(
valdoc
)
names
[
key
]
.
append
(
valdoc
)
log
.
info
(
'Found
%
s names from
%
s objects'
%
(
len
(
names
),
n
))
log
.
info
(
'Found
%
s names from
%
s objects'
%
(
len
(
names
),
n
))
...
@@ -51,6 +56,7 @@ LINE_RE = re.compile('.*')
...
@@ -51,6 +56,7 @@ LINE_RE = re.compile('.*')
INDEXTERM
=
'<indexterm type="nltk"><primary>
%
s</primary></indexterm>'
INDEXTERM
=
'<indexterm type="nltk"><primary>
%
s</primary></indexterm>'
def
scan_xml
(
filenames
,
names
):
def
scan_xml
(
filenames
,
names
):
fdist
=
nltk
.
FreqDist
()
fdist
=
nltk
.
FreqDist
()
...
@@ -81,8 +87,8 @@ def scan_xml(filenames, names):
...
@@ -81,8 +87,8 @@ def scan_xml(filenames, names):
out
.
close
()
out
.
close
()
for
word
in
fdist
:
for
word
in
fdist
:
namestr
=
(
'
\n
'
+
38
*
' '
)
.
join
([
str
(
v
.
canonical_name
[:
-
1
])
namestr
=
(
'
\n
'
+
38
*
' '
)
.
join
([
str
(
v
.
canonical_name
[:
-
1
])
for
v
in
names
[
word
][:
1
]])
for
v
in
names
[
word
][:
1
]])
print
(
'[
%3
d]
%-30
s
%
s'
%
(
fdist
[
word
],
word
,
namestr
))
print
(
'[
%3
d]
%-30
s
%
s'
%
(
fdist
[
word
],
word
,
namestr
))
sys
.
stdout
.
flush
()
sys
.
stdout
.
flush
()
...
@@ -99,4 +105,3 @@ def main():
...
@@ -99,4 +105,3 @@ def main():
scan_xml
(
FILENAMES
,
names
)
scan_xml
(
FILENAMES
,
names
)
main
()
main
()
tools/run_doctests.py
View file @
d2cfaa35
...
@@ -5,7 +5,9 @@ run doctests
...
@@ -5,7 +5,9 @@ run doctests
"""
"""
from
__future__
import
print_function
from
__future__
import
print_function
import
sys
,
subprocess
,
os
import
sys
import
subprocess
import
os
for
root
,
dirs
,
filenames
in
os
.
walk
(
'.'
):
for
root
,
dirs
,
filenames
in
os
.
walk
(
'.'
):
for
filename
in
filenames
:
for
filename
in
filenames
:
...
...
tools/svnmime.py
View file @
d2cfaa35
...
@@ -36,7 +36,8 @@ types_map = {
...
@@ -36,7 +36,8 @@ types_map = {
'xml'
:
'text/xml'
,
'xml'
:
'text/xml'
,
'xsl'
:
'text/plain'
,
'xsl'
:
'text/plain'
,
'zip'
:
'application/zip'
,
'zip'
:
'application/zip'
,
}
}
def
usage
():
def
usage
():
exit
(
"Usage: svnmime files"
)
exit
(
"Usage: svnmime files"
)
...
@@ -45,6 +46,7 @@ for file in sys.argv[1:]:
...
@@ -45,6 +46,7 @@ for file in sys.argv[1:]:
if
"."
in
file
:
if
"."
in
file
:
extension
=
file
.
rsplit
(
'.'
,
1
)[
1
]
extension
=
file
.
rsplit
(
'.'
,
1
)[
1
]
if
extension
in
types_map
:
if
extension
in
types_map
:
os
.
system
(
"svn propset svn:mime-type
%
s
%
s"
%
(
types_map
[
extension
],
file
))
os
.
system
(
"svn propset svn:mime-type
%
s
%
s"
%
(
types_map
[
extension
],
file
))
else
:
else
:
print
(
"Unrecognized extension"
,
extension
)
print
(
"Unrecognized extension"
,
extension
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment