Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
nltk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
nltk
Commits
d2cfaa35
Commit
d2cfaa35
authored
Mar 25, 2015
by
Anirudh
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
MNT: fixed pep8 errors for files in tools folder
parent
13fc199b
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
71 additions
and
44 deletions
+71
-44
tools/find_deprecated.py
+42
-25
tools/global_replace.py
+8
-7
tools/nltk_term_index.py
+14
-9
tools/run_doctests.py
+3
-1
tools/svnmime.py
+4
-2
No files found.
tools/find_deprecated.py
View file @
d2cfaa35
#!/usr/bin/env python
#
#
#
Natural Language Toolkit: Deprecated Function & Class Finder
# Natural Language Toolkit: Deprecated Function & Class Finder
#
# Copyright (C) 2001-2015 NLTK Project
# Author: Edward Loper <edloper@gmail.com>
...
...
@@ -23,8 +23,13 @@ identifier will be highlighted in red.
# Imports
######################################################################
import
os
,
re
,
sys
,
tokenize
,
textwrap
import
nltk
,
nltk
.
corpus
import
os
import
re
import
sys
import
tokenize
import
textwrap
import
nltk
import
nltk.corpus
from
doctest
import
DocTestParser
,
register_optionflag
from
cStringIO
import
StringIO
from
nltk
import
defaultdict
...
...
@@ -69,6 +74,7 @@ try:
from
epydoc.cli
import
TerminalController
except
ImportError
:
class
TerminalController
:
def
__getattr__
(
self
,
attr
):
return
''
term
=
TerminalController
()
...
...
@@ -78,7 +84,9 @@ term = TerminalController()
######################################################################
# If we're using py24, then ignore the +SKIP directive.
if
sys
.
version_info
[:
2
]
<
(
2
,
5
):
register_optionflag
(
'SKIP'
)
if
sys
.
version_info
[:
2
]
<
(
2
,
5
):
register_optionflag
(
'SKIP'
)
def
strip_quotes
(
s
):
s
=
s
.
strip
()
...
...
@@ -89,14 +97,17 @@ def strip_quotes(s):
s
=
s
.
strip
()
return
s
def
find_class
(
s
,
index
):
lines
=
s
[:
index
]
.
split
(
'
\n
'
)
while
lines
:
m
=
CLASS_DEF_RE
.
match
(
lines
[
-
1
])
if
m
:
return
m
.
group
(
1
)
+
'.'
if
m
:
return
m
.
group
(
1
)
+
'.'
lines
.
pop
()
return
'?.'
def
find_deprecated_defs
(
pkg_dir
):
"""
Return a list of all functions marked with the @deprecated
...
...
@@ -117,16 +128,19 @@ def find_deprecated_defs(pkg_dir):
msg
=
' '
.
join
(
msg
.
split
())
if
m
.
group
()[
0
]
in
'
\t
'
:
cls
=
find_class
(
s
,
m
.
start
())
deprecated_methods
[
name
]
.
add
(
(
msg
,
cls
,
'()'
)
)
deprecated_methods
[
name
]
.
add
(
(
msg
,
cls
,
'()'
)
)
else
:
deprecated_funcs
[
name
]
.
add
(
(
msg
,
''
,
'()'
)
)
deprecated_funcs
[
name
]
.
add
(
(
msg
,
''
,
'()'
)
)
else
:
name
=
m
.
group
(
3
)
m2
=
STRING_RE
.
match
(
s
,
m
.
end
())
if
m2
:
msg
=
strip_quotes
(
m2
.
group
())
else
:
msg
=
''
if
m2
:
msg
=
strip_quotes
(
m2
.
group
())
else
:
msg
=
''
msg
=
' '
.
join
(
msg
.
split
())
deprecated_classes
[
name
]
.
add
(
(
msg
,
''
,
''
))
deprecated_classes
[
name
]
.
add
((
msg
,
''
,
''
))
def
print_deprecated_uses
(
paths
):
dep_names
=
set
()
...
...
@@ -134,7 +148,7 @@ def print_deprecated_uses(paths):
for
path
in
sorted
(
paths
):
if
os
.
path
.
isdir
(
path
):
dep_names
.
update
(
print_deprecated_uses
(
[
os
.
path
.
join
(
path
,
f
)
for
f
in
os
.
listdir
(
path
)]))
[
os
.
path
.
join
(
path
,
f
)
for
f
in
os
.
listdir
(
path
)]))
elif
path
.
endswith
(
'.py'
):
print_deprecated_uses_in
(
open
(
path
)
.
readline
,
path
,
dep_files
,
dep_names
,
0
)
...
...
@@ -146,9 +160,10 @@ def print_deprecated_uses(paths):
dep_names
,
example
.
lineno
)
except
tokenize
.
TokenError
:
print
(
term
.
RED
+
'Caught TokenError -- '
'malformatted doctest?'
+
term
.
NORMAL
)
'malformatted doctest?'
+
term
.
NORMAL
)
return
dep_names
def
print_deprecated_uses_in
(
readline
,
path
,
dep_files
,
dep_names
,
lineno_offset
):
tokiter
=
tokenize
.
generate_tokens
(
readline
)
...
...
@@ -158,7 +173,8 @@ def print_deprecated_uses_in(readline, path, dep_files, dep_names,
# the @deprecated decorator.
if
line
is
not
context
[
-
1
]:
context
.
append
(
line
)
if
len
(
context
)
>
10
:
del
context
[
0
]
if
len
(
context
)
>
10
:
del
context
[
0
]
esctok
=
re
.
escape
(
tok
)
# Ignore all tokens except deprecated names.
if
not
(
tok
in
deprecated_classes
or
...
...
@@ -175,17 +191,20 @@ def print_deprecated_uses_in(readline, path, dep_files, dep_names,
continue
# Print a header for the first use in a file:
if
path
not
in
dep_files
:
print
(
'
\n
'
+
term
.
BOLD
+
path
+
term
.
NORMAL
)
print
(
'
\n
'
+
term
.
BOLD
+
path
+
term
.
NORMAL
)
print
(
'
%
slinenum
%
s'
%
(
term
.
YELLOW
,
term
.
NORMAL
))
dep_files
.
add
(
path
)
# Mark the offending token.
dep_names
.
add
(
tok
)
if
term
.
RED
:
sub
=
term
.
RED
+
tok
+
term
.
NORMAL
elif
term
.
BOLD
:
sub
=
term
.
BOLD
+
tok
+
term
.
NORMAL
else
:
sub
=
'<<'
+
tok
+
'>>'
if
term
.
RED
:
sub
=
term
.
RED
+
tok
+
term
.
NORMAL
elif
term
.
BOLD
:
sub
=
term
.
BOLD
+
tok
+
term
.
NORMAL
else
:
sub
=
'<<'
+
tok
+
'>>'
line
=
re
.
sub
(
r'\b
%
s\b'
%
esctok
,
sub
,
line
)
# Print the offending line.
print
(
'
%
s[
%5
d]
%
s
%
s'
%
(
term
.
YELLOW
,
start
[
0
]
+
lineno_offset
,
print
(
'
%
s[
%5
d]
%
s
%
s'
%
(
term
.
YELLOW
,
start
[
0
]
+
lineno_offset
,
term
.
NORMAL
,
line
.
rstrip
()))
...
...
@@ -208,19 +227,17 @@ def main():
if
not
dep_names
:
print
(
'No deprecated funcs or classes found!'
)
else
:
print
(
"
\n
"
+
term
.
BOLD
+
"What you should use instead:"
+
term
.
NORMAL
)
print
(
"
\n
"
+
term
.
BOLD
+
"What you should use instead:"
+
term
.
NORMAL
)
for
name
in
sorted
(
dep_names
):
msgs
=
deprecated_funcs
[
name
]
.
union
(
deprecated_classes
[
name
])
.
union
(
deprecated_methods
[
name
])
for
msg
,
prefix
,
suffix
in
msgs
:
print
(
textwrap
.
fill
(
term
.
RED
+
prefix
+
name
+
suffix
+
term
.
NORMAL
+
': '
+
msg
,
width
=
75
,
initial_indent
=
' '
*
2
,
subsequent_indent
=
' '
*
6
))
print
(
textwrap
.
fill
(
term
.
RED
+
prefix
+
name
+
suffix
+
term
.
NORMAL
+
': '
+
msg
,
width
=
75
,
initial_indent
=
' '
*
2
,
subsequent_indent
=
' '
*
6
))
if
__name__
==
'__main__'
:
main
()
tools/global_replace.py
View file @
d2cfaa35
#!/usr/bin/env python
#
#
# Natural Language Toolkit: substitute a pattern with a replacement in every file
#
#
Natural Language Toolkit: substitute a pattern with
#
a replacement in every file
# Copyright (C) 2001-2015 NLTK Project
# Author: Edward Loper <edloper@gmail.com>
# Steven Bird <stevenbird1@gmail.com>
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT
# NB Should work on all platforms, http://www.python.org/doc/2.5.2/lib/os-file-dir.html
# NB Should work on all platforms,
# http://www.python.org/doc/2.5.2/lib/os-file-dir.html
import
os
import
stat
import
sys
import
os
,
stat
,
sys
def
update
(
file
,
pattern
,
replacement
):
...
...
@@ -56,6 +60,3 @@ if __name__ == '__main__':
count
+=
1
print
(
"Updated
%
d files"
%
count
)
tools/nltk_term_index.py
View file @
d2cfaa35
from
__future__
import
print_function
import
re
,
sys
import
re
import
sys
import
nltk
import
epydoc.docbuilder
,
epydoc
.
cli
import
epydoc.docbuilder
import
epydoc.cli
from
epydoc
import
log
STOPLIST
=
'../../tools/nltk_term_index.stoplist'
...
...
@@ -14,6 +16,7 @@ logger = epydoc.cli.ConsoleLogger(0)
logger
.
_verbosity
=
5
log
.
register_logger
(
logger
)
def
find_all_names
(
stoplist
):
ROOT
=
[
'nltk'
]
logger
.
_verbosity
=
0
...
...
@@ -21,7 +24,7 @@ def find_all_names(stoplist):
valdocs
=
sorted
(
docindex
.
reachable_valdocs
(
imports
=
False
,
#packages=False, bases=False, submodules=False,
#subclasses=False,
#
subclasses=False,
private
=
False
))
logger
.
_verbosity
=
5
names
=
nltk
.
defaultdict
(
list
)
...
...
@@ -29,12 +32,14 @@ def find_all_names(stoplist):
for
valdoc
in
valdocs
:
name
=
valdoc
.
canonical_name
if
(
name
is
not
epydoc
.
apidoc
.
UNKNOWN
and
name
is
not
None
and
name
[
0
]
==
'nltk'
):
name
is
not
None
and
name
[
0
]
==
'nltk'
):
n
+=
1
for
i
in
range
(
len
(
name
)):
key
=
str
(
name
[
i
:])
if
len
(
key
)
==
1
:
continue
if
key
in
stoplist
:
continue
if
len
(
key
)
==
1
:
continue
if
key
in
stoplist
:
continue
names
[
key
]
.
append
(
valdoc
)
log
.
info
(
'Found
%
s names from
%
s objects'
%
(
len
(
names
),
n
))
...
...
@@ -51,6 +56,7 @@ LINE_RE = re.compile('.*')
INDEXTERM
=
'<indexterm type="nltk"><primary>
%
s</primary></indexterm>'
def
scan_xml
(
filenames
,
names
):
fdist
=
nltk
.
FreqDist
()
...
...
@@ -81,8 +87,8 @@ def scan_xml(filenames, names):
out
.
close
()
for
word
in
fdist
:
namestr
=
(
'
\n
'
+
38
*
' '
)
.
join
([
str
(
v
.
canonical_name
[:
-
1
])
for
v
in
names
[
word
][:
1
]])
namestr
=
(
'
\n
'
+
38
*
' '
)
.
join
([
str
(
v
.
canonical_name
[:
-
1
])
for
v
in
names
[
word
][:
1
]])
print
(
'[
%3
d]
%-30
s
%
s'
%
(
fdist
[
word
],
word
,
namestr
))
sys
.
stdout
.
flush
()
...
...
@@ -99,4 +105,3 @@ def main():
scan_xml
(
FILENAMES
,
names
)
main
()
tools/run_doctests.py
View file @
d2cfaa35
...
...
@@ -5,7 +5,9 @@ run doctests
"""
from
__future__
import
print_function
import
sys
,
subprocess
,
os
import
sys
import
subprocess
import
os
for
root
,
dirs
,
filenames
in
os
.
walk
(
'.'
):
for
filename
in
filenames
:
...
...
tools/svnmime.py
View file @
d2cfaa35
...
...
@@ -36,7 +36,8 @@ types_map = {
'xml'
:
'text/xml'
,
'xsl'
:
'text/plain'
,
'zip'
:
'application/zip'
,
}
}
def
usage
():
exit
(
"Usage: svnmime files"
)
...
...
@@ -45,6 +46,7 @@ for file in sys.argv[1:]:
if
"."
in
file
:
extension
=
file
.
rsplit
(
'.'
,
1
)[
1
]
if
extension
in
types_map
:
os
.
system
(
"svn propset svn:mime-type
%
s
%
s"
%
(
types_map
[
extension
],
file
))
os
.
system
(
"svn propset svn:mime-type
%
s
%
s"
%
(
types_map
[
extension
],
file
))
else
:
print
(
"Unrecognized extension"
,
extension
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment