Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
nltk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
nltk
Commits
f8a2c52b
Commit
f8a2c52b
authored
Jun 12, 2007
by
Steven Bird
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
changed StringType checks to StringTypes, to admit unicode strings
svn/trunk@4664
parent
a102f9d9
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
15 additions
and
48 deletions
+15
-48
nltk/chunk/regexp.py
+1
-1
nltk/wordnet/dictionary.py
+3
-3
nltk/wordnet/util.py
+11
-44
No files found.
nltk/chunk/regexp.py
View file @
f8a2c52b
...
...
@@ -57,7 +57,7 @@ class RegexpChunkRule(object):
if
type
(
regexp
)
.
__name__
==
'SRE_Pattern'
:
regexp
=
regexp
.
pattern
self
.
_repl
=
repl
self
.
_descr
=
descr
if
type
(
regexp
)
==
types
.
StringType
:
if
type
(
regexp
)
in
types
.
StringTypes
:
self
.
_regexp
=
re
.
compile
(
regexp
)
else
:
self
.
_regexp
=
regexp
...
...
nltk/wordnet/dictionary.py
View file @
f8a2c52b
...
...
@@ -10,7 +10,7 @@
# Dictionary classes, which allow users to access
# Wordnet data via a handy dict notation (see below).
from
types
import
IntType
,
StringType
import
types
from
util
import
*
from
cache
import
entityCache
...
...
@@ -137,10 +137,10 @@ class Dictionary(object):
>>> N[0]
'hood(n.)
"""
if
isinstance
(
index
,
StringType
)
:
if
type
(
index
)
in
types
.
StringTypes
:
return
self
.
getWord
(
index
)
elif
isinstance
(
index
,
IntType
)
:
elif
type
(
index
)
==
types
.
IntType
:
line
=
self
.
indexFile
[
index
]
return
self
.
getWord
(
string
.
replace
(
line
[:
string
.
find
(
line
,
' '
)],
'_'
,
' '
),
line
)
...
...
nltk/wordnet/util.py
View file @
f8a2c52b
...
...
@@ -7,9 +7,8 @@
# URL: <http://nltk.sf.net>
# For license information, see LICENSE.TXT
import
os
,
string
import
os
,
string
,
types
from
nltk.corpora
import
get_basedir
from
types
import
IntType
,
StringType
ANTONYM
=
'antonym'
HYPERNYM
=
'hypernym'
...
...
@@ -182,7 +181,6 @@ def dataFilePathname(filenameroot):
if
os
.
name
in
(
'dos'
,
'nt'
):
path
=
os
.
path
.
join
(
get_basedir
(),
"wordnet"
,
filenameroot
+
".dat"
)
if
os
.
path
.
exists
(
path
):
return
path
...
...
@@ -214,30 +212,24 @@ def binarySearchFile(file, key, cache={}, cacheDepth=-1):
else
:
file
.
seek
(
max
(
0
,
middle
-
1
))
if
middle
>
0
:
file
.
readline
()
offset
,
line
=
file
.
tell
(),
file
.
readline
()
if
currentDepth
<
cacheDepth
:
cache
[
middle
]
=
(
offset
,
line
)
if
offset
>
end
:
assert
end
!=
middle
-
1
,
"infinite loop"
end
=
middle
-
1
elif
line
[:
keylen
]
==
key
:
return
line
elif
line
>
key
:
assert
end
!=
middle
-
1
,
"infinite loop"
end
=
middle
-
1
elif
line
<
key
:
start
=
offset
+
len
(
line
)
-
1
currentDepth
=
currentDepth
+
1
currentDepth
+=
1
thisState
=
start
,
end
if
lastState
==
thisState
:
...
...
@@ -293,13 +285,11 @@ class IndexFile(object):
"""
self
.
file
.
seek
(
0
)
while
1
:
while
True
:
offset
=
self
.
file
.
tell
()
line
=
self
.
file
.
readline
()
if
(
line
[
0
]
!=
' '
):
break
self
.
nextIndex
=
0
self
.
nextOffset
=
offset
...
...
@@ -307,53 +297,36 @@ class IndexFile(object):
return
1
def
__len__
(
self
):
if
hasattr
(
self
,
'indexCache'
):
return
len
(
self
.
indexCache
)
self
.
rewind
()
lines
=
0
while
1
:
while
True
:
line
=
self
.
file
.
readline
()
if
line
==
""
:
break
lines
=
lines
+
1
lines
+=
1
return
lines
def
__nonzero__
(
self
):
return
1
def
__getitem__
(
self
,
index
):
if
isinstance
(
index
,
StringType
):
if
type
(
index
)
in
types
.
StringTypes
:
if
hasattr
(
self
,
'indexCache'
):
return
self
.
indexCache
[
index
]
return
binarySearchFile
(
self
.
file
,
index
,
self
.
offsetLineCache
,
8
)
elif
isinstance
(
index
,
IntType
):
elif
type
(
index
)
==
types
.
IntType
:
if
hasattr
(
self
,
'indexCache'
):
return
self
.
get
(
self
.
keys
[
index
])
if
index
<
self
.
nextIndex
:
self
.
rewind
()
while
self
.
nextIndex
<=
index
:
self
.
file
.
seek
(
self
.
nextOffset
)
line
=
self
.
file
.
readline
()
if
line
==
""
:
raise
IndexError
,
"index out of range"
self
.
nextIndex
=
self
.
nextIndex
+
1
self
.
nextIndex
+=
1
self
.
nextOffset
=
self
.
file
.
tell
()
return
line
else
:
raise
TypeError
,
"
%
s is not a String or Int"
%
`index`
...
...
@@ -366,7 +339,6 @@ class IndexFile(object):
"""
try
:
return
self
[
key
]
except
LookupError
:
return
default
...
...
@@ -379,19 +351,14 @@ class IndexFile(object):
keys
=
self
.
indexCache
.
keys
()
keys
.
sort
()
return
keys
else
:
keys
=
[]
self
.
rewind
()
while
1
:
while
True
:
line
=
self
.
file
.
readline
()
if
not
line
:
break
key
=
line
.
split
(
' '
,
1
)[
0
]
keys
.
append
(
key
.
replace
(
'_'
,
' '
))
return
keys
def
has_key
(
self
,
key
):
...
...
@@ -420,7 +387,7 @@ class IndexFile(object):
self
.
rewind
()
count
=
0
while
1
:
while
True
:
offset
,
line
=
self
.
file
.
tell
(),
self
.
file
.
readline
()
if
not
line
:
break
key
=
line
[:
string
.
find
(
line
,
' '
)]
...
...
@@ -429,7 +396,7 @@ class IndexFile(object):
import
sys
sys
.
stdout
.
flush
()
indexCache
[
key
]
=
line
count
=
count
+
1
count
+=
1
indexCache
.
close
()
os
.
rename
(
tempname
,
self
.
shelfname
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment