Commit f8a2c52b by Steven Bird

changed StringType checks to StringTypes, to admit unicode strings

svn/trunk@4664
parent a102f9d9
...@@ -57,7 +57,7 @@ class RegexpChunkRule(object): ...@@ -57,7 +57,7 @@ class RegexpChunkRule(object):
if type(regexp).__name__ == 'SRE_Pattern': regexp = regexp.pattern if type(regexp).__name__ == 'SRE_Pattern': regexp = regexp.pattern
self._repl = repl self._repl = repl
self._descr = descr self._descr = descr
if type(regexp) == types.StringType: if type(regexp) in types.StringTypes:
self._regexp = re.compile(regexp) self._regexp = re.compile(regexp)
else: else:
self._regexp = regexp self._regexp = regexp
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
# Dictionary classes, which allow users to access # Dictionary classes, which allow users to access
# Wordnet data via a handy dict notation (see below). # Wordnet data via a handy dict notation (see below).
from types import IntType, StringType import types
from util import * from util import *
from cache import entityCache from cache import entityCache
...@@ -137,10 +137,10 @@ class Dictionary(object): ...@@ -137,10 +137,10 @@ class Dictionary(object):
>>> N[0] >>> N[0]
'hood(n.) 'hood(n.)
""" """
if isinstance(index, StringType): if type(index) in types.StringTypes:
return self.getWord(index) return self.getWord(index)
elif isinstance(index, IntType): elif type(index) == types.IntType:
line = self.indexFile[index] line = self.indexFile[index]
return self.getWord(string.replace(line[:string.find(line, ' ')], '_', ' '), line) return self.getWord(string.replace(line[:string.find(line, ' ')], '_', ' '), line)
......
...@@ -7,9 +7,8 @@ ...@@ -7,9 +7,8 @@
# URL: <http://nltk.sf.net> # URL: <http://nltk.sf.net>
# For license information, see LICENSE.TXT # For license information, see LICENSE.TXT
import os, string import os, string, types
from nltk.corpora import get_basedir from nltk.corpora import get_basedir
from types import IntType, StringType
ANTONYM = 'antonym' ANTONYM = 'antonym'
HYPERNYM = 'hypernym' HYPERNYM = 'hypernym'
...@@ -182,7 +181,6 @@ def dataFilePathname(filenameroot): ...@@ -182,7 +181,6 @@ def dataFilePathname(filenameroot):
if os.name in ('dos', 'nt'): if os.name in ('dos', 'nt'):
path = os.path.join(get_basedir(), "wordnet", filenameroot + ".dat") path = os.path.join(get_basedir(), "wordnet", filenameroot + ".dat")
if os.path.exists(path): if os.path.exists(path):
return path return path
...@@ -214,30 +212,24 @@ def binarySearchFile(file, key, cache={}, cacheDepth=-1): ...@@ -214,30 +212,24 @@ def binarySearchFile(file, key, cache={}, cacheDepth=-1):
else: else:
file.seek(max(0, middle - 1)) file.seek(max(0, middle - 1))
if middle > 0: if middle > 0:
file.readline() file.readline()
offset, line = file.tell(), file.readline() offset, line = file.tell(), file.readline()
if currentDepth < cacheDepth: if currentDepth < cacheDepth:
cache[middle] = (offset, line) cache[middle] = (offset, line)
if offset > end: if offset > end:
assert end != middle - 1, "infinite loop" assert end != middle - 1, "infinite loop"
end = middle - 1 end = middle - 1
elif line[:keylen] == key: elif line[:keylen] == key:
return line return line
elif line > key: elif line > key:
assert end != middle - 1, "infinite loop" assert end != middle - 1, "infinite loop"
end = middle - 1 end = middle - 1
elif line < key: elif line < key:
start = offset + len(line) - 1 start = offset + len(line) - 1
currentDepth = currentDepth + 1 currentDepth += 1
thisState = start, end thisState = start, end
if lastState == thisState: if lastState == thisState:
...@@ -293,13 +285,11 @@ class IndexFile(object): ...@@ -293,13 +285,11 @@ class IndexFile(object):
""" """
self.file.seek(0) self.file.seek(0)
while 1: while True:
offset = self.file.tell() offset = self.file.tell()
line = self.file.readline() line = self.file.readline()
if (line[0] != ' '): if (line[0] != ' '):
break break
self.nextIndex = 0 self.nextIndex = 0
self.nextOffset = offset self.nextOffset = offset
...@@ -307,53 +297,36 @@ class IndexFile(object): ...@@ -307,53 +297,36 @@ class IndexFile(object):
return 1 return 1
def __len__(self): def __len__(self):
if hasattr(self, 'indexCache'): if hasattr(self, 'indexCache'):
return len(self.indexCache) return len(self.indexCache)
self.rewind() self.rewind()
lines = 0 lines = 0
while True:
while 1:
line = self.file.readline() line = self.file.readline()
if line == "": if line == "":
break break
lines += 1
lines = lines + 1
return lines return lines
def __nonzero__(self):
return 1
def __getitem__(self, index): def __getitem__(self, index):
if type(index) in types.StringTypes:
if isinstance(index, StringType):
if hasattr(self, 'indexCache'): if hasattr(self, 'indexCache'):
return self.indexCache[index] return self.indexCache[index]
return binarySearchFile(self.file, index, self.offsetLineCache, 8) return binarySearchFile(self.file, index, self.offsetLineCache, 8)
elif isinstance(index, IntType): elif type(index) == types.IntType:
if hasattr(self, 'indexCache'): if hasattr(self, 'indexCache'):
return self.get(self.keys[index]) return self.get(self.keys[index])
if index < self.nextIndex: if index < self.nextIndex:
self.rewind() self.rewind()
while self.nextIndex <= index: while self.nextIndex <= index:
self.file.seek(self.nextOffset) self.file.seek(self.nextOffset)
line = self.file.readline() line = self.file.readline()
if line == "": if line == "":
raise IndexError, "index out of range" raise IndexError, "index out of range"
self.nextIndex += 1
self.nextIndex = self.nextIndex + 1
self.nextOffset = self.file.tell() self.nextOffset = self.file.tell()
return line return line
else: raise TypeError, "%s is not a String or Int" % `index` else: raise TypeError, "%s is not a String or Int" % `index`
...@@ -366,7 +339,6 @@ class IndexFile(object): ...@@ -366,7 +339,6 @@ class IndexFile(object):
""" """
try: try:
return self[key] return self[key]
except LookupError: except LookupError:
return default return default
...@@ -379,19 +351,14 @@ class IndexFile(object): ...@@ -379,19 +351,14 @@ class IndexFile(object):
keys = self.indexCache.keys() keys = self.indexCache.keys()
keys.sort() keys.sort()
return keys return keys
else: else:
keys = [] keys = []
self.rewind() self.rewind()
while True:
while 1:
line = self.file.readline() line = self.file.readline()
if not line: break if not line: break
key = line.split(' ', 1)[0] key = line.split(' ', 1)[0]
keys.append(key.replace('_', ' ')) keys.append(key.replace('_', ' '))
return keys return keys
def has_key(self, key): def has_key(self, key):
...@@ -420,7 +387,7 @@ class IndexFile(object): ...@@ -420,7 +387,7 @@ class IndexFile(object):
self.rewind() self.rewind()
count = 0 count = 0
while 1: while True:
offset, line = self.file.tell(), self.file.readline() offset, line = self.file.tell(), self.file.readline()
if not line: break if not line: break
key = line[:string.find(line, ' ')] key = line[:string.find(line, ' ')]
...@@ -429,7 +396,7 @@ class IndexFile(object): ...@@ -429,7 +396,7 @@ class IndexFile(object):
import sys import sys
sys.stdout.flush() sys.stdout.flush()
indexCache[key] = line indexCache[key] = line
count = count + 1 count += 1
indexCache.close() indexCache.close()
os.rename(tempname, self.shelfname) os.rename(tempname, self.shelfname)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment