Commit e3e8cebe by Steven Bird

minor refactor of code for adding PY3 to paths, resolves #880

parent 5982c0e4
...@@ -372,20 +372,22 @@ else: ...@@ -372,20 +372,22 @@ else:
"taggers/maxent_treebank_pos_tagger", "taggers/maxent_treebank_pos_tagger",
"tokenizers/punkt"] "tokenizers/punkt"]
def add_py3_data(path):
if PY3:
for item in _PY3_DATA_UPDATES:
if item in str(path) and "/PY3" not in str(path):
pos = path.index(item) + len(item)
if path[pos:pos+4] == ".zip":
pos += 4
path = path[:pos] + "/PY3" + path[pos:]
break
return path
# for use in adding /PY3 to the second (filename) argument # for use in adding /PY3 to the second (filename) argument
# of the file pointers in data.py # of the file pointers in data.py
def py3_data(init_func): def py3_data(init_func):
def _decorator(*args, **kwargs): def _decorator(*args, **kwargs):
if PY3: args = (args[0], add_py3_data(args[1])) + args[2:]
path = args[1]
for item in _PY3_DATA_UPDATES:
if item in str(path) and "/PY3" not in str(path):
pos = path.index(item) + len(item)
if path[pos:pos+4] == ".zip":
pos += 4
path = path[:pos] + "/PY3" + path[pos:]
args = (args[0], path) + args[2:]
break
return init_func(*args, **kwargs) return init_func(*args, **kwargs)
return wraps(init_func)(_decorator) return wraps(init_func)(_decorator)
......
...@@ -56,7 +56,9 @@ except ImportError: ...@@ -56,7 +56,9 @@ except ImportError:
# this import should be more specific: # this import should be more specific:
import nltk import nltk
from nltk.compat import py3_data, text_type, string_types, BytesIO, urlopen, url2pathname from nltk.compat import py3_data, add_py3_data
from nltk.compat import text_type, string_types, BytesIO, urlopen, url2pathname
###################################################################### ######################################################################
# Search Path # Search Path
...@@ -744,7 +746,8 @@ def load(resource_url, format='auto', cache=True, verbose=False, ...@@ -744,7 +746,8 @@ def load(resource_url, format='auto', cache=True, verbose=False,
:type encoding: str :type encoding: str
:param encoding: the encoding of the input; only used for text formats. :param encoding: the encoding of the input; only used for text formats.
""" """
resource_url=normalize_resource_url(resource_url) resource_url = normalize_resource_url(resource_url)
resource_url = add_py3_data(resource_url)
# Determine the format of the resource. # Determine the format of the resource.
if format == 'auto': if format == 'auto':
......
...@@ -77,15 +77,10 @@ from nltk.tag.senna import SennaTagger, SennaChunkTagger, SennaNERTagger ...@@ -77,15 +77,10 @@ from nltk.tag.senna import SennaTagger, SennaChunkTagger, SennaNERTagger
from nltk.tag.mapping import tagset_mapping, map_tag from nltk.tag.mapping import tagset_mapping, map_tag
from nltk.data import load from nltk.data import load
from nltk.compat import PY3
# Standard treebank POS tagger # Standard treebank POS tagger
_POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/english.pickle'
if PY3:
_POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/PY3/english.pickle'
else:
_POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/english.pickle'
def pos_tag(tokens): def pos_tag(tokens):
""" """
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment