Commit e3e8cebe by Steven Bird

minor refactor of code for adding PY3 to paths, resolves #880

parent 5982c0e4
......@@ -372,20 +372,22 @@ else:
"taggers/maxent_treebank_pos_tagger",
"tokenizers/punkt"]
def add_py3_data(path):
if PY3:
for item in _PY3_DATA_UPDATES:
if item in str(path) and "/PY3" not in str(path):
pos = path.index(item) + len(item)
if path[pos:pos+4] == ".zip":
pos += 4
path = path[:pos] + "/PY3" + path[pos:]
break
return path
# for use in adding /PY3 to the second (filename) argument
# of the file pointers in data.py
def py3_data(init_func):
def _decorator(*args, **kwargs):
if PY3:
path = args[1]
for item in _PY3_DATA_UPDATES:
if item in str(path) and "/PY3" not in str(path):
pos = path.index(item) + len(item)
if path[pos:pos+4] == ".zip":
pos += 4
path = path[:pos] + "/PY3" + path[pos:]
args = (args[0], path) + args[2:]
break
args = (args[0], add_py3_data(args[1])) + args[2:]
return init_func(*args, **kwargs)
return wraps(init_func)(_decorator)
......
......@@ -56,7 +56,9 @@ except ImportError:
# this import should be more specific:
import nltk
from nltk.compat import py3_data, text_type, string_types, BytesIO, urlopen, url2pathname
from nltk.compat import py3_data, add_py3_data
from nltk.compat import text_type, string_types, BytesIO, urlopen, url2pathname
######################################################################
# Search Path
......@@ -744,7 +746,8 @@ def load(resource_url, format='auto', cache=True, verbose=False,
:type encoding: str
:param encoding: the encoding of the input; only used for text formats.
"""
resource_url=normalize_resource_url(resource_url)
resource_url = normalize_resource_url(resource_url)
resource_url = add_py3_data(resource_url)
# Determine the format of the resource.
if format == 'auto':
......
......@@ -77,15 +77,10 @@ from nltk.tag.senna import SennaTagger, SennaChunkTagger, SennaNERTagger
from nltk.tag.mapping import tagset_mapping, map_tag
from nltk.data import load
from nltk.compat import PY3
# Standard treebank POS tagger
if PY3:
_POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/PY3/english.pickle'
else:
_POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/english.pickle'
_POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/english.pickle'
def pos_tag(tokens):
"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment