Commit 8505c04a by Ewan Klein

Merge branch 'develop' into twitter

parents abf75286 2288eecd
......@@ -592,6 +592,23 @@ def find_jar_iter(name_pattern, path_to_jar=None, env_vars=(),
print('[Found %s: %s]' % (name_pattern, cp))
yielded = True
yield cp
# The case where user put directory containing the jar file in the classpath
if os.path.isdir(cp):
if not is_regex:
if os.path.isfile(os.path.join(cp,name_pattern)):
if verbose:
print('[Found %s: %s]' % (name_pattern, cp))
yielded = True
yield os.path.join(cp,name_pattern)
else:
# Look for file using regular expression
for file_name in os.listdir(cp):
if re.match(name_pattern,file_name):
if verbose:
print('[Found %s: %s]' % (name_pattern, os.path.join(cp,file_name)))
yielded = True
yield os.path.join(cp,file_name)
else:
jar_env = os.environ[env_var]
jar_iter = ((os.path.join(jar_env, path_to_jar) for path_to_jar in os.listdir(jar_env))
......
......@@ -465,7 +465,7 @@ class DependencyGraph(object):
for n in nx_nodelist:
self.nx_labels[n] = self.nodes[n]['word']
g = NX.XDiGraph()
g = NX.MultiDiGraph()
g.add_nodes_from(nx_nodelist)
g.add_edges_from(nx_edgelist)
......@@ -552,13 +552,11 @@ def cycle_finding_demo():
dg = DependencyGraph(treebank_data)
print(dg.contains_cycle())
cyclic_dg = DependencyGraph()
top = {'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0}
child1 = {'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1}
child2 = {'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2}
child3 = {'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3}
child4 = {'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4}
cyclic_dg.nodelist = [top, child1, child2, child3, child4]
cyclic_dg.root = top
cyclic_dg.add_node({'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0})
cyclic_dg.add_node({'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1})
cyclic_dg.add_node({'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2})
cyclic_dg.add_node({'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3})
cyclic_dg.add_node({'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4})
print(cyclic_dg.contains_cycle())
treebank_data = """Pierre NNP 2 NMOD
......
......@@ -83,7 +83,7 @@ from nltk.data import load
# Standard treebank POS tagger
_POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/english.pickle'
def pos_tag(tokens):
def pos_tag(tokens, tagset=None):
"""
Use NLTK's currently recommended part of speech tagger to
tag the given list of tokens.
......@@ -101,6 +101,8 @@ def pos_tag(tokens):
:rtype: list(tuple(str, str))
"""
tagger = load(_POS_TAGGER)
if tagset:
return [(token, map_tag('en-ptb', tagset, tag)) for (token, tag) in tagger.tag(tokens)]
return tagger.tag(tokens)
def pos_tag_sents(sentences):
......
......@@ -24,7 +24,7 @@ class CRFTagger(TaggerI):
"""
A module for POS tagging using CRFSuite https://pypi.python.org/pypi/python-crfsuite
>>> from nltk.tag.crf import CRFTagger
>>> from nltk.tag import CRFTagger
>>> ct = CRFTagger()
>>> train_data = [[('University','Noun'), ('is','Verb'), ('a','Det'), ('good','Adj'), ('place','Noun')],
......
......@@ -33,7 +33,7 @@ class HunposTagger(TaggerI):
Example:
>>> from nltk.tag.hunpos import HunposTagger
>>> from nltk.tag import HunposTagger
>>> ht = HunposTagger('en_wsj.model')
>>> ht.tag('What is the airspeed of an unladen swallow ?'.split())
[('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'), ('unladen', 'NN'), ('swallow', 'VB'), ('?', '.')]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment