Commit 8505c04a by Ewan Klein

Merge branch 'develop' into twitter

parents abf75286 2288eecd
...@@ -592,6 +592,23 @@ def find_jar_iter(name_pattern, path_to_jar=None, env_vars=(), ...@@ -592,6 +592,23 @@ def find_jar_iter(name_pattern, path_to_jar=None, env_vars=(),
print('[Found %s: %s]' % (name_pattern, cp)) print('[Found %s: %s]' % (name_pattern, cp))
yielded = True yielded = True
yield cp yield cp
# The case where user put directory containing the jar file in the classpath
if os.path.isdir(cp):
if not is_regex:
if os.path.isfile(os.path.join(cp,name_pattern)):
if verbose:
print('[Found %s: %s]' % (name_pattern, cp))
yielded = True
yield os.path.join(cp,name_pattern)
else:
# Look for file using regular expression
for file_name in os.listdir(cp):
if re.match(name_pattern,file_name):
if verbose:
print('[Found %s: %s]' % (name_pattern, os.path.join(cp,file_name)))
yielded = True
yield os.path.join(cp,file_name)
else: else:
jar_env = os.environ[env_var] jar_env = os.environ[env_var]
jar_iter = ((os.path.join(jar_env, path_to_jar) for path_to_jar in os.listdir(jar_env)) jar_iter = ((os.path.join(jar_env, path_to_jar) for path_to_jar in os.listdir(jar_env))
......
...@@ -465,7 +465,7 @@ class DependencyGraph(object): ...@@ -465,7 +465,7 @@ class DependencyGraph(object):
for n in nx_nodelist: for n in nx_nodelist:
self.nx_labels[n] = self.nodes[n]['word'] self.nx_labels[n] = self.nodes[n]['word']
g = NX.XDiGraph() g = NX.MultiDiGraph()
g.add_nodes_from(nx_nodelist) g.add_nodes_from(nx_nodelist)
g.add_edges_from(nx_edgelist) g.add_edges_from(nx_edgelist)
...@@ -552,13 +552,11 @@ def cycle_finding_demo(): ...@@ -552,13 +552,11 @@ def cycle_finding_demo():
dg = DependencyGraph(treebank_data) dg = DependencyGraph(treebank_data)
print(dg.contains_cycle()) print(dg.contains_cycle())
cyclic_dg = DependencyGraph() cyclic_dg = DependencyGraph()
top = {'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0} cyclic_dg.add_node({'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0})
child1 = {'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1} cyclic_dg.add_node({'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1})
child2 = {'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2} cyclic_dg.add_node({'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2})
child3 = {'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3} cyclic_dg.add_node({'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3})
child4 = {'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4} cyclic_dg.add_node({'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4})
cyclic_dg.nodelist = [top, child1, child2, child3, child4]
cyclic_dg.root = top
print(cyclic_dg.contains_cycle()) print(cyclic_dg.contains_cycle())
treebank_data = """Pierre NNP 2 NMOD treebank_data = """Pierre NNP 2 NMOD
......
...@@ -83,7 +83,7 @@ from nltk.data import load ...@@ -83,7 +83,7 @@ from nltk.data import load
# Standard treebank POS tagger # Standard treebank POS tagger
_POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/english.pickle' _POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/english.pickle'
def pos_tag(tokens): def pos_tag(tokens, tagset=None):
""" """
Use NLTK's currently recommended part of speech tagger to Use NLTK's currently recommended part of speech tagger to
tag the given list of tokens. tag the given list of tokens.
...@@ -101,6 +101,8 @@ def pos_tag(tokens): ...@@ -101,6 +101,8 @@ def pos_tag(tokens):
:rtype: list(tuple(str, str)) :rtype: list(tuple(str, str))
""" """
tagger = load(_POS_TAGGER) tagger = load(_POS_TAGGER)
if tagset:
return [(token, map_tag('en-ptb', tagset, tag)) for (token, tag) in tagger.tag(tokens)]
return tagger.tag(tokens) return tagger.tag(tokens)
def pos_tag_sents(sentences): def pos_tag_sents(sentences):
......
...@@ -24,7 +24,7 @@ class CRFTagger(TaggerI): ...@@ -24,7 +24,7 @@ class CRFTagger(TaggerI):
""" """
A module for POS tagging using CRFSuite https://pypi.python.org/pypi/python-crfsuite A module for POS tagging using CRFSuite https://pypi.python.org/pypi/python-crfsuite
>>> from nltk.tag.crf import CRFTagger >>> from nltk.tag import CRFTagger
>>> ct = CRFTagger() >>> ct = CRFTagger()
>>> train_data = [[('University','Noun'), ('is','Verb'), ('a','Det'), ('good','Adj'), ('place','Noun')], >>> train_data = [[('University','Noun'), ('is','Verb'), ('a','Det'), ('good','Adj'), ('place','Noun')],
......
...@@ -33,7 +33,7 @@ class HunposTagger(TaggerI): ...@@ -33,7 +33,7 @@ class HunposTagger(TaggerI):
Example: Example:
>>> from nltk.tag.hunpos import HunposTagger >>> from nltk.tag import HunposTagger
>>> ht = HunposTagger('en_wsj.model') >>> ht = HunposTagger('en_wsj.model')
>>> ht.tag('What is the airspeed of an unladen swallow ?'.split()) >>> ht.tag('What is the airspeed of an unladen swallow ?'.split())
[('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'), ('unladen', 'NN'), ('swallow', 'VB'), ('?', '.')] [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'), ('unladen', 'NN'), ('swallow', 'VB'), ('?', '.')]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment