Merge branch 'develop' into twitter

8505c04a · Ewan Klein · abf75286 · 2288eecd · 8505c04a · 8505c04a
Commit 8505c04a authored May 10, 2015 by Ewan Klein
Hide whitespace changes
Inline Side-by-side

Showing with 28 additions and 11 deletions

nltk/internals.py
+17 -0

nltk/parse/dependencygraph.py
+6 -8

nltk/tag/__init__.py
+3 -1

nltk/tag/crf.py
+1 -1

nltk/tag/hunpos.py
+1 -1

No files found.
--- a/nltk/internals.py
+++ b/nltk/internals.py
@@ -592,6 +592,23 @@ def find_jar_iter(name_pattern, path_to_jar=None, env_vars=(),
                                print('[Found %s: %s]' % (name_pattern, cp))
                            yielded = True
                            yield cp
+                    # The case where user put directory containing the jar file in the classpath 
+                    if os.path.isdir(cp):
+                        if not is_regex:
+                            if os.path.isfile(os.path.join(cp,name_pattern)):
+                                if verbose:
+                                    print('[Found %s: %s]' % (name_pattern, cp))
+                                yielded = True
+                                yield os.path.join(cp,name_pattern)
+                        else:
+                            # Look for file using regular expression 
+                            for file_name in os.listdir(cp):
+                                if re.match(name_pattern,file_name):
+                                    if verbose:
+                                        print('[Found %s: %s]' % (name_pattern, os.path.join(cp,file_name)))
+                                    yielded = True
+                                    yield os.path.join(cp,file_name)
+                                
            else:
                jar_env = os.environ[env_var]
                jar_iter = ((os.path.join(jar_env, path_to_jar) for path_to_jar in os.listdir(jar_env))

--- a/nltk/parse/dependencygraph.py
+++ b/nltk/parse/dependencygraph.py
@@ -465,7 +465,7 @@ class DependencyGraph(object):
        for n in nx_nodelist:
            self.nx_labels[n] = self.nodes[n]['word']

-        g = NX.XDiGraph()
+        g = NX.MultiDiGraph()
        g.add_nodes_from(nx_nodelist)
        g.add_edges_from(nx_edgelist)

@@ -552,13 +552,11 @@ def cycle_finding_demo():
    dg = DependencyGraph(treebank_data)
    print(dg.contains_cycle())
    cyclic_dg = DependencyGraph()
-    top = {'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0}
-    child1 = {'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1}
-    child2 = {'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2}
-    child3 = {'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3}
-    child4 = {'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4}
-    cyclic_dg.nodelist = [top, child1, child2, child3, child4]
-    cyclic_dg.root = top
+    cyclic_dg.add_node({'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0})
+    cyclic_dg.add_node({'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1})
+    cyclic_dg.add_node({'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2})
+    cyclic_dg.add_node({'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3})
+    cyclic_dg.add_node({'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4})
    print(cyclic_dg.contains_cycle())

 treebank_data = """Pierre  NNP     2       NMOD

--- a/nltk/tag/__init__.py
+++ b/nltk/tag/__init__.py
@@ -83,7 +83,7 @@ from nltk.data import load
 # Standard treebank POS tagger
 _POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/english.pickle'

-def pos_tag(tokens):
+def pos_tag(tokens, tagset=None):
    """
    Use NLTK's currently recommended part of speech tagger to
    tag the given list of tokens.
@@ -101,6 +101,8 @@ def pos_tag(tokens):
    :rtype: list(tuple(str, str))
    """
    tagger = load(_POS_TAGGER)
+    if tagset:
+        return [(token, map_tag('en-ptb', tagset, tag)) for (token, tag) in tagger.tag(tokens)]
    return tagger.tag(tokens)

 def pos_tag_sents(sentences):

--- a/nltk/tag/crf.py
+++ b/nltk/tag/crf.py
@@ -24,7 +24,7 @@ class CRFTagger(TaggerI):
    """
    A module for POS tagging using CRFSuite https://pypi.python.org/pypi/python-crfsuite
    
-    >>> from nltk.tag.crf import CRFTagger
+    >>> from nltk.tag import CRFTagger
    >>> ct = CRFTagger()
 
    >>> train_data = [[('University','Noun'), ('is','Verb'), ('a','Det'), ('good','Adj'), ('place','Noun')],

--- a/nltk/tag/hunpos.py
+++ b/nltk/tag/hunpos.py
@@ -33,7 +33,7 @@ class HunposTagger(TaggerI):

    Example:

-        >>> from nltk.tag.hunpos import HunposTagger
+        >>> from nltk.tag import HunposTagger
        >>> ht = HunposTagger('en_wsj.model')
        >>> ht.tag('What is the airspeed of an unladen swallow ?'.split())
        [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'), ('unladen', 'NN'), ('swallow', 'VB'), ('?', '.')]