Merge pull request #887 from longdt219/ParserI

Modify files according to recent change of ParserI #876

Merge pull request #887 from longdt219/ParserI
Modify files according to recent change of ParserI #876
82aeb920 · Steven Bird · cdcf6701 · cf7dabbd · 82aeb920 · 82aeb920
Commit 82aeb920 authored Feb 14, 2015 by Steven Bird
Hide whitespace changes
Inline Side-by-side

Showing with 74 additions and 26 deletions

nltk/parse/malt.py
+7 -4

nltk/parse/pchart.py
+41 -4

nltk/parse/stanford.py
+21 -15

nltk/parse/transitionparser.py
+5 -3

No files found.
--- a/nltk/parse/malt.py
+++ b/nltk/parse/malt.py
@@ -109,9 +109,9 @@ class MaltParser(ParserI):
        :param sentence: Input sentence to parse
        :type sentence: list(tuple(str, str))
-        :return: ``DependencyGraph`` the dependency graph representation of the sentence
+        :return: iter(DependencyGraph) the possible dependency graph representations of the sentence
        """
-        return self.tagged_parse_sents([sentence], verbose)[0]
+        return next(self.tagged_parse_sents([sentence], verbose))
    def tagged_parse_sents(self, sentences, verbose=False):
        """
@@ -156,7 +156,8 @@ class MaltParser(ParserI):
                raise Exception("MaltParser parsing (%s) failed with exit "
                                "code %d" % (' '.join(cmd), ret))
-            return iter(DependencyGraph.load(output_file.name))
+            # Must return iter(iter(Tree))
+            return (iter([dep_graph]) for dep_graph in  DependencyGraph.load(output_file.name))
        finally:
            input_file.close()
            os.remove(input_file.name)
@@ -241,6 +242,8 @@ def demo():
    maltParser.parse_one(['John','sees','Mary'], verbose=verbose).tree().pprint()
    maltParser.parse_one(['a','man','runs'], verbose=verbose).tree().pprint()
+    next(maltParser.tagged_parse([('John','NNP'),('sees','VB'),('Mary','NNP')], verbose)).tree().pprint()
 if __name__ == '__main__':
    demo()
--- a/nltk/parse/pchart.py
+++ b/nltk/parse/pchart.py
@@ -212,7 +212,7 @@ class BottomUpProbabilisticChartParser(ParserI):
        # Initialize the chart.
        for edge in bu_init.apply(chart, grammar):
            if self._trace > 1:
-                print('  %-50s [%s]' % (chart.pp_edge(edge,width=2),
+                print('  %-50s [%s]' % (chart.pretty_format_edge(edge,width=2),
                                        edge.prob()))
            queue.append(edge)
@@ -227,7 +227,7 @@ class BottomUpProbabilisticChartParser(ParserI):
            # Get the best edge.
            edge = queue.pop()
            if self._trace > 0:
-                print('  %-50s [%s]' % (chart.pp_edge(edge,width=2),
+                print('  %-50s [%s]' % (chart.pretty_format_edge(edge,width=2),
                                        edge.prob()))
            # Apply BU & FR to it.
@@ -294,7 +294,7 @@ class BottomUpProbabilisticChartParser(ParserI):
            split = len(queue)-self.beam_size
            if self._trace > 2:
                for edge in queue[:split]:
-                    print('  %-50s [DISCARDED]' % chart.pp_edge(edge,2))
+                    print('  %-50s [DISCARDED]' % chart.pretty_format_edge(edge,2))
            del queue[:split]
 class InsideChartParser(BottomUpProbabilisticChartParser):
@@ -397,10 +397,47 @@ def demo(choice=None, draw_parses=None, print_parses=None):
    summary of the results are displayed.
    """
    import sys, time
-    from nltk import tokenize, toy_pcfg1, toy_pcfg2
+    from nltk import tokenize
    from nltk.parse import pchart
    # Define two demos.  Each demo has a sentence and a grammar.
+    toy_pcfg1 = PCFG.fromstring("""
+    S -> NP VP [1.0]
+    NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
+    Det -> 'the' [0.8] | 'my' [0.2]
+    N -> 'man' [0.5] | 'telescope' [0.5]
+    VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
+    V -> 'ate' [0.35] | 'saw' [0.65]
+    PP -> P NP [1.0]
+    P -> 'with' [0.61] | 'under' [0.39]
+    """)
+    toy_pcfg2 = PCFG.fromstring("""
+    S    -> NP VP         [1.0]
+    VP   -> V NP          [.59]
+    VP   -> V             [.40]
+    VP   -> VP PP         [.01]
+    NP   -> Det N         [.41]
+    NP   -> Name          [.28]
+    NP   -> NP PP         [.31]
+    PP   -> P NP          [1.0]
+    V    -> 'saw'         [.21]
+    V    -> 'ate'         [.51]
+    V    -> 'ran'         [.28]
+    N    -> 'boy'         [.11]
+    N    -> 'cookie'      [.12]
+    N    -> 'table'       [.13]
+    N    -> 'telescope'   [.14]
+    N    -> 'hill'        [.5]
+    Name -> 'Jack'        [.52]
+    Name -> 'Bob'         [.48]
+    P    -> 'with'        [.61]
+    P    -> 'under'       [.39]
+    Det  -> 'the'         [.41]
+    Det  -> 'a'           [.31]
+    Det  -> 'my'          [.28]
+    """)
    demos = [('I saw John with my telescope', toy_pcfg1),
             ('the boy saw Jack with Bob under the table with a telescope',
              toy_pcfg2)]

--- a/nltk/parse/stanford.py
+++ b/nltk/parse/stanford.py
@@ -29,10 +29,16 @@ class StanfordParser(ParserI):
    >>> parser=StanfordParser(
    ...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
    ... )
-    >>> parser.raw_parse_sents((
+    >>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog"))
+    [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), 
+    Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), 
+    Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])])]
+    >>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents((
    ...     "the quick brown fox jumps over the lazy dog",
    ...     "the quick grey wolf jumps over the lazy fox"
-    ... ))
+    ... ))], [])
    [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
    Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
    Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP',
@@ -40,17 +46,17 @@ class StanfordParser(ParserI):
    [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']),
    Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])]
-    >>> parser.parse_sents((
+    >>> sum([list(dep_graphs) for dep_graphs in parser.parse_sents((
    ...     "I 'm a dog".split(),
    ...     "This is my friends ' cat ( the tabby )".split(),
-    ... ))
+    ... ))], [])
    [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]),
    Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP',
    [Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']),
    Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', ['-LRB-']),
    Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', ['-RRB-'])])])])])])]
-    >>> parser.tagged_parse_sents((
+    >>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents((
    ...     (
    ...         ("The", "DT"),
    ...         ("quick", "JJ"),
@@ -63,7 +69,7 @@ class StanfordParser(ParserI):
    ...         ("dog", "NN"),
    ...         (".", "."),
    ...     ),
-    ... ))
+    ... ))],[])
    [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
    Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP',
    [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])]
@@ -103,11 +109,11 @@ class StanfordParser(ParserI):
        cur_lines = []
        for line in output_.splitlines(False):
            if line == '':
-                res.append(Tree.fromstring('\n'.join(cur_lines)))
+                res.append(iter([Tree.fromstring('\n'.join(cur_lines))]))
                cur_lines = []
            else:
                cur_lines.append(line)
-        return res
+        return iter(res)
    def parse_sents(self, sentences, verbose=False):
        """
@@ -120,7 +126,7 @@ class StanfordParser(ParserI):
        :param sentences: Input sentences to parse
        :type sentences: list(list(str))
-        :rtype: list(Tree)
+        :rtype: iter(iter(Tree))
        """
        cmd = [
            'edu.stanford.nlp.parser.lexparser.LexicalizedParser',
@@ -141,9 +147,9 @@ class StanfordParser(ParserI):
        :param sentence: Input sentence to parse
        :type sentence: str
-        :rtype: Tree
+        :rtype: iter(Tree)
        """
-        return self.raw_parse_sents((sentence,), verbose)
+        return next(self.raw_parse_sents([sentence], verbose))
    def raw_parse_sents(self, sentences, verbose=False):
        """
@@ -153,7 +159,7 @@ class StanfordParser(ParserI):
        :param sentences: Input sentences to parse
        :type sentences: list(str)
-        :rtype: list(Tree)
+        :rtype: iter(iter(Tree))
        """
        cmd = [
            'edu.stanford.nlp.parser.lexparser.LexicalizedParser',
@@ -171,9 +177,9 @@ class StanfordParser(ParserI):
        :param sentence: Input sentence to parse
        :type sentence: list(tuple(str, str))
-        :rtype: Tree
+        :rtype: iter(Tree)
        """
-        return self.tagged_parse_sents([sentence], verbose)[0]
+        return next(self.tagged_parse_sents([sentence], verbose))
    def tagged_parse_sents(self, sentences, verbose=False):
        """
@@ -183,7 +189,7 @@ class StanfordParser(ParserI):
        :param sentences: Input sentences to parse
        :type sentences: list(list(tuple(str, str)))
-        :rtype: Tree
+        :rtype: iter(iter(Tree))
        """
        tag_separator = '/'
        cmd = [

--- a/nltk/parse/transitionparser.py
+++ b/nltk/parse/transitionparser.py
@@ -6,6 +6,9 @@
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
 import tempfile
 import pickle
@@ -20,6 +23,7 @@ from sklearn import svm
 from nltk.parse import ParserI, DependencyGraph, DependencyEvaluator
 class Configuration(object):
    """
    Class for holding configuration which is the partial analysis of the input sentence.
@@ -41,9 +45,7 @@ class Configuration(object):
        """
        # dep_graph.nodes contain list of token for a sentence
        self.stack = [0]  # The root element
-        self.buffer = range(
+        self.buffer = list(range(1, len(dep_graph.nodes)))  # The rest is in the buffer
-            1, len(
-                dep_graph.nodes))  # The rest is in the buffer
        self.arcs = []  # empty set of arc
        self._tokens = dep_graph.nodes
        self._max_address = len(self.buffer)