Commit 00c52aa8 by Steven Bird

Merge branch 'test' of https://github.com/nltk/nltk into test

parents da669e67 82aeb920
...@@ -109,9 +109,9 @@ class MaltParser(ParserI): ...@@ -109,9 +109,9 @@ class MaltParser(ParserI):
:param sentence: Input sentence to parse :param sentence: Input sentence to parse
:type sentence: list(tuple(str, str)) :type sentence: list(tuple(str, str))
:return: ``DependencyGraph`` the dependency graph representation of the sentence :return: iter(DependencyGraph) the possible dependency graph representations of the sentence
""" """
return self.tagged_parse_sents([sentence], verbose)[0] return next(self.tagged_parse_sents([sentence], verbose))
def tagged_parse_sents(self, sentences, verbose=False): def tagged_parse_sents(self, sentences, verbose=False):
""" """
...@@ -156,7 +156,8 @@ class MaltParser(ParserI): ...@@ -156,7 +156,8 @@ class MaltParser(ParserI):
raise Exception("MaltParser parsing (%s) failed with exit " raise Exception("MaltParser parsing (%s) failed with exit "
"code %d" % (' '.join(cmd), ret)) "code %d" % (' '.join(cmd), ret))
return iter(DependencyGraph.load(output_file.name)) # Must return iter(iter(Tree))
return (iter([dep_graph]) for dep_graph in DependencyGraph.load(output_file.name))
finally: finally:
input_file.close() input_file.close()
os.remove(input_file.name) os.remove(input_file.name)
...@@ -241,6 +242,8 @@ def demo(): ...@@ -241,6 +242,8 @@ def demo():
maltParser.parse_one(['John','sees','Mary'], verbose=verbose).tree().pprint() maltParser.parse_one(['John','sees','Mary'], verbose=verbose).tree().pprint()
maltParser.parse_one(['a','man','runs'], verbose=verbose).tree().pprint() maltParser.parse_one(['a','man','runs'], verbose=verbose).tree().pprint()
next(maltParser.tagged_parse([('John','NNP'),('sees','VB'),('Mary','NNP')], verbose)).tree().pprint()
if __name__ == '__main__': if __name__ == '__main__':
demo() demo()
...@@ -397,10 +397,47 @@ def demo(choice=None, draw_parses=None, print_parses=None): ...@@ -397,10 +397,47 @@ def demo(choice=None, draw_parses=None, print_parses=None):
summary of the results are displayed. summary of the results are displayed.
""" """
import sys, time import sys, time
from nltk import tokenize, toy_pcfg1, toy_pcfg2 from nltk import tokenize
from nltk.parse import pchart from nltk.parse import pchart
# Define two demos. Each demo has a sentence and a grammar. # Define two demos. Each demo has a sentence and a grammar.
toy_pcfg1 = PCFG.fromstring("""
S -> NP VP [1.0]
NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
Det -> 'the' [0.8] | 'my' [0.2]
N -> 'man' [0.5] | 'telescope' [0.5]
VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
V -> 'ate' [0.35] | 'saw' [0.65]
PP -> P NP [1.0]
P -> 'with' [0.61] | 'under' [0.39]
""")
toy_pcfg2 = PCFG.fromstring("""
S -> NP VP [1.0]
VP -> V NP [.59]
VP -> V [.40]
VP -> VP PP [.01]
NP -> Det N [.41]
NP -> Name [.28]
NP -> NP PP [.31]
PP -> P NP [1.0]
V -> 'saw' [.21]
V -> 'ate' [.51]
V -> 'ran' [.28]
N -> 'boy' [.11]
N -> 'cookie' [.12]
N -> 'table' [.13]
N -> 'telescope' [.14]
N -> 'hill' [.5]
Name -> 'Jack' [.52]
Name -> 'Bob' [.48]
P -> 'with' [.61]
P -> 'under' [.39]
Det -> 'the' [.41]
Det -> 'a' [.31]
Det -> 'my' [.28]
""")
demos = [('I saw John with my telescope', toy_pcfg1), demos = [('I saw John with my telescope', toy_pcfg1),
('the boy saw Jack with Bob under the table with a telescope', ('the boy saw Jack with Bob under the table with a telescope',
toy_pcfg2)] toy_pcfg2)]
......
...@@ -29,10 +29,16 @@ class StanfordParser(ParserI): ...@@ -29,10 +29,16 @@ class StanfordParser(ParserI):
>>> parser=StanfordParser( >>> parser=StanfordParser(
... model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" ... model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
... ) ... )
>>> parser.raw_parse_sents((
>>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog"))
[Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])])]
>>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents((
... "the quick brown fox jumps over the lazy dog", ... "the quick brown fox jumps over the lazy dog",
... "the quick grey wolf jumps over the lazy fox" ... "the quick grey wolf jumps over the lazy fox"
... )) ... ))], [])
[Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP', Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP',
...@@ -40,17 +46,17 @@ class StanfordParser(ParserI): ...@@ -40,17 +46,17 @@ class StanfordParser(ParserI):
[Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']),
Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])] Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])]
>>> parser.parse_sents(( >>> sum([list(dep_graphs) for dep_graphs in parser.parse_sents((
... "I 'm a dog".split(), ... "I 'm a dog".split(),
... "This is my friends ' cat ( the tabby )".split(), ... "This is my friends ' cat ( the tabby )".split(),
... )) ... ))], [])
[Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]), [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]),
Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP', Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP',
[Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']), [Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']),
Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', ['-LRB-']), Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', ['-LRB-']),
Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', ['-RRB-'])])])])])])] Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', ['-RRB-'])])])])])])]
>>> parser.tagged_parse_sents(( >>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents((
... ( ... (
... ("The", "DT"), ... ("The", "DT"),
... ("quick", "JJ"), ... ("quick", "JJ"),
...@@ -63,7 +69,7 @@ class StanfordParser(ParserI): ...@@ -63,7 +69,7 @@ class StanfordParser(ParserI):
... ("dog", "NN"), ... ("dog", "NN"),
... (".", "."), ... (".", "."),
... ), ... ),
... )) ... ))],[])
[Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP', Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP',
[Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])] [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])]
...@@ -103,11 +109,11 @@ class StanfordParser(ParserI): ...@@ -103,11 +109,11 @@ class StanfordParser(ParserI):
cur_lines = [] cur_lines = []
for line in output_.splitlines(False): for line in output_.splitlines(False):
if line == '': if line == '':
res.append(Tree.fromstring('\n'.join(cur_lines))) res.append(iter([Tree.fromstring('\n'.join(cur_lines))]))
cur_lines = [] cur_lines = []
else: else:
cur_lines.append(line) cur_lines.append(line)
return res return iter(res)
def parse_sents(self, sentences, verbose=False): def parse_sents(self, sentences, verbose=False):
""" """
...@@ -120,7 +126,7 @@ class StanfordParser(ParserI): ...@@ -120,7 +126,7 @@ class StanfordParser(ParserI):
:param sentences: Input sentences to parse :param sentences: Input sentences to parse
:type sentences: list(list(str)) :type sentences: list(list(str))
:rtype: list(Tree) :rtype: iter(iter(Tree))
""" """
cmd = [ cmd = [
'edu.stanford.nlp.parser.lexparser.LexicalizedParser', 'edu.stanford.nlp.parser.lexparser.LexicalizedParser',
...@@ -141,9 +147,9 @@ class StanfordParser(ParserI): ...@@ -141,9 +147,9 @@ class StanfordParser(ParserI):
:param sentence: Input sentence to parse :param sentence: Input sentence to parse
:type sentence: str :type sentence: str
:rtype: Tree :rtype: iter(Tree)
""" """
return self.raw_parse_sents((sentence,), verbose) return next(self.raw_parse_sents([sentence], verbose))
def raw_parse_sents(self, sentences, verbose=False): def raw_parse_sents(self, sentences, verbose=False):
""" """
...@@ -153,7 +159,7 @@ class StanfordParser(ParserI): ...@@ -153,7 +159,7 @@ class StanfordParser(ParserI):
:param sentences: Input sentences to parse :param sentences: Input sentences to parse
:type sentences: list(str) :type sentences: list(str)
:rtype: list(Tree) :rtype: iter(iter(Tree))
""" """
cmd = [ cmd = [
'edu.stanford.nlp.parser.lexparser.LexicalizedParser', 'edu.stanford.nlp.parser.lexparser.LexicalizedParser',
...@@ -171,9 +177,9 @@ class StanfordParser(ParserI): ...@@ -171,9 +177,9 @@ class StanfordParser(ParserI):
:param sentence: Input sentence to parse :param sentence: Input sentence to parse
:type sentence: list(tuple(str, str)) :type sentence: list(tuple(str, str))
:rtype: Tree :rtype: iter(Tree)
""" """
return self.tagged_parse_sents([sentence], verbose)[0] return next(self.tagged_parse_sents([sentence], verbose))
def tagged_parse_sents(self, sentences, verbose=False): def tagged_parse_sents(self, sentences, verbose=False):
""" """
...@@ -183,7 +189,7 @@ class StanfordParser(ParserI): ...@@ -183,7 +189,7 @@ class StanfordParser(ParserI):
:param sentences: Input sentences to parse :param sentences: Input sentences to parse
:type sentences: list(list(tuple(str, str))) :type sentences: list(list(tuple(str, str)))
:rtype: Tree :rtype: iter(iter(Tree))
""" """
tag_separator = '/' tag_separator = '/'
cmd = [ cmd = [
......
...@@ -6,6 +6,9 @@ ...@@ -6,6 +6,9 @@
# URL: <http://nltk.org/> # URL: <http://nltk.org/>
# For license information, see LICENSE.TXT # For license information, see LICENSE.TXT
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tempfile import tempfile
import pickle import pickle
...@@ -20,6 +23,7 @@ from sklearn import svm ...@@ -20,6 +23,7 @@ from sklearn import svm
from nltk.parse import ParserI, DependencyGraph, DependencyEvaluator from nltk.parse import ParserI, DependencyGraph, DependencyEvaluator
class Configuration(object): class Configuration(object):
""" """
Class for holding configuration which is the partial analysis of the input sentence. Class for holding configuration which is the partial analysis of the input sentence.
...@@ -41,9 +45,7 @@ class Configuration(object): ...@@ -41,9 +45,7 @@ class Configuration(object):
""" """
# dep_graph.nodes contain list of token for a sentence # dep_graph.nodes contain list of token for a sentence
self.stack = [0] # The root element self.stack = [0] # The root element
self.buffer = range( self.buffer = list(range(1, len(dep_graph.nodes))) # The rest is in the buffer
1, len(
dep_graph.nodes)) # The rest is in the buffer
self.arcs = [] # empty set of arc self.arcs = [] # empty set of arc
self._tokens = dep_graph.nodes self._tokens = dep_graph.nodes
self._max_address = len(self.buffer) self._max_address = len(self.buffer)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment