Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
nltk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
nltk
Commits
82aeb920
Commit
82aeb920
authored
Feb 14, 2015
by
Steven Bird
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #887 from longdt219/ParserI
Modify files according to recent change of ParserI #876
parents
cdcf6701
cf7dabbd
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
74 additions
and
26 deletions
+74
-26
nltk/parse/malt.py
+7
-4
nltk/parse/pchart.py
+41
-4
nltk/parse/stanford.py
+21
-15
nltk/parse/transitionparser.py
+5
-3
No files found.
nltk/parse/malt.py
View file @
82aeb920
...
@@ -109,9 +109,9 @@ class MaltParser(ParserI):
...
@@ -109,9 +109,9 @@ class MaltParser(ParserI):
:param sentence: Input sentence to parse
:param sentence: Input sentence to parse
:type sentence: list(tuple(str, str))
:type sentence: list(tuple(str, str))
:return:
``DependencyGraph`` the dependency graph representation
of the sentence
:return:
iter(DependencyGraph) the possible dependency graph representations
of the sentence
"""
"""
return
self
.
tagged_parse_sents
([
sentence
],
verbose
)[
0
]
return
next
(
self
.
tagged_parse_sents
([
sentence
],
verbose
))
def
tagged_parse_sents
(
self
,
sentences
,
verbose
=
False
):
def
tagged_parse_sents
(
self
,
sentences
,
verbose
=
False
):
"""
"""
...
@@ -156,7 +156,8 @@ class MaltParser(ParserI):
...
@@ -156,7 +156,8 @@ class MaltParser(ParserI):
raise
Exception
(
"MaltParser parsing (
%
s) failed with exit "
raise
Exception
(
"MaltParser parsing (
%
s) failed with exit "
"code
%
d"
%
(
' '
.
join
(
cmd
),
ret
))
"code
%
d"
%
(
' '
.
join
(
cmd
),
ret
))
return
iter
(
DependencyGraph
.
load
(
output_file
.
name
))
# Must return iter(iter(Tree))
return
(
iter
([
dep_graph
])
for
dep_graph
in
DependencyGraph
.
load
(
output_file
.
name
))
finally
:
finally
:
input_file
.
close
()
input_file
.
close
()
os
.
remove
(
input_file
.
name
)
os
.
remove
(
input_file
.
name
)
...
@@ -241,6 +242,8 @@ def demo():
...
@@ -241,6 +242,8 @@ def demo():
maltParser
.
parse_one
([
'John'
,
'sees'
,
'Mary'
],
verbose
=
verbose
)
.
tree
()
.
pprint
()
maltParser
.
parse_one
([
'John'
,
'sees'
,
'Mary'
],
verbose
=
verbose
)
.
tree
()
.
pprint
()
maltParser
.
parse_one
([
'a'
,
'man'
,
'runs'
],
verbose
=
verbose
)
.
tree
()
.
pprint
()
maltParser
.
parse_one
([
'a'
,
'man'
,
'runs'
],
verbose
=
verbose
)
.
tree
()
.
pprint
()
next
(
maltParser
.
tagged_parse
([(
'John'
,
'NNP'
),(
'sees'
,
'VB'
),(
'Mary'
,
'NNP'
)],
verbose
))
.
tree
()
.
pprint
()
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
demo
()
demo
()
nltk/parse/pchart.py
View file @
82aeb920
...
@@ -212,7 +212,7 @@ class BottomUpProbabilisticChartParser(ParserI):
...
@@ -212,7 +212,7 @@ class BottomUpProbabilisticChartParser(ParserI):
# Initialize the chart.
# Initialize the chart.
for
edge
in
bu_init
.
apply
(
chart
,
grammar
):
for
edge
in
bu_init
.
apply
(
chart
,
grammar
):
if
self
.
_trace
>
1
:
if
self
.
_trace
>
1
:
print
(
'
%-50
s [
%
s]'
%
(
chart
.
p
p
_edge
(
edge
,
width
=
2
),
print
(
'
%-50
s [
%
s]'
%
(
chart
.
p
retty_format
_edge
(
edge
,
width
=
2
),
edge
.
prob
()))
edge
.
prob
()))
queue
.
append
(
edge
)
queue
.
append
(
edge
)
...
@@ -227,7 +227,7 @@ class BottomUpProbabilisticChartParser(ParserI):
...
@@ -227,7 +227,7 @@ class BottomUpProbabilisticChartParser(ParserI):
# Get the best edge.
# Get the best edge.
edge
=
queue
.
pop
()
edge
=
queue
.
pop
()
if
self
.
_trace
>
0
:
if
self
.
_trace
>
0
:
print
(
'
%-50
s [
%
s]'
%
(
chart
.
p
p
_edge
(
edge
,
width
=
2
),
print
(
'
%-50
s [
%
s]'
%
(
chart
.
p
retty_format
_edge
(
edge
,
width
=
2
),
edge
.
prob
()))
edge
.
prob
()))
# Apply BU & FR to it.
# Apply BU & FR to it.
...
@@ -294,7 +294,7 @@ class BottomUpProbabilisticChartParser(ParserI):
...
@@ -294,7 +294,7 @@ class BottomUpProbabilisticChartParser(ParserI):
split
=
len
(
queue
)
-
self
.
beam_size
split
=
len
(
queue
)
-
self
.
beam_size
if
self
.
_trace
>
2
:
if
self
.
_trace
>
2
:
for
edge
in
queue
[:
split
]:
for
edge
in
queue
[:
split
]:
print
(
'
%-50
s [DISCARDED]'
%
chart
.
p
p
_edge
(
edge
,
2
))
print
(
'
%-50
s [DISCARDED]'
%
chart
.
p
retty_format
_edge
(
edge
,
2
))
del
queue
[:
split
]
del
queue
[:
split
]
class
InsideChartParser
(
BottomUpProbabilisticChartParser
):
class
InsideChartParser
(
BottomUpProbabilisticChartParser
):
...
@@ -397,10 +397,47 @@ def demo(choice=None, draw_parses=None, print_parses=None):
...
@@ -397,10 +397,47 @@ def demo(choice=None, draw_parses=None, print_parses=None):
summary of the results are displayed.
summary of the results are displayed.
"""
"""
import
sys
,
time
import
sys
,
time
from
nltk
import
tokenize
,
toy_pcfg1
,
toy_pcfg2
from
nltk
import
tokenize
from
nltk.parse
import
pchart
from
nltk.parse
import
pchart
# Define two demos. Each demo has a sentence and a grammar.
# Define two demos. Each demo has a sentence and a grammar.
toy_pcfg1
=
PCFG
.
fromstring
(
"""
S -> NP VP [1.0]
NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
Det -> 'the' [0.8] | 'my' [0.2]
N -> 'man' [0.5] | 'telescope' [0.5]
VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
V -> 'ate' [0.35] | 'saw' [0.65]
PP -> P NP [1.0]
P -> 'with' [0.61] | 'under' [0.39]
"""
)
toy_pcfg2
=
PCFG
.
fromstring
(
"""
S -> NP VP [1.0]
VP -> V NP [.59]
VP -> V [.40]
VP -> VP PP [.01]
NP -> Det N [.41]
NP -> Name [.28]
NP -> NP PP [.31]
PP -> P NP [1.0]
V -> 'saw' [.21]
V -> 'ate' [.51]
V -> 'ran' [.28]
N -> 'boy' [.11]
N -> 'cookie' [.12]
N -> 'table' [.13]
N -> 'telescope' [.14]
N -> 'hill' [.5]
Name -> 'Jack' [.52]
Name -> 'Bob' [.48]
P -> 'with' [.61]
P -> 'under' [.39]
Det -> 'the' [.41]
Det -> 'a' [.31]
Det -> 'my' [.28]
"""
)
demos
=
[(
'I saw John with my telescope'
,
toy_pcfg1
),
demos
=
[(
'I saw John with my telescope'
,
toy_pcfg1
),
(
'the boy saw Jack with Bob under the table with a telescope'
,
(
'the boy saw Jack with Bob under the table with a telescope'
,
toy_pcfg2
)]
toy_pcfg2
)]
...
...
nltk/parse/stanford.py
View file @
82aeb920
...
@@ -29,10 +29,16 @@ class StanfordParser(ParserI):
...
@@ -29,10 +29,16 @@ class StanfordParser(ParserI):
>>> parser=StanfordParser(
>>> parser=StanfordParser(
... model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
... model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
... )
... )
>>> parser.raw_parse_sents((
>>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog"))
[Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])])]
>>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents((
... "the quick brown fox jumps over the lazy dog",
... "the quick brown fox jumps over the lazy dog",
... "the quick grey wolf jumps over the lazy fox"
... "the quick grey wolf jumps over the lazy fox"
... ))
... ))
], [])
[Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
[Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP',
Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP',
...
@@ -40,17 +46,17 @@ class StanfordParser(ParserI):
...
@@ -40,17 +46,17 @@ class StanfordParser(ParserI):
[Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']),
[Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']),
Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])]
Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])]
>>> parser.parse_sents((
>>>
sum([list(dep_graphs) for dep_graphs in
parser.parse_sents((
... "I 'm a dog".split(),
... "I 'm a dog".split(),
... "This is my friends ' cat ( the tabby )".split(),
... "This is my friends ' cat ( the tabby )".split(),
... ))
... ))
], [])
[Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]),
[Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]),
Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP',
Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP',
[Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']),
[Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']),
Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', ['-LRB-']),
Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', ['-LRB-']),
Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', ['-RRB-'])])])])])])]
Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', ['-RRB-'])])])])])])]
>>> parser.tagged_parse_sents((
>>>
sum([list(dep_graphs) for dep_graphs in
parser.tagged_parse_sents((
... (
... (
... ("The", "DT"),
... ("The", "DT"),
... ("quick", "JJ"),
... ("quick", "JJ"),
...
@@ -63,7 +69,7 @@ class StanfordParser(ParserI):
...
@@ -63,7 +69,7 @@ class StanfordParser(ParserI):
... ("dog", "NN"),
... ("dog", "NN"),
... (".", "."),
... (".", "."),
... ),
... ),
... ))
... ))
],[])
[Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
[Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP',
Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP',
[Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])]
[Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])]
...
@@ -103,11 +109,11 @@ class StanfordParser(ParserI):
...
@@ -103,11 +109,11 @@ class StanfordParser(ParserI):
cur_lines
=
[]
cur_lines
=
[]
for
line
in
output_
.
splitlines
(
False
):
for
line
in
output_
.
splitlines
(
False
):
if
line
==
''
:
if
line
==
''
:
res
.
append
(
Tree
.
fromstring
(
'
\n
'
.
join
(
cur_lines
)
))
res
.
append
(
iter
([
Tree
.
fromstring
(
'
\n
'
.
join
(
cur_lines
))]
))
cur_lines
=
[]
cur_lines
=
[]
else
:
else
:
cur_lines
.
append
(
line
)
cur_lines
.
append
(
line
)
return
res
return
iter
(
res
)
def
parse_sents
(
self
,
sentences
,
verbose
=
False
):
def
parse_sents
(
self
,
sentences
,
verbose
=
False
):
"""
"""
...
@@ -120,7 +126,7 @@ class StanfordParser(ParserI):
...
@@ -120,7 +126,7 @@ class StanfordParser(ParserI):
:param sentences: Input sentences to parse
:param sentences: Input sentences to parse
:type sentences: list(list(str))
:type sentences: list(list(str))
:rtype:
list(Tree
)
:rtype:
iter(iter(Tree)
)
"""
"""
cmd
=
[
cmd
=
[
'edu.stanford.nlp.parser.lexparser.LexicalizedParser'
,
'edu.stanford.nlp.parser.lexparser.LexicalizedParser'
,
...
@@ -141,9 +147,9 @@ class StanfordParser(ParserI):
...
@@ -141,9 +147,9 @@ class StanfordParser(ParserI):
:param sentence: Input sentence to parse
:param sentence: Input sentence to parse
:type sentence: str
:type sentence: str
:rtype:
Tree
:rtype:
iter(Tree)
"""
"""
return
self
.
raw_parse_sents
((
sentence
,),
verbose
)
return
next
(
self
.
raw_parse_sents
([
sentence
],
verbose
)
)
def
raw_parse_sents
(
self
,
sentences
,
verbose
=
False
):
def
raw_parse_sents
(
self
,
sentences
,
verbose
=
False
):
"""
"""
...
@@ -153,7 +159,7 @@ class StanfordParser(ParserI):
...
@@ -153,7 +159,7 @@ class StanfordParser(ParserI):
:param sentences: Input sentences to parse
:param sentences: Input sentences to parse
:type sentences: list(str)
:type sentences: list(str)
:rtype:
list(Tree
)
:rtype:
iter(iter(Tree)
)
"""
"""
cmd
=
[
cmd
=
[
'edu.stanford.nlp.parser.lexparser.LexicalizedParser'
,
'edu.stanford.nlp.parser.lexparser.LexicalizedParser'
,
...
@@ -171,9 +177,9 @@ class StanfordParser(ParserI):
...
@@ -171,9 +177,9 @@ class StanfordParser(ParserI):
:param sentence: Input sentence to parse
:param sentence: Input sentence to parse
:type sentence: list(tuple(str, str))
:type sentence: list(tuple(str, str))
:rtype:
Tree
:rtype:
iter(Tree)
"""
"""
return
self
.
tagged_parse_sents
([
sentence
],
verbose
)[
0
]
return
next
(
self
.
tagged_parse_sents
([
sentence
],
verbose
))
def
tagged_parse_sents
(
self
,
sentences
,
verbose
=
False
):
def
tagged_parse_sents
(
self
,
sentences
,
verbose
=
False
):
"""
"""
...
@@ -183,7 +189,7 @@ class StanfordParser(ParserI):
...
@@ -183,7 +189,7 @@ class StanfordParser(ParserI):
:param sentences: Input sentences to parse
:param sentences: Input sentences to parse
:type sentences: list(list(tuple(str, str)))
:type sentences: list(list(tuple(str, str)))
:rtype:
Tree
:rtype:
iter(iter(Tree))
"""
"""
tag_separator
=
'/'
tag_separator
=
'/'
cmd
=
[
cmd
=
[
...
...
nltk/parse/transitionparser.py
View file @
82aeb920
...
@@ -6,6 +6,9 @@
...
@@ -6,6 +6,9 @@
# URL: <http://nltk.org/>
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT
# For license information, see LICENSE.TXT
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tempfile
import
tempfile
import
pickle
import
pickle
...
@@ -20,6 +23,7 @@ from sklearn import svm
...
@@ -20,6 +23,7 @@ from sklearn import svm
from
nltk.parse
import
ParserI
,
DependencyGraph
,
DependencyEvaluator
from
nltk.parse
import
ParserI
,
DependencyGraph
,
DependencyEvaluator
class
Configuration
(
object
):
class
Configuration
(
object
):
"""
"""
Class for holding configuration which is the partial analysis of the input sentence.
Class for holding configuration which is the partial analysis of the input sentence.
...
@@ -41,9 +45,7 @@ class Configuration(object):
...
@@ -41,9 +45,7 @@ class Configuration(object):
"""
"""
# dep_graph.nodes contain list of token for a sentence
# dep_graph.nodes contain list of token for a sentence
self
.
stack
=
[
0
]
# The root element
self
.
stack
=
[
0
]
# The root element
self
.
buffer
=
range
(
self
.
buffer
=
list
(
range
(
1
,
len
(
dep_graph
.
nodes
)))
# The rest is in the buffer
1
,
len
(
dep_graph
.
nodes
))
# The rest is in the buffer
self
.
arcs
=
[]
# empty set of arc
self
.
arcs
=
[]
# empty set of arc
self
.
_tokens
=
dep_graph
.
nodes
self
.
_tokens
=
dep_graph
.
nodes
self
.
_max_address
=
len
(
self
.
buffer
)
self
.
_max_address
=
len
(
self
.
buffer
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment