Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
nltk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
nltk
Commits
0653e629
Commit
0653e629
authored
Feb 09, 2015
by
Steven Bird
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #879 from dimazest/dependency_graph_fixes
Updating NonprojectiveDependencyParser.parse().
parents
e933cca0
1a3742d8
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
89 additions
and
66 deletions
+89
-66
nltk/parse/dependencygraph.py
+15
-8
nltk/parse/nonprojectivedependencyparser.py
+32
-20
nltk/test/dependency.doctest
+42
-38
No files found.
nltk/parse/dependencygraph.py
View file @
0653e629
...
@@ -102,7 +102,7 @@ class DependencyGraph(object):
...
@@ -102,7 +102,7 @@ class DependencyGraph(object):
self
.
nodes
[
head_address
][
'deps'
]
.
setdefault
(
relation
,[])
self
.
nodes
[
head_address
][
'deps'
]
.
setdefault
(
relation
,[])
self
.
nodes
[
head_address
][
'deps'
][
relation
]
.
append
(
mod_address
)
self
.
nodes
[
head_address
][
'deps'
][
relation
]
.
append
(
mod_address
)
#self.nodes[head_address]['deps'].append(mod_address)
#self.nodes[head_address]['deps'].append(mod_address)
def
connect_graph
(
self
):
def
connect_graph
(
self
):
"""
"""
...
@@ -113,7 +113,7 @@ class DependencyGraph(object):
...
@@ -113,7 +113,7 @@ class DependencyGraph(object):
for
node2
in
self
.
nodes
.
values
():
for
node2
in
self
.
nodes
.
values
():
if
node1
[
'address'
]
!=
node2
[
'address'
]
and
node2
[
'rel'
]
!=
'TOP'
:
if
node1
[
'address'
]
!=
node2
[
'address'
]
and
node2
[
'rel'
]
!=
'TOP'
:
relation
=
node2
[
'rel'
]
relation
=
node2
[
'rel'
]
node1
[
'deps'
]
.
setdefault
(
relation
,
[])
node1
[
'deps'
]
.
setdefault
(
relation
,
[])
node1
[
'deps'
][
relation
]
.
append
(
node2
[
'address'
])
node1
[
'deps'
][
relation
]
.
append
(
node2
[
'address'
])
#node1['deps'].append(node2['address'])
#node1['deps'].append(node2['address'])
...
@@ -214,17 +214,21 @@ class DependencyGraph(object):
...
@@ -214,17 +214,21 @@ class DependencyGraph(object):
lines
=
(
l
.
rstrip
()
for
l
in
input_
)
lines
=
(
l
.
rstrip
()
for
l
in
input_
)
lines
=
(
l
for
l
in
lines
if
l
)
lines
=
(
l
for
l
in
lines
if
l
)
cell_number
=
None
for
index
,
line
in
enumerate
(
lines
,
start
=
1
):
for
index
,
line
in
enumerate
(
lines
,
start
=
1
):
cells
=
line
.
split
(
cell_separator
)
cells
=
line
.
split
(
cell_separator
)
nrCells
=
len
(
cells
)
if
cell_number
is
None
:
cell_number
=
len
(
cells
)
else
:
assert
cell_number
==
len
(
cells
)
if
cell_extractor
is
None
:
if
cell_extractor
is
None
:
try
:
try
:
cell_extractor
=
extractors
[
nrCells
]
cell_extractor
=
extractors
[
cell_number
]
except
KeyError
:
except
KeyError
:
raise
ValueError
(
raise
ValueError
(
'Number of tab-delimited fields ({0}) not supported by '
'Number of tab-delimited fields ({0}) not supported by '
'CoNLL(10) or Malt-Tab(4) format'
.
format
(
nrCells
)
'CoNLL(10) or Malt-Tab(4) format'
.
format
(
cell_number
)
)
)
word
,
lemma
,
ctag
,
tag
,
feats
,
head
,
rel
=
cell_extractor
(
cells
)
word
,
lemma
,
ctag
,
tag
,
feats
,
head
,
rel
=
cell_extractor
(
cells
)
...
@@ -246,6 +250,9 @@ class DependencyGraph(object):
...
@@ -246,6 +250,9 @@ class DependencyGraph(object):
}
}
)
)
# Make sure that he fake root node has labeled dependencies.
if
(
cell_number
==
3
)
and
(
head
==
0
):
rel
=
'ROOT'
self
.
nodes
[
head
][
'deps'
][
rel
]
.
append
(
index
)
self
.
nodes
[
head
][
'deps'
][
rel
]
.
append
(
index
)
if
not
self
.
nodes
[
0
][
'deps'
][
'ROOT'
]:
if
not
self
.
nodes
[
0
][
'deps'
][
'ROOT'
]:
...
@@ -271,7 +278,7 @@ class DependencyGraph(object):
...
@@ -271,7 +278,7 @@ class DependencyGraph(object):
"""
"""
node
=
self
.
get_by_address
(
i
)
node
=
self
.
get_by_address
(
i
)
word
=
node
[
'word'
]
word
=
node
[
'word'
]
deps
=
list
(
chain
.
from_iterable
(
node
[
'deps'
]
.
values
()))
deps
=
sorted
(
chain
.
from_iterable
(
node
[
'deps'
]
.
values
()))
if
deps
:
if
deps
:
return
Tree
(
word
,
[
self
.
_tree
(
dep
)
for
dep
in
deps
])
return
Tree
(
word
,
[
self
.
_tree
(
dep
)
for
dep
in
deps
])
...
@@ -286,7 +293,7 @@ class DependencyGraph(object):
...
@@ -286,7 +293,7 @@ class DependencyGraph(object):
node
=
self
.
root
node
=
self
.
root
word
=
node
[
'word'
]
word
=
node
[
'word'
]
deps
=
chain
.
from_iterable
(
node
[
'deps'
]
.
values
(
))
deps
=
sorted
(
chain
.
from_iterable
(
node
[
'deps'
]
.
values
()
))
return
Tree
(
word
,
[
self
.
_tree
(
dep
)
for
dep
in
deps
])
return
Tree
(
word
,
[
self
.
_tree
(
dep
)
for
dep
in
deps
])
def
triples
(
self
,
node
=
None
):
def
triples
(
self
,
node
=
None
):
...
@@ -299,7 +306,7 @@ class DependencyGraph(object):
...
@@ -299,7 +306,7 @@ class DependencyGraph(object):
node
=
self
.
root
node
=
self
.
root
head
=
(
node
[
'word'
],
node
[
'ctag'
])
head
=
(
node
[
'word'
],
node
[
'ctag'
])
for
i
in
node
[
'deps'
]
:
for
i
in
sorted
(
chain
.
from_iterable
(
node
[
'deps'
]
.
values
()))
:
dep
=
self
.
get_by_address
(
i
)
dep
=
self
.
get_by_address
(
i
)
yield
(
head
,
dep
[
'rel'
],
(
dep
[
'word'
],
dep
[
'ctag'
]))
yield
(
head
,
dep
[
'rel'
],
(
dep
[
'word'
],
dep
[
'ctag'
]))
for
triple
in
self
.
triples
(
node
=
dep
):
for
triple
in
self
.
triples
(
node
=
dep
):
...
...
nltk/parse/nonprojectivedependencyparser.py
View file @
0653e629
...
@@ -462,8 +462,8 @@ class ProbabilisticNonprojectiveParser(object):
...
@@ -462,8 +462,8 @@ class ProbabilisticNonprojectiveParser(object):
}
}
)
)
#print (g_graph.nodes)
#print (g_graph.nodes)
# Fully connect non-root nodes in g_graph
# Fully connect non-root nodes in g_graph
g_graph
.
connect_graph
()
g_graph
.
connect_graph
()
original_graph
=
DependencyGraph
()
original_graph
=
DependencyGraph
()
...
@@ -567,8 +567,10 @@ class ProbabilisticNonprojectiveParser(object):
...
@@ -567,8 +567,10 @@ class ProbabilisticNonprojectiveParser(object):
logger
.
debug
(
'Betas:
%
s'
,
betas
)
logger
.
debug
(
'Betas:
%
s'
,
betas
)
for
node
in
original_graph
.
nodes
.
values
():
for
node
in
original_graph
.
nodes
.
values
():
# deps must be a dictionary
# TODO: It's dangerous to assume that deps it a dictionary
#node['deps'] = []
# because it's a default dictionary. Ideally, here we should not
# be concerned how dependencies are stored inside of a dependency
# graph.
node
[
'deps'
]
=
{}
node
[
'deps'
]
=
{}
for
i
in
range
(
1
,
len
(
tokens
)
+
1
):
for
i
in
range
(
1
,
len
(
tokens
)
+
1
):
original_graph
.
add_arc
(
betas
[
i
][
0
],
betas
[
i
][
1
])
original_graph
.
add_arc
(
betas
[
i
][
0
],
betas
[
i
][
1
])
...
@@ -701,22 +703,32 @@ class NonprojectiveDependencyParser(object):
...
@@ -701,22 +703,32 @@ class NonprojectiveDependencyParser(object):
# Filter parses
# Filter parses
# ensure 1 root, every thing has 1 head
# ensure 1 root, every thing has 1 head
for
analysis
in
analyses
:
for
analysis
in
analyses
:
root_count
=
0
if
analysis
.
count
(
-
1
)
>
1
:
root
=
[]
# there are several root elements!
for
i
,
cell
in
enumerate
(
analysis
):
continue
if
cell
==
-
1
:
root_count
+=
1
graph
=
DependencyGraph
()
root
=
i
graph
.
root
=
graph
.
nodes
[
analysis
.
index
(
-
1
)
+
1
]
if
root_count
==
1
:
graph
=
DependencyGraph
()
for
address
,
(
token
,
head_index
)
in
enumerate
(
zip
(
tokens
,
analysis
),
start
=
1
):
graph
.
nodes
[
0
][
'deps'
]
=
root
+
1
head_address
=
head_index
+
1
for
i
in
range
(
len
(
tokens
)):
node
=
{
'word'
:
tokens
[
i
],
'address'
:
i
+
1
}
node
=
graph
.
nodes
[
address
]
node
[
'deps'
]
=
[
j
+
1
for
j
in
range
(
len
(
tokens
))
if
analysis
[
j
]
==
i
]
node
.
update
(
graph
.
nodes
[
i
+
1
]
=
node
{
# cycle = graph.contains_cycle()
'word'
:
token
,
# if not cycle:
'address'
:
address
,
yield
graph
}
)
if
head_address
==
0
:
rel
=
'ROOT'
else
:
rel
=
''
graph
.
nodes
[
head_index
+
1
][
'deps'
][
rel
]
.
append
(
address
)
# TODO: check for cycles
yield
graph
#################################################################
#################################################################
...
...
nltk/test/dependency.doctest
View file @
0653e629
...
@@ -35,30 +35,33 @@ CoNLL Data
...
@@ -35,30 +35,33 @@ CoNLL Data
... . . 9 VMOD
... . . 9 VMOD
... """
... """
>>> dg = DependencyGraph(treebank_data)
>>> dg = DependencyGraph(treebank_data)
>>>
print(dg.tree().pprint()
)
>>>
dg.tree().pprint(
)
(will
(will
(Vinken Pierre , (old (years 61)) ,)
(Vinken Pierre , (old (years 61)) ,)
(join (board the) (as (director a nonexecutive)) (Nov. 29) .))
(join (board the) (as (director a nonexecutive)) (Nov. 29) .))
>>> print(list(dg.triples()))
>>> for head, rel, dep in dg.triples():
[((u'will', u'MD'), u'SUB', (u'Vinken', u'NNP')),
... print(
((u'Vinken', u'NNP'), u'NMOD', (u'Pierre', u'NNP')),
... '({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})'
((u'Vinken', u'NNP'), u'P', (u',', u',')),
... .format(h=head, r=rel, d=dep)
((u'Vinken', u'NNP'), u'NMOD', (u'old', u'JJ')),
... )
((u'old', u'JJ'), u'AMOD', (u'years', u'NNS')),
(will, MD), SUB, (Vinken, NNP)
((u'years', u'NNS'), u'NMOD', (u'61', u'CD')),
(Vinken, NNP), NMOD, (Pierre, NNP)
((u'Vinken', u'NNP'), u'P', (u',', u',')),
(Vinken, NNP), P, (,, ,)
((u'will', u'MD'), u'VC', (u'join', u'VB')),
(Vinken, NNP), NMOD, (old, JJ)
((u'join', u'VB'), u'OBJ', (u'board', u'NN')),
(old, JJ), AMOD, (years, NNS)
((u'board', u'NN'), u'NMOD', (u'the', u'DT')),
(years, NNS), NMOD, (61, CD)
((u'join', u'VB'), u'VMOD', (u'as', u'IN')),
(Vinken, NNP), P, (,, ,)
((u'as', u'IN'), u'PMOD', (u'director', u'NN')),
(will, MD), VC, (join, VB)
((u'director', u'NN'), u'NMOD', (u'a', u'DT')),
(join, VB), OBJ, (board, NN)
((u'director', u'NN'), u'NMOD', (u'nonexecutive', u'JJ')),
(board, NN), NMOD, (the, DT)
((u'join', u'VB'), u'VMOD', (u'Nov.', u'NNP')),
(join, VB), VMOD, (as, IN)
((u'Nov.', u'NNP'), u'NMOD', (u'29', u'CD')),
(as, IN), PMOD, (director, NN)
((u'join', u'VB'), u'VMOD', (u'.', u'.'))]
(director, NN), NMOD, (a, DT)
(director, NN), NMOD, (nonexecutive, JJ)
(join, VB), VMOD, (Nov., NNP)
(Nov., NNP), NMOD, (29, CD)
(join, VB), VMOD, (., .)
Using the dependency-parsed version of the Penn Treebank corpus sample.
Using the dependency-parsed version of the Penn Treebank corpus sample.
...
@@ -159,21 +162,22 @@ Non-Projective Dependency Parsing
...
@@ -159,21 +162,22 @@ Non-Projective Dependency Parsing
'dog' -> 'his'
'dog' -> 'his'
>>> dp = NonprojectiveDependencyParser(grammar)
>>> dp = NonprojectiveDependencyParser(grammar)
>>> for g in dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf']):
>>> g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])
... print(g) # doctest: +NORMALIZE_WHITESPACE
{0: {'address': 0,
>>> print(g.root['word'])
'ctag': 'TOP',
taught
'deps': 3,
'feats': None,
>>> for _, node in sorted(g.nodes.items()):
'lemma': None,
... if node['word'] is not None:
'rel': 'TOP',
... print('{address} {word}: {d}'.format(d=node['deps'][''], **node))
'tag': 'TOP',
1 the: []
'word': None},
2 man: [1]
1: {'address': 1, 'deps': [], 'word': 'the'},
3 taught: [2, 7]
2: {'address': 2, 'deps': [1], 'word': 'man'},
4 his: []
3: {'address': 3, 'deps': [2, 7], 'word': 'taught'},
5 dog: [4]
4: {'address': 4, 'deps': [], 'word': 'his'},
6 to: []
5: {'address': 5, 'deps': [4], 'word': 'dog'},
7 play: [5, 6, 8]
6: {'address': 6, 'deps': [], 'word': 'to'},
8 golf: []
7: {'address': 7, 'deps': [5, 6, 8], 'word': 'play'},
8: {'address': 8, 'deps': [], 'word': 'golf'}}
>>> print(g.tree())
(taught (man the) (play (dog his) to golf))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment