Commit 2266fa40 by Steven Bird
parent 9fa28edd
...@@ -112,7 +112,7 @@ def list2sym(lst): ...@@ -112,7 +112,7 @@ def list2sym(lst):
sym = sym.replace('.', '') sym = sym.replace('.', '')
return sym return sym
def _tree2semi_rel(tree): def tree2semi_rel(tree):
""" """
Group a chunk structure into a list of 'semi-relations' of the form (list(str), ``Tree``). Group a chunk structure into a list of 'semi-relations' of the form (list(str), ``Tree``).
...@@ -143,7 +143,7 @@ def _tree2semi_rel(tree): ...@@ -143,7 +143,7 @@ def _tree2semi_rel(tree):
def semi_rel2reldict(pairs, window=5, trace=False): def semi_rel2reldict(pairs, window=5, trace=False):
""" """
Converts the pairs generated by ``_tree2semi_rel`` into a 'reldict': a dictionary which Converts the pairs generated by ``tree2semi_rel`` into a 'reldict': a dictionary which
stores information about the subject and object NEs plus the filler between them. stores information about the subject and object NEs plus the filler between them.
Additionally, a left and right context of length =< window are captured (within Additionally, a left and right context of length =< window are captured (within
a given input sentence). a given input sentence).
...@@ -211,9 +211,9 @@ def extract_rels(subjclass, objclass, doc, corpus='ace', pattern=None, window=10 ...@@ -211,9 +211,9 @@ def extract_rels(subjclass, objclass, doc, corpus='ace', pattern=None, window=10
raise ValueError("your value for the object type has not been recognized: %s" % objclass) raise ValueError("your value for the object type has not been recognized: %s" % objclass)
if corpus == 'ace' or corpus == 'conll2002': if corpus == 'ace' or corpus == 'conll2002':
pairs = _tree2semi_rel(doc) pairs = tree2semi_rel(doc)
elif corpus == 'ieer': elif corpus == 'ieer':
pairs = _tree2semi_rel(doc.text) + _tree2semi_rel(doc.headline) pairs = tree2semi_rel(doc.text) + tree2semi_rel(doc.headline)
else: else:
raise ValueError("corpus type not recognized") raise ValueError("corpus type not recognized")
......
...@@ -94,12 +94,12 @@ recognize pairs *(o, l)* of these kinds of entities such that *o* is ...@@ -94,12 +94,12 @@ recognize pairs *(o, l)* of these kinds of entities such that *o* is
located in *l*. located in *l*.
The `sem.relextract` module provides some tools to help carry out a The `sem.relextract` module provides some tools to help carry out a
simple version of this task. The `_tree2semi_rel()` function splits a chunk simple version of this task. The `tree2semi_rel()` function splits a chunk
document into a list of two-member lists, each of which consists of a document into a list of two-member lists, each of which consists of a
(possibly empty) string followed by a `Tree` (i.e., a Named Entity): (possibly empty) string followed by a `Tree` (i.e., a Named Entity):
>>> from nltk.sem import relextract >>> from nltk.sem import relextract
>>> pairs = relextract._tree2semi_rel(tree) >>> pairs = relextract.tree2semi_rel(tree)
>>> for s, tree in pairs[18:22]: >>> for s, tree in pairs[18:22]:
... print('("...%s", %s)' % (" ".join(s[-5:]),tree)) ... print('("...%s", %s)' % (" ".join(s[-5:]),tree))
("...about first-level questions,'' said Ms.", (PERSON Cohn)) ("...about first-level questions,'' said Ms.", (PERSON Cohn))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment