Commit 036ec1a4 by Steven Xu

fixed unit test bugs for wsd

parent 520148f1
......@@ -17,22 +17,20 @@ a the definitions of the ambiguous word.
Given an ambiguous word and the context in which the word occurs, Lesk returns
a Synset with the highest number of overlapping words between the context
sentence and different definitions form each Synset.
sentence and different definitions from each Synset.
>>> from nltk.wsd import lesk
>>> from nltk.wsd import lesk
>>> sent = "I went to the bank to deposit money."
>>> word = "bank"
>>> pos = "n"
>>> print lesk(sent, word, pos)
>>> print(lesk(sent, word, pos))
Synset('depository_financial_institution.n.01')
The definitions for "bank" are:
>>> from nltk.corpus import wordnet as wn
>>> for ss in wn.synsets('bank'):
... print ss, ss.definition()
...
... print(ss, ss.definition())
Synset('bank.n.01') sloping land (especially the slope beside a body of water)
Synset('depository_financial_institution.n.01') a financial institution that accepts deposits and channels the money into lending activities
Synset('bank.n.03') a long ridge or pile
......
......@@ -43,11 +43,11 @@ def lesk(context_sentence, ambiguous_word, pos=None, dictionary=None):
>>> sent = word_tokenize("I went to the bank to deposit money.")
>>> word = "bank"
>>> pos = "n"
>>> wsd(sent, word, pos)
>>> lesk(sent, word, pos)
Synset('depository_financial_institution.n.01')
:param context_sentence: The context sentence where the ambiguous word occurs.
:param ambiguous: The ambiguous word that requires WSD.
:param ambiguous_word: The ambiguous word that requires WSD.
:param pos: A specified Part-of-Speech (POS).
:param dictionary: A list of words that 'signifies' the ambiguous word.
:return: ``lesk_sense`` The Synset() object with the highest signature overlaps.
......@@ -56,7 +56,7 @@ def lesk(context_sentence, ambiguous_word, pos=None, dictionary=None):
dictionary = {}
for ss in wn.synsets(ambiguous_word):
dictionary[ss] = ss.definition().split()
best_sense = _compare_overlaps_greedy(context_sentence, \
best_sense = _compare_overlaps_greedy(context_sentence,
dictionary, pos)
return best_sense
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment