Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
nltk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
nltk
Commits
aca50bcc
Commit
aca50bcc
authored
May 17, 2014
by
Steven Bird
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
WSD doesn't belong inside the sem package; may be a whole package on its own someday
parent
0f896856
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
14 deletions
+19
-14
nltk/__init__.py
+1
-1
nltk/wsd.py
+18
-13
No files found.
nltk/__init__.py
View file @
aca50bcc
...
@@ -165,7 +165,7 @@ else:
...
@@ -165,7 +165,7 @@ else:
from
nltk
import
align
,
ccg
,
chunk
,
classify
,
collocations
from
nltk
import
align
,
ccg
,
chunk
,
classify
,
collocations
from
nltk
import
data
,
featstruct
,
grammar
,
help
,
inference
,
metrics
from
nltk
import
data
,
featstruct
,
grammar
,
help
,
inference
,
metrics
from
nltk
import
misc
,
parse
,
probability
,
sem
,
stem
from
nltk
import
misc
,
parse
,
probability
,
sem
,
stem
,
wsd
from
nltk
import
tag
,
tbl
,
text
,
tokenize
,
tree
,
treetransforms
,
util
from
nltk
import
tag
,
tbl
,
text
,
tokenize
,
tree
,
treetransforms
,
util
# override any accidentally imported demo
# override any accidentally imported demo
...
...
nltk/
sem/lesk
.py
→
nltk/
wsd
.py
View file @
aca50bcc
# Natural Language Toolkit:
Lesk Algorithm
# Natural Language Toolkit:
Word Sense Disambiguation Algorithms
#
#
# Author: Liling Tan <alvations@gmail.com>
# Author: Liling Tan <alvations@gmail.com>
#
#
...
@@ -6,10 +6,13 @@
...
@@ -6,10 +6,13 @@
# URL: <http://nltk.org/>
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT
# For license information, see LICENSE.TXT
from
nltk
import
word_tokenize
from
nltk.corpus
import
wordnet
as
wn
from
nltk.corpus
import
wordnet
as
wn
def
compare_overlaps_greedy
(
context
,
synsets_signatures
,
pos
=
None
):
############################################################
# Lesk Algorithm
############################################################
def
_compare_overlaps_greedy
(
context
,
synsets_signatures
,
pos
=
None
):
"""
"""
Calculate overlaps between the context sentence and the synset_signature
Calculate overlaps between the context sentence and the synset_signature
and returns the synset with the highest overlap.
and returns the synset with the highest overlap.
...
@@ -30,11 +33,18 @@ def compare_overlaps_greedy(context, synsets_signatures, pos=None):
...
@@ -30,11 +33,18 @@ def compare_overlaps_greedy(context, synsets_signatures, pos=None):
max_overlaps
=
len
(
overlaps
)
max_overlaps
=
len
(
overlaps
)
return
lesk_sense
return
lesk_sense
def
wsd
(
context_sentence
,
ambiguous_word
,
pos
=
None
,
dictionary
=
None
):
def
lesk
(
context_sentence
,
ambiguous_word
,
pos
=
None
,
dictionary
=
None
):
"""
"""
This function is the implementation of the original Lesk algorithm (1986).
This function is the implementation of the original Lesk algorithm (1986).
It requires a dictionary which contains the definition of the different
It requires a dictionary which contains the definition of the different
sense of each word. See http://goo.gl/8TB15w
sense of each word. See http://goo.gl/8TB15w
>>> from nltk import word_tokenize
>>> sent = word_tokenize("I went to the bank to deposit money.")
>>> word = "bank"
>>> pos = "n"
>>> wsd(sent, word, pos)
Synset('depository_financial_institution.n.01')
:param context_sentence: The context sentence where the ambiguous word occurs.
:param context_sentence: The context sentence where the ambiguous word occurs.
:param ambiguous: The ambiguous word that requires WSD.
:param ambiguous: The ambiguous word that requires WSD.
...
@@ -46,16 +56,11 @@ def wsd(context_sentence, ambiguous_word, pos=None, dictionary=None):
...
@@ -46,16 +56,11 @@ def wsd(context_sentence, ambiguous_word, pos=None, dictionary=None):
dictionary
=
{}
dictionary
=
{}
for
ss
in
wn
.
synsets
(
ambiguous_word
):
for
ss
in
wn
.
synsets
(
ambiguous_word
):
dictionary
[
ss
]
=
ss
.
definition
()
.
split
()
dictionary
[
ss
]
=
ss
.
definition
()
.
split
()
best_sense
=
compare_overlaps_greedy
(
word_tokenize
(
context_sentence
)
,
\
best_sense
=
_compare_overlaps_greedy
(
context_sentence
,
\
dictionary
,
pos
)
dictionary
,
pos
)
return
best_sense
return
best_sense
def
demo
():
if
__name__
==
"__main__"
:
sent
=
"I went to the bank to deposit money."
import
doctest
word
=
"bank"
doctest
.
testmod
(
optionflags
=
doctest
.
NORMALIZE_WHITESPACE
)
pos
=
"n"
print
wsd
(
sent
,
word
,
pos
)
if
__name__
==
'__main__'
:
demo
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment