Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
nltk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
nltk
Commits
b4386729
Commit
b4386729
authored
Jul 10, 2014
by
Steven Bird
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added basic documentation for sentiwordnet
parent
76187b5a
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
91 additions
and
18 deletions
+91
-18
nltk/corpus/reader/sentiwordnet.py
+47
-18
nltk/test/corpus.doctest
+5
-0
nltk/test/sentiwordnet.doctest
+39
-0
No files found.
nltk/corpus/reader/sentiwordnet.py
View file @
b4386729
...
...
@@ -17,13 +17,28 @@ For details about SentiWordNet see:
http://sentiwordnet.isti.cnr.it/
>>> from nltk.corpus import sentiwordnet as swn
>>> print(swn.senti_synset('breakdown.n.03'))
<breakdown.n.03: PosScore=0.0 NegScore=0.25>
>>> list(swn.senti_synsets('slow'))
[SentiSynset('decelerate.v.01'), SentiSynset('slow.v.02'),
\
SentiSynset('slow.v.03'), SentiSynset('slow.a.01'),
\
SentiSynset('slow.a.02'), SentiSynset('slow.a.04'),
\
SentiSynset('slowly.r.01'), SentiSynset('behind.r.03')]
>>> happy = swn.senti_synsets('happy', 'a')
>>> happy0 = list(happy)[0]
>>> happy0.pos_score()
0.875
>>> happy0.neg_score()
0.0
>>> happy0.obj_score()
0.125
"""
import
re
from
nltk.compat
import
python_2_unicode_compatible
from
nltk.corpus.reader
import
CorpusReader
@python_2_unicode_compatible
class
SentiWordNetCorpusReader
(
CorpusReader
):
def
__init__
(
self
,
root
,
fileids
,
encoding
=
'utf-8'
):
"""
...
...
@@ -34,10 +49,10 @@ class SentiWordNetCorpusReader(CorpusReader):
encoding
=
encoding
)
if
len
(
self
.
_fileids
)
!=
1
:
raise
ValueError
(
'Exactly one file must be specified'
)
self
.
db
=
{}
self
.
parse_src_file
()
self
.
_
db
=
{}
self
.
_
parse_src_file
()
def
parse_src_file
(
self
):
def
_
parse_src_file
(
self
):
lines
=
self
.
open
(
self
.
_fileids
[
0
])
.
read
()
.
splitlines
()
lines
=
filter
((
lambda
x
:
not
re
.
search
(
r"^\s*#"
,
x
)),
lines
)
for
i
,
line
in
enumerate
(
lines
):
...
...
@@ -48,12 +63,12 @@ class SentiWordNetCorpusReader(CorpusReader):
raise
ValueError
(
'Line
%
s formatted incorrectly:
%
s
\n
'
%
(
i
,
line
))
if
pos
and
offset
:
offset
=
int
(
offset
)
self
.
db
[(
pos
,
offset
)]
=
(
float
(
pos_score
),
float
(
neg_score
))
self
.
_
db
[(
pos
,
offset
)]
=
(
float
(
pos_score
),
float
(
neg_score
))
def
senti_synset
(
self
,
*
vals
):
from
nltk.corpus
import
wordnet
as
wn
if
tuple
(
vals
)
in
self
.
db
:
pos_score
,
neg_score
=
self
.
db
[
tuple
(
vals
)]
if
tuple
(
vals
)
in
self
.
_
db
:
pos_score
,
neg_score
=
self
.
_
db
[
tuple
(
vals
)]
pos
,
offset
=
vals
synset
=
wn
.
_synset_from_pos_and_offset
(
pos
,
offset
)
return
SentiSynset
(
pos_score
,
neg_score
,
synset
)
...
...
@@ -61,8 +76,8 @@ class SentiWordNetCorpusReader(CorpusReader):
synset
=
wn
.
synset
(
vals
[
0
])
pos
=
synset
.
pos
()
offset
=
synset
.
offset
()
if
(
pos
,
offset
)
in
self
.
db
:
pos_score
,
neg_score
=
self
.
db
[(
pos
,
offset
)]
if
(
pos
,
offset
)
in
self
.
_
db
:
pos_score
,
neg_score
=
self
.
_
db
[(
pos
,
offset
)]
return
SentiSynset
(
pos_score
,
neg_score
,
synset
)
else
:
return
None
...
...
@@ -78,28 +93,42 @@ class SentiWordNetCorpusReader(CorpusReader):
def
all_senti_synsets
(
self
):
from
nltk.corpus
import
wordnet
as
wn
for
key
,
fields
in
self
.
db
.
items
():
for
key
,
fields
in
self
.
_
db
.
items
():
pos
,
offset
=
key
pos_score
,
neg_score
=
fields
synset
=
wn
.
_synset_from_pos_and_offset
(
pos
,
offset
)
yield
SentiSynset
(
pos_score
,
neg_score
,
synset
)
@python_2_unicode_compatible
class
SentiSynset
(
object
):
def
__init__
(
self
,
pos_score
,
neg_score
,
synset
):
self
.
pos_score
=
pos_score
self
.
neg_score
=
neg_score
self
.
obj_score
=
1.0
-
(
self
.
pos_score
+
self
.
neg_score
)
self
.
_
pos_score
=
pos_score
self
.
_
neg_score
=
neg_score
self
.
_obj_score
=
1.0
-
(
self
.
_pos_score
+
self
.
_
neg_score
)
self
.
synset
=
synset
def
pos_score
(
self
):
return
self
.
_pos_score
def
neg_score
(
self
):
return
self
.
_neg_score
def
obj_score
(
self
):
return
self
.
_obj_score
def
__str__
(
self
):
"""Prints just the Pos/Neg scores for now."""
s
=
""
s
+=
self
.
synset
.
name
()
+
"
\t
"
s
+=
"PosScore:
%
s
\t
"
%
self
.
pos_score
s
+=
"NegScore:
%
s"
%
self
.
neg_score
s
=
"<"
s
+=
self
.
synset
.
name
()
+
": "
s
+=
"PosScore=
%
s "
%
self
.
_pos_score
s
+=
"NegScore=
%
s"
%
self
.
_neg_score
s
+=
">"
return
s
def
__repr__
(
self
):
return
"Senti"
+
repr
(
self
.
synset
)
if
__name__
==
"__main__"
:
import
doctest
doctest
.
testmod
(
optionflags
=
doctest
.
NORMALIZE_WHITESPACE
)
nltk/test/corpus.doctest
View file @
b4386729
...
...
@@ -479,6 +479,11 @@ PropBank
Please see the separate PropBank howto.
SentiWordNet
============
Please see the separate SentiWordNet howto.
Categorized Corpora
===================
...
...
nltk/test/sentiwordnet.doctest
0 → 100644
View file @
b4386729
.. Copyright (C) 2001-2014 NLTK Project
.. For license information, see LICENSE.TXT
======================
SentiWordNet Interface
======================
SentiWordNet can be imported like this:
>>> from nltk.corpus import sentiwordnet as swn
------------
SentiSynsets
------------
>>> breakdown = swn.senti_synset('breakdown.n.03')
>>> print(breakdown)
<breakdown.n.03: PosScore=0.0 NegScore=0.25>
>>> breakdown.pos_score()
0.0
>>> breakdown.neg_score()
0.25
>>> breakdown.obj_score()
0.75
------
Lookup
------
>>> list(swn.senti_synsets('slow'))
[SentiSynset('decelerate.v.01'), SentiSynset('slow.v.02'),\
SentiSynset('slow.v.03'), SentiSynset('slow.a.01'),\
SentiSynset('slow.a.02'), SentiSynset('slow.a.04'),\
SentiSynset('slowly.r.01'), SentiSynset('behind.r.03')]
>>> happy = swn.senti_synsets('happy', 'a')
>>> all = swn.all_senti_synsets()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment