Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
nltk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
nltk
Commits
aaf77f64
Commit
aaf77f64
authored
Apr 30, 2015
by
Ewan Klein
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Minor changes to docstrings. Re-worked error handling.
parent
d4717752
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
41 additions
and
39 deletions
+41
-39
nltk/twitter/util.py
+41
-39
No files found.
nltk/twitter/util.py
View file @
aaf77f64
...
@@ -53,7 +53,7 @@ def _get_entity_recursive(json, entity):
...
@@ -53,7 +53,7 @@ def _get_entity_recursive(json, entity):
if
isinstance
(
json
,
dict
):
if
isinstance
(
json
,
dict
):
for
key
,
value
in
json
.
iteritems
():
for
key
,
value
in
json
.
iteritems
():
if
key
==
entity
:
if
key
==
entity
:
return
value
return
value
candidate
=
_get_entity_recursive
(
value
,
entity
)
candidate
=
_get_entity_recursive
(
value
,
entity
)
if
candidate
!=
None
:
if
candidate
!=
None
:
return
candidate
return
candidate
...
@@ -75,11 +75,12 @@ def json2csv(infile, outfile, fields, encoding='utf8', errors='replace'):
...
@@ -75,11 +75,12 @@ def json2csv(infile, outfile, fields, encoding='utf8', errors='replace'):
This utility function allows a file of full tweets to be easily converted
This utility function allows a file of full tweets to be easily converted
to a CSV file for easier processing. For example, just tweetIDs or
to a CSV file for easier processing. For example, just tweetIDs or
just the text content of the tweets can be extracted.
just the text content of the tweets can be extracted.
Additionally, the function allows combinations of fields of other Twitter
Additionally, the function allows combinations of fields of other Twitter
objects (mainly the users, see below).
objects (mainly the users, see below).
For Twitter entities (e.g. hashtags of a tweet) see json2csv_entities
For Twitter entities (e.g. hashtags of a tweet), and for geolocation, see
`json2csv_entities`
:param str infile: The name of the file containing full tweets
:param str infile: The name of the file containing full tweets
...
@@ -90,13 +91,12 @@ def json2csv(infile, outfile, fields, encoding='utf8', errors='replace'):
...
@@ -90,13 +91,12 @@ def json2csv(infile, outfile, fields, encoding='utf8', errors='replace'):
are 'id_str' for the tweetID and 'text' for the text of the tweet. See
\
are 'id_str' for the tweetID and 'text' for the text of the tweet. See
\
<https://dev.twitter.com/overview/api/tweets> for a full list of fields.
<https://dev.twitter.com/overview/api/tweets> for a full list of fields.
e. g.: ['id_str'], ['id', 'text', 'favorite_count', 'retweet_count']
e. g.: ['id_str'], ['id', 'text', 'favorite_count', 'retweet_count']
Addionally, it allows fileds from other Twitter objects.
Addionally, it allows IDs from other Twitter objects, e. g.,
\
e. g.: ['id', 'text', {'user' : ['id', 'followers_count', 'friends_count']}]
['id', 'text', {'user' : ['id', 'followers_count', 'friends_count']}]
Not suitable for entities like hastags; use json2csv_entities instead.
Not for the place of a tweet; also use json2csv.
:param error: Behaviour for encoding errors, see
\
:param error: Behaviour for encoding errors, see
\
https://docs.python.org/3/library/codecs.html#codec-base-classes
https://docs.python.org/3/library/codecs.html#codec-base-classes
"""
"""
with
open
(
infile
)
as
inf
:
with
open
(
infile
)
as
inf
:
writer
=
get_outf_writer_compat
(
outfile
,
encoding
,
errors
)
writer
=
get_outf_writer_compat
(
outfile
,
encoding
,
errors
)
...
@@ -106,6 +106,9 @@ def json2csv(infile, outfile, fields, encoding='utf8', errors='replace'):
...
@@ -106,6 +106,9 @@ def json2csv(infile, outfile, fields, encoding='utf8', errors='replace'):
writer
.
writerow
(
row
)
writer
.
writerow
(
row
)
def
get_outf_writer_compat
(
outfile
,
encoding
,
errors
):
def
get_outf_writer_compat
(
outfile
,
encoding
,
errors
):
"""
Identify appropriate CSV writer given the Python version
"""
if
compat
.
PY3
==
True
:
if
compat
.
PY3
==
True
:
outf
=
open
(
outfile
,
'w'
,
encoding
=
encoding
,
errors
=
errors
)
outf
=
open
(
outfile
,
'w'
,
encoding
=
encoding
,
errors
=
errors
)
writer
=
csv
.
writer
(
outf
)
writer
=
csv
.
writer
(
outf
)
...
@@ -113,8 +116,8 @@ def get_outf_writer_compat(outfile, encoding, errors):
...
@@ -113,8 +116,8 @@ def get_outf_writer_compat(outfile, encoding, errors):
outf
=
open
(
outfile
,
'wb'
)
outf
=
open
(
outfile
,
'wb'
)
writer
=
compat
.
UnicodeWriter
(
outf
,
encoding
=
encoding
,
errors
=
errors
)
writer
=
compat
.
UnicodeWriter
(
outf
,
encoding
=
encoding
,
errors
=
errors
)
return
writer
return
writer
def
json2csv_entities
(
infile
,
outfile
,
main_fields
,
entity_name
,
entity_fields
,
def
json2csv_entities
(
infile
,
outfile
,
main_fields
,
entity_name
,
entity_fields
,
encoding
=
'utf8'
,
errors
=
'replace'
):
encoding
=
'utf8'
,
errors
=
'replace'
):
"""
"""
...
@@ -124,7 +127,7 @@ def json2csv_entities(infile, outfile, main_fields, entity_name, entity_fields,
...
@@ -124,7 +127,7 @@ def json2csv_entities(infile, outfile, main_fields, entity_name, entity_fields,
This utility function allows a file of full tweets to be easily converted
This utility function allows a file of full tweets to be easily converted
to a CSV file for easier processing of Twitter entities. For example, the
to a CSV file for easier processing of Twitter entities. For example, the
hashtags or media elements of a tweet can be extracted.
hashtags or media elements of a tweet can be extracted.
:param str infile: The name of the file containing full tweets
:param str infile: The name of the file containing full tweets
:param str outfile: The name of the text file where results should be
\
:param str outfile: The name of the text file where results should be
\
...
@@ -143,10 +146,10 @@ def json2csv_entities(infile, outfile, main_fields, entity_name, entity_fields,
...
@@ -143,10 +146,10 @@ def json2csv_entities(infile, outfile, main_fields, entity_name, entity_fields,
needs to be expressed as a dictionary: {'user' : 'urls'}. For the
\
needs to be expressed as a dictionary: {'user' : 'urls'}. For the
\
bounding box of the place from which a tweet was twitted, as a dict
\
bounding box of the place from which a tweet was twitted, as a dict
\
as well: {'place', 'bounding_box'}
as well: {'place', 'bounding_box'}
:param list entity_fields: The list of fields to be extracted from the
\
:param list entity_fields: The list of fields to be extracted from the
\
entity. E.g. ['text'] (of the hashtag)
entity. E.g. ['text'] (of the hashtag)
:param error: Behaviour for encoding errors, see
\
:param error: Behaviour for encoding errors, see
\
https://docs.python.org/3/library/codecs.html#codec-base-classes
https://docs.python.org/3/library/codecs.html#codec-base-classes
"""
"""
...
@@ -168,6 +171,7 @@ def json2csv_entities(infile, outfile, main_fields, entity_name, entity_fields,
...
@@ -168,6 +171,7 @@ def json2csv_entities(infile, outfile, main_fields, entity_name, entity_fields,
items
=
_get_entity_recursive
(
tweet
,
entity_name
)
items
=
_get_entity_recursive
(
tweet
,
entity_name
)
_write_to_file
(
tweet_fields
,
items
,
entity_fields
,
writer
)
_write_to_file
(
tweet_fields
,
items
,
entity_fields
,
writer
)
def
_write_to_file
(
object_fields
,
items
,
entity_fields
,
writer
):
def
_write_to_file
(
object_fields
,
items
,
entity_fields
,
writer
):
if
items
==
None
:
if
items
==
None
:
# it could be that the entity is just not present for the tweet
# it could be that the entity is just not present for the tweet
...
@@ -189,8 +193,8 @@ def _write_to_file(object_fields, items, entity_fields, writer):
...
@@ -189,8 +193,8 @@ def _write_to_file(object_fields, items, entity_fields, writer):
for
item
in
items
:
for
item
in
items
:
row
=
object_fields
+
extract_fields
(
item
,
entity_fields
)
row
=
object_fields
+
extract_fields
(
item
,
entity_fields
)
writer
.
writerow
(
row
)
writer
.
writerow
(
row
)
def
credsfromfile
(
creds_file
=
None
,
subdir
=
None
,
verbose
=
False
):
def
credsfromfile
(
creds_file
=
None
,
subdir
=
None
,
verbose
=
False
):
"""
"""
Read OAuth credentials from a text file.
Read OAuth credentials from a text file.
...
@@ -204,7 +208,6 @@ def credsfromfile(creds_file=None, subdir=None, verbose=False):
...
@@ -204,7 +208,6 @@ def credsfromfile(creds_file=None, subdir=None, verbose=False):
oauth_token_secret=OAUTH_TOKEN_SECRET
oauth_token_secret=OAUTH_TOKEN_SECRET
::
::
File format for OAuth 2
File format for OAuth 2
=======================
=======================
...
@@ -216,24 +219,25 @@ def credsfromfile(creds_file=None, subdir=None, verbose=False):
...
@@ -216,24 +219,25 @@ def credsfromfile(creds_file=None, subdir=None, verbose=False):
:param str file_name: File containing credentials. ``None`` (default) reads
\
:param str file_name: File containing credentials. ``None`` (default) reads
\
data from `TWITTER/'credentials.txt'`
data from `TWITTER/'credentials.txt'`
"""
"""
if
subdir
is
None
:
if
creds_file
is
None
:
creds_file
=
'credentials.txt'
if
not
subdir
:
try
:
try
:
subdir
=
os
.
environ
[
'TWITTER'
]
subdir
=
os
.
environ
[
'TWITTER'
]
creds_fullpath
=
os
.
path
.
normpath
(
os
.
path
.
join
(
subdir
,
creds_file
))
if
not
os
.
path
.
isfile
(
creds_fullpath
):
raise
OSError
(
'Cannot find file {}'
.
format
(
creds_fullpath
))
except
KeyError
:
except
KeyError
:
print
(
"""Supply a value to the 'subdir' parameter or set the
print
(
"Supply a value to the 'subdir' parameter or set the
\
environment variable TWITTER"""
)
TWITTER environment variable."
)
if
creds_file
is
None
:
raise
FileNotFoundError
from
KeyError
creds_file
=
'credentials.txt'
creds_fullpath
=
os
.
path
.
normpath
(
os
.
path
.
join
(
subdir
,
creds_file
))
if
not
os
.
path
.
isfile
(
creds_fullpath
):
raise
OSError
(
'Cannot find file {}'
.
format
(
creds_fullpath
))
with
open
(
creds_fullpath
)
as
f
:
with
open
(
creds_fullpath
)
as
infile
:
if
verbose
:
if
verbose
:
print
(
'Reading credentials file {}'
.
format
(
creds_fullpath
))
print
(
'Reading credentials file {}'
.
format
(
creds_fullpath
))
oauth
=
{}
oauth
=
{}
for
line
in
f
:
for
line
in
infile
:
if
'='
in
line
:
if
'='
in
line
:
name
,
value
=
line
.
split
(
'='
,
1
)
name
,
value
=
line
.
split
(
'='
,
1
)
oauth
[
name
.
strip
()]
=
value
.
strip
()
oauth
[
name
.
strip
()]
=
value
.
strip
()
...
@@ -242,7 +246,7 @@ def credsfromfile(creds_file=None, subdir=None, verbose=False):
...
@@ -242,7 +246,7 @@ def credsfromfile(creds_file=None, subdir=None, verbose=False):
return
oauth
return
oauth
def
_validate_creds_file
(
fn
,
oauth
,
verbose
=
False
):
def
_validate_creds_file
(
fn
ame
,
oauth
,
verbose
=
False
):
"""Check validity of a credentials file."""
"""Check validity of a credentials file."""
oauth1
=
False
oauth1
=
False
oauth1_keys
=
[
'app_key'
,
'app_secret'
,
'oauth_token'
,
'oauth_token_secret'
]
oauth1_keys
=
[
'app_key'
,
'app_secret'
,
'oauth_token'
,
'oauth_token_secret'
]
...
@@ -254,11 +258,11 @@ def _validate_creds_file(fn, oauth, verbose=False):
...
@@ -254,11 +258,11 @@ def _validate_creds_file(fn, oauth, verbose=False):
oauth2
=
True
oauth2
=
True
if
not
(
oauth1
or
oauth2
):
if
not
(
oauth1
or
oauth2
):
msg
=
'Missing or incorrect entries in {}
\n
'
.
format
(
fn
)
msg
=
'Missing or incorrect entries in {}
\n
'
.
format
(
fn
ame
)
msg
+=
pprint
.
pformat
(
oauth
)
msg
+=
pprint
.
pformat
(
oauth
)
raise
ValueError
(
msg
)
raise
ValueError
(
msg
)
elif
verbose
:
elif
verbose
:
print
(
'Credentials file "{}" looks good'
.
format
(
fn
))
print
(
'Credentials file "{}" looks good'
.
format
(
fn
ame
))
def
add_access_token
(
creds_file
=
None
):
def
add_access_token
(
creds_file
=
None
):
...
@@ -270,14 +274,14 @@ def add_access_token(creds_file=None):
...
@@ -270,14 +274,14 @@ def add_access_token(creds_file=None):
path
=
os
.
path
.
dirname
(
__file__
)
path
=
os
.
path
.
dirname
(
__file__
)
creds_file
=
os
.
path
.
join
(
path
,
'credentials2.txt'
)
creds_file
=
os
.
path
.
join
(
path
,
'credentials2.txt'
)
oauth2
=
credsfromfile
(
creds_file
=
creds_file
)
oauth2
=
credsfromfile
(
creds_file
=
creds_file
)
APP_KEY
=
oauth2
[
'app_key'
]
app_key
=
oauth2
[
'app_key'
]
APP_SECRET
=
oauth2
[
'app_secret'
]
app_secret
=
oauth2
[
'app_secret'
]
twitter
=
Twython
(
APP_KEY
,
APP_SECRET
,
oauth_version
=
2
)
twitter
=
Twython
(
app_key
,
app_secret
,
oauth_version
=
2
)
ACCESS_TOKEN
=
twitter
.
obtain_access_token
()
access_token
=
twitter
.
obtain_access_token
()
tok
=
'access_token={}
\n
'
.
format
(
ACCESS_TOKEN
)
tok
=
'access_token={}
\n
'
.
format
(
access_token
)
with
open
(
creds_file
,
'a'
)
as
f
:
with
open
(
creds_file
,
'a'
)
as
infile
:
print
(
tok
,
file
=
f
)
print
(
tok
,
file
=
infile
)
def
guess_path
(
pth
):
def
guess_path
(
pth
):
...
@@ -291,5 +295,3 @@ def guess_path(pth):
...
@@ -291,5 +295,3 @@ def guess_path(pth):
return
pth
return
pth
else
:
else
:
return
os
.
path
.
expanduser
(
os
.
path
.
join
(
"~"
,
pth
))
return
os
.
path
.
expanduser
(
os
.
path
.
join
(
"~"
,
pth
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment