minor edits to corpus reader demo

fc44d4d1 · Ewan Klein · 10a99af5 · fc44d4d1 · fc44d4d1
Commit fc44d4d1 authored Apr 29, 2015 by Ewan Klein
Hide whitespace changes
Inline Side-by-side

Showing with 10 additions and 13 deletions

nltk/corpus/reader/tweets.py
+5 -4

nltk/twitter/twitter_demo.py
+5 -9

No files found.
--- a/nltk/corpus/reader/tweets.py
+++ b/nltk/corpus/reader/tweets.py
@@ -33,13 +33,14 @@ class TwitterCorpusReader(CorpusReader):
    If you made your own tweet collection in a directory called
    `twitter-files`, then you can initialise the reader as::
-        >>> reader = TwitterCorpusReader(root='twitter-files', '.*\.json') # doctest: +SKIP
+        from nltk.corpus import TwitterCorpusReader
+        reader = TwitterCorpusReader(root='/path/to/twitter-files', '.*\.json')
-    The recommended approach is to use this directory as the value of the
+    However, the recommended approach is to use this directory as the value of the
    environmental variable `TWITTER`, and then invoke the reader as::
-        >>> root = os.environ['TWITTER']
+       root = os.environ['TWITTER']
-        >>> reader = TwitterCorpusReader(root, '.*\.json') # doctest: +SKIP
+       reader = TwitterCorpusReader(root, '.*\.json')
    """

--- a/nltk/twitter/twitter_demo.py
+++ b/nltk/twitter/twitter_demo.py
@@ -191,16 +191,14 @@ def corpusreader_demo():
    * the result of tokenising the raw strings.
    """
-    #from nltk.corpus import TwitterCorpusReader
    from nltk.corpus import tweets
-    tweets.fileids()
-    #root = os.environ['TWITTER']
    #reader = TwitterCorpusReader(root, '1k_sample.json')
    #reader = TwitterCorpusReader('twitter', 'tweets.20150417.json')
    print()
    print("Complete tweet documents")
    print(SPACER)
-    for tweet in tweets.docs()[:2]:
+    for tweet in tweets.docs()[:1]:
        print(json.dumps(tweet, indent=1, sort_keys=True))
    print()
@@ -212,13 +210,11 @@ def corpusreader_demo():
    print()
    print("Tokenized tweet strings:")
    print(SPACER)
-    for text in tweets.tokenized()[:15]:
+    for toks in tweets.tokenized()[:15]:
-        print(text)
+        print(toks)
-#def corpusreader_demo():
-    #from nltk.corpus import brown
-    #brown.words()