stylistic fixes, cleaned imports, fixed cyclic imports

e7ba3080 · Steven Bird · 92e5fca0 · e7ba3080 · e7ba3080 · e7ba3080
Commit e7ba3080 authored May 18, 2015 by Steven Bird
8 changed files
--- a/nltk/align/ibm1.py
+++ b/nltk/align/ibm1.py
@@ -15,7 +15,6 @@
 from __future__  import division
 from collections import defaultdict
 from nltk.align  import AlignedSent
-from nltk.corpus import comtrans
 class IBMModel1(object):
    """
@@ -28,6 +27,7 @@ class IBMModel1(object):
    Step 2 - Estimate the probability of translation according to the 
             evidence from Step 1. 
+    >>> from nltk.corpus import comtrans
    >>> bitexts = comtrans.aligned_sents()[:100]
    >>> ibm = IBMModel1(bitexts, 20)

--- a/nltk/align/ibm2.py
+++ b/nltk/align/ibm2.py
@@ -9,7 +9,6 @@
 from __future__  import division
 from collections import defaultdict
 from nltk.align  import AlignedSent
-from nltk.corpus import comtrans
 from nltk.align.ibm1 import IBMModel1
 class IBMModel2(object):
@@ -26,6 +25,7 @@ class IBMModel2(object):
    Step 3 - Estimate the probability of translation and alignment according 
             to the evidence from Step 2. 
+    >>> from nltk.corpus import comtrans
    >>> bitexts = comtrans.aligned_sents()[:100]
    >>> ibm = IBMModel2(bitexts, 5)
    >>> aligned_sent = ibm.align(bitexts[0])

--- a/nltk/classify/tadm.py
+++ b/nltk/classify/tadm.py
@@ -14,7 +14,7 @@ from nltk.internals import find_binary
 try:
    import numpy
 except ImportError:
-    numpy = None
+    pass
 _tadm_bin = None
 def config_tadm(bin=None):

--- a/nltk/classify/textcat.py
+++ b/nltk/classify/textcat.py
@@ -31,14 +31,10 @@ http://borel.slu.edu/crubadan/index.html
 # Ensure that literal strings default to unicode rather than str.
 from __future__ import print_function, unicode_literals
-import nltk
+from nltk.compat import PY3
-import nltk.compat
-from nltk.corpus import CrubadanCorpusReader
 from nltk.util import trigrams
-from nltk.tokenize import word_tokenize
-from nltk.probability import FreqDist
-if nltk.compat.PY3:
+if PY3:
    from sys import maxsize
 else:
    from sys import maxint
@@ -71,7 +67,8 @@ class TextCat(object):
                                   "see https://pypi.python.org/pypi/regex for "
                                   "further details.")
-        self._corpus = CrubadanCorpusReader(nltk.data.find('corpora/crubadan'), '.*\.txt')
+        from nltk.corpus import crubadan
+        self._corpus = crubadan
        # Load all language ngrams into cache
        for lang in self._corpus.langs():
            self._corpus.lang_freq(lang)
@@ -82,6 +79,8 @@ class TextCat(object):
    def profile(self, text):
        ''' Create FreqDist of trigrams within text '''
+        from nltk import word_tokenize, FreqDist
        clean_text = self.remove_punctuation(text)
        tokens = word_tokenize(clean_text)
@@ -115,7 +114,7 @@ class TextCat(object):
            # Arbitrary but should be larger than
            # any possible trigram file length
            # in terms of total lines
-            if nltk.compat.PY3:
+            if PY3:
                dist = maxsize
            else:
                dist = maxint
@@ -148,41 +147,47 @@ class TextCat(object):
        return min(self.last_distances, key=self.last_distances.get)
        #################################################')
-    def demo(self):
+def demo():
-        from nltk.corpus import udhr
+    from nltk.corpus import udhr
-        langs = ['Kurdish-UTF8', 'Abkhaz-UTF8', 'Farsi_Persian-UTF8',
+    langs = ['Kurdish-UTF8', 'Abkhaz-UTF8', 'Farsi_Persian-UTF8',
-                 'Hindi-UTF8', 'Hawaiian-UTF8', 'Russian-UTF8', 'Vietnamese-UTF8',
+             'Hindi-UTF8', 'Hawaiian-UTF8', 'Russian-UTF8', 'Vietnamese-UTF8',
-                 'Serbian_Srpski-UTF8','Esperanto-UTF8']
+             'Serbian_Srpski-UTF8','Esperanto-UTF8']
-        friendly = {'kmr':'Northern Kurdish',
+    friendly = {'kmr':'Northern Kurdish',
-                    'abk':'Abkhazian',
+                'abk':'Abkhazian',
-                    'pes':'Iranian Persian',
+                'pes':'Iranian Persian',
-                    'hin':'Hindi',
+                'hin':'Hindi',
-                    'haw':'Hawaiian',
+                'haw':'Hawaiian',
-                    'rus':'Russian',
+                'rus':'Russian',
-                    'vie':'Vietnamese',
+                'vie':'Vietnamese',
-                    'srp':'Serbian',
+                'srp':'Serbian',
-                    'epo':'Esperanto'}
+                'epo':'Esperanto'}
-        for cur_lang in langs:
+    tc = TextCat()
-            # Get raw data from UDHR corpus
-            raw_sentences = udhr.sents(cur_lang)
+    for cur_lang in langs:
-            rows = len(raw_sentences) - 1
+        # Get raw data from UDHR corpus
-            cols = list(map(len, raw_sentences))
+        raw_sentences = udhr.sents(cur_lang)
+        rows = len(raw_sentences) - 1
+        cols = list(map(len, raw_sentences))
-            sample = ''
+        sample = ''
-            # Generate a sample text of the language
+        # Generate a sample text of the language
-            for i in range(0, rows):
+        for i in range(0, rows):
-                cur_sent = ''
+            cur_sent = ''
-                for j in range(0, cols[i]):
+            for j in range(0, cols[i]):
-                    cur_sent += ' ' + raw_sentences[i][j]
+                cur_sent += ' ' + raw_sentences[i][j]
-                sample += cur_sent
+            sample += cur_sent
-            # Try to detect what it is
+        # Try to detect what it is
-            print('Language snippet: ' + sample[0:140] + '...')
+        print('Language snippet: ' + sample[0:140] + '...')
-            guess = self.guess_language(sample)
+        guess = tc.guess_language(sample)
-            print('Language detection: %s (%s)' % (guess, friendly[guess]))
+        print('Language detection: %s (%s)' % (guess, friendly[guess]))
-            print('#' * 140)
+        print('#' * 140)
+if __name__ == '__main__':
+    demo()
--- a/nltk/corpus/reader/crubadan.py
+++ b/nltk/corpus/reader/crubadan.py
@@ -22,7 +22,7 @@ http://borel.slu.edu/crubadan/index.html
 from __future__ import print_function, unicode_literals
 import re
-import nltk.compat
+from nltk.compat import PY3
 from os import path
 from nltk.corpus.reader import CorpusReader
 from nltk.probability import FreqDist
@@ -75,7 +75,7 @@ class CrubadanCorpusReader(CorpusReader):
        if self._LANG_MAPPER_FILE not in self.fileids():
            raise RuntimeError("Could not find language mapper file: " + mapper_file)
-        if nltk.compat.PY3:
+        if PY3:
            raw = open(mapper_file, 'r', encoding='utf-8').read().strip()
        else:
            raw = open(mapper_file, 'rU').read().decode('utf-8').strip()
@@ -93,13 +93,13 @@ class CrubadanCorpusReader(CorpusReader):
            raise Runtime("Could not find language n-gram file for " + lang)
        counts = FreqDist()
-        if nltk.compat.PY3:
+        if PY3:
            f = open(ngram_file, 'r', encoding='utf-8')
        else:
            f = open(ngram_file, 'rU')
        for line in f:
-            if nltk.compat.PY3:
+            if PY3:
                data = line.split(' ')
            else:
                data = line.decode('utf8').split(' ')

--- a/nltk/parse/malt.py
+++ b/nltk/parse/malt.py
@@ -15,7 +15,6 @@ from functools import reduce
 import subprocess
 from nltk.data import ZipFilePathPointer
-from nltk.tag import RegexpTagger
 from nltk.tokenize import word_tokenize
 from nltk.internals import find_binary
@@ -43,6 +42,7 @@ class MaltParser(ParserI):
        if tagger is not None:
            self.tagger = tagger
        else:
+            from nltk.tag import RegexpTagger
            self.tagger = RegexpTagger(
            [(r'^-?[0-9]+(.[0-9]+)?$', 'CD'),   # cardinal numbers
             (r'(The|the|A|a|An|an)$', 'AT'),   # articles

--- a/nltk/parse/nonprojectivedependencyparser.py
+++ b/nltk/parse/nonprojectivedependencyparser.py
@@ -14,7 +14,6 @@ import logging
 from nltk.compat import xrange
 from nltk.parse.dependencygraph import DependencyGraph
-from nltk.classify import NaiveBayesClassifier
 logger = logging.getLogger(__name__)
@@ -111,6 +110,8 @@ class NaiveBayesDependencyScorer(DependencyScorerI):
        :param graphs: A list of dependency graphs to train the scorer.
        """
+        from nltk.classify import NaiveBayesClassifier
        # Create training labeled training examples
        labeled_examples = []
        for graph in graphs:

--- a/nltk/parse/transitionparser.py
+++ b/nltk/parse/transitionparser.py
@@ -12,10 +12,14 @@ import pickle
 from os import remove
 from copy import deepcopy
 from operator import itemgetter
-from scipy import sparse
-from numpy import array
+try:
-from sklearn.datasets import load_svmlight_file
+    from numpy import array
-from sklearn import svm
+    from scipy import sparse
+    from sklearn.datasets import load_svmlight_file
+    from sklearn import svm
+except ImportError:
+    pass
 from nltk.parse import ParserI, DependencyGraph, DependencyEvaluator