fixing up wildcard imports

a39be2c0 · Steven Bird · 26082247 · a39be2c0 · a39be2c0 · a39be2c0
Commit a39be2c0 authored Nov 14, 2011 by Steven Bird
Showing with 47 additions and 54 deletions

nltk/chunk/__init__.py
+5 -13

nltk/chunk/named_entity.py
+36 -29

nltk/chunk/regexp.py
+1 -2

nltk/chunk/util.py
+2 -4

nltk/downloader.py
+2 -2

nltk/parse/earleychart.py
+0 -3

nltk/tag/hmm.py
+1 -1

No files found.
--- a/nltk/chunk/__init__.py
+++ b/nltk/chunk/__init__.py
@@ -152,23 +152,15 @@ zero-length assertions).
     pattern is valid.
 """

-from api import *
-from util import *
-from regexp import *
-
-__all__ = [
-    # ChunkParser interface
-    'ChunkParserI',
-
-    # Parsers
-    'RegexpChunkParser', 'RegexpParser',
-
-    'ne_chunk', 'batch_ne_chunk',
-    ]
+from api import ChunkParserI
+from util import (ChunkScore, accuracy, tagstr2tree, conllstr2tree,
+                  tree2conlltags, tree2conllstr, tree2conlltags)
+from regexp import RegexpChunkParser, RegexpParser

 # Standard treebank POS tagger
 _BINARY_NE_CHUNKER = 'chunkers/maxent_ne_chunker/english_ace_binary.pickle'
 _MULTICLASS_NE_CHUNKER = 'chunkers/maxent_ne_chunker/english_ace_multiclass.pickle'
+
 def ne_chunk(tagged_tokens, binary=False):
    """
    Use NLTK's currently recommended named entity chunker to

--- a/nltk/chunk/named_entity.py
+++ b/nltk/chunk/named_entity.py
@@ -11,28 +11,35 @@ Named entity chunker

 import os, re, pickle
 from xml.etree import ElementTree as ET
-from nltk.chunk.api import *
-from nltk.chunk.util import *
-import nltk

-# This really shouldn't be loaded at import time.  But it's used by a
-# static method.  Do a lazy loading?
-_short_en_wordlist = set(nltk.corpus.words.words('en-basic'))
+from nltk.tag import ClassifierBasedTagger, pos_tag
+from nltk.classify import MaxentClassifier
+from nltk.tree import Tree
+from nltk.tokenize import word_tokenize
+from nltk.data import find

+from nltk.chunk.api import ChunkParserI
+from nltk.chunk.util import ChunkScore

-class NEChunkParserTagger(nltk.tag.ClassifierBasedTagger):
+class NEChunkParserTagger(ClassifierBasedTagger):
    """
    The IOB tagger used by the chunk parser.
    """
    def __init__(self, train):
-        nltk.tag.ClassifierBasedTagger.__init__(
+        ClassifierBasedTagger.__init__(
            self, train=train,
            classifier_builder=self._classifier_builder)

    def _classifier_builder(self, train):
-        return nltk.MaxentClassifier.train(train, algorithm='megam',
+        return MaxentClassifier.train(train, algorithm='megam',
                                           gaussian_prior_sigma=1,
                                           trace=2)
+
+    def _english_wordlist(self):
+        if not self._en_wordlist:
+            from nltk.corpus import words
+            self._en_wordlist = set(words.words('en-basic'))
+        return self._en_wordlist
    
    def _feature_detector(self, tokens, index, history):
        word = tokens[index][0]
@@ -79,7 +86,7 @@ class NEChunkParserTagger(nltk.tag.ClassifierBasedTagger):
            'suffix3': word[-3:].lower(),
            'pos': pos,
            'word': word,
-            'en-wordlist': (word in _short_en_wordlist), # xx!
+            'en-wordlist': (word in self._english_wordlist()),
            'prevtag': prevtag,
            'prevpos': prevpos,
            'nextpos': nextpos,
@@ -117,19 +124,19 @@ class NEChunkParser(ChunkParserI):
        """
        Convert a list of tagged tokens to a chunk-parse tree.
        """
-        sent = nltk.Tree('S', [])
+        sent = Tree('S', [])
        
        for (tok,tag) in tagged_tokens:
            if tag == 'O':
                sent.append(tok)
            elif tag.startswith('B-'):
-                sent.append(nltk.Tree(tag[2:], [tok]))
+                sent.append(Tree(tag[2:], [tok]))
            elif tag.startswith('I-'):
                if (sent and isinstance(sent[-1], Tree) and
                    sent[-1].node == tag[2:]):
                    sent[-1].append(tok)
                else:
-                    sent.append(nltk.Tree(tag[2:], [tok]))
+                    sent.append(Tree(tag[2:], [tok]))
        return sent

    @staticmethod
@@ -139,7 +146,7 @@ class NEChunkParser(ChunkParserI):
        """
        toks = []
        for child in sent:
-            if isinstance(child, nltk.Tree):
+            if isinstance(child, Tree):
                if len(child) == 0:
                    print "Warning -- empty chunk in sentence"
                    continue
@@ -171,10 +178,10 @@ def simplify_pos(s):
 def postag_tree(tree):
    # Part-of-speech tagging.
    words = tree.leaves()
-    tag_iter = (pos for (word, pos) in nltk.pos_tag(words))
+    tag_iter = (pos for (word, pos) in pos_tag(words))
    newtree = Tree('S', [])
    for child in tree:
-        if isinstance(child, nltk.Tree):
+        if isinstance(child, Tree):
            newtree.append(Tree(child.node, []))
            for subchild in child:
                newtree[-1].append( (subchild, tag_iter.next()) )
@@ -227,27 +234,27 @@ def load_ace_file(textfile, fmt):
    # Binary distinction (NE or not NE)
    if fmt == 'binary':
        i = 0
-        toks = nltk.Tree('S', [])
+        toks = Tree('S', [])
        for (s,e,typ) in sorted(entities):
            if s < i: s = i # Overlapping!  Deal with this better?
            if e <= s: continue
-            toks.extend(nltk.word_tokenize(text[i:s]))
-            toks.append(nltk.Tree('NE', text[s:e].split()))
+            toks.extend(word_tokenize(text[i:s]))
+            toks.append(Tree('NE', text[s:e].split()))
            i = e
-        toks.extend(nltk.word_tokenize(text[i:]))
+        toks.extend(word_tokenize(text[i:]))
        yield toks

    # Multiclass distinction (NE type)
    elif fmt == 'multiclass':
        i = 0
-        toks = nltk.Tree('S', [])
+        toks = Tree('S', [])
        for (s,e,typ) in sorted(entities):
            if s < i: s = i # Overlapping!  Deal with this better?
            if e <= s: continue
-            toks.extend(nltk.word_tokenize(text[i:s]))
-            toks.append(nltk.Tree(typ, text[s:e].split()))
+            toks.extend(word_tokenize(text[i:s]))
+            toks.append(Tree(typ, text[s:e].split()))
            i = e
-        toks.extend(nltk.word_tokenize(text[i:]))
+        toks.extend(word_tokenize(text[i:]))
        yield toks

    else:
@@ -271,10 +278,10 @@ def cmp_chunks(correct, guessed):

 def build_model(fmt='binary'):
    print 'Loading training data...'
-    train_paths = [nltk.data.find('corpora/ace_data/ace.dev'),
-                   nltk.data.find('corpora/ace_data/ace.heldout'),
-                   nltk.data.find('corpora/ace_data/bbn.dev'),
-                   nltk.data.find('corpora/ace_data/muc.dev')]
+    train_paths = [find('corpora/ace_data/ace.dev'),
+                   find('corpora/ace_data/ace.heldout'),
+                   find('corpora/ace_data/bbn.dev'),
+                   find('corpora/ace_data/muc.dev')]
    train_trees = load_ace_data(train_paths, fmt)
    train_data = [postag_tree(t) for t in train_trees]
    print 'Training...'
@@ -282,7 +289,7 @@ def build_model(fmt='binary'):
    del train_data

    print 'Loading eval data...'
-    eval_paths = [nltk.data.find('corpora/ace_data/ace.eval')]
+    eval_paths = [find('corpora/ace_data/ace.eval')]
    eval_trees = load_ace_data(eval_paths, fmt)
    eval_data = [postag_tree(t) for t in eval_trees]
    

--- a/nltk/chunk/regexp.py
+++ b/nltk/chunk/regexp.py
@@ -11,8 +11,7 @@ import types

 from nltk.tree import Tree

-from nltk.chunk.api import *
-from nltk.chunk.util import *
+from nltk.chunk.api import ChunkParserI

 ##//////////////////////////////////////////////////////
 ##  ChunkString

--- a/nltk/chunk/util.py
+++ b/nltk/chunk/util.py
@@ -10,9 +10,7 @@ import re
 import string

 from nltk.tree import Tree
-import nltk.tag.util
-
-from api import *
+from nltk.tag.util import str2tuple

 ##//////////////////////////////////////////////////////
 ## EVALUATION
@@ -338,7 +336,7 @@ def tagstr2tree(s, chunk_node="NP", top_node="S", sep='/'):
            if sep is None:
                stack[-1].append(text)
            else:
-                stack[-1].append(nltk.tag.util.str2tuple(text, sep))
+                stack[-1].append(str2tuple(text, sep))

    if len(stack) != 1:
        raise ValueError('Expected ] at char %d' % len(s))

--- a/nltk/downloader.py
+++ b/nltk/downloader.py
@@ -168,8 +168,8 @@ except:
    
 try:
    TKINTER = True
-    from Tkinter import *
-    from tkMessageBox import *
+    from Tkinter import Tk, Frame, Label, Entry, Button, Canvas, Menu, IntVar
+    from tkMessageBox import showerror
    from nltk.draw.table import Table
    from nltk.draw import ShowText
 except:

--- a/nltk/parse/earleychart.py
+++ b/nltk/parse/earleychart.py
@@ -28,9 +28,6 @@ The main parser class is L{EarleyChartParser}, which is a top-down
 algorithm, originally formulated by Jay Earley (1970).
 """

-#from nltk.grammar import *
-
-#from nltk.parse.api import ParserI
 from nltk.parse.chart import (Chart, ChartParser, EdgeI, LeafEdge, LeafInitRule,
                              BottomUpPredictRule, BottomUpPredictCombineRule,
                              TopDownInitRule, SingleEdgeFundamentalRule,

--- a/nltk/tag/hmm.py
+++ b/nltk/tag/hmm.py
@@ -72,7 +72,7 @@ which includes extensive demonstration code.

 import re
 import types
-from numpy import *
+from numpy import zeros, ones, float32, float64, log2, hstack, array, argmax

 from nltk.probability import (FreqDist, ConditionalFreqDist,
                              ConditionalProbDist, DictionaryProbDist,