Merge branch 'parseri' into test

cdcf6701 · Steven Bird · 8dcae605 · 04f8a9b6 · cdcf6701 · cdcf6701
Commit cdcf6701 authored Feb 13, 2015 by Steven Bird
24 changed files
--- a/ChangeLog
+++ b/ChangeLog
+Version 3.0.2 2015-02-08
+* make pretty-printing method names consistent
+* improvements to Portuguese stemmer
+* transition-based dependency parsers
+* code clean ups, minor bug fixes
+Thanks to the following contributors to 3.0.2:
+Long Duong, Saimadhav Heblikar, Helder, Denis Krusko,
+Felipe Madrigal, Dmitrijs Milajevs, Nathan Schneider,
+0ssifrage, kiwipi.
+Version 3.0.1 2015-01-12
+* fix setup.py for new version of setuptools
 Version 3.0.0 2014-09-07
 * minor bugfixes
 * added phrase extraction code by Liling Tan and Fredrik Hedman

--- a/nltk/VERSION
+++ b/nltk/VERSION
-3.0.0
+3.0.2
--- a/nltk/__init__.py
+++ b/nltk/__init__.py
@@ -145,7 +145,7 @@ try:
 except ImportError:
    pass
 else:
-    from nltk import cluster; from .cluster import *
+    from nltk import cluster
 from nltk.downloader import download, download_shell
 try:

--- a/nltk/chunk/util.py
+++ b/nltk/chunk/util.py
@@ -548,7 +548,7 @@ def demo():
    s = "[ Pierre/NNP Vinken/NNP ] ,/, [ 61/CD years/NNS ] old/JJ ,/, will/MD join/VB [ the/DT board/NN ] ./."
    import nltk
    t = nltk.chunk.tagstr2tree(s, chunk_label='NP')
-    print(t.pprint())
+    t.pprint()
    print()
    s = """
@@ -582,7 +582,7 @@ better JJR I-ADJP
 """
    conll_tree = conllstr2tree(s, chunk_types=('NP', 'PP'))
-    print(conll_tree.pprint())
+    conll_tree.pprint()
    # Demonstrate CoNLL output
    print("CoNLL output:")

--- a/nltk/corpus/__init__.py
+++ b/nltk/corpus/__init__.py
@@ -147,6 +147,8 @@ movie_reviews = LazyCorpusLoader(
    encoding='ascii')
 names = LazyCorpusLoader(
    'names', WordListCorpusReader, r'(?!\.).*\.txt', encoding='ascii')
+nkjp = LazyCorpusLoader(
+    'nkjp', NKJPCorpusReader, r'', encoding='utf8')
 nps_chat = LazyCorpusLoader(
    'nps_chat', NPSChatCorpusReader, r'(?!README|\.).*\.xml', tagset='wsj')
 pl196x = LazyCorpusLoader(

--- a/nltk/corpus/reader/__init__.py
+++ b/nltk/corpus/reader/__init__.py
@@ -93,6 +93,7 @@ from nltk.corpus.reader.framenet import *
 from nltk.corpus.reader.udhr import *
 from nltk.corpus.reader.bnc import *
 from nltk.corpus.reader.sentiwordnet import *
+from nltk.corpus.reader.nkjp import *
 # Make sure that nltk.corpus.reader.bracket_parse gives the module, not
 # the function bracket_parse() defined in nltk.tree:
@@ -127,5 +128,6 @@ __all__ = [
    'CHILDESCorpusReader', 'AlignedCorpusReader',
    'TimitTaggedCorpusReader', 'LinThesaurusCorpusReader',
    'SemcorCorpusReader', 'FramenetCorpusReader', 'UdhrCorpusReader',
-    'BNCCorpusReader', 'SentiWordNetCorpusReader', 'SentiSynset'
+    'BNCCorpusReader', 'SentiWordNetCorpusReader', 'SentiSynset',
+    'NKJPCorpusReader'
 ]
--- a/nltk/corpus/reader/nkjp.py
+++ b/nltk/corpus/reader/nkjp.py
--- a/nltk/parse/__init__.py
+++ b/nltk/parse/__init__.py
@@ -76,3 +76,4 @@ from nltk.parse.nonprojectivedependencyparser import (NonprojectiveDependencyPar
                                                      ProbabilisticNonprojectiveParser)
 from nltk.parse.malt import MaltParser
 from nltk.parse.evaluate import DependencyEvaluator
+from nltk.parse.transitionparser import TransitionParser
--- a/nltk/parse/api.py
+++ b/nltk/parse/api.py
@@ -32,7 +32,7 @@ class ParserI(object):
        """
        raise NotImplementedError()
-    def parse(self, sent):
+    def parse(self, sent, *args, **kwargs):
        """
        :return: An iterator that generates parse trees for the sentence.
        When possible this list is sorted from most likely to least likely.
@@ -42,25 +42,25 @@ class ParserI(object):
        :rtype: iter(Tree)
        """
        if overridden(self.parse_sents):
-            return next(self.parse_sents([sent]))
+            return next(self.parse_sents([sent], *args, **kwargs))
        elif overridden(self.parse_one):
-            return (tree for tree in [self.parse_one(sent)] if tree is not None)
+            return (tree for tree in [self.parse_one(sent, *args, **kwargs)] if tree is not None)
        elif overridden(self.parse_all):
-            return iter(self.parse_all(sent))
+            return iter(self.parse_all(sent, *args, **kwargs))
        else:
            raise NotImplementedError()
-    def parse_sents(self, sents):
+    def parse_sents(self, sents, *args, **kwargs):
        """
        Apply ``self.parse()`` to each element of ``sents``.
        :rtype: iter(iter(Tree))
        """
-        return (self.parse(sent) for sent in sents)
+        return (self.parse(sent, *args, **kwargs) for sent in sents)
-    def parse_all(self, sent):
+    def parse_all(self, sent, *args, **kwargs):
        """:rtype: list(Tree)"""
-        return list(self.parse(sent))
+        return list(self.parse(sent, *args, **kwargs))
-    def parse_one(self, sent):
+    def parse_one(self, sent, *args, **kwargs):
        """:rtype: Tree or None"""
-        return next(self.parse(sent), None)
+        return next(self.parse(sent, *args, **kwargs), None)
--- a/nltk/parse/chart.py
+++ b/nltk/parse/chart.py
@@ -1346,9 +1346,9 @@ class ChartParser(ParserI):
        # Return the final chart.
        return chart
-    def parse_all(self, tokens, tree_class=Tree):
+    def parse(self, tokens, tree_class=Tree):
        chart = self.chart_parse(tokens)
-        return chart.parses(self._grammar.start(), tree_class=tree_class)
+        return iter(chart.parses(self._grammar.start(), tree_class=tree_class))
 class TopDownChartParser(ChartParser):
    """
@@ -1628,9 +1628,8 @@ def demo(choice=None,
        print()
        cp = ChartParser(grammar, strategies[strategy][1], trace=trace)
        t = time.time()
-        # parses = cp.parse_all(tokens)
+        parses = cp.parse_all(tokens)
        chart = cp.chart_parse(tokens)
-        parses = list(chart.parses(grammar.start()))
        times[strategies[strategy][0]] = time.time()-t
        print("Nr edges in chart:", len(chart.edges()))
        if numparses:

--- a/nltk/parse/dependencygraph.py
+++ b/nltk/parse/dependencygraph.py
@@ -102,7 +102,7 @@ class DependencyGraph(object):
        self.nodes[head_address]['deps'].setdefault(relation,[])
        self.nodes[head_address]['deps'][relation].append(mod_address)
        #self.nodes[head_address]['deps'].append(mod_address)
    def connect_graph(self):
        """
@@ -113,7 +113,7 @@ class DependencyGraph(object):
            for node2 in self.nodes.values():
                if node1['address'] != node2['address'] and node2['rel'] != 'TOP':
                    relation = node2['rel']
-                    node1['deps'].setdefault(relation,[]) 
+                    node1['deps'].setdefault(relation, [])
                    node1['deps'][relation].append(node2['address'])
                    #node1['deps'].append(node2['address'])
@@ -214,17 +214,21 @@ class DependencyGraph(object):
        lines = (l.rstrip() for l in input_)
        lines = (l for l in lines if l)
+        cell_number = None
        for index, line in enumerate(lines, start=1):
            cells = line.split(cell_separator)
-            nrCells = len(cells)
+            if cell_number is None:
+                cell_number = len(cells)
+            else:
+                assert cell_number == len(cells)
            if cell_extractor is None:
                try:
-                    cell_extractor = extractors[nrCells]
+                    cell_extractor = extractors[cell_number]
                except KeyError:
                    raise ValueError(
                        'Number of tab-delimited fields ({0}) not supported by '
-                        'CoNLL(10) or Malt-Tab(4) format'.format(nrCells)
+                        'CoNLL(10) or Malt-Tab(4) format'.format(cell_number)
                    )
            word, lemma, ctag, tag, feats, head, rel = cell_extractor(cells)
@@ -246,6 +250,9 @@ class DependencyGraph(object):
                }
            )
+            # Make sure that he fake root node has labeled dependencies.
+            if (cell_number == 3) and (head == 0):
+                rel = 'ROOT'
            self.nodes[head]['deps'][rel].append(index)
        if not self.nodes[0]['deps']['ROOT']:
@@ -271,7 +278,7 @@ class DependencyGraph(object):
        """
        node = self.get_by_address(i)
        word = node['word']
-        deps = list(chain.from_iterable(node['deps'].values()))
+        deps = sorted(chain.from_iterable(node['deps'].values()))
        if deps:
            return Tree(word, [self._tree(dep) for dep in deps])
@@ -286,7 +293,7 @@ class DependencyGraph(object):
        node = self.root
        word = node['word']
-        deps = chain.from_iterable(node['deps'].values())
+        deps = sorted(chain.from_iterable(node['deps'].values()))
        return Tree(word, [self._tree(dep) for dep in deps])
    def triples(self, node=None):
@@ -299,7 +306,7 @@ class DependencyGraph(object):
            node = self.root
        head = (node['word'], node['ctag'])
-        for i in node['deps']:
+        for i in sorted(chain.from_iterable(node['deps'].values())):
            dep = self.get_by_address(i)
            yield (head, dep['rel'], (dep['word'], dep['ctag']))
            for triple in self.triples(node=dep):
@@ -458,7 +465,7 @@ Nov.    NNP     9       VMOD
 .       .       9       VMOD
 """)
    tree = dg.tree()
-    print(tree.pprint())
+    tree.pprint()
    if nx:
        # currently doesn't work
        import networkx as NX
@@ -483,7 +490,7 @@ def conll_demo():
    """
    dg = DependencyGraph(conll_data1)
    tree = dg.tree()
-    print(tree.pprint())
+    tree.pprint()
    print(dg)
    print(dg.to_conll(4))
@@ -494,7 +501,8 @@ def conll_file_demo():
              for entry in conll_data2.split('\n\n') if entry]
    for graph in graphs:
        tree = graph.tree()
-        print('\n' + tree.pprint())
+        print('\n')
+        tree.pprint()
 def cycle_finding_demo():

--- a/nltk/parse/malt.py
+++ b/nltk/parse/malt.py
@@ -87,56 +87,19 @@ class MaltParser(ParserI):
            url='http://www.maltparser.org/',
            verbose=verbose)
-    def parse_all(self, sentence, verbose=False):
-        """
-        Use MaltParser to parse a sentence. Takes a sentence as a list of
-        words; it will be automatically tagged with this MaltParser instance's
-        tagger.
-        :param sentence: Input sentence to parse
-        :type sentence: list(str)
-        :return: list(DependencyGraph)
-        """
-        return self.parse_sents([sentence], verbose)
    def parse_sents(self, sentences, verbose=False):
        """
-        Use MaltParser to parse multiple sentence. Takes multiple sentences as a
+        Use MaltParser to parse multiple sentences. Takes multiple sentences as a
        list where each sentence is a list of words.
        Each sentence will be automatically tagged with this MaltParser instance's
        tagger.
        :param sentences: Input sentences to parse
        :type sentence: list(list(str))
-        :return: list(DependencyGraph)
+        :return: iter(DependencyGraph)
        """
        tagged_sentences = [self.tagger.tag(sentence) for sentence in sentences]
-        return self.tagged_parse_sents(tagged_sentences, verbose)
+        return iter(self.tagged_parse_sents(tagged_sentences, verbose))
-    def parse(self, sentence, verbose=False):
-        """
-        Use MaltParser to parse a sentence. Takes a sentence as a list of words.
-        The sentence will be automatically tagged with this MaltParser instance's
-        tagger.
-        :param sentence: Input sentence to parse
-        :type sentence: list(str)
-        :return: ``DependencyGraph`` the dependency graph representation of the sentence
-        """
-        return self.parse_sents([sentence], verbose)[0]
-    def raw_parse(self, sentence, verbose=False):
-        """
-        Use MaltParser to parse a sentence. Takes a sentence as a string;
-        before parsing, it will be automatically tokenized and tagged with this
-        MaltParser instance's tagger.
-        :param sentence: Input sentence to parse
-        :type sentence: str
-        :return: list(DependencyGraph)
-        """
-        words = word_tokenize(sentence)
-        return self.parse(words, verbose)
    def tagged_parse(self, sentence, verbose=False):
        """
@@ -158,7 +121,7 @@ class MaltParser(ParserI):
        :param sentences: Input sentences to parse
        :type sentence: list(list(tuple(str, str)))
-        :return: list(``DependencyGraph``) the dependency graph representation
+        :return: iter(iter(``DependencyGraph``)) the dependency graph representation
                 of each sentence
        """
@@ -193,7 +156,7 @@ class MaltParser(ParserI):
                raise Exception("MaltParser parsing (%s) failed with exit "
                                "code %d" % (' '.join(cmd), ret))
-            return DependencyGraph.load(output_file.name)
+            return iter(DependencyGraph.load(output_file.name))
        finally:
            input_file.close()
            os.remove(input_file.name)
@@ -276,8 +239,8 @@ def demo():
    maltParser = MaltParser()
    maltParser.train([dg1,dg2], verbose=verbose)
-    print(maltParser.raw_parse('John sees Mary', verbose=verbose).tree().pprint())
+    maltParser.parse_one(['John','sees','Mary'], verbose=verbose).tree().pprint()
-    print(maltParser.raw_parse('a man runs', verbose=verbose).tree().pprint())
+    maltParser.parse_one(['a','man','runs'], verbose=verbose).tree().pprint()
 if __name__ == '__main__':
    demo()
--- a/nltk/parse/nonprojectivedependencyparser.py
+++ b/nltk/parse/nonprojectivedependencyparser.py
@@ -462,8 +462,8 @@ class ProbabilisticNonprojectiveParser(object):
                }
            )
        #print (g_graph.nodes)
        # Fully connect non-root nodes in g_graph
        g_graph.connect_graph()
        original_graph = DependencyGraph()
@@ -567,8 +567,10 @@ class ProbabilisticNonprojectiveParser(object):
        logger.debug('Betas: %s', betas)
        for node in original_graph.nodes.values():
-            # deps must be a dictionary 
+            # TODO: It's dangerous to assume that deps it a dictionary
-            #node['deps'] = []
+            # because it's a default dictionary. Ideally, here we should not
+            # be concerned how dependencies are stored inside of a dependency
+            # graph.
            node['deps'] = {}
        for i in range(1, len(tokens) + 1):
            original_graph.add_arc(betas[i][0], betas[i][1])
@@ -701,22 +703,32 @@ class NonprojectiveDependencyParser(object):
        # Filter parses
        # ensure 1 root, every thing has 1 head
        for analysis in analyses:
-            root_count = 0
+            if analysis.count(-1) > 1:
-            root = []
+                # there are several root elements!
-            for i, cell in enumerate(analysis):
+                continue
-                if cell == -1:
-                    root_count += 1
+            graph = DependencyGraph()
-                    root = i
+            graph.root = graph.nodes[analysis.index(-1) + 1]
-            if root_count == 1:
-                graph = DependencyGraph()
+            for address, (token, head_index) in enumerate(zip(tokens, analysis), start=1):
-                graph.nodes[0]['deps'] = root + 1
+                head_address = head_index + 1
-                for i in range(len(tokens)):
-                    node = {'word': tokens[i], 'address': i+1}
+                node = graph.nodes[address]
-                    node['deps'] = [j+1 for j in range(len(tokens)) if analysis[j] == i]
+                node.update(
-                    graph.nodes[i + 1] = node
+                    {
-#               cycle = graph.contains_cycle()
+                        'word': token,
-#               if not cycle:
+                        'address': address,
-                yield graph
+                    }
+                )
+                if head_address == 0:
+                    rel = 'ROOT'
+                else:
+                    rel = ''
+                graph.nodes[head_index + 1]['deps'][rel].append(address)
+            # TODO: check for cycles
+            yield graph
 #################################################################

--- a/nltk/parse/stanford.py
+++ b/nltk/parse/stanford.py
@@ -109,18 +109,6 @@ class StanfordParser(ParserI):
                cur_lines.append(line)
        return res
-    def parse_all(self, sentence, verbose=False):
-        """
-        Use StanfordParser to parse a sentence. Takes a sentence as a list of
-        words; it will be automatically tagged with this StanfordParser instance's
-        tagger.
-        :param sentence: Input sentence to parse
-        :type sentence: list(str)
-        :rtype: Tree
-        """
-        return self.parse_sents([sentence], verbose)
    def parse_sents(self, sentences, verbose=False):
        """
        Use StanfordParser to parse multiple sentences. Takes multiple sentences as a

--- a/nltk/parse/transitionparser.py
+++ b/nltk/parse/transitionparser.py
@@ -8,16 +8,16 @@
 import tempfile
 import pickle
-import os
-import copy
+from os import remove
-import operator
+from copy import deepcopy
-from nltk.parse.api import ParserI
+from operator import itemgetter
-import scipy.sparse as sparse
+from scipy import sparse
-import numpy as np
+from numpy import array
 from sklearn.datasets import load_svmlight_file
 from sklearn import svm
-from nltk.parse import DependencyGraph
-from evaluate import DependencyEvaluator
+from nltk.parse import ParserI, DependencyGraph, DependencyEvaluator
 class Configuration(object):
@@ -522,7 +522,7 @@ class TransitionParser(ParserI):
            # Save the model to file name (as pickle)
            pickle.dump(model, open(modelfile, 'wb'))
        finally:
-            os.remove(input_file.name)
+            remove(input_file.name)
    def parse(self, depgraphs, modelFile):
        """
@@ -549,9 +549,9 @@ class TransitionParser(ParserI):
                        col.append(self._dictionary[feature])
                        row.append(0)
                        data.append(1.0)
-                np_col = np.array(sorted(col))  # NB : index must be sorted
+                np_col = array(sorted(col))  # NB : index must be sorted
-                np_row = np.array(row)
+                np_row = array(row)
-                np_data = np.array(data)
+                np_data = array(data)
                x_test = sparse.csr_matrix((np_data, (np_row, np_col)), shape=(1, len(self._dictionary)))
@@ -570,7 +570,7 @@ class TransitionParser(ParserI):
                #           votes[j] +=1
                #        k +=1
                # Sort votes according to the values
-                #sorted_votes = sorted(votes.items(), key=operator.itemgetter(1), reverse=True)
+                #sorted_votes = sorted(votes.items(), key=itemgetter(1), reverse=True)
                # We will use predict_proba instead of decision_function
                prob_dict = {}
@@ -579,7 +579,7 @@ class TransitionParser(ParserI):
                    prob_dict[i] = pred_prob[i]
                sorted_Prob = sorted(
                    prob_dict.items(),
-                    key=operator.itemgetter(1),
+                    key=itemgetter(1),
                    reverse=True)
                # Note that SHIFT is always a valid operation
@@ -609,7 +609,7 @@ class TransitionParser(ParserI):
            # Finish with operations build the dependency graph from Conf.arcs
-            new_depgraph = copy.deepcopy(depgraph)
+            new_depgraph = deepcopy(depgraph)
            for key in new_depgraph.nodes:
                node = new_depgraph.nodes[key]
                node['rel'] = ''
@@ -727,7 +727,7 @@ def demo():
     Number of training examples : 1
     Number of valid (projective) examples : 1
    ...
-    >>> os.remove(input_file.name)
+    >>> remove(input_file.name)
    B. Check the ARC-EAGER training
@@ -743,7 +743,7 @@ def demo():
     Number of valid (projective) examples : 1
    ...
-    >>> os.remove(input_file.name)
+    >>> remove(input_file.name)
    ###################### Check The Parsing Function ########################

--- a/nltk/stem/snowball.py
+++ b/nltk/stem/snowball.py
--- a/nltk/test/dependency.doctest
+++ b/nltk/test/dependency.doctest
@@ -35,30 +35,33 @@ CoNLL Data
    ... .       .       9       VMOD
    ... """
    >>> dg = DependencyGraph(treebank_data)
-    >>> print(dg.tree().pprint())
+    >>> dg.tree().pprint()
    (will
      (Vinken Pierre , (old (years 61)) ,)
      (join (board the) (as (director a nonexecutive)) (Nov. 29) .))
-    >>> print(list(dg.triples()))
+    >>> for head, rel, dep in dg.triples():
-    [((u'will', u'MD'), u'SUB', (u'Vinken', u'NNP')),
+    ...     print(
-     ((u'Vinken', u'NNP'), u'NMOD', (u'Pierre', u'NNP')),
+    ...         '({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})'
-     ((u'Vinken', u'NNP'), u'P', (u',', u',')),
+    ...         .format(h=head, r=rel, d=dep)
-     ((u'Vinken', u'NNP'), u'NMOD', (u'old', u'JJ')),
+    ...     )
-     ((u'old', u'JJ'), u'AMOD', (u'years', u'NNS')),
+    (will, MD), SUB, (Vinken, NNP)
-     ((u'years', u'NNS'), u'NMOD', (u'61', u'CD')),
+    (Vinken, NNP), NMOD, (Pierre, NNP)
-     ((u'Vinken', u'NNP'), u'P', (u',', u',')),
+    (Vinken, NNP), P, (,, ,)
-     ((u'will', u'MD'), u'VC', (u'join', u'VB')),
+    (Vinken, NNP), NMOD, (old, JJ)
-     ((u'join', u'VB'), u'OBJ', (u'board', u'NN')),
+    (old, JJ), AMOD, (years, NNS)
-     ((u'board', u'NN'), u'NMOD', (u'the', u'DT')),
+    (years, NNS), NMOD, (61, CD)
-     ((u'join', u'VB'), u'VMOD', (u'as', u'IN')),
+    (Vinken, NNP), P, (,, ,)
-     ((u'as', u'IN'), u'PMOD', (u'director', u'NN')),
+    (will, MD), VC, (join, VB)
-     ((u'director', u'NN'), u'NMOD', (u'a', u'DT')),
+    (join, VB), OBJ, (board, NN)
-     ((u'director', u'NN'), u'NMOD', (u'nonexecutive', u'JJ')),
+    (board, NN), NMOD, (the, DT)
-     ((u'join', u'VB'), u'VMOD', (u'Nov.', u'NNP')),
+    (join, VB), VMOD, (as, IN)
-     ((u'Nov.', u'NNP'), u'NMOD', (u'29', u'CD')),
+    (as, IN), PMOD, (director, NN)
-     ((u'join', u'VB'), u'VMOD', (u'.', u'.'))]
+    (director, NN), NMOD, (a, DT)
+    (director, NN), NMOD, (nonexecutive, JJ)
+    (join, VB), VMOD, (Nov., NNP)
+    (Nov., NNP), NMOD, (29, CD)
+    (join, VB), VMOD, (., .)
 Using the dependency-parsed version of the Penn Treebank corpus sample.
@@ -159,21 +162,22 @@ Non-Projective Dependency Parsing
      'dog' -> 'his'
    >>> dp = NonprojectiveDependencyParser(grammar)
-    >>> for g in dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf']):
+    >>> g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])
-    ...     print(g)  # doctest: +NORMALIZE_WHITESPACE
-    {0: {'address': 0,
+    >>> print(g.root['word'])
-         'ctag': 'TOP',
+    taught
-         'deps': 3,
-         'feats': None,
+    >>> for _, node in sorted(g.nodes.items()):
-         'lemma': None,
+    ...     if node['word'] is not None:
-         'rel': 'TOP',
+    ...         print('{address} {word}: {d}'.format(d=node['deps'][''], **node))
-         'tag': 'TOP',
+    1 the: []
-         'word': None},
+    2 man: [1]
-     1: {'address': 1, 'deps': [], 'word': 'the'},
+    3 taught: [2, 7]
-     2: {'address': 2, 'deps': [1], 'word': 'man'},
+    4 his: []
-     3: {'address': 3, 'deps': [2, 7], 'word': 'taught'},
+    5 dog: [4]
-     4: {'address': 4, 'deps': [], 'word': 'his'},
+    6 to: []
-     5: {'address': 5, 'deps': [4], 'word': 'dog'},
+    7 play: [5, 6, 8]
-     6: {'address': 6, 'deps': [], 'word': 'to'},
+    8 golf: []
-     7: {'address': 7, 'deps': [5, 6, 8], 'word': 'play'},
-     8: {'address': 8, 'deps': [], 'word': 'golf'}}
+    >>> print(g.tree())
+    (taught (man the) (play (dog his) to golf))
--- a/nltk/test/gluesemantics.doctest
+++ b/nltk/test/gluesemantics.doctest
@@ -370,7 +370,7 @@ Dependency Graph to LFG f-structure
       subj g:[pred 'John']]
    >>> fstruct.to_depgraph().tree().pprint()
-    '(sees (dog a) John)'
+    (sees (dog a) John)
 ---------------------------------
 LFG f-structure to Glue

--- a/nltk/test/tree.doctest
+++ b/nltk/test/tree.doctest
@@ -41,7 +41,7 @@ tree object to one of several standard tree encodings:
 There is also a fancy ASCII art representation:
-    >>> tree.pprint()
+    >>> tree.pretty_print()
                  s               
          ________|_____           
         |              vp        
@@ -52,7 +52,7 @@ There is also a fancy ASCII art representation:
     |       |    |     |       |  
    the     dog chased the     cat
-    >>> tree.pprint(unicodelines=True, nodedist=4)
+    >>> tree.pretty_print(unicodelines=True, nodedist=4)
                           s                        
            ┌──────────────┴────────┐                   
            │                       vp              

--- a/nltk/tree.py
+++ b/nltk/tree.py
@@ -685,7 +685,7 @@ class Tree(list):
        from nltk.draw.tree import draw_trees
        draw_trees(self)
-    def pprint(self, sentence=None, highlight=(), **viz_args):
+    def pretty_print(self, sentence=None, highlight=(), **viz_args):
        """
        Pretty-print this tree as ASCII or Unicode art.
        For explanation of the arguments, see the documentation for
@@ -734,6 +734,17 @@ class Tree(list):
    def __str__(self):
        return self.pformat()
+    def pprint(self, **args):
+        """
+        Print a string representation of this Tree to 'stream'
+        """
+        if "stream" in args:
+            stream = args["stream"]
+        else:
+            stream = None
+        print(self.pformat(**args), file=stream)
    def pformat(self, margin=70, indent=0, nodesep='', parens='()', quotes=False):
        """
        :return: A pretty-printed string representation of this tree.
@@ -751,7 +762,7 @@ class Tree(list):
        # Try writing it on one line.
        s = self._pformat_flat(nodesep, parens, quotes)
-        if len(s)+indent < margin:
+        if len(s) + indent < margin:
            return s
        # If it doesn't fit on one line, then write it on multi-lines.

--- a/tox.ini
+++ b/tox.ini
 [tox]
-envlist = py26,py27,py32,py33,pypy,py26-nodeps,py27-nodeps,py32-nodeps,py33-nodeps,py26-jenkins,py32-jenkins
+envlist = py26,py27,py32,py33,py34,pypy,py26-nodeps,py27-nodeps,py32-nodeps,py33-nodeps,py34-nodeps,py26-jenkins,py32-jenkins,py34-jenkins
 [testenv]
@@ -63,6 +63,20 @@ commands =
    ; python runtests.py --with-coverage --cover-inclusive --cover-package=nltk --cover-html --cover-html-dir={envdir}/docs []
    python runtests.py []
+[testenv:py34]
+deps =
+    numpy
+    nose >= 1.2.1
+    coverage
+    text-unidecode
+commands =
+    ; scipy and scikit-learn requires numpy even to run setup.py so
+    ; they can't be installed in one command
+    pip install --download-cache={toxworkdir}/_download scipy scikit-learn
+    ; python runtests.py --with-coverage --cover-inclusive --cover-package=nltk --cover-html --cover-html-dir={envdir}/docs []
+    python runtests.py []
 [testenv:py26-nodeps]
 basepython = python2.6
@@ -84,6 +98,11 @@ basepython = python3.3
 deps = nose >= 1.2.1
 commands = python runtests.py []
+[testenv:py34-nodeps]
+basepython = python3.4
+deps = nose >= 1.2.1
+commands = python runtests.py []
 [testenv:py26-jenkins]
 basepython = python2.6
 commands = {toxinidir}/jenkins.sh
@@ -99,3 +118,11 @@ setenv =
 	STANFORD_MODELS = {homedir}/third/stanford-parser/
 	STANFORD_PARSER = {homedir}/third/stanford-parser/
 	STANFORD_POSTAGGER = {homedir}/third/stanford-postagger/
+[testenv:py34-jenkins]
+basepython = python3.4
+commands = {toxinidir}/jenkins.sh
+setenv =
+	STANFORD_MODELS = {homedir}/third/stanford-parser/
+	STANFORD_PARSER = {homedir}/third/stanford-parser/
+	STANFORD_POSTAGGER = {homedir}/third/stanford-postagger/
--- a/web/conf.py
+++ b/web/conf.py
@@ -44,7 +44,7 @@ master_doc = 'index'
 # General information about the project.
 project = 'NLTK'
-copyright = '2013, NLTK Project'
+copyright = '2015, NLTK Project'
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the

--- a/web/contribute.rst
+++ b/web/contribute.rst
@@ -5,14 +5,14 @@ The Natural Language Toolkit exists thanks to the efforts of dozens
 of voluntary developers who have contributed functionality and
 bugfixes since the project began in 2000 (`contributors <https://github.com/nltk/nltk#contributing>`_).
-In 2014 we are especially keen to improve NLTK coverage for:
+In 2015 we are especially keen to improve NLTK coverage for:
 `dependency parsing <https://github.com/nltk/nltk/wiki/Dependency-Parsing>`_,
 `machine translation <https://github.com/nltk/nltk/wiki/Machine-Translation>`_,
 `sentiment analysis <https://github.com/nltk/nltk/wiki/Sentiment-Analysis>`_,
 `twitter processing <https://github.com/nltk/nltk/wiki/Twitter-Processing>`_.
 New material in these areas will be covered in the second edition of
-the NLTK book, anticipated in 2015.
+the NLTK book, anticipated in early 2016.
 * `desired enhancements <https://github.com/nltk/nltk/issues?labels=enhancement&page=1&state=open>`_
 * `contribute a corpus <https://github.com/nltk/nltk/wiki/Adding-a-Corpus>`_
@@ -29,7 +29,6 @@ Individual packages are maintained by the following people:
 :Parsing: `Peter Ljunglöf <http://www.cse.chalmers.se/~peb/>`_, Gothenburg, Sweden (``nltk.parse, nltk.featstruct``)
 :Metrics: `Joel Nothman <http://joelnothman.com/>`_, Sydney, Australia (``nltk.metrics, nltk.tokenize.punkt``)
 :Python 3: `Mikhail Korobov <http://kmike.ru/>`_, Ekaterinburg, Russia
-:Integration: `Morten Minde Neergaard <http://8d.no/>`_, Oslo, Norway
 :Releases: `Steven Bird <http://estive.net>`_, Melbourne, Australia

--- a/web/news.rst
+++ b/web/news.rst
 NLTK News
 =========
+2015
+----
+NLTK 3.0.1 released : January 2015
+   Minor packaging update.
+2014
+----
 NLTK 3.0.0 released : September 2014
   Minor bugfixes. For full details see:
   https://github.com/nltk/nltk/blob/develop/ChangeLog
@@ -26,6 +35,9 @@ NLTK 3.0a4 released : June 2014
   https://github.com/nltk/nltk/blob/develop/ChangeLog
   http://nltk.org/nltk3-alpha/
+2013
+----
 NLTK Book Updates : October 2013
   We are updating the NLTK book for Python 3 and NLTK 3; please see
   http://nltk.org/book3/