Merge branch 'nltk-3.0' into develop

58563e6d · Steven Bird · b7ac4739 · 99b9ca6e · 58563e6d · 58563e6d
Commit 58563e6d authored Sep 07, 2014 by Steven Bird
Showing with 36 additions and 20 deletions

ChangeLog
+10 -1

nltk/VERSION
+1 -1

nltk/grammar.py
+1 -1

nltk/probability.py
+5 -9

nltk/test/dependency.doctest
+8 -1

nltk/test/parse.doctest
+4 -4

nltk/text.py
+2 -2

web/news.rst
+5 -1

No files found.
--- a/ChangeLog
+++ b/ChangeLog
-Version 3.0b1 2014-07-11
+Version 3.0b2 2014-08-21
+* minor bugfixes and clean-ups
+* renamed remaining parse_ methods to read_ or load_, cf issue #656
+* added Paice's method of evaluating stemming algorithms
+
+Thanks to the following contributors to 3.0.0b2: Lars Buitinck,
+Cristian Capdevila, Lauri Hallila, Ofer Helman, Dmitrijs Milajevs,
+lade, Liling Tan, Steven Xu
+
+Version 3.0.0b1 2014-07-11
 * Added SentiWordNet corpus and corpus reader
 * Fixed support for 10-column dependency file format
 * Changed Tree initialization to use fromstring

--- a/nltk/VERSION
+++ b/nltk/VERSION
-3.0.0b1
+3.0.0b2
--- a/nltk/grammar.py
+++ b/nltk/grammar.py
@@ -399,7 +399,7 @@ class ProbabilisticProduction(Production, ImmutableProbabilisticMixIn):

    def __str__(self):
        return Production.__unicode__(self) + \
-            (' [1.0]' if (self.prob() == 1.0) else ' [%.g]' % self.prob())
+            (' [1.0]' if (self.prob() == 1.0) else ' [%g]' % self.prob())

    def __eq__(self, other):
        return (type(self) == type(other) and

--- a/nltk/probability.py
+++ b/nltk/probability.py
@@ -44,7 +44,6 @@ import random
 import warnings
 import array
 from operator import itemgetter
-from itertools import islice
 from collections import defaultdict
 from functools import reduce
 from nltk import compat
@@ -218,9 +217,7 @@ class FreqDist(Counter):
        Plot samples from the frequency distribution
        displaying the most frequent sample first.  If an integer
        parameter is supplied, stop after this many samples have been
-        plotted.  If two integer parameters m, n are supplied, plot a
-        subset of the samples, beginning with m and stopping at n-1.
-        For a cumulative plot, specify cumulative=True.
+        plotted.  For a cumulative plot, specify cumulative=True.
        (Requires Matplotlib to be installed.)

        :param title: The title for the graph
@@ -236,7 +233,7 @@ class FreqDist(Counter):

        if len(args) == 0:
            args = [len(self)]
-        samples = list(islice(self, *args))
+        samples = [item for item, _ in self.most_common(*args)]

        cumulative = _get_kwarg(kwargs, 'cumulative', False)
        if cumulative:
@@ -264,16 +261,14 @@ class FreqDist(Counter):
        Tabulate the given samples from the frequency distribution (cumulative),
        displaying the most frequent sample first.  If an integer
        parameter is supplied, stop after this many samples have been
-        plotted.  If two integer parameters m, n are supplied, plot a
-        subset of the samples, beginning with m and stopping at n-1.
-        (Requires Matplotlib to be installed.)
+        plotted.

        :param samples: The samples to plot (default is all samples)
        :type samples: list
        """
        if len(args) == 0:
            args = [len(self)]
-        samples = list(islice(self, *args))
+        samples = [item for item, _ in self.most_common(*args)]

        cumulative = _get_kwarg(kwargs, 'cumulative', False)
        if cumulative:
@@ -1668,6 +1663,7 @@ class ConditionalFreqDist(defaultdict):
    the indexing operator:

        >>> cfdist[3]
+        FreqDist({'the': 3, 'dog': 2, 'not': 1})
        <FreqDist with 3 samples and 6 outcomes>
        >>> cfdist[3].freq('the')
        0.5

--- a/nltk/test/dependency.doctest
+++ b/nltk/test/dependency.doctest
@@ -108,7 +108,14 @@ Non-Projective Dependency Parsing
    >>> dp = NonprojectiveDependencyParser(grammar)
    >>> for g in dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf']):
    ...     print(g)  # doctest: +NORMALIZE_WHITESPACE
-    [{'address': 0, 'deps': 3, 'rel': 'TOP', 'tag': 'TOP', 'word': None},
+    [{'address': 0,
+      'ctag': 'TOP',
+      'deps': 3,
+      'feats': None,
+      'lemma': None,
+      'rel': 'TOP',
+      'tag': 'TOP',
+      'word': None},
     {'address': 1, 'deps': [], 'word': 'the'},
     {'address': 2, 'deps': [1], 'word': 'man'},
     {'address': 3, 'deps': [2, 7], 'word': 'taught'},

--- a/nltk/test/parse.doctest
+++ b/nltk/test/parse.doctest
@@ -556,7 +556,7 @@ Create a set of PCFG productions.
    A

    >>> grammar.productions()
-    [A -> B B [0.3], A -> C B C [0.7], B -> B D [0.5], B -> C [0.5], C -> 'a' [0.1], C -> 'b' [0.9], D -> 'b' [1]]
+    [A -> B B [0.3], A -> C B C [0.7], B -> B D [0.5], B -> C [0.5], C -> 'a' [0.1], C -> 'b' [0.9], D -> 'b' [1.0]]

 Induce some productions using parsed Treebank data.

@@ -570,7 +570,7 @@ Induce some productions using parsed Treebank data.
    <Grammar with 71 productions>

    >>> sorted(grammar.productions(lhs=Nonterminal('PP')))[:2]
-    [PP -> IN NP [1]]
+    [PP -> IN NP [1.0]]
    >>> sorted(grammar.productions(lhs=Nonterminal('NNP')))[:2]
    [NNP -> 'Agnew' [0.0714286], NNP -> 'Consolidated' [0.0714286]]
    >>> sorted(grammar.productions(lhs=Nonterminal('JJ')))[:2]
@@ -585,14 +585,14 @@ Unit tests for the Probabilistic Chart Parse classes
    >>> grammar = toy_pcfg2
    >>> print(grammar)
    Grammar with 23 productions (start state = S)
-        S -> NP VP [1]
+        S -> NP VP [1.0]
        VP -> V NP [0.59]
        VP -> V [0.4]
        VP -> VP PP [0.01]
        NP -> Det N [0.41]
        NP -> Name [0.28]
        NP -> NP PP [0.31]
-        PP -> P NP [1]
+        PP -> P NP [1.0]
        V -> 'saw' [0.21]
        V -> 'ate' [0.51]
        V -> 'ran' [0.28]

--- a/nltk/text.py
+++ b/nltk/text.py
@@ -16,7 +16,7 @@ distributional similarity.
 from __future__ import print_function, division, unicode_literals

 from math import log
-from collections import defaultdict, Counter
+from collections import defaultdict
 from functools import reduce
 from itertools import islice
 import re
@@ -26,7 +26,7 @@ from nltk.probability import ConditionalFreqDist as CFD
 from nltk.util import tokenwrap, LazyConcatenation
 from nltk.metrics import f_measure, BigramAssocMeasures
 from nltk.collocations import BigramCollocationFinder
-from nltk.compat import python_2_unicode_compatible, text_type
+from nltk.compat import python_2_unicode_compatible, text_type, Counter


 class ContextIndex(object):

--- a/web/news.rst
+++ b/web/news.rst
 NLTK News
 =========

+NLTK 3.0.0b2 released : August 2014
+   Minor bugfixes and clean-ups. For full details see:
+   https://github.com/nltk/nltk/blob/develop/ChangeLog
+
 NLTK Book Updates : July 2014
   The NLTK book is being updated for Python 3 and NLTK 3 `here <http://nltk.org/book>`_.
   The original Python 2 edition is still available `here <http://nltk.org/book_1ed>`_.

-NLTK 3.0b1 released : July 2014
+NLTK 3.0.0b1 released : July 2014
   FrameNet, SentiWordNet, universal tagset, misc efficiency improvements and bugfixes
   Several API changes, see https://github.com/nltk/nltk/wiki/Porting-your-code-to-NLTK-3.0
   For full details see: