Commit 58563e6d by Steven Bird

Merge branch 'nltk-3.0' into develop

parents b7ac4739 99b9ca6e
Version 3.0b1 2014-07-11
Version 3.0b2 2014-08-21
* minor bugfixes and clean-ups
* renamed remaining parse_ methods to read_ or load_, cf issue #656
* added Paice's method of evaluating stemming algorithms
Thanks to the following contributors to 3.0.0b2: Lars Buitinck,
Cristian Capdevila, Lauri Hallila, Ofer Helman, Dmitrijs Milajevs,
lade, Liling Tan, Steven Xu
Version 3.0.0b1 2014-07-11
* Added SentiWordNet corpus and corpus reader
* Fixed support for 10-column dependency file format
* Changed Tree initialization to use fromstring
......
......@@ -399,7 +399,7 @@ class ProbabilisticProduction(Production, ImmutableProbabilisticMixIn):
def __str__(self):
return Production.__unicode__(self) + \
(' [1.0]' if (self.prob() == 1.0) else ' [%.g]' % self.prob())
(' [1.0]' if (self.prob() == 1.0) else ' [%g]' % self.prob())
def __eq__(self, other):
return (type(self) == type(other) and
......
......@@ -44,7 +44,6 @@ import random
import warnings
import array
from operator import itemgetter
from itertools import islice
from collections import defaultdict
from functools import reduce
from nltk import compat
......@@ -218,9 +217,7 @@ class FreqDist(Counter):
Plot samples from the frequency distribution
displaying the most frequent sample first. If an integer
parameter is supplied, stop after this many samples have been
plotted. If two integer parameters m, n are supplied, plot a
subset of the samples, beginning with m and stopping at n-1.
For a cumulative plot, specify cumulative=True.
plotted. For a cumulative plot, specify cumulative=True.
(Requires Matplotlib to be installed.)
:param title: The title for the graph
......@@ -236,7 +233,7 @@ class FreqDist(Counter):
if len(args) == 0:
args = [len(self)]
samples = list(islice(self, *args))
samples = [item for item, _ in self.most_common(*args)]
cumulative = _get_kwarg(kwargs, 'cumulative', False)
if cumulative:
......@@ -264,16 +261,14 @@ class FreqDist(Counter):
Tabulate the given samples from the frequency distribution (cumulative),
displaying the most frequent sample first. If an integer
parameter is supplied, stop after this many samples have been
plotted. If two integer parameters m, n are supplied, plot a
subset of the samples, beginning with m and stopping at n-1.
(Requires Matplotlib to be installed.)
plotted.
:param samples: The samples to plot (default is all samples)
:type samples: list
"""
if len(args) == 0:
args = [len(self)]
samples = list(islice(self, *args))
samples = [item for item, _ in self.most_common(*args)]
cumulative = _get_kwarg(kwargs, 'cumulative', False)
if cumulative:
......@@ -1668,6 +1663,7 @@ class ConditionalFreqDist(defaultdict):
the indexing operator:
>>> cfdist[3]
FreqDist({'the': 3, 'dog': 2, 'not': 1})
<FreqDist with 3 samples and 6 outcomes>
>>> cfdist[3].freq('the')
0.5
......
......@@ -108,7 +108,14 @@ Non-Projective Dependency Parsing
>>> dp = NonprojectiveDependencyParser(grammar)
>>> for g in dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf']):
... print(g) # doctest: +NORMALIZE_WHITESPACE
[{'address': 0, 'deps': 3, 'rel': 'TOP', 'tag': 'TOP', 'word': None},
[{'address': 0,
'ctag': 'TOP',
'deps': 3,
'feats': None,
'lemma': None,
'rel': 'TOP',
'tag': 'TOP',
'word': None},
{'address': 1, 'deps': [], 'word': 'the'},
{'address': 2, 'deps': [1], 'word': 'man'},
{'address': 3, 'deps': [2, 7], 'word': 'taught'},
......
......@@ -556,7 +556,7 @@ Create a set of PCFG productions.
A
>>> grammar.productions()
[A -> B B [0.3], A -> C B C [0.7], B -> B D [0.5], B -> C [0.5], C -> 'a' [0.1], C -> 'b' [0.9], D -> 'b' [1]]
[A -> B B [0.3], A -> C B C [0.7], B -> B D [0.5], B -> C [0.5], C -> 'a' [0.1], C -> 'b' [0.9], D -> 'b' [1.0]]
Induce some productions using parsed Treebank data.
......@@ -570,7 +570,7 @@ Induce some productions using parsed Treebank data.
<Grammar with 71 productions>
>>> sorted(grammar.productions(lhs=Nonterminal('PP')))[:2]
[PP -> IN NP [1]]
[PP -> IN NP [1.0]]
>>> sorted(grammar.productions(lhs=Nonterminal('NNP')))[:2]
[NNP -> 'Agnew' [0.0714286], NNP -> 'Consolidated' [0.0714286]]
>>> sorted(grammar.productions(lhs=Nonterminal('JJ')))[:2]
......@@ -585,14 +585,14 @@ Unit tests for the Probabilistic Chart Parse classes
>>> grammar = toy_pcfg2
>>> print(grammar)
Grammar with 23 productions (start state = S)
S -> NP VP [1]
S -> NP VP [1.0]
VP -> V NP [0.59]
VP -> V [0.4]
VP -> VP PP [0.01]
NP -> Det N [0.41]
NP -> Name [0.28]
NP -> NP PP [0.31]
PP -> P NP [1]
PP -> P NP [1.0]
V -> 'saw' [0.21]
V -> 'ate' [0.51]
V -> 'ran' [0.28]
......
......@@ -16,7 +16,7 @@ distributional similarity.
from __future__ import print_function, division, unicode_literals
from math import log
from collections import defaultdict, Counter
from collections import defaultdict
from functools import reduce
from itertools import islice
import re
......@@ -26,7 +26,7 @@ from nltk.probability import ConditionalFreqDist as CFD
from nltk.util import tokenwrap, LazyConcatenation
from nltk.metrics import f_measure, BigramAssocMeasures
from nltk.collocations import BigramCollocationFinder
from nltk.compat import python_2_unicode_compatible, text_type
from nltk.compat import python_2_unicode_compatible, text_type, Counter
class ContextIndex(object):
......
NLTK News
=========
NLTK 3.0.0b2 released : August 2014
Minor bugfixes and clean-ups. For full details see:
https://github.com/nltk/nltk/blob/develop/ChangeLog
NLTK Book Updates : July 2014
The NLTK book is being updated for Python 3 and NLTK 3 `here <http://nltk.org/book>`_.
The original Python 2 edition is still available `here <http://nltk.org/book_1ed>`_.
NLTK 3.0b1 released : July 2014
NLTK 3.0.0b1 released : July 2014
FrameNet, SentiWordNet, universal tagset, misc efficiency improvements and bugfixes
Several API changes, see https://github.com/nltk/nltk/wiki/Porting-your-code-to-NLTK-3.0
For full details see:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment