Commit 58563e6d by Steven Bird

Merge branch 'nltk-3.0' into develop

parents b7ac4739 99b9ca6e
Version 3.0b1 2014-07-11 Version 3.0b2 2014-08-21
* minor bugfixes and clean-ups
* renamed remaining parse_ methods to read_ or load_, cf issue #656
* added Paice's method of evaluating stemming algorithms
Thanks to the following contributors to 3.0.0b2: Lars Buitinck,
Cristian Capdevila, Lauri Hallila, Ofer Helman, Dmitrijs Milajevs,
lade, Liling Tan, Steven Xu
Version 3.0.0b1 2014-07-11
* Added SentiWordNet corpus and corpus reader * Added SentiWordNet corpus and corpus reader
* Fixed support for 10-column dependency file format * Fixed support for 10-column dependency file format
* Changed Tree initialization to use fromstring * Changed Tree initialization to use fromstring
......
...@@ -399,7 +399,7 @@ class ProbabilisticProduction(Production, ImmutableProbabilisticMixIn): ...@@ -399,7 +399,7 @@ class ProbabilisticProduction(Production, ImmutableProbabilisticMixIn):
def __str__(self): def __str__(self):
return Production.__unicode__(self) + \ return Production.__unicode__(self) + \
(' [1.0]' if (self.prob() == 1.0) else ' [%.g]' % self.prob()) (' [1.0]' if (self.prob() == 1.0) else ' [%g]' % self.prob())
def __eq__(self, other): def __eq__(self, other):
return (type(self) == type(other) and return (type(self) == type(other) and
......
...@@ -44,7 +44,6 @@ import random ...@@ -44,7 +44,6 @@ import random
import warnings import warnings
import array import array
from operator import itemgetter from operator import itemgetter
from itertools import islice
from collections import defaultdict from collections import defaultdict
from functools import reduce from functools import reduce
from nltk import compat from nltk import compat
...@@ -218,9 +217,7 @@ class FreqDist(Counter): ...@@ -218,9 +217,7 @@ class FreqDist(Counter):
Plot samples from the frequency distribution Plot samples from the frequency distribution
displaying the most frequent sample first. If an integer displaying the most frequent sample first. If an integer
parameter is supplied, stop after this many samples have been parameter is supplied, stop after this many samples have been
plotted. If two integer parameters m, n are supplied, plot a plotted. For a cumulative plot, specify cumulative=True.
subset of the samples, beginning with m and stopping at n-1.
For a cumulative plot, specify cumulative=True.
(Requires Matplotlib to be installed.) (Requires Matplotlib to be installed.)
:param title: The title for the graph :param title: The title for the graph
...@@ -236,7 +233,7 @@ class FreqDist(Counter): ...@@ -236,7 +233,7 @@ class FreqDist(Counter):
if len(args) == 0: if len(args) == 0:
args = [len(self)] args = [len(self)]
samples = list(islice(self, *args)) samples = [item for item, _ in self.most_common(*args)]
cumulative = _get_kwarg(kwargs, 'cumulative', False) cumulative = _get_kwarg(kwargs, 'cumulative', False)
if cumulative: if cumulative:
...@@ -264,16 +261,14 @@ class FreqDist(Counter): ...@@ -264,16 +261,14 @@ class FreqDist(Counter):
Tabulate the given samples from the frequency distribution (cumulative), Tabulate the given samples from the frequency distribution (cumulative),
displaying the most frequent sample first. If an integer displaying the most frequent sample first. If an integer
parameter is supplied, stop after this many samples have been parameter is supplied, stop after this many samples have been
plotted. If two integer parameters m, n are supplied, plot a plotted.
subset of the samples, beginning with m and stopping at n-1.
(Requires Matplotlib to be installed.)
:param samples: The samples to plot (default is all samples) :param samples: The samples to plot (default is all samples)
:type samples: list :type samples: list
""" """
if len(args) == 0: if len(args) == 0:
args = [len(self)] args = [len(self)]
samples = list(islice(self, *args)) samples = [item for item, _ in self.most_common(*args)]
cumulative = _get_kwarg(kwargs, 'cumulative', False) cumulative = _get_kwarg(kwargs, 'cumulative', False)
if cumulative: if cumulative:
...@@ -1668,6 +1663,7 @@ class ConditionalFreqDist(defaultdict): ...@@ -1668,6 +1663,7 @@ class ConditionalFreqDist(defaultdict):
the indexing operator: the indexing operator:
>>> cfdist[3] >>> cfdist[3]
FreqDist({'the': 3, 'dog': 2, 'not': 1})
<FreqDist with 3 samples and 6 outcomes> <FreqDist with 3 samples and 6 outcomes>
>>> cfdist[3].freq('the') >>> cfdist[3].freq('the')
0.5 0.5
......
...@@ -108,7 +108,14 @@ Non-Projective Dependency Parsing ...@@ -108,7 +108,14 @@ Non-Projective Dependency Parsing
>>> dp = NonprojectiveDependencyParser(grammar) >>> dp = NonprojectiveDependencyParser(grammar)
>>> for g in dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf']): >>> for g in dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf']):
... print(g) # doctest: +NORMALIZE_WHITESPACE ... print(g) # doctest: +NORMALIZE_WHITESPACE
[{'address': 0, 'deps': 3, 'rel': 'TOP', 'tag': 'TOP', 'word': None}, [{'address': 0,
'ctag': 'TOP',
'deps': 3,
'feats': None,
'lemma': None,
'rel': 'TOP',
'tag': 'TOP',
'word': None},
{'address': 1, 'deps': [], 'word': 'the'}, {'address': 1, 'deps': [], 'word': 'the'},
{'address': 2, 'deps': [1], 'word': 'man'}, {'address': 2, 'deps': [1], 'word': 'man'},
{'address': 3, 'deps': [2, 7], 'word': 'taught'}, {'address': 3, 'deps': [2, 7], 'word': 'taught'},
......
...@@ -556,7 +556,7 @@ Create a set of PCFG productions. ...@@ -556,7 +556,7 @@ Create a set of PCFG productions.
A A
>>> grammar.productions() >>> grammar.productions()
[A -> B B [0.3], A -> C B C [0.7], B -> B D [0.5], B -> C [0.5], C -> 'a' [0.1], C -> 'b' [0.9], D -> 'b' [1]] [A -> B B [0.3], A -> C B C [0.7], B -> B D [0.5], B -> C [0.5], C -> 'a' [0.1], C -> 'b' [0.9], D -> 'b' [1.0]]
Induce some productions using parsed Treebank data. Induce some productions using parsed Treebank data.
...@@ -570,7 +570,7 @@ Induce some productions using parsed Treebank data. ...@@ -570,7 +570,7 @@ Induce some productions using parsed Treebank data.
<Grammar with 71 productions> <Grammar with 71 productions>
>>> sorted(grammar.productions(lhs=Nonterminal('PP')))[:2] >>> sorted(grammar.productions(lhs=Nonterminal('PP')))[:2]
[PP -> IN NP [1]] [PP -> IN NP [1.0]]
>>> sorted(grammar.productions(lhs=Nonterminal('NNP')))[:2] >>> sorted(grammar.productions(lhs=Nonterminal('NNP')))[:2]
[NNP -> 'Agnew' [0.0714286], NNP -> 'Consolidated' [0.0714286]] [NNP -> 'Agnew' [0.0714286], NNP -> 'Consolidated' [0.0714286]]
>>> sorted(grammar.productions(lhs=Nonterminal('JJ')))[:2] >>> sorted(grammar.productions(lhs=Nonterminal('JJ')))[:2]
...@@ -585,14 +585,14 @@ Unit tests for the Probabilistic Chart Parse classes ...@@ -585,14 +585,14 @@ Unit tests for the Probabilistic Chart Parse classes
>>> grammar = toy_pcfg2 >>> grammar = toy_pcfg2
>>> print(grammar) >>> print(grammar)
Grammar with 23 productions (start state = S) Grammar with 23 productions (start state = S)
S -> NP VP [1] S -> NP VP [1.0]
VP -> V NP [0.59] VP -> V NP [0.59]
VP -> V [0.4] VP -> V [0.4]
VP -> VP PP [0.01] VP -> VP PP [0.01]
NP -> Det N [0.41] NP -> Det N [0.41]
NP -> Name [0.28] NP -> Name [0.28]
NP -> NP PP [0.31] NP -> NP PP [0.31]
PP -> P NP [1] PP -> P NP [1.0]
V -> 'saw' [0.21] V -> 'saw' [0.21]
V -> 'ate' [0.51] V -> 'ate' [0.51]
V -> 'ran' [0.28] V -> 'ran' [0.28]
......
...@@ -16,7 +16,7 @@ distributional similarity. ...@@ -16,7 +16,7 @@ distributional similarity.
from __future__ import print_function, division, unicode_literals from __future__ import print_function, division, unicode_literals
from math import log from math import log
from collections import defaultdict, Counter from collections import defaultdict
from functools import reduce from functools import reduce
from itertools import islice from itertools import islice
import re import re
...@@ -26,7 +26,7 @@ from nltk.probability import ConditionalFreqDist as CFD ...@@ -26,7 +26,7 @@ from nltk.probability import ConditionalFreqDist as CFD
from nltk.util import tokenwrap, LazyConcatenation from nltk.util import tokenwrap, LazyConcatenation
from nltk.metrics import f_measure, BigramAssocMeasures from nltk.metrics import f_measure, BigramAssocMeasures
from nltk.collocations import BigramCollocationFinder from nltk.collocations import BigramCollocationFinder
from nltk.compat import python_2_unicode_compatible, text_type from nltk.compat import python_2_unicode_compatible, text_type, Counter
class ContextIndex(object): class ContextIndex(object):
......
NLTK News NLTK News
========= =========
NLTK 3.0.0b2 released : August 2014
Minor bugfixes and clean-ups. For full details see:
https://github.com/nltk/nltk/blob/develop/ChangeLog
NLTK Book Updates : July 2014 NLTK Book Updates : July 2014
The NLTK book is being updated for Python 3 and NLTK 3 `here <http://nltk.org/book>`_. The NLTK book is being updated for Python 3 and NLTK 3 `here <http://nltk.org/book>`_.
The original Python 2 edition is still available `here <http://nltk.org/book_1ed>`_. The original Python 2 edition is still available `here <http://nltk.org/book_1ed>`_.
NLTK 3.0b1 released : July 2014 NLTK 3.0.0b1 released : July 2014
FrameNet, SentiWordNet, universal tagset, misc efficiency improvements and bugfixes FrameNet, SentiWordNet, universal tagset, misc efficiency improvements and bugfixes
Several API changes, see https://github.com/nltk/nltk/wiki/Porting-your-code-to-NLTK-3.0 Several API changes, see https://github.com/nltk/nltk/wiki/Porting-your-code-to-NLTK-3.0
For full details see: For full details see:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment