fixed doctests in align module

e9e0f7dd · Steven Bird · a1e4de67 · e9e0f7dd · e9e0f7dd
Commit e9e0f7dd authored Jan 12, 2012 by Steven Bird
Hide whitespace changes
Inline Side-by-side

Showing with 30 additions and 42 deletions

nltk/align.py
+15 -19

nltk/test/align.doctest
+15 -23

No files found.
--- a/nltk/align.py
+++ b/nltk/align.py
@@ -18,22 +18,19 @@ class AlignedSent(object):
    Return an aligned sentence object, which encapsulates two sentences along with
    an ``Alignment`` between them.

-    .. doctest::
-        :options: +SKIP
-
        >>> from nltk.align import AlignedSent
        >>> algnsent = AlignedSent(['klein', 'ist', 'das', 'Haus'],
-        ...     ['the', 'house', 'is', 'small'], '1-3 2-4 3-2 4-1')
+        ...     ['the', 'house', 'is', 'small'], '0-2 1-3 2-1 3-0')
        >>> algnsent.words
        ['klein', 'ist', 'das', 'Haus']
        >>> algnsent.mots
        ['the', 'house', 'is', 'small']
        >>> algnsent.alignment
-        Alignment([(1, 3), (2, 4), (3, 2), (4, 1)])
-        >>> algnsent.precision('1-3 2-4 3-2 4-4')
+        Alignment([(0, 2), (1, 3), (2, 1), (3, 0)])
+        >>> algnsent.precision('0-2 1-3 2-1 3-3')
        0.75
        >>> from nltk.corpus import comtrans
-        >>> comtrans.aligned_sents()[54]
+        >>> print comtrans.aligned_sents()[54]
        <AlignedSent: 'Weshalb also sollten...' -> 'So why should EU arm...'>
        >>> print comtrans.aligned_sents()[54].alignment
        0-0 0-1 1-0 2-2 3-4 3-5 4-7 5-8 6-3 7-9 8-9 9-10 9-11 10-12 11-6 12-6 13-13
@@ -84,9 +81,9 @@ class AlignedSent(object):
        :raise IndexError: if alignment is out of sentence boundary
        :rtype: boolean
        """
-        if not all([0 <= p[0] <= len(self._words) for p in a]):
+        if not all([0 <= p[0] < len(self._words) for p in a]):
            raise IndexError("Alignment is outside boundary of words")
-        if not all([0 <= p[1] <= len(self._mots) for p in a]):
+        if not all([0 <= p[1] < len(self._mots) for p in a]):
            raise IndexError("Alignment is outside boundary of mots")
        return True

@@ -216,19 +213,19 @@ class Alignment(frozenset):
    additional data, such as a boolean to indicate sure vs possible alignments).

        >>> from nltk.align import Alignment
-        >>> a = Alignment([(1, 1), (1, 2), (2, 3), (3, 3)])
+        >>> a = Alignment([(0, 0), (0, 1), (1, 2), (2, 2)])
        >>> a.invert()
-        Alignment([(1, 1), (2, 1), (3, 2), (3, 3)])
+        Alignment([(0, 0), (1, 0), (2, 1), (2, 2)])
        >>> print a.invert()
-        1-1 2-1 3-2 3-3
-        >>> a[1]
-        [(1, 2), (1, 1)]
+        0-0 1-0 2-1 2-2
+        >>> a[0]
+        [(0, 1), (0, 0)]
        >>> a.invert()[3]
-        [(3, 2), (3, 3)]
-        >>> b = Alignment([(1, 1), (1, 2)])
+        [(2, 1), (2, 2)]
+        >>> b = Alignment([(0, 0), (0, 1)])
        >>> b.issubset(a)
        True
-        >>> c = Alignment('1-1 1-2')
+        >>> c = Alignment('0-0 0-1')
        >>> b == c
        True
    """
@@ -395,7 +392,6 @@ class IBMModel1(object):
                    num_converged, num_probs, 100.0*num_converged/num_probs))

        self.probabilities = dict(t)
-        return iteration_count

    def aligned(self):
        """
@@ -403,7 +399,7 @@ class IBMModel1(object):
        IBM-Model 1.
        """

-        if self.probablities is None:
+        if self.probabilities is None:
            raise ValueError("No probabilities calculated")

        aligned = []

--- a/nltk/test/align.doctest
+++ b/nltk/test/align.doctest
@@ -42,34 +42,26 @@ but they are easilly inverted:
    Alignment([(0, 0), (1, 1), (2, 2), (3, 3)]))


-We can also set new alignments, but these need to be in the correct range of
+We can create new alignments, but these need to be in the correct range of
 the corresponding sentences:

-    >>> from nltk import align
-    >>> als.alignment = align.Alignment([(0, 0), (1, 4), (2, 1), (3, 3)])
+    >>> from nltk.align import Alignment, AlignedSent
+    >>> als = AlignedSent(['Reprise', 'de', 'la', 'session'],
+    ...                   ['Resumption', 'of', 'the', 'session'],
+    ...                   Alignment([(0, 0), (1, 4), (2, 1), (3, 3)]))
    Traceback (most recent call last):
        ...
    IndexError: Alignment is outside boundary of mots
-    >>> als.alignment = align.Alignment([(-1, 0), (1, 2), (2, 1), (3, 3)])
-    Traceback (most recent call last):
-        ...
-    IndexError: Alignment is outside boundary of words
-    >>> als.alignment
-    Alignment([(0, 0), (1, 1), (2, 2), (3, 3)])
-    >>> als.alignment = align.Alignment([(1, 3), (3, 2), (0, 1), (2, 0)])
-    >>> als.alignment
-    Alignment([(0, 1), (1, 3), (2, 0), (3, 2)])
+
+.. in Python 2.6 version, we will support:
+   als.alignment = Alignment([(0, 0), (1, 4), (2, 1), (3, 3)])


 You can set alignments with any sequence of tuples, so long as the first two
 indexes of the tuple are the alignment indices:

-    >>> als.alignment = [(0, 0), (1, 1), (2, 2, "boat"), (3, 3, False, (1,2))]
-    >>> als.alignment
+    >>> Alignment([(0, 0), (1, 1), (2, 2, "boat"), (3, 3, False, (1,2))])
    Alignment([(0, 0), (1, 1), (2, 2, 'boat'), (3, 3, False, (1, 2))])
-    >>> als.alignment = ((0, 0), (1, 1), (2, 2), (3, 3))
-    >>> als.alignment
-    Alignment([(0, 0), (1, 1), (2, 2), (3, 3)])


 Alignment Algorithms
@@ -80,11 +72,11 @@ EM for IBM Model 1

 Here is an example from Kohn, 2010:

-    >>> corpus = [align.AlignedSent(['the', 'house'], ['das', 'Haus']),
-    ...         align.AlignedSent(['the', 'book'], ['das', 'Buch']),
-    ...         align.AlignedSent(['a', 'book'], ['ein', 'Buch'])]
-    >>> em_ibm1 = align.EMIBMModel1(corpus, 1e-3)
-    >>> iterations = em_ibm1.train()
+    >>> from nltk.align import IBMModel1
+    >>> corpus = [AlignedSent(['the', 'house'], ['das', 'Haus']),
+    ...           AlignedSent(['the', 'book'], ['das', 'Buch']),
+    ...           AlignedSent(['a', 'book'], ['ein', 'Buch'])]
+    >>> em_ibm1 = IBMModel1(corpus, 1e-3)
    >>> print round(em_ibm1.probabilities['the', 'das'], 1)
    1.0
    >>> print round(em_ibm1.probabilities['book', 'das'], 1)
@@ -132,7 +124,7 @@ This then gives us a very clean form for defining our evaluation metrics.

 Consider the following aligned sentence for evaluation:

-    >>> my_als = align.AlignedSent(['Resumption', 'of', 'the', 'session'],
+    >>> my_als = AlignedSent(['Resumption', 'of', 'the', 'session'],
    ...     ['Reprise', 'de', 'la', 'session'],
    ...     [(0, 0), (3, 3), (1, 2), (1, 1), (1, 3)])