Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
nltk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
nltk
Commits
e9e0f7dd
Commit
e9e0f7dd
authored
Jan 12, 2012
by
Steven Bird
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed doctests in align module
parent
a1e4de67
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
30 additions
and
42 deletions
+30
-42
nltk/align.py
+15
-19
nltk/test/align.doctest
+15
-23
No files found.
nltk/align.py
View file @
e9e0f7dd
...
...
@@ -18,22 +18,19 @@ class AlignedSent(object):
Return an aligned sentence object, which encapsulates two sentences along with
an ``Alignment`` between them.
.. doctest::
:options: +SKIP
>>> from nltk.align import AlignedSent
>>> algnsent = AlignedSent(['klein', 'ist', 'das', 'Haus'],
... ['the', 'house', 'is', 'small'], '
1-3 2-4 3-2 4-1
')
... ['the', 'house', 'is', 'small'], '
0-2 1-3 2-1 3-0
')
>>> algnsent.words
['klein', 'ist', 'das', 'Haus']
>>> algnsent.mots
['the', 'house', 'is', 'small']
>>> algnsent.alignment
Alignment([(
1, 3), (2, 4), (3, 2), (4, 1
)])
>>> algnsent.precision('
1-3 2-4 3-2 4-4
')
Alignment([(
0, 2), (1, 3), (2, 1), (3, 0
)])
>>> algnsent.precision('
0-2 1-3 2-1 3-3
')
0.75
>>> from nltk.corpus import comtrans
>>> comtrans.aligned_sents()[54]
>>>
print
comtrans.aligned_sents()[54]
<AlignedSent: 'Weshalb also sollten...' -> 'So why should EU arm...'>
>>> print comtrans.aligned_sents()[54].alignment
0-0 0-1 1-0 2-2 3-4 3-5 4-7 5-8 6-3 7-9 8-9 9-10 9-11 10-12 11-6 12-6 13-13
...
...
@@ -84,9 +81,9 @@ class AlignedSent(object):
:raise IndexError: if alignment is out of sentence boundary
:rtype: boolean
"""
if
not
all
([
0
<=
p
[
0
]
<
=
len
(
self
.
_words
)
for
p
in
a
]):
if
not
all
([
0
<=
p
[
0
]
<
len
(
self
.
_words
)
for
p
in
a
]):
raise
IndexError
(
"Alignment is outside boundary of words"
)
if
not
all
([
0
<=
p
[
1
]
<
=
len
(
self
.
_mots
)
for
p
in
a
]):
if
not
all
([
0
<=
p
[
1
]
<
len
(
self
.
_mots
)
for
p
in
a
]):
raise
IndexError
(
"Alignment is outside boundary of mots"
)
return
True
...
...
@@ -216,19 +213,19 @@ class Alignment(frozenset):
additional data, such as a boolean to indicate sure vs possible alignments).
>>> from nltk.align import Alignment
>>> a = Alignment([(
1, 1), (1, 2), (2, 3), (3, 3
)])
>>> a = Alignment([(
0, 0), (0, 1), (1, 2), (2, 2
)])
>>> a.invert()
Alignment([(
1, 1), (2, 1), (3, 2), (3, 3
)])
Alignment([(
0, 0), (1, 0), (2, 1), (2, 2
)])
>>> print a.invert()
1-1 2-1 3-2 3-3
>>> a[
1
]
[(
1, 2), (1, 1
)]
0-0 1-0 2-1 2-2
>>> a[
0
]
[(
0, 1), (0, 0
)]
>>> a.invert()[3]
[(
3, 2), (3, 3
)]
>>> b = Alignment([(
1, 1), (1, 2
)])
[(
2, 1), (2, 2
)]
>>> b = Alignment([(
0, 0), (0, 1
)])
>>> b.issubset(a)
True
>>> c = Alignment('
1-1 1-2
')
>>> c = Alignment('
0-0 0-1
')
>>> b == c
True
"""
...
...
@@ -395,7 +392,6 @@ class IBMModel1(object):
num_converged
,
num_probs
,
100.0
*
num_converged
/
num_probs
))
self
.
probabilities
=
dict
(
t
)
return
iteration_count
def
aligned
(
self
):
"""
...
...
@@ -403,7 +399,7 @@ class IBMModel1(object):
IBM-Model 1.
"""
if
self
.
probablities
is
None
:
if
self
.
probab
i
lities
is
None
:
raise
ValueError
(
"No probabilities calculated"
)
aligned
=
[]
...
...
nltk/test/align.doctest
View file @
e9e0f7dd
...
...
@@ -42,34 +42,26 @@ but they are easilly inverted:
Alignment([(0, 0), (1, 1), (2, 2), (3, 3)]))
We can
also set
new alignments, but these need to be in the correct range of
We can
create
new alignments, but these need to be in the correct range of
the corresponding sentences:
>>> from nltk import align
>>> als.alignment = align.Alignment([(0, 0), (1, 4), (2, 1), (3, 3)])
>>> from nltk.align import Alignment, AlignedSent
>>> als = AlignedSent(['Reprise', 'de', 'la', 'session'],
... ['Resumption', 'of', 'the', 'session'],
... Alignment([(0, 0), (1, 4), (2, 1), (3, 3)]))
Traceback (most recent call last):
...
IndexError: Alignment is outside boundary of mots
>>> als.alignment = align.Alignment([(-1, 0), (1, 2), (2, 1), (3, 3)])
Traceback (most recent call last):
...
IndexError: Alignment is outside boundary of words
>>> als.alignment
Alignment([(0, 0), (1, 1), (2, 2), (3, 3)])
>>> als.alignment = align.Alignment([(1, 3), (3, 2), (0, 1), (2, 0)])
>>> als.alignment
Alignment([(0, 1), (1, 3), (2, 0), (3, 2)])
.. in Python 2.6 version, we will support:
als.alignment = Alignment([(0, 0), (1, 4), (2, 1), (3, 3)])
You can set alignments with any sequence of tuples, so long as the first two
indexes of the tuple are the alignment indices:
>>> als.alignment = [(0, 0), (1, 1), (2, 2, "boat"), (3, 3, False, (1,2))]
>>> als.alignment
>>> Alignment([(0, 0), (1, 1), (2, 2, "boat"), (3, 3, False, (1,2))])
Alignment([(0, 0), (1, 1), (2, 2, 'boat'), (3, 3, False, (1, 2))])
>>> als.alignment = ((0, 0), (1, 1), (2, 2), (3, 3))
>>> als.alignment
Alignment([(0, 0), (1, 1), (2, 2), (3, 3)])
Alignment Algorithms
...
...
@@ -80,11 +72,11 @@ EM for IBM Model 1
Here is an example from Kohn, 2010:
>>>
corpus = [align.AlignedSent(['the', 'house'], ['das', 'Haus']),
... align.AlignedSent(['the', 'book'], ['das', 'Buch
']),
...
align.AlignedSent(['a', 'book'], ['ein', 'Buch'])]
>>> em_ibm1 = align.EMIBMModel1(corpus, 1e-3)
>>>
iterations = em_ibm1.train(
)
>>>
from nltk.align import IBMModel1
>>> corpus = [AlignedSent(['the', 'house'], ['das', 'Haus
']),
...
AlignedSent(['the', 'book'], ['das', 'Buch']),
... AlignedSent(['a', 'book'], ['ein', 'Buch'])]
>>>
em_ibm1 = IBMModel1(corpus, 1e-3
)
>>> print round(em_ibm1.probabilities['the', 'das'], 1)
1.0
>>> print round(em_ibm1.probabilities['book', 'das'], 1)
...
...
@@ -132,7 +124,7 @@ This then gives us a very clean form for defining our evaluation metrics.
Consider the following aligned sentence for evaluation:
>>> my_als =
align.
AlignedSent(['Resumption', 'of', 'the', 'session'],
>>> my_als = AlignedSent(['Resumption', 'of', 'the', 'session'],
... ['Reprise', 'de', 'la', 'session'],
... [(0, 0), (3, 3), (1, 2), (1, 1), (1, 3)])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment