Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
nltk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
nltk
Commits
afcab733
Commit
afcab733
authored
Sep 07, 2012
by
Steven Bird
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #290 from pflaquerre/malt-thread-safety
Make MaltParser safe to use in parallel
parents
c09ae1a1
86b3909c
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
68 additions
and
56 deletions
+68
-56
nltk/parse/malt.py
+68
-56
No files found.
nltk/parse/malt.py
View file @
afcab733
...
@@ -10,7 +10,9 @@ import os
...
@@ -10,7 +10,9 @@ import os
import
tempfile
import
tempfile
import
glob
import
glob
from
operator
import
add
from
operator
import
add
import
subprocess
from
nltk.data
import
ZipFilePathPointer
from
nltk.tag
import
RegexpTagger
from
nltk.tag
import
RegexpTagger
from
nltk.tokenize
import
word_tokenize
from
nltk.tokenize
import
word_tokenize
from
nltk.internals
import
find_binary
from
nltk.internals
import
find_binary
...
@@ -24,8 +26,9 @@ class MaltParser(ParserI):
...
@@ -24,8 +26,9 @@ class MaltParser(ParserI):
"""
"""
An interface for parsing with the Malt Parser.
An interface for parsing with the Malt Parser.
:param mco: The full path to a pre-trained model. If
:param mco: The name of the pre-trained model. If provided, training
provided, then training will not be needed.
will not be required, and MaltParser will use the model file in
${working_dir}/${mco}.mco.
:type mco: str
:type mco: str
"""
"""
self
.
config_malt
()
self
.
config_malt
()
...
@@ -123,31 +126,35 @@ class MaltParser(ParserI):
...
@@ -123,31 +126,35 @@ class MaltParser(ParserI):
if
not
self
.
_trained
:
if
not
self
.
_trained
:
raise
Exception
(
"Parser has not been trained. Call train() first."
)
raise
Exception
(
"Parser has not been trained. Call train() first."
)
input_file
=
os
.
path
.
join
(
tempfile
.
gettempdir
(),
'malt_input.conll'
)
input_file
=
tempfile
.
NamedTemporaryFile
(
prefix
=
'malt_input.conll'
,
output_file
=
os
.
path
.
join
(
tempfile
.
gettempdir
(),
'malt_output.conll'
)
dir
=
self
.
working_dir
,
delete
=
False
)
output_file
=
tempfile
.
NamedTemporaryFile
(
prefix
=
'malt_output.conll'
,
dir
=
self
.
working_dir
,
delete
=
False
)
execute_string
=
'java -jar
%
s -w
%
s -c
%
s -i
%
s -o
%
s -m parse'
if
not
verbose
:
execute_string
+=
' > '
+
os
.
path
.
join
(
tempfile
.
gettempdir
(),
"malt.out"
)
f
=
None
try
:
try
:
f
=
open
(
input_file
,
'w'
)
for
(
i
,
(
word
,
tag
))
in
enumerate
(
sentence
,
start
=
1
):
input_file
.
write
(
'
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\n
'
%
for
(
i
,
(
word
,
tag
))
in
enumerate
(
sentence
):
(
i
,
word
,
'_'
,
tag
,
tag
,
'_'
,
'0'
,
'a'
,
'_'
,
'_'
))
f
.
write
(
'
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\n
'
%
input_file
.
write
(
'
\n
'
)
(
i
+
1
,
word
,
'_'
,
tag
,
tag
,
'_'
,
'0'
,
'a'
,
'_'
,
'_'
))
input_file
.
close
()
f
.
write
(
'
\n
'
)
f
.
close
()
cmd
=
[
'java'
,
'-jar'
,
self
.
_malt_bin
,
'-w'
,
self
.
working_dir
,
'-c'
,
self
.
mco
,
'-i'
,
input_file
.
name
,
cmd
=
[
'java'
,
'-jar
%
s'
%
self
.
_malt_bin
,
'-w
%
s'
%
tempfile
.
gettempdir
(),
'-o'
,
output_file
.
name
,
'-m'
,
'parse'
]
'-c
%
s'
%
self
.
mco
,
'-i
%
s'
%
input_file
,
'-o
%
s'
%
output_file
,
'-m parse'
]
ret
=
self
.
_execute
(
cmd
,
verbose
)
self
.
_execute
(
cmd
,
'parse'
,
verbose
)
if
ret
!=
0
:
raise
Exception
(
"MaltParser parsing (
%
s) failed with exit "
return
DependencyGraph
.
load
(
output_file
)
"code
%
d"
%
(
' '
.
join
(
cmd
),
ret
))
return
DependencyGraph
.
load
(
output_file
.
name
)
finally
:
finally
:
if
f
:
f
.
close
()
input_file
.
close
()
os
.
remove
(
input_file
.
name
)
output_file
.
close
()
os
.
remove
(
output_file
.
name
)
def
train
(
self
,
depgraphs
,
verbose
=
False
):
def
train
(
self
,
depgraphs
,
verbose
=
False
):
"""
"""
...
@@ -155,16 +162,16 @@ class MaltParser(ParserI):
...
@@ -155,16 +162,16 @@ class MaltParser(ParserI):
:param depgraphs: list of ``DependencyGraph`` objects for training input data
:param depgraphs: list of ``DependencyGraph`` objects for training input data
"""
"""
input_file
=
os
.
path
.
join
(
tempfile
.
gettempdir
(),
'malt_train.conll'
)
input_file
=
tempfile
.
NamedTemporaryFile
(
prefix
=
'malt_train.conll'
,
dir
=
self
.
working_dir
,
f
=
None
delete
=
False
)
try
:
try
:
f
=
open
(
input_file
,
'w'
)
input_file
.
write
(
'
\n
'
.
join
([
dg
.
to_conll
(
10
)
for
dg
in
depgraphs
]))
f
.
write
(
'
\n
'
.
join
([
dg
.
to_conll
(
10
)
for
dg
in
depgraphs
]))
input_file
.
close
()
self
.
train_from_file
(
input_file
.
name
,
verbose
=
verbose
)
finally
:
finally
:
if
f
:
f
.
close
()
input_file
.
close
()
os
.
remove
(
input_file
.
name
)
self
.
train_from_file
(
input_file
,
verbose
=
verbose
)
def
train_from_file
(
self
,
conll_file
,
verbose
=
False
):
def
train_from_file
(
self
,
conll_file
,
verbose
=
False
):
"""
"""
...
@@ -175,33 +182,38 @@ class MaltParser(ParserI):
...
@@ -175,33 +182,38 @@ class MaltParser(ParserI):
if
not
self
.
_malt_bin
:
if
not
self
.
_malt_bin
:
raise
Exception
(
"MaltParser location is not configured. Call config_malt() first."
)
raise
Exception
(
"MaltParser location is not configured. Call config_malt() first."
)
# If conll_file is a ZipFilePathPointer, then we need to do some extra massaging
# If conll_file is a ZipFilePathPointer, then we need to do some extra
f
=
None
# massaging
if
hasattr
(
conll_file
,
'zipfile'
):
if
isinstance
(
conll_file
,
ZipFilePathPointer
):
zip_conll_file
=
conll_file
input_file
=
tempfile
.
NamedTemporaryFile
(
prefix
=
'malt_train.conll'
,
conll_file
=
os
.
path
.
join
(
tempfile
.
gettempdir
(),
'malt_train.conll'
)
dir
=
self
.
working_dir
,
conll_str
=
zip_conll_file
.
open
()
.
read
()
delete
=
False
)
f
=
open
(
conll_file
,
'w'
)
try
:
f
.
write
(
conll_str
)
conll_str
=
conll_file
.
open
()
.
read
()
f
.
close
()
conll_file
.
close
()
input_file
.
write
(
conll_str
)
cmd
=
[
'java'
,
'-jar
%
s'
%
self
.
_malt_bin
,
'-w
%
s'
%
tempfile
.
gettempdir
(),
input_file
.
close
()
'-c
%
s'
%
self
.
mco
,
'-i
%
s'
%
conll_file
,
'-m learn'
]
return
self
.
train_from_file
(
input_file
.
name
,
verbose
=
verbose
)
finally
:
# p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
input_file
.
close
()
# stderr=subprocess.STDOUT,
os
.
remove
(
input_file
.
name
)
# stdin=subprocess.PIPE)
# (stdout, stderr) = p.communicate()
cmd
=
[
'java'
,
'-jar'
,
self
.
_malt_bin
,
'-w'
,
self
.
working_dir
,
'-c'
,
self
.
mco
,
'-i'
,
conll_file
,
'-m'
,
'learn'
]
self
.
_execute
(
cmd
,
'train'
,
verbose
)
ret
=
self
.
_execute
(
cmd
,
verbose
)
if
ret
!=
0
:
raise
Exception
(
"MaltParser training (
%
s) "
"failed with exit code
%
d"
%
(
' '
.
join
(
cmd
),
ret
))
self
.
_trained
=
True
self
.
_trained
=
True
def
_execute
(
self
,
cmd
,
type
,
verbose
=
False
):
@staticmethod
if
not
verbose
:
def
_execute
(
cmd
,
verbose
=
False
)
:
temp_dir
=
os
.
path
.
join
(
tempfile
.
gettempdir
(),
''
)
output
=
None
if
verbose
else
subprocess
.
PIPE
cmd
.
append
(
' >
%
smalt_
%
s.out 2>
%
smalt_
%
s.err'
%
((
temp_dir
,
type
)
*
2
)
)
p
=
subprocess
.
Popen
(
cmd
,
stdout
=
output
,
stderr
=
output
)
malt_exit
=
os
.
system
(
' '
.
join
(
cmd
)
)
return
p
.
wait
(
)
def
demo
():
def
demo
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment