Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
nltk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
nltk
Commits
afcab733
Commit
afcab733
authored
Sep 07, 2012
by
Steven Bird
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #290 from pflaquerre/malt-thread-safety
Make MaltParser safe to use in parallel
parents
c09ae1a1
86b3909c
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
68 additions
and
56 deletions
+68
-56
nltk/parse/malt.py
+68
-56
No files found.
nltk/parse/malt.py
View file @
afcab733
...
...
@@ -10,7 +10,9 @@ import os
import
tempfile
import
glob
from
operator
import
add
import
subprocess
from
nltk.data
import
ZipFilePathPointer
from
nltk.tag
import
RegexpTagger
from
nltk.tokenize
import
word_tokenize
from
nltk.internals
import
find_binary
...
...
@@ -24,8 +26,9 @@ class MaltParser(ParserI):
"""
An interface for parsing with the Malt Parser.
:param mco: The full path to a pre-trained model. If
provided, then training will not be needed.
:param mco: The name of the pre-trained model. If provided, training
will not be required, and MaltParser will use the model file in
${working_dir}/${mco}.mco.
:type mco: str
"""
self
.
config_malt
()
...
...
@@ -123,31 +126,35 @@ class MaltParser(ParserI):
if
not
self
.
_trained
:
raise
Exception
(
"Parser has not been trained. Call train() first."
)
input_file
=
os
.
path
.
join
(
tempfile
.
gettempdir
(),
'malt_input.conll'
)
output_file
=
os
.
path
.
join
(
tempfile
.
gettempdir
(),
'malt_output.conll'
)
input_file
=
tempfile
.
NamedTemporaryFile
(
prefix
=
'malt_input.conll'
,
dir
=
self
.
working_dir
,
delete
=
False
)
output_file
=
tempfile
.
NamedTemporaryFile
(
prefix
=
'malt_output.conll'
,
dir
=
self
.
working_dir
,
delete
=
False
)
execute_string
=
'java -jar
%
s -w
%
s -c
%
s -i
%
s -o
%
s -m parse'
if
not
verbose
:
execute_string
+=
' > '
+
os
.
path
.
join
(
tempfile
.
gettempdir
(),
"malt.out"
)
f
=
None
try
:
f
=
open
(
input_file
,
'w'
)
for
(
i
,
(
word
,
tag
))
in
enumerate
(
sentence
):
f
.
write
(
'
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\n
'
%
(
i
+
1
,
word
,
'_'
,
tag
,
tag
,
'_'
,
'0'
,
'a'
,
'_'
,
'_'
))
f
.
write
(
'
\n
'
)
f
.
close
()
cmd
=
[
'java'
,
'-jar
%
s'
%
self
.
_malt_bin
,
'-w
%
s'
%
tempfile
.
gettempdir
(),
'-c
%
s'
%
self
.
mco
,
'-i
%
s'
%
input_file
,
'-o
%
s'
%
output_file
,
'-m parse'
]
self
.
_execute
(
cmd
,
'parse'
,
verbose
)
return
DependencyGraph
.
load
(
output_file
)
for
(
i
,
(
word
,
tag
))
in
enumerate
(
sentence
,
start
=
1
):
input_file
.
write
(
'
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\t
%
s
\n
'
%
(
i
,
word
,
'_'
,
tag
,
tag
,
'_'
,
'0'
,
'a'
,
'_'
,
'_'
))
input_file
.
write
(
'
\n
'
)
input_file
.
close
()
cmd
=
[
'java'
,
'-jar'
,
self
.
_malt_bin
,
'-w'
,
self
.
working_dir
,
'-c'
,
self
.
mco
,
'-i'
,
input_file
.
name
,
'-o'
,
output_file
.
name
,
'-m'
,
'parse'
]
ret
=
self
.
_execute
(
cmd
,
verbose
)
if
ret
!=
0
:
raise
Exception
(
"MaltParser parsing (
%
s) failed with exit "
"code
%
d"
%
(
' '
.
join
(
cmd
),
ret
))
return
DependencyGraph
.
load
(
output_file
.
name
)
finally
:
if
f
:
f
.
close
()
input_file
.
close
()
os
.
remove
(
input_file
.
name
)
output_file
.
close
()
os
.
remove
(
output_file
.
name
)
def
train
(
self
,
depgraphs
,
verbose
=
False
):
"""
...
...
@@ -155,16 +162,16 @@ class MaltParser(ParserI):
:param depgraphs: list of ``DependencyGraph`` objects for training input data
"""
input_file
=
os
.
path
.
join
(
tempfile
.
gettempdir
(),
'malt_train.conll'
)
f
=
None
input_file
=
tempfile
.
NamedTemporaryFile
(
prefix
=
'malt_train.conll'
,
dir
=
self
.
working_dir
,
delete
=
False
)
try
:
f
=
open
(
input_file
,
'w'
)
f
.
write
(
'
\n
'
.
join
([
dg
.
to_conll
(
10
)
for
dg
in
depgraphs
]))
input_file
.
write
(
'
\n
'
.
join
([
dg
.
to_conll
(
10
)
for
dg
in
depgraphs
]))
input_file
.
close
()
self
.
train_from_file
(
input_file
.
name
,
verbose
=
verbose
)
finally
:
if
f
:
f
.
close
()
self
.
train_from_file
(
input_file
,
verbose
=
verbose
)
input_file
.
close
()
os
.
remove
(
input_file
.
name
)
def
train_from_file
(
self
,
conll_file
,
verbose
=
False
):
"""
...
...
@@ -175,33 +182,38 @@ class MaltParser(ParserI):
if
not
self
.
_malt_bin
:
raise
Exception
(
"MaltParser location is not configured. Call config_malt() first."
)
# If conll_file is a ZipFilePathPointer, then we need to do some extra massaging
f
=
None
if
hasattr
(
conll_file
,
'zipfile'
):
zip_conll_file
=
conll_file
conll_file
=
os
.
path
.
join
(
tempfile
.
gettempdir
(),
'malt_train.conll'
)
conll_str
=
zip_conll_file
.
open
()
.
read
()
f
=
open
(
conll_file
,
'w'
)
f
.
write
(
conll_str
)
f
.
close
()
cmd
=
[
'java'
,
'-jar
%
s'
%
self
.
_malt_bin
,
'-w
%
s'
%
tempfile
.
gettempdir
(),
'-c
%
s'
%
self
.
mco
,
'-i
%
s'
%
conll_file
,
'-m learn'
]
# p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
# stderr=subprocess.STDOUT,
# stdin=subprocess.PIPE)
# (stdout, stderr) = p.communicate()
self
.
_execute
(
cmd
,
'train'
,
verbose
)
# If conll_file is a ZipFilePathPointer, then we need to do some extra
# massaging
if
isinstance
(
conll_file
,
ZipFilePathPointer
):
input_file
=
tempfile
.
NamedTemporaryFile
(
prefix
=
'malt_train.conll'
,
dir
=
self
.
working_dir
,
delete
=
False
)
try
:
conll_str
=
conll_file
.
open
()
.
read
()
conll_file
.
close
()
input_file
.
write
(
conll_str
)
input_file
.
close
()
return
self
.
train_from_file
(
input_file
.
name
,
verbose
=
verbose
)
finally
:
input_file
.
close
()
os
.
remove
(
input_file
.
name
)
cmd
=
[
'java'
,
'-jar'
,
self
.
_malt_bin
,
'-w'
,
self
.
working_dir
,
'-c'
,
self
.
mco
,
'-i'
,
conll_file
,
'-m'
,
'learn'
]
ret
=
self
.
_execute
(
cmd
,
verbose
)
if
ret
!=
0
:
raise
Exception
(
"MaltParser training (
%
s) "
"failed with exit code
%
d"
%
(
' '
.
join
(
cmd
),
ret
))
self
.
_trained
=
True
def
_execute
(
self
,
cmd
,
type
,
verbose
=
False
):
if
not
verbose
:
temp_dir
=
os
.
path
.
join
(
tempfile
.
gettempdir
(),
''
)
cmd
.
append
(
' >
%
smalt_
%
s.out 2>
%
smalt_
%
s.err'
%
((
temp_dir
,
type
)
*
2
)
)
malt_exit
=
os
.
system
(
' '
.
join
(
cmd
)
)
@staticmethod
def
_execute
(
cmd
,
verbose
=
False
)
:
output
=
None
if
verbose
else
subprocess
.
PIPE
p
=
subprocess
.
Popen
(
cmd
,
stdout
=
output
,
stderr
=
output
)
return
p
.
wait
(
)
def
demo
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment