Updated boxer.py to use the most up-to-date version of Boxer (available from the…

Updated boxer.py to use the most up-to-date version of Boxer (available from the C&C SVN) based on the recommendation from Johan Bos since the most recent release version is very old. svn/trunk@8759

Updated boxer.py to use the most up-to-date version of Boxer (available from the…
Updated boxer.py to use the most up-to-date version of Boxer (available from the C&C SVN) based on the recommendation from Johan Bos since the most recent release version is very old. svn/trunk@8759
da47a91c · Dan Garrette · a756baa2 · da47a91c
Commit da47a91c authored Apr 03, 2011 by Dan Garrette
Hide whitespace changes
Inline Side-by-side

Showing with 31 additions and 23 deletions

nltk/sem/boxer.py
+31 -23

No files found.
--- a/nltk/sem/boxer.py
+++ b/nltk/sem/boxer.py
@@ -38,7 +38,7 @@ class Boxer(object):
    semantic parser that produces Discourse Representation Structures (DRSs).
    """

-    def __init__(self, boxer_drs_interpreter=None, elimeq=False):
+    def __init__(self, boxer_drs_interpreter=None, elimeq=False, bin_dir=None, verbose=False):
        """
        @param boxer_drs_interpreter: A class that converts from the 
        C{AbstractBoxerDrs} object hierarchy to a different object.  The 
@@ -53,10 +53,13 @@ class Boxer(object):
        self._boxer_drs_interpreter = boxer_drs_interpreter
        
        self._elimeq = elimeq
-    
-        self._boxer_bin = None
-        self._candc_bin = None
-        self._candc_models_path = None
+
+        self.set_bin_dir(bin_dir, verbose)
+        
+    def set_bin_dir(self, bin_dir, verbose=False):
+        self._candc_bin = self._find_binary('candc', bin_dir, verbose)
+        self._candc_models_path = os.path.normpath(os.path.join(self._candc_bin[:-5], '../models'))
+        self._boxer_bin = self._find_binary('boxer', bin_dir, verbose)            

    def interpret(self, input, discourse_id=None, question=False, verbose=False):
        """
@@ -68,7 +71,10 @@ class Boxer(object):
        @return: C{drt.AbstractDrs}
        """
        discourse_ids = [discourse_id] if discourse_id is not None else None 
-        return self.batch_interpret_multisentence([[input]], discourse_ids, question, verbose)[0]
+        d, = self.batch_interpret_multisentence([[input]], discourse_ids, question, verbose)
+        if not d:
+            raise Exception('Unable to interpret: "%s"' % input)
+        return d
    
    def interpret_multisentence(self, input, discourse_id=None, question=False, verbose=False):
        """
@@ -80,7 +86,10 @@ class Boxer(object):
        @return: C{drt.AbstractDrs}
        """
        discourse_ids = [discourse_id] if discourse_id is not None else None
-        return self.batch_interpret_multisentence([input], discourse_ids, question, verbose)[0]
+        d, = self.batch_interpret_multisentence([input], discourse_ids, question, verbose)
+        if not d:
+            raise Exception('Unable to interpret: "%s"' % input)
+        return d
        
    def batch_interpret(self, inputs, discourse_ids=None, question=False, verbose=False):
        """
@@ -132,13 +141,9 @@ class Boxer(object):
        @param filename: C{str} A filename for the output file
        @return: stdout
        """
-        if self._candc_bin is None:
-            self._candc_bin = self._find_binary('candc', verbose)
-        if self._candc_models_path is None:
-            self._candc_models_path = os.path.normpath(os.path.join(self._candc_bin[:-5], '../models'))
        args = ['--models', os.path.join(self._candc_models_path, 'questions' if question else 'boxer'), 
-                '--output', filename]
-
+                '--output', filename,
+                '--candc-printer', 'boxer']
        return self._call('\n'.join(sum((["<META>'%s'" % id] + d for d,id in zip(inputs,discourse_ids)), [])), self._candc_bin, args, verbose)

    def _call_boxer(self, filename, verbose=False):
@@ -148,8 +153,6 @@ class Boxer(object):
        @param filename: C{str} A filename for the input file
        @return: stdout
        """
-        if self._boxer_bin is None:
-            self._boxer_bin = self._find_binary('boxer', verbose)
        args = ['--box', 'false', 
                '--semantics', 'drs',
                '--flat', 'false',
@@ -161,8 +164,9 @@ class Boxer(object):

        return self._call(None, self._boxer_bin, args, verbose)

-    def _find_binary(self, name, verbose=False):
+    def _find_binary(self, name, bin_dir, verbose=False):
        return nltk.internals.find_binary(name, 
+            path_to_bin=bin_dir,
            env_vars=['CANDCHOME'],
            url='http://svn.ask.it.usyd.edu.au/trac/candc/',
            binary_names=[name, name + '.exe'],
@@ -213,14 +217,16 @@ class Boxer(object):
                if discourse_id[0] == "'" and discourse_id[-1] == "'":
                    discourse_id = discourse_id[1:-1]
                drs_id = line[comma_idx+1:line.index(')')]
-                line = lines[i+4]
+                i += 1
+                line = lines[i]
                assert line.startswith('sem(%s,' % drs_id)
-                line = lines[i+8]
+                
+                i += 4
+                line = lines[i]
                assert line.endswith(').')
                drs_input = line[:-2].strip()
                parsed = self._parse_drs(drs_input, discourse_id, use_disc_id)
                drs_dict[discourse_id] = self._boxer_drs_interpreter.interpret(parsed)
-                i += 8
            i += 1
        return drs_dict
    
@@ -361,7 +367,7 @@ class BoxerOutputDrsParser(DrtParser):
        return lambda sent_index, word_indices: BoxerRel(self.discourse_id, sent_index, word_indices, var1, var2, rel, sense)

    def _handle_timex(self):
-        #timex(_G18322, date([]: +, []:'XXXX', [1004]:'04', []:'XX'))
+        #timex(_G18322, date([]: (+), []:'XXXX', [1004]:'04', []:'XX'))
        self.assertToken(self.token(), '(')
        arg = self.parse_variable()
        self.assertToken(self.token(), ',')
@@ -370,7 +376,7 @@ class BoxerOutputDrsParser(DrtParser):
        return new_conds

    def _handle_time_expression(self, arg):
-        #date([]: +, []:'XXXX', [1004]:'04', []:'XX')
+        #date([]: (+), []:'XXXX', [1004]:'04', []:'XX')
        tok = self.token()
        self.assertToken(self.token(), '(')
        if tok == 'date':
@@ -384,10 +390,12 @@ class BoxerOutputDrsParser(DrtParser):
               [lambda sent_index, word_indices: cond for cond in conds]

    def _handle_date(self, arg):
-        #[]: +, []:'XXXX', [1004]:'04', []:'XX'
+        #[]: (+), []:'XXXX', [1004]:'04', []:'XX'
        conds = []
        (sent_index, word_indices), = self._sent_and_word_indices(self._parse_index_list())
+        self.assertToken(self.token(), '(')
        pol = self.token()
+        self.assertToken(self.token(), ')')
        conds.append(BoxerPred(self.discourse_id, sent_index, word_indices, arg, 'date_pol_%s' % (pol), 'a', 0))
        self.assertToken(self.token(), ',')

@@ -1080,7 +1088,7 @@ class BoxerWhq(BoxerIndexed):
        return BoxerWhq(self.discourse_id, f(self.sent_index), self.word_indices, self.ans_types, self.drs1, self.variable, self.drs2)
        
    def __iter__(self):
-        return iter((self.ans_types, self.drs1, self.variable, self.drs2))
+        return iter(('['+','.join(self.ans_types)+']', self.drs1, self.variable, self.drs2))
        
    def _pred(self):
        return 'whq'