Commit da669e67 by Steven Bird

Merge branch 'parseri' into test

parents cdcf6701 3bab2103
...@@ -840,8 +840,11 @@ class Downloader(object): ...@@ -840,8 +840,11 @@ class Downloader(object):
for i, child_id in enumerate(collection.children): for i, child_id in enumerate(collection.children):
if child_id in self._packages: if child_id in self._packages:
collection.children[i] = self._packages[child_id] collection.children[i] = self._packages[child_id]
if child_id in self._collections: elif child_id in self._collections:
collection.children[i] = self._collections[child_id] collection.children[i] = self._collections[child_id]
else:
print('removing collection member with no package: {}'.format(child_id))
del collection.children[i]
# Fill in collection.packages for each collection. # Fill in collection.packages for each collection.
for collection in self._collections.values(): for collection in self._collections.values():
......
...@@ -212,7 +212,7 @@ class BottomUpProbabilisticChartParser(ParserI): ...@@ -212,7 +212,7 @@ class BottomUpProbabilisticChartParser(ParserI):
# Initialize the chart. # Initialize the chart.
for edge in bu_init.apply(chart, grammar): for edge in bu_init.apply(chart, grammar):
if self._trace > 1: if self._trace > 1:
print(' %-50s [%s]' % (chart.pp_edge(edge,width=2), print(' %-50s [%s]' % (chart.pretty_format_edge(edge,width=2),
edge.prob())) edge.prob()))
queue.append(edge) queue.append(edge)
...@@ -227,7 +227,7 @@ class BottomUpProbabilisticChartParser(ParserI): ...@@ -227,7 +227,7 @@ class BottomUpProbabilisticChartParser(ParserI):
# Get the best edge. # Get the best edge.
edge = queue.pop() edge = queue.pop()
if self._trace > 0: if self._trace > 0:
print(' %-50s [%s]' % (chart.pp_edge(edge,width=2), print(' %-50s [%s]' % (chart.pretty_format_edge(edge,width=2),
edge.prob())) edge.prob()))
# Apply BU & FR to it. # Apply BU & FR to it.
...@@ -294,7 +294,7 @@ class BottomUpProbabilisticChartParser(ParserI): ...@@ -294,7 +294,7 @@ class BottomUpProbabilisticChartParser(ParserI):
split = len(queue)-self.beam_size split = len(queue)-self.beam_size
if self._trace > 2: if self._trace > 2:
for edge in queue[:split]: for edge in queue[:split]:
print(' %-50s [DISCARDED]' % chart.pp_edge(edge,2)) print(' %-50s [DISCARDED]' % chart.pretty_format_edge(edge,2))
del queue[:split] del queue[:split]
class InsideChartParser(BottomUpProbabilisticChartParser): class InsideChartParser(BottomUpProbabilisticChartParser):
......
...@@ -73,7 +73,7 @@ class StanfordParser(ParserI): ...@@ -73,7 +73,7 @@ class StanfordParser(ParserI):
def __init__(self, path_to_jar=None, path_to_models_jar=None, def __init__(self, path_to_jar=None, path_to_models_jar=None,
model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz', model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz',
encoding='UTF-8', verbose=False, java_options='-mx1000m'): encoding='utf8', verbose=False, java_options='-mx1000m'):
self._stanford_jar = find_jar( self._stanford_jar = find_jar(
self._JAR, path_to_jar, self._JAR, path_to_jar,
......
...@@ -36,7 +36,7 @@ class StanfordTagger(TaggerI): ...@@ -36,7 +36,7 @@ class StanfordTagger(TaggerI):
_SEPARATOR = '' _SEPARATOR = ''
_JAR = '' _JAR = ''
def __init__(self, path_to_model, path_to_jar=None, encoding='ascii', verbose=False, java_options='-mx1000m'): def __init__(self, path_to_model, path_to_jar=None, encoding='utf8', verbose=False, java_options='-mx1000m'):
if not self._JAR: if not self._JAR:
warnings.warn('The StanfordTagger class is not meant to be ' warnings.warn('The StanfordTagger class is not meant to be '
......
...@@ -36,7 +36,7 @@ class StanfordTokenizer(TokenizerI): ...@@ -36,7 +36,7 @@ class StanfordTokenizer(TokenizerI):
_JAR = 'stanford-postagger.jar' _JAR = 'stanford-postagger.jar'
def __init__(self, path_to_jar=None, encoding='UTF-8', options=None, verbose=False, java_options='-mx1000m'): def __init__(self, path_to_jar=None, encoding='utf8', options=None, verbose=False, java_options='-mx1000m'):
self._stanford_jar = find_jar( self._stanford_jar = find_jar(
self._JAR, path_to_jar, self._JAR, path_to_jar,
env_vars=('STANFORD_POSTAGGER',), env_vars=('STANFORD_POSTAGGER',),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment