Commit 3bab2103 by Steven Bird

Merge branch 'develop' into parseri

parents 04f8a9b6 c35b192a
......@@ -840,8 +840,11 @@ class Downloader(object):
for i, child_id in enumerate(collection.children):
if child_id in self._packages:
collection.children[i] = self._packages[child_id]
if child_id in self._collections:
elif child_id in self._collections:
collection.children[i] = self._collections[child_id]
else:
print('removing collection member with no package: {}'.format(child_id))
del collection.children[i]
# Fill in collection.packages for each collection.
for collection in self._collections.values():
......
......@@ -212,7 +212,7 @@ class BottomUpProbabilisticChartParser(ParserI):
# Initialize the chart.
for edge in bu_init.apply(chart, grammar):
if self._trace > 1:
print(' %-50s [%s]' % (chart.pp_edge(edge,width=2),
print(' %-50s [%s]' % (chart.pretty_format_edge(edge,width=2),
edge.prob()))
queue.append(edge)
......@@ -227,7 +227,7 @@ class BottomUpProbabilisticChartParser(ParserI):
# Get the best edge.
edge = queue.pop()
if self._trace > 0:
print(' %-50s [%s]' % (chart.pp_edge(edge,width=2),
print(' %-50s [%s]' % (chart.pretty_format_edge(edge,width=2),
edge.prob()))
# Apply BU & FR to it.
......@@ -294,7 +294,7 @@ class BottomUpProbabilisticChartParser(ParserI):
split = len(queue)-self.beam_size
if self._trace > 2:
for edge in queue[:split]:
print(' %-50s [DISCARDED]' % chart.pp_edge(edge,2))
print(' %-50s [DISCARDED]' % chart.pretty_format_edge(edge,2))
del queue[:split]
class InsideChartParser(BottomUpProbabilisticChartParser):
......
......@@ -73,7 +73,7 @@ class StanfordParser(ParserI):
def __init__(self, path_to_jar=None, path_to_models_jar=None,
model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz',
encoding='UTF-8', verbose=False, java_options='-mx1000m'):
encoding='utf8', verbose=False, java_options='-mx1000m'):
self._stanford_jar = find_jar(
self._JAR, path_to_jar,
......
......@@ -36,7 +36,7 @@ class StanfordTagger(TaggerI):
_SEPARATOR = ''
_JAR = ''
def __init__(self, path_to_model, path_to_jar=None, encoding='ascii', verbose=False, java_options='-mx1000m'):
def __init__(self, path_to_model, path_to_jar=None, encoding='utf8', verbose=False, java_options='-mx1000m'):
if not self._JAR:
warnings.warn('The StanfordTagger class is not meant to be '
......
......@@ -36,7 +36,7 @@ class StanfordTokenizer(TokenizerI):
_JAR = 'stanford-postagger.jar'
def __init__(self, path_to_jar=None, encoding='UTF-8', options=None, verbose=False, java_options='-mx1000m'):
def __init__(self, path_to_jar=None, encoding='utf8', options=None, verbose=False, java_options='-mx1000m'):
self._stanford_jar = find_jar(
self._JAR, path_to_jar,
env_vars=('STANFORD_POSTAGGER',),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment