Commit cccab982 by Dávid

Making use of 'b'.

parent e262de6a
...@@ -84,8 +84,6 @@ class HunposTagger(TaggerI): ...@@ -84,8 +84,6 @@ class HunposTagger(TaggerI):
self._hunpos = Popen([self._hunpos_bin, self._hunpos_model], self._hunpos = Popen([self._hunpos_bin, self._hunpos_model],
shell=False, stdin=PIPE, stdout=PIPE, stderr=PIPE) shell=False, stdin=PIPE, stdout=PIPE, stderr=PIPE)
self._closed = False self._closed = False
self._endl = compat.text_type("\n").encode(self._encoding)
self._tab = compat.text_type("\t").encode(self._encoding)
def __del__(self): def __del__(self):
self.close() self.close()
...@@ -109,14 +107,14 @@ class HunposTagger(TaggerI): ...@@ -109,14 +107,14 @@ class HunposTagger(TaggerI):
assert "\n" not in token, "Tokens should not contain newlines" assert "\n" not in token, "Tokens should not contain newlines"
if isinstance(token, compat.text_type): if isinstance(token, compat.text_type):
token = token.encode(self._encoding) token = token.encode(self._encoding)
self._hunpos.stdin.write(token + self._endl) self._hunpos.stdin.write(token + b"\n")
# We write a final empty line to tell hunpos that the sentence is finished: # We write a final empty line to tell hunpos that the sentence is finished:
self._hunpos.stdin.write(self._endl) self._hunpos.stdin.write(b"\n")
self._hunpos.stdin.flush() self._hunpos.stdin.flush()
tagged_tokens = [] tagged_tokens = []
for token in tokens: for token in tokens:
tagged = self._hunpos.stdout.readline().strip().split(self._tab) tagged = self._hunpos.stdout.readline().strip().split(b"\t")
tag = (tagged[1] if len(tagged) > 1 else None) tag = (tagged[1] if len(tagged) > 1 else None)
tagged_tokens.append((token, tag)) tagged_tokens.append((token, tag))
# We have to read (and dismiss) the final empty line: # We have to read (and dismiss) the final empty line:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment