Commit 82cc85bd by Avital Pekker

* Added variable last_distances to allow retrieval of language

  distances calculated. This helps determine if no match was found.
parent f3b9bd44
......@@ -54,6 +54,8 @@ class TextCat():
_START_CHAR = "<".encode('utf8')
_END_CHAR = ">".encode('utf8')
last_distances = {}
def __init__(self):
self._corpus = CrubadanCorpusReader(nltk.data.find('corpora/crubadan'), '.*\.txt')
......@@ -133,9 +135,9 @@ class TextCat():
def guess_language(self, text):
''' Find the language with the min distance
to the text and return its ISO 639-3 code '''
r = self.lang_dists(text)
self.last_distances = self.lang_dists(text)
return min(r, key=r.get)
return min(r, key=self.last_distances.get)
def demo(self):
''' Demo of language guessing using a bunch of UTF-8 encoded
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment