Commit baf35701 by Ilia Kurenkov

updated entropy code

parent f064f6e5
...@@ -87,6 +87,9 @@ class NgramModel(ModelI): ...@@ -87,6 +87,9 @@ class NgramModel(ModelI):
assert(isinstance(pad_left, bool)) assert(isinstance(pad_left, bool))
assert(isinstance(pad_right, bool)) assert(isinstance(pad_right, bool))
self._lpad = ('',) * (n - 1) if pad_left else ()
self._rpad = ('',) * (n - 1) if pad_right else ()
# make sure n is greater than zero, otherwise print it # make sure n is greater than zero, otherwise print it
assert (n > 0), n assert (n > 0), n
self._unigram_model = (n == 1) self._unigram_model = (n == 1)
...@@ -240,13 +243,13 @@ class NgramModel(ModelI): ...@@ -240,13 +243,13 @@ class NgramModel(ModelI):
:type text: list(str) :type text: list(str)
""" """
e = 0.0 H = 0.0 # entropy is conventionally denoted by "H"
text = list(self._lpad) + text + list(self._rpad) text = list(self._lpad) + text + list(self._rpad)
for i in range(self._n-1, len(text)): for i in range(self._n - 1, len(text)):
context = tuple(text[i-self._n+1:i]) context = tuple(text[(i - self._n + 1):i])
token = text[i] token = text[i]
e += self.logprob(token, context) H += self.logprob(token, context)
return e / float(len(text) - (self._n-1)) return H / float(len(text) - (self._n - 1))
def perplexity(self, text): def perplexity(self, text):
""" """
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment