Commit 714ca96c by Steven Bird

whitespace normalisation

parent c387f272
...@@ -50,7 +50,7 @@ class CCGLexicon(object): ...@@ -50,7 +50,7 @@ class CCGLexicon(object):
self._entries = entries self._entries = entries
# Returns all the possible categories for a word # Returns all the possible categories for a word
def categories(self,word): def categories(self, word):
return self._entries[word] return self._entries[word]
# Returns the target category for the parser # Returns the target category for the parser
...@@ -89,13 +89,13 @@ def matchBrackets(string): ...@@ -89,13 +89,13 @@ def matchBrackets(string):
while rest != "" and not rest.startswith(')'): while rest != "" and not rest.startswith(')'):
if rest.startswith('('): if rest.startswith('('):
(part,rest) = matchBrackets(rest) (part, rest) = matchBrackets(rest)
inside = inside + part inside = inside + part
else: else:
inside = inside + rest[0] inside = inside + rest[0]
rest = rest[1:] rest = rest[1:]
if rest.startswith(')'): if rest.startswith(')'):
return (inside + ')',rest[1:]) return (inside + ')', rest[1:])
raise AssertionError('Unmatched bracket in string \'' + string + '\'') raise AssertionError('Unmatched bracket in string \'' + string + '\'')
# Separates the string for the next portion of the category # Separates the string for the next portion of the category
...@@ -107,7 +107,7 @@ def nextCategory(string): ...@@ -107,7 +107,7 @@ def nextCategory(string):
# Parses an application operator # Parses an application operator
def parseApplication(app): def parseApplication(app):
return Direction(app[0],app[1:]) return Direction(app[0], app[1:])
# Parses the subscripts for a primitive category # Parses the subscripts for a primitive category
def parseSubscripts(subscr): def parseSubscripts(subscr):
...@@ -116,14 +116,14 @@ def parseSubscripts(subscr): ...@@ -116,14 +116,14 @@ def parseSubscripts(subscr):
return [] return []
# Parse a primitive category # Parse a primitive category
def parsePrimitiveCategory(chunks,primitives,families,var): def parsePrimitiveCategory(chunks, primitives, families, var):
# If the primitive is the special category 'var', # If the primitive is the special category 'var',
# replace it with the correct CCGVar # replace it with the correct CCGVar
if chunks[0] == "var": if chunks[0] == "var":
if chunks[1] is None: if chunks[1] is None:
if var is None: if var is None:
var = CCGVar() var = CCGVar()
return (var,var) return (var, var)
catstr = chunks[0] catstr = chunks[0]
if catstr in families: if catstr in families:
...@@ -131,43 +131,44 @@ def parsePrimitiveCategory(chunks,primitives,families,var): ...@@ -131,43 +131,44 @@ def parsePrimitiveCategory(chunks,primitives,families,var):
if var is None: if var is None:
var = cvar var = cvar
else: else:
cat = cat.substitute([(cvar,var)]) cat = cat.substitute([(cvar, var)])
return (cat,var) return (cat, var)
if catstr in primitives: if catstr in primitives:
subscrs = parseSubscripts(chunks[1]) subscrs = parseSubscripts(chunks[1])
return (PrimitiveCategory(catstr,subscrs),var) return (PrimitiveCategory(catstr, subscrs), var)
raise AssertionError('String \'' + catstr + '\' is neither a family nor primitive category.') raise AssertionError('String \'' + catstr + '\' is neither a family nor primitive category.')
# parseCategory drops the 'var' from the tuple # parseCategory drops the 'var' from the tuple
def parseCategory(line,primitives,families): def parseCategory(line, primitives, families):
return augParseCategory(line,primitives,families)[0] return augParseCategory(line, primitives, families)[0]
# Parses a string representing a category, and returns # Parses a string representing a category, and returns
# a tuple with (possibly) the CCG variable for the category # a tuple with (possibly) the CCG variable for the category
def augParseCategory(line,primitives,families,var = None): def augParseCategory(line, primitives, families, var=None):
(str,rest) = nextCategory(line) (str, rest) = nextCategory(line)
if str.startswith('('): if str.startswith('('):
(res,var) = augParseCategory(str[1:-1],primitives,families,var) (res, var) = augParseCategory(str[1:-1], primitives, families, var)
else: else:
# print rePrim.match(str).groups() # print rePrim.match(str).groups()
(res,var) = parsePrimitiveCategory(rePrim.match(str).groups(),primitives,families,var) (res, var) = parsePrimitiveCategory(rePrim.match(str).groups(),
primitives, families, var)
while rest != "": while rest != "":
app = reApp.match(rest).groups() app = reApp.match(rest).groups()
dir = parseApplication(app[0:3]) dir = parseApplication(app[0:3])
rest = app[3] rest = app[3]
(str,rest) = nextCategory(rest) (str, rest) = nextCategory(rest)
if str.startswith('('): if str.startswith('('):
(arg,var) = augParseCategory(str[1:-1],primitives,families,var) (arg, var) = augParseCategory(str[1:-1], primitives, families, var)
else: else:
(arg,var) = parsePrimitiveCategory(rePrim.match(str).groups(),primitives,families,var) (arg, var) = parsePrimitiveCategory(rePrim.match(str).groups(), primitives, families, var)
res = FunctionalCategory(res,arg,dir) res = FunctionalCategory(res, arg, dir)
return (res,var) return (res, var)
# Takes an input string, and converts it into a lexicon for CCGs. # Takes an input string, and converts it into a lexicon for CCGs.
def parseLexicon(lex_str): def parseLexicon(lex_str):
...@@ -188,16 +189,16 @@ def parseLexicon(lex_str): ...@@ -188,16 +189,16 @@ def parseLexicon(lex_str):
else: else:
# Either a family definition, or a word definition # Either a family definition, or a word definition
(ident, sep, catstr) = reLex.match(line).groups() (ident, sep, catstr) = reLex.match(line).groups()
(cat,var) = augParseCategory(catstr,primitives,families) (cat, var) = augParseCategory(catstr, primitives, families)
if sep == '::': if sep == '::':
# Family definition # Family definition
# ie, Det :: NP/N # ie, Det :: NP/N
families[ident] = (cat,var) families[ident] = (cat, var)
else: else:
# Word definition # Word definition
# ie, which => (N\N)/(S/NP) # ie, which => (N\N)/(S/NP)
entries[ident].append(cat) entries[ident].append(cat)
return CCGLexicon(primitives[0],primitives,families,entries) return CCGLexicon(primitives[0], primitives, families, entries)
openccg_tinytiny = parseLexicon(''' openccg_tinytiny = parseLexicon('''
......
...@@ -1029,7 +1029,7 @@ class HiddenMarkovModelTrainer(object): ...@@ -1029,7 +1029,7 @@ class HiddenMarkovModelTrainer(object):
return model return model
def train_supervised(self, labelled_sequences, estimator = None): def train_supervised(self, labelled_sequences, estimator=None):
""" """
Supervised training maximising the joint probability of the symbol and Supervised training maximising the joint probability of the symbol and
state sequences. This is done via collecting frequencies of state sequences. This is done via collecting frequencies of
......
...@@ -430,7 +430,7 @@ class Tree(list): ...@@ -430,7 +430,7 @@ class Tree(list):
# Transforms # Transforms
#//////////////////////////////////////////////////////////// #////////////////////////////////////////////////////////////
def chomsky_normal_form(self, factor = "right", horzMarkov = None, vertMarkov = 0, childChar = "|", parentChar = "^"): def chomsky_normal_form(self, factor="right", horzMarkov=None, vertMarkov=0, childChar="|", parentChar="^"):
""" """
This method can modify a tree in three ways: This method can modify a tree in three ways:
......
...@@ -110,7 +110,7 @@ from __future__ import print_function ...@@ -110,7 +110,7 @@ from __future__ import print_function
from nltk.tree import Tree from nltk.tree import Tree
def chomsky_normal_form(tree, factor = "right", horzMarkov = None, vertMarkov = 0, childChar = "|", parentChar = "^"): def chomsky_normal_form(tree, factor="right", horzMarkov=None, vertMarkov=0, childChar="|", parentChar="^"):
# assume all subtrees have homogeneous children # assume all subtrees have homogeneous children
# assume all terminals have no siblings # assume all terminals have no siblings
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment