Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
nltk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
nltk
Commits
714ca96c
Commit
714ca96c
authored
May 20, 2014
by
Steven Bird
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
whitespace normalisation
parent
c387f272
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
27 additions
and
26 deletions
+27
-26
nltk/ccg/lexicon.py
+24
-23
nltk/tag/hmm.py
+1
-1
nltk/tree.py
+1
-1
nltk/treetransforms.py
+1
-1
No files found.
nltk/ccg/lexicon.py
View file @
714ca96c
...
@@ -50,7 +50,7 @@ class CCGLexicon(object):
...
@@ -50,7 +50,7 @@ class CCGLexicon(object):
self
.
_entries
=
entries
self
.
_entries
=
entries
# Returns all the possible categories for a word
# Returns all the possible categories for a word
def
categories
(
self
,
word
):
def
categories
(
self
,
word
):
return
self
.
_entries
[
word
]
return
self
.
_entries
[
word
]
# Returns the target category for the parser
# Returns the target category for the parser
...
@@ -89,13 +89,13 @@ def matchBrackets(string):
...
@@ -89,13 +89,13 @@ def matchBrackets(string):
while
rest
!=
""
and
not
rest
.
startswith
(
')'
):
while
rest
!=
""
and
not
rest
.
startswith
(
')'
):
if
rest
.
startswith
(
'('
):
if
rest
.
startswith
(
'('
):
(
part
,
rest
)
=
matchBrackets
(
rest
)
(
part
,
rest
)
=
matchBrackets
(
rest
)
inside
=
inside
+
part
inside
=
inside
+
part
else
:
else
:
inside
=
inside
+
rest
[
0
]
inside
=
inside
+
rest
[
0
]
rest
=
rest
[
1
:]
rest
=
rest
[
1
:]
if
rest
.
startswith
(
')'
):
if
rest
.
startswith
(
')'
):
return
(
inside
+
')'
,
rest
[
1
:])
return
(
inside
+
')'
,
rest
[
1
:])
raise
AssertionError
(
'Unmatched bracket in string
\'
'
+
string
+
'
\'
'
)
raise
AssertionError
(
'Unmatched bracket in string
\'
'
+
string
+
'
\'
'
)
# Separates the string for the next portion of the category
# Separates the string for the next portion of the category
...
@@ -107,7 +107,7 @@ def nextCategory(string):
...
@@ -107,7 +107,7 @@ def nextCategory(string):
# Parses an application operator
# Parses an application operator
def
parseApplication
(
app
):
def
parseApplication
(
app
):
return
Direction
(
app
[
0
],
app
[
1
:])
return
Direction
(
app
[
0
],
app
[
1
:])
# Parses the subscripts for a primitive category
# Parses the subscripts for a primitive category
def
parseSubscripts
(
subscr
):
def
parseSubscripts
(
subscr
):
...
@@ -116,14 +116,14 @@ def parseSubscripts(subscr):
...
@@ -116,14 +116,14 @@ def parseSubscripts(subscr):
return
[]
return
[]
# Parse a primitive category
# Parse a primitive category
def
parsePrimitiveCategory
(
chunks
,
primitives
,
families
,
var
):
def
parsePrimitiveCategory
(
chunks
,
primitives
,
families
,
var
):
# If the primitive is the special category 'var',
# If the primitive is the special category 'var',
# replace it with the correct CCGVar
# replace it with the correct CCGVar
if
chunks
[
0
]
==
"var"
:
if
chunks
[
0
]
==
"var"
:
if
chunks
[
1
]
is
None
:
if
chunks
[
1
]
is
None
:
if
var
is
None
:
if
var
is
None
:
var
=
CCGVar
()
var
=
CCGVar
()
return
(
var
,
var
)
return
(
var
,
var
)
catstr
=
chunks
[
0
]
catstr
=
chunks
[
0
]
if
catstr
in
families
:
if
catstr
in
families
:
...
@@ -131,43 +131,44 @@ def parsePrimitiveCategory(chunks,primitives,families,var):
...
@@ -131,43 +131,44 @@ def parsePrimitiveCategory(chunks,primitives,families,var):
if
var
is
None
:
if
var
is
None
:
var
=
cvar
var
=
cvar
else
:
else
:
cat
=
cat
.
substitute
([(
cvar
,
var
)])
cat
=
cat
.
substitute
([(
cvar
,
var
)])
return
(
cat
,
var
)
return
(
cat
,
var
)
if
catstr
in
primitives
:
if
catstr
in
primitives
:
subscrs
=
parseSubscripts
(
chunks
[
1
])
subscrs
=
parseSubscripts
(
chunks
[
1
])
return
(
PrimitiveCategory
(
catstr
,
subscrs
),
var
)
return
(
PrimitiveCategory
(
catstr
,
subscrs
),
var
)
raise
AssertionError
(
'String
\'
'
+
catstr
+
'
\'
is neither a family nor primitive category.'
)
raise
AssertionError
(
'String
\'
'
+
catstr
+
'
\'
is neither a family nor primitive category.'
)
# parseCategory drops the 'var' from the tuple
# parseCategory drops the 'var' from the tuple
def
parseCategory
(
line
,
primitives
,
families
):
def
parseCategory
(
line
,
primitives
,
families
):
return
augParseCategory
(
line
,
primitives
,
families
)[
0
]
return
augParseCategory
(
line
,
primitives
,
families
)[
0
]
# Parses a string representing a category, and returns
# Parses a string representing a category, and returns
# a tuple with (possibly) the CCG variable for the category
# a tuple with (possibly) the CCG variable for the category
def
augParseCategory
(
line
,
primitives
,
families
,
var
=
None
):
def
augParseCategory
(
line
,
primitives
,
families
,
var
=
None
):
(
str
,
rest
)
=
nextCategory
(
line
)
(
str
,
rest
)
=
nextCategory
(
line
)
if
str
.
startswith
(
'('
):
if
str
.
startswith
(
'('
):
(
res
,
var
)
=
augParseCategory
(
str
[
1
:
-
1
],
primitives
,
families
,
var
)
(
res
,
var
)
=
augParseCategory
(
str
[
1
:
-
1
],
primitives
,
families
,
var
)
else
:
else
:
# print rePrim.match(str).groups()
# print rePrim.match(str).groups()
(
res
,
var
)
=
parsePrimitiveCategory
(
rePrim
.
match
(
str
)
.
groups
(),
primitives
,
families
,
var
)
(
res
,
var
)
=
parsePrimitiveCategory
(
rePrim
.
match
(
str
)
.
groups
(),
primitives
,
families
,
var
)
while
rest
!=
""
:
while
rest
!=
""
:
app
=
reApp
.
match
(
rest
)
.
groups
()
app
=
reApp
.
match
(
rest
)
.
groups
()
dir
=
parseApplication
(
app
[
0
:
3
])
dir
=
parseApplication
(
app
[
0
:
3
])
rest
=
app
[
3
]
rest
=
app
[
3
]
(
str
,
rest
)
=
nextCategory
(
rest
)
(
str
,
rest
)
=
nextCategory
(
rest
)
if
str
.
startswith
(
'('
):
if
str
.
startswith
(
'('
):
(
arg
,
var
)
=
augParseCategory
(
str
[
1
:
-
1
],
primitives
,
families
,
var
)
(
arg
,
var
)
=
augParseCategory
(
str
[
1
:
-
1
],
primitives
,
families
,
var
)
else
:
else
:
(
arg
,
var
)
=
parsePrimitiveCategory
(
rePrim
.
match
(
str
)
.
groups
(),
primitives
,
families
,
var
)
(
arg
,
var
)
=
parsePrimitiveCategory
(
rePrim
.
match
(
str
)
.
groups
(),
primitives
,
families
,
var
)
res
=
FunctionalCategory
(
res
,
arg
,
dir
)
res
=
FunctionalCategory
(
res
,
arg
,
dir
)
return
(
res
,
var
)
return
(
res
,
var
)
# Takes an input string, and converts it into a lexicon for CCGs.
# Takes an input string, and converts it into a lexicon for CCGs.
def
parseLexicon
(
lex_str
):
def
parseLexicon
(
lex_str
):
...
@@ -188,16 +189,16 @@ def parseLexicon(lex_str):
...
@@ -188,16 +189,16 @@ def parseLexicon(lex_str):
else
:
else
:
# Either a family definition, or a word definition
# Either a family definition, or a word definition
(
ident
,
sep
,
catstr
)
=
reLex
.
match
(
line
)
.
groups
()
(
ident
,
sep
,
catstr
)
=
reLex
.
match
(
line
)
.
groups
()
(
cat
,
var
)
=
augParseCategory
(
catstr
,
primitives
,
families
)
(
cat
,
var
)
=
augParseCategory
(
catstr
,
primitives
,
families
)
if
sep
==
'::'
:
if
sep
==
'::'
:
# Family definition
# Family definition
# ie, Det :: NP/N
# ie, Det :: NP/N
families
[
ident
]
=
(
cat
,
var
)
families
[
ident
]
=
(
cat
,
var
)
else
:
else
:
# Word definition
# Word definition
# ie, which => (N\N)/(S/NP)
# ie, which => (N\N)/(S/NP)
entries
[
ident
]
.
append
(
cat
)
entries
[
ident
]
.
append
(
cat
)
return
CCGLexicon
(
primitives
[
0
],
primitives
,
families
,
entries
)
return
CCGLexicon
(
primitives
[
0
],
primitives
,
families
,
entries
)
openccg_tinytiny
=
parseLexicon
(
'''
openccg_tinytiny
=
parseLexicon
(
'''
...
...
nltk/tag/hmm.py
View file @
714ca96c
...
@@ -1029,7 +1029,7 @@ class HiddenMarkovModelTrainer(object):
...
@@ -1029,7 +1029,7 @@ class HiddenMarkovModelTrainer(object):
return
model
return
model
def
train_supervised
(
self
,
labelled_sequences
,
estimator
=
None
):
def
train_supervised
(
self
,
labelled_sequences
,
estimator
=
None
):
"""
"""
Supervised training maximising the joint probability of the symbol and
Supervised training maximising the joint probability of the symbol and
state sequences. This is done via collecting frequencies of
state sequences. This is done via collecting frequencies of
...
...
nltk/tree.py
View file @
714ca96c
...
@@ -430,7 +430,7 @@ class Tree(list):
...
@@ -430,7 +430,7 @@ class Tree(list):
# Transforms
# Transforms
#////////////////////////////////////////////////////////////
#////////////////////////////////////////////////////////////
def
chomsky_normal_form
(
self
,
factor
=
"right"
,
horzMarkov
=
None
,
vertMarkov
=
0
,
childChar
=
"|"
,
parentChar
=
"^"
):
def
chomsky_normal_form
(
self
,
factor
=
"right"
,
horzMarkov
=
None
,
vertMarkov
=
0
,
childChar
=
"|"
,
parentChar
=
"^"
):
"""
"""
This method can modify a tree in three ways:
This method can modify a tree in three ways:
...
...
nltk/treetransforms.py
View file @
714ca96c
...
@@ -110,7 +110,7 @@ from __future__ import print_function
...
@@ -110,7 +110,7 @@ from __future__ import print_function
from
nltk.tree
import
Tree
from
nltk.tree
import
Tree
def
chomsky_normal_form
(
tree
,
factor
=
"right"
,
horzMarkov
=
None
,
vertMarkov
=
0
,
childChar
=
"|"
,
parentChar
=
"^"
):
def
chomsky_normal_form
(
tree
,
factor
=
"right"
,
horzMarkov
=
None
,
vertMarkov
=
0
,
childChar
=
"|"
,
parentChar
=
"^"
):
# assume all subtrees have homogeneous children
# assume all subtrees have homogeneous children
# assume all terminals have no siblings
# assume all terminals have no siblings
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment