Commit 0c61c977 by Steven Xu

restricted matching of wordnet tab files so the non-data ones are ignored; added…

restricted matching of wordnet tab files so the non-data ones are ignored; added "fra" to wn.langs() doctest
parent 7ce4d56d
......@@ -206,7 +206,7 @@ webtext = LazyCorpusLoader(
'webtext', PlaintextCorpusReader, r'(?!README|\.).*\.txt', encoding='ISO-8859-2')
wordnet = LazyCorpusLoader(
'wordnet', WordNetCorpusReader,
LazyCorpusLoader('omw', CorpusReader, r'(?!\.).*\.tab'))
LazyCorpusLoader('omw', CorpusReader, r'.*/wn-data-.*\.tab', encoding='utf8'))
wordnet_ic = LazyCorpusLoader(
'wordnet_ic', WordNetICCorpusReader, '.*\.dat')
words = LazyCorpusLoader(
......
......@@ -47,9 +47,9 @@ A synset is identified with a 3-part name of the form: word.pos.nn:
The WordNet corpus reader gives access to the Open Multilingual
WordNet, using ISO-639 language codes.
>>> wn.langs()
>>> sorted(wn.langs())
[u'als', u'arb', u'cat', u'cmn', u'dan', u'eng', u'eus', u'fas',
u'fin', u'fre', u'glg', u'heb', u'ind', u'ita', u'jpn', u'nno',
u'fin', u'fra', u'fre', u'glg', u'heb', u'ind', u'ita', u'jpn', u'nno',
u'nob', u'pol', u'por', u'spa', u'tha', u'zsm']
>>> wn.synsets('犬', lang='jpn')
[Synset('dog.n.01'), Synset('spy.n.01')]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment