Commit f77d83bd by Steven Bird

changed output to unicode strings

parent 68620e53
...@@ -65,24 +65,24 @@ this case, the strings are also POS tagged. ...@@ -65,24 +65,24 @@ this case, the strings are also POS tagged.
>>> from nltk.corpus import conll2002 >>> from nltk.corpus import conll2002
>>> for doc in conll2002.chunked_sents('ned.train')[27]: >>> for doc in conll2002.chunked_sents('ned.train')[27]:
... print doc ... print doc
('Het', 'Art') (u'Het', u'Art')
(ORG Hof/N van/Prep Cassatie/N) (ORG Hof/N van/Prep Cassatie/N)
('verbrak', 'V') (u'verbrak', u'V')
('het', 'Art') (u'het', u'Art')
('arrest', 'N') (u'arrest', u'N')
('zodat', 'Conj') (u'zodat', u'Conj')
('het', 'Pron') (u'het', u'Pron')
('moest', 'V') (u'moest', u'V')
('worden', 'V') (u'worden', u'V')
('overgedaan', 'V') (u'overgedaan', u'V')
('door', 'Prep') (u'door', u'Prep')
('het', 'Art') (u'het', u'Art')
('hof', 'N') (u'hof', u'N')
('van', 'Prep') (u'van', u'Prep')
('beroep', 'N') (u'beroep', u'N')
('van', 'Prep') (u'van', u'Prep')
(LOC Antwerpen/N) (LOC Antwerpen/N)
('.', 'Punc') (u'.', u'Punc')
Relation Extraction Relation Extraction
~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~
...@@ -234,16 +234,16 @@ presented as something that looks more like a clause in a logical language. ...@@ -234,16 +234,16 @@ presented as something that looks more like a clause in a logical language.
... for rel in relextract.extract_rels('ORG', 'LOC', doc, corpus='conll2002', pattern = DE)] ... for rel in relextract.extract_rels('ORG', 'LOC', doc, corpus='conll2002', pattern = DE)]
>>> for r in rels[:10]: >>> for r in rels[:10]:
... print relextract.show_clause(r, relsym='DE') # doctest: +NORMALIZE_WHITESPACE ... print relextract.show_clause(r, relsym='DE') # doctest: +NORMALIZE_WHITESPACE
DE('tribunal_supremo', 'victoria') DE(u'tribunal_supremo', u'victoria')
DE('museo_de_arte', 'alcorc\xe3\xb3n') DE(u'museo_de_arte', u'alcorc\xf3n')
DE('museo_de_bellas_artes', 'a_coru\xe3\xb1a') DE(u'museo_de_bellas_artes', u'a_coru\xf1a')
DE('siria', 'l\xe3\xadbano') DE(u'siria', u'l\xedbano')
DE('uni\xe3\xb3n_europea', 'pek\xe3\xadn') DE(u'uni\xf3n_europea', u'pek\xedn')
DE('ej\xe3\xa9rcito', 'rogberi') DE(u'ej\xe9rcito', u'rogberi')
DE('juzgado_de_instrucci\xe3\xb3n_n\xe3\xbamero_1', 'san_sebasti\xe3\xa1n') DE(u'juzgado_de_instrucci\xf3n_n\xfamero_1', u'san_sebasti\xe1n')
DE('psoe', 'villanueva_de_la_serena') DE(u'psoe', u'villanueva_de_la_serena')
DE('ej\xe3\xa9rcito', 'l\xe3\xadbano') DE(u'ej\xe9rcito', u'l\xedbano')
DE('juzgado_de_lo_penal_n\xe3\xbamero_2', 'ceuta') DE(u'juzgado_de_lo_penal_n\xfamero_2', u'ceuta')
>>> vnv = """ >>> vnv = """
... ( ... (
... is/V| ... is/V|
...@@ -258,6 +258,6 @@ presented as something that looks more like a clause in a logical language. ...@@ -258,6 +258,6 @@ presented as something that looks more like a clause in a logical language.
>>> for doc in conll2002.chunked_sents('ned.train'): >>> for doc in conll2002.chunked_sents('ned.train'):
... for r in relextract.extract_rels('PER', 'ORG', doc, corpus='conll2002', pattern=VAN): ... for r in relextract.extract_rels('PER', 'ORG', doc, corpus='conll2002', pattern=VAN):
... print relextract.show_clause(r, relsym="VAN") ... print relextract.show_clause(r, relsym="VAN")
VAN("cornet_d'elzius", 'buitenlandse_handel') VAN(u"cornet_d'elzius", u'buitenlandse_handel')
VAN('johan_rottiers', 'kardinaal_van_roey_instituut') VAN(u'johan_rottiers', u'kardinaal_van_roey_instituut')
VAN('annie_lennox', 'eurythmics') VAN(u'annie_lennox', u'eurythmics')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment