Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
notifier
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
notifier
Commits
c8cdcd6c
Commit
c8cdcd6c
authored
Dec 20, 2013
by
jsa
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
handle non-BMP unicode chars in _trunc()
parent
5d425578
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
23 additions
and
9 deletions
+23
-9
notifier/digest.py
+23
-6
notifier/tests/test_digest.py
+0
-3
No files found.
notifier/digest.py
View file @
c8cdcd6c
...
@@ -4,6 +4,7 @@ General formatting and rendering helpers for digest notifications.
...
@@ -4,6 +4,7 @@ General formatting and rendering helpers for digest notifications.
import
datetime
import
datetime
import
logging
import
logging
import
struct
from
django.conf
import
settings
from
django.conf
import
settings
from
django.template.loader
import
get_template
from
django.template.loader
import
get_template
...
@@ -34,17 +35,33 @@ def _trunc(s, length):
...
@@ -34,17 +35,33 @@ def _trunc(s, length):
Truncate the string `s` to no more than `length`, using ellipsis and
Truncate the string `s` to no more than `length`, using ellipsis and
without chopping words.
without chopping words.
>>> _trunc("one two three", 13)
This function works on both str and unicode objects. If a str
'one two three'
is passed, it may return a unicode. If a unicode is passed, it will
>>> _trunc("one two three", 12)
always return a unicode.
'one two...'
>>> _trunc(u"one two three", 13)
u'one two three'
>>> _trunc(u"one two three", 12)
u'one two...'
"""
"""
# Some Python2.7 builds do not support non-BMP unicode characters.
# To function properly on such systems, we convert to code points
# inside this function before counting / slicing characters, and
# decode again prior to concatenating the output value.
s
=
s
.
strip
()
s
=
s
.
strip
()
if
len
(
s
)
<=
length
:
u
=
s
.
encode
(
'utf-32-le'
)
pts
=
struct
.
unpack
(
'<{}L'
.
format
(
len
(
u
)
/
4
),
u
)
if
len
(
pts
)
<=
length
:
# nothing to do
# nothing to do
return
s
return
s
# truncate, taking an extra -3 off the orig string for the ellipsis itself
# truncate, taking an extra -3 off the orig string for the ellipsis itself
return
s
[:
length
-
3
]
.
rsplit
(
' '
,
1
)[
0
]
.
strip
()
+
'...'
# see above comment about non-BMP support for why this is done in such
# elaborate fashion.
uchr
=
lambda
x
:
'
\
U{0:08x}'
.
format
(
x
)
.
decode
(
'unicode-escape'
)
return
''
.
join
(
uchr
(
p
)
for
p
in
pts
[:
length
-
3
])
.
rsplit
(
' '
,
1
)[
0
]
.
strip
()
+
'...'
def
_make_text_list
(
values
):
def
_make_text_list
(
values
):
...
...
notifier/tests/test_digest.py
View file @
c8cdcd6c
...
@@ -23,7 +23,6 @@ class DigestItemTestCase(TestCase):
...
@@ -23,7 +23,6 @@ class DigestItemTestCase(TestCase):
def
test_CJK
(
self
):
def
test_CJK
(
self
):
self
.
_test_unicode_data
(
u"イんノ丂 アo丂イ co刀イムノ刀丂 cフズ"
,
u"イんノ丂 アo丂イ..."
)
self
.
_test_unicode_data
(
u"イんノ丂 アo丂イ co刀イムノ刀丂 cフズ"
,
u"イんノ丂 アo丂イ..."
)
@skip
(
"Non-BMP characters are not handled correctly"
)
def
test_non_BMP
(
self
):
def
test_non_BMP
(
self
):
self
.
_test_unicode_data
(
u"𝕋𝕙𝕚𝕤 𝕡𝕠𝕤𝕥 𝕔𝕠𝕟𝕥𝕒𝕚𝕟𝕤 𝕔𝕙𝕒𝕣𝕒𝕔𝕥𝕖𝕣𝕤 𝕠𝕦𝕥𝕤𝕚𝕕𝕖 𝕥𝕙𝕖 𝔹𝕄ℙ"
,
u"𝕋𝕙𝕚𝕤 𝕡𝕠𝕤𝕥..."
)
self
.
_test_unicode_data
(
u"𝕋𝕙𝕚𝕤 𝕡𝕠𝕤𝕥 𝕔𝕠𝕟𝕥𝕒𝕚𝕟𝕤 𝕔𝕙𝕒𝕣𝕒𝕔𝕥𝕖𝕣𝕤 𝕠𝕦𝕥𝕤𝕚𝕕𝕖 𝕥𝕙𝕖 𝔹𝕄ℙ"
,
u"𝕋𝕙𝕚𝕤 𝕡𝕠𝕤𝕥..."
)
...
@@ -48,7 +47,6 @@ class DigestThreadTestCase(TestCase):
...
@@ -48,7 +47,6 @@ class DigestThreadTestCase(TestCase):
def
test_CJK
(
self
):
def
test_CJK
(
self
):
self
.
_test_unicode_data
(
u"イんノ丂 アo丂イ co刀イムノ刀丂 cフズ"
,
u"イんノ丂 アo丂イ..."
)
self
.
_test_unicode_data
(
u"イんノ丂 アo丂イ co刀イムノ刀丂 cフズ"
,
u"イんノ丂 アo丂イ..."
)
@skip
(
"Non-BMP characters are not handled correctly"
)
def
test_non_BMP
(
self
):
def
test_non_BMP
(
self
):
self
.
_test_unicode_data
(
u"𝕋𝕙𝕚𝕤 𝕡𝕠𝕤𝕥 𝕔𝕠𝕟𝕥𝕒𝕚𝕟𝕤 𝕔𝕙𝕒𝕣𝕒𝕔𝕥𝕖𝕣𝕤 𝕠𝕦𝕥𝕤𝕚𝕕𝕖 𝕥𝕙𝕖 𝔹𝕄ℙ"
,
u"𝕋𝕙𝕚𝕤 𝕡𝕠𝕤𝕥..."
)
self
.
_test_unicode_data
(
u"𝕋𝕙𝕚𝕤 𝕡𝕠𝕤𝕥 𝕔𝕠𝕟𝕥𝕒𝕚𝕟𝕤 𝕔𝕙𝕒𝕣𝕒𝕔𝕥𝕖𝕣𝕤 𝕠𝕦𝕥𝕤𝕚𝕕𝕖 𝕥𝕙𝕖 𝔹𝕄ℙ"
,
u"𝕋𝕙𝕚𝕤 𝕡𝕠𝕤𝕥..."
)
...
@@ -91,7 +89,6 @@ class RenderDigestTestCase(TestCase):
...
@@ -91,7 +89,6 @@ class RenderDigestTestCase(TestCase):
def
test_CJK
(
self
):
def
test_CJK
(
self
):
self
.
_test_unicode_data
(
u"イんノ丂 アo丂イ co刀イムノ刀丂 cフズ"
,
u"イんノ丂 アo丂イ..."
)
self
.
_test_unicode_data
(
u"イんノ丂 アo丂イ co刀イムノ刀丂 cフズ"
,
u"イんノ丂 アo丂イ..."
)
@skip
(
"Non-BMP characters are not handled correctly"
)
def
test_non_BMP
(
self
):
def
test_non_BMP
(
self
):
self
.
_test_unicode_data
(
u"𝕋𝕙𝕚𝕤 𝕡𝕠𝕤𝕥 𝕔𝕠𝕟𝕥𝕒𝕚𝕟𝕤 𝕔𝕙𝕒𝕣𝕒𝕔𝕥𝕖𝕣𝕤 𝕠𝕦𝕥𝕤𝕚𝕕𝕖 𝕥𝕙𝕖 𝔹𝕄ℙ"
,
u"𝕋𝕙𝕚𝕤 𝕡𝕠𝕤𝕥..."
)
self
.
_test_unicode_data
(
u"𝕋𝕙𝕚𝕤 𝕡𝕠𝕤𝕥 𝕔𝕠𝕟𝕥𝕒𝕚𝕟𝕤 𝕔𝕙𝕒𝕣𝕒𝕔𝕥𝕖𝕣𝕤 𝕠𝕦𝕥𝕤𝕚𝕕𝕖 𝕥𝕙𝕖 𝔹𝕄ℙ"
,
u"𝕋𝕙𝕚𝕤 𝕡𝕠𝕤𝕥..."
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment