Commit 4ac7b557 by Waheed Ahmed

Fixed transcript skip first line if it contains BOM(Byte Order Mark).

TNL-935
parent 5364ccf1
...@@ -302,7 +302,7 @@ class TestUploadtranscripts(Basetranscripts): ...@@ -302,7 +302,7 @@ class TestUploadtranscripts(Basetranscripts):
""" """
Test uploading subs containing BOM(Byte Order Mark), e.g. U+FEFF Test uploading subs containing BOM(Byte Order Mark), e.g. U+FEFF
""" """
filedate = textwrap.dedent(""" filedata = textwrap.dedent("""
1 1
00:00:10,500 --> 00:00:13,000 00:00:10,500 --> 00:00:13,000
Test ufeff characters Test ufeff characters
...@@ -313,8 +313,8 @@ class TestUploadtranscripts(Basetranscripts): ...@@ -313,8 +313,8 @@ class TestUploadtranscripts(Basetranscripts):
""").encode('utf-8-sig') """).encode('utf-8-sig')
# Verify that ufeff character is in filedata. # Verify that ufeff character is in filedata.
self.assertIn("ufeff", filedate) self.assertIn("ufeff", filedata)
self.ufeff_srt_file.write(filedate) self.ufeff_srt_file.write(filedata)
self.ufeff_srt_file.seek(0) self.ufeff_srt_file.seek(0)
link = reverse('upload_transcripts') link = reverse('upload_transcripts')
......
...@@ -398,10 +398,11 @@ def generate_sjson_for_all_speeds(item, user_filename, result_subs_dict, lang): ...@@ -398,10 +398,11 @@ def generate_sjson_for_all_speeds(item, user_filename, result_subs_dict, lang):
if not lang: if not lang:
lang = item.transcript_language lang = item.transcript_language
# Used utf-8-sig encoding type instead of utf-8 to remove BOM(Byte Order Mark), e.g. U+FEFF
generate_subs_from_source( generate_subs_from_source(
result_subs_dict, result_subs_dict,
os.path.splitext(user_filename)[1][1:], os.path.splitext(user_filename)[1][1:],
srt_transcripts.data.decode('utf8'), srt_transcripts.data.decode('utf-8-sig'),
item, item,
lang lang
) )
......
...@@ -489,3 +489,23 @@ class VideoEditorTest(CMSVideoBaseTest): ...@@ -489,3 +489,23 @@ class VideoEditorTest(CMSVideoBaseTest):
self.assertIn(unicode_text, self.video.captions_text) self.assertIn(unicode_text, self.video.captions_text)
self.assertEqual(self.video.caption_languages.keys(), [u'table', u'uk']) self.assertEqual(self.video.caption_languages.keys(), [u'table', u'uk'])
self.assertEqual(self.video.caption_languages.keys()[0], 'table') self.assertEqual(self.video.caption_languages.keys()[0], 'table')
def test_upload_transcript_with_BOM(self):
"""
Scenario: User can upload transcript file with BOM(Byte Order Mark) in it.
Given I have created a Video component
And I edit the component
And I open tab "Advanced"
And I upload transcript file "chinese_transcripts_with_BOM.srt" for "zh" language code
And I save changes
Then when I view the video it does show the captions
And I see "莎拉·佩林 (Sarah Palin)" text in the captions
"""
self._create_video_component()
self.edit_component()
self.open_advanced_tab()
self.video.upload_translation('chinese_transcripts_with_BOM.srt', 'zh')
self.save_unit_settings()
self.assertTrue(self.video.is_captions_visible())
unicode_text = "莎拉·佩林 (Sarah Palin)".decode('utf-8')
self.assertIn(unicode_text, self.video.captions_lines())
1
00:00:16,850 --> 00:00:23,850
莎拉·佩林 (Sarah Palin) 的著作《我行我素》被乔纳森·拉班(Jonathan Raban) 评论为“400页对高尚无知的赞美”
2
00:00:24,040 --> 00:00:30,680
他是什么意思呢?拉班所指的那种思想
3
00:00:30,680 --> 00:00:35,660
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻
4
00:00:35,660 --> 00:00:42,410
他将其描述为“常识性保守派”
5
00:00:42,410 --> 00:00:47,510
即占据道德制高点的外行人能比专家更好地评价 比方说
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment