Commit 5958ffb3 by Waheed Ahmed

Merge pull request #6582 from edx/waheed/tnl935-fix-transcript-skip-first-line

Fixed transcript skip first line if it contains BOM(Byte Order Mark).
parents 2db00c57 47a37228
......@@ -116,6 +116,8 @@ class TestUploadtranscripts(Basetranscripts):
"""))
self.bad_name_srt_file.seek(0)
self.ufeff_srt_file = tempfile.NamedTemporaryFile(suffix='.srt')
def test_success_video_module_source_subs_uploading(self):
self.item.data = textwrap.dedent("""
<video youtube="">
......@@ -296,12 +298,52 @@ class TestUploadtranscripts(Basetranscripts):
self.assertEqual(resp.status_code, 400)
self.assertEqual(json.loads(resp.content).get('status'), 'Undefined file extension.')
def test_subs_uploading_with_byte_order_mark(self):
"""
Test uploading subs containing BOM(Byte Order Mark), e.g. U+FEFF
"""
filedate = textwrap.dedent("""
1
00:00:10,500 --> 00:00:13,000
Test ufeff characters
2
00:00:15,000 --> 00:00:18,000
At the left we can see...
""").encode('utf-8-sig')
# Verify that ufeff character is in filedata.
self.assertIn("ufeff", filedate)
self.ufeff_srt_file.write(filedate)
self.ufeff_srt_file.seek(0)
link = reverse('upload_transcripts')
filename = os.path.splitext(os.path.basename(self.ufeff_srt_file.name))[0]
resp = self.client.post(link, {
'locator': self.video_usage_key,
'transcript-file': self.ufeff_srt_file,
'video_list': json.dumps([{
'type': 'html5',
'video': filename,
'mode': 'mp4',
}])
})
self.assertEqual(resp.status_code, 200)
content_location = StaticContent.compute_location(
self.course.id, 'subs_{0}.srt.sjson'.format(filename))
self.assertTrue(contentstore().find(content_location))
subs_text = json.loads(contentstore().find(content_location).data).get('text')
self.assertIn("Test ufeff characters", subs_text)
def tearDown(self):
super(TestUploadtranscripts, self).tearDown()
self.good_srt_file.close()
self.bad_data_srt_file.close()
self.bad_name_srt_file.close()
self.ufeff_srt_file.close()
class TestDownloadtranscripts(Basetranscripts):
......
......@@ -100,7 +100,8 @@ def upload_transcripts(request):
except ValueError:
return error_response(response, 'Invalid video_list JSON.')
source_subs_filedata = request.FILES['transcript-file'].read().decode('utf8')
# Used utf-8-sig encoding type instead of utf-8 to remove BOM(Byte Order Mark), e.g. U+FEFF
source_subs_filedata = request.FILES['transcript-file'].read().decode('utf-8-sig')
source_subs_filename = request.FILES['transcript-file'].name
if '.' not in source_subs_filename:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment