Commit c28b2958 by Greg Price

Add rendered_body to discussion API endpoints

The rendering in this commit does not prevent MathJax content from being
rendered as the discussion JavaScript code does.
parent 905512b6
"""
Content rendering functionality
Note that this module is designed to imitate the front end behavior as
implemented in Markdown.Sanitizer.js.
"""
import re
import markdown
# These patterns could be more flexible about things like attributes and
# whitespace, but this is imitating Markdown.Sanitizer.js, so it uses the
# patterns defined therein.
TAG_PATTERN = re.compile(r"<[^>]*>?")
SANITIZED_TAG_PATTERN = re.compile(r"<(/?)(\w+)[^>]*>")
ALLOWED_BASIC_TAG_PATTERN = re.compile(
r"^(</?(b|blockquote|code|del|dd|dl|dt|em|h1|h2|h3|i|kbd|li|ol|p|pre|s|sup|sub|strong|strike|ul)>|<(br|hr)\s?/?>)$"
)
ALLOWED_A_PATTERN = re.compile(
r'^(<a\shref="((https?|ftp)://|/)[-A-Za-z0-9+&@#/%?=~_|!:,.;\(\)]+"(\stitle="[^"<>]+")?\s?>|</a>)$'
)
ALLOWED_IMG_PATTERN = re.compile(
r'^(<img\ssrc="(https?://|/)[-A-Za-z0-9+&@#/%?=~_|!:,.;\(\)]+"(\swidth="\d{1,3}")?'
r'(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?/?>)$'
)
def _sanitize_tag(match):
"""Return the tag if it is allowed or the empty string otherwise"""
tag = match.group(0)
if (
ALLOWED_BASIC_TAG_PATTERN.match(tag) or
ALLOWED_A_PATTERN.match(tag) or
ALLOWED_IMG_PATTERN.match(tag)
):
return tag
else:
return ""
def _sanitize_html(source):
"""
Return source with all non-allowed tags removed, preserving the text content
"""
return TAG_PATTERN.sub(_sanitize_tag, source)
def _remove_unpaired_tags(source):
"""
Return source with all unpaired tags removed, preserving the text content
source should have already been sanitized
"""
tag_matches = list(SANITIZED_TAG_PATTERN.finditer(source))
if not tag_matches:
return source
tag_stack = []
tag_name_stack = []
text_stack = [source[:tag_matches[0].start()]]
for i, match in enumerate(tag_matches):
tag_name = match.group(2)
following_text = (
source[match.end():tag_matches[i + 1].start()] if i + 1 < len(tag_matches) else
source[match.end():]
)
if tag_name in ["p", "img", "br", "li", "hr"]: # tags that don't require closing
text_stack[-1] += match.group(0) + following_text
elif match.group(1): # end tag
if tag_name in tag_name_stack: # paired with a start tag somewhere
# pop tags until we find the matching one, keeping the non-tag text
while True:
popped_tag_name = tag_name_stack.pop()
popped_tag = tag_stack.pop()
popped_text = text_stack.pop()
if popped_tag_name == tag_name:
text_stack[-1] += popped_tag + popped_text + match.group(0)
break
else:
text_stack[-1] += popped_text
# else unpaired; drop the tag
text_stack[-1] += following_text
else: # start tag
tag_stack.append(match.group(0))
tag_name_stack.append(tag_name)
text_stack.append(following_text)
return "".join(text_stack)
def render_body(raw_body):
"""
Render raw_body to HTML.
This includes the following steps:
* Convert Markdown to HTML
* Strip non-whitelisted HTML
* Remove unbalanced HTML tags
Note that this does not prevent Markdown syntax inside a MathJax block from
being processed, which the forums JavaScript code does.
"""
rendered = markdown.markdown(raw_body)
rendered = _sanitize_html(rendered)
rendered = _remove_unpaired_tags(rendered)
return rendered
......@@ -10,6 +10,7 @@ from django.core.urlresolvers import reverse
from rest_framework import serializers
from discussion_api.render import render_body
from django_comment_common.models import (
FORUM_ROLE_ADMINISTRATOR,
FORUM_ROLE_COMMUNITY_TA,
......@@ -65,6 +66,7 @@ class _ContentSerializer(serializers.Serializer):
created_at = serializers.CharField(read_only=True)
updated_at = serializers.CharField(read_only=True)
raw_body = NonEmptyCharField(source="body")
rendered_body = serializers.SerializerMethodField("get_rendered_body")
abuse_flagged = serializers.SerializerMethodField("get_abuse_flagged")
voted = serializers.SerializerMethodField("get_voted")
vote_count = serializers.SerializerMethodField("get_vote_count")
......@@ -122,6 +124,10 @@ class _ContentSerializer(serializers.Serializer):
"""Returns the role label for the content author."""
return None if self._is_anonymous(obj) else self._get_user_label(int(obj["user_id"]))
def get_rendered_body(self, obj):
"""Returns the rendered body content."""
return render_body(obj["body"])
def get_abuse_flagged(self, obj):
"""
Returns a boolean indicating whether the requester has flagged the
......
......@@ -588,6 +588,7 @@ class GetThreadListTest(CommentsServiceMockMixin, UrlResetMixin, ModuleStoreTest
"type": "discussion",
"title": "Test Title",
"raw_body": "Test body",
"rendered_body": "<p>Test body</p>",
"pinned": False,
"closed": False,
"following": False,
......@@ -613,6 +614,7 @@ class GetThreadListTest(CommentsServiceMockMixin, UrlResetMixin, ModuleStoreTest
"type": "question",
"title": "Another Test Title",
"raw_body": "More content",
"rendered_body": "<p>More content</p>",
"pinned": False,
"closed": True,
"following": False,
......@@ -912,6 +914,7 @@ class GetCommentListTest(CommentsServiceMockMixin, ModuleStoreTestCase):
"created_at": "2015-05-11T00:00:00Z",
"updated_at": "2015-05-11T11:11:11Z",
"raw_body": "Test body",
"rendered_body": "<p>Test body</p>",
"endorsed": False,
"endorsed_by": None,
"endorsed_by_label": None,
......@@ -930,6 +933,7 @@ class GetCommentListTest(CommentsServiceMockMixin, ModuleStoreTestCase):
"created_at": "2015-05-11T22:22:22Z",
"updated_at": "2015-05-11T33:33:33Z",
"raw_body": "More content",
"rendered_body": "<p>More content</p>",
"endorsed": False,
"endorsed_by": None,
"endorsed_by_label": None,
......@@ -1145,6 +1149,7 @@ class CreateThreadTest(CommentsServiceMockMixin, UrlResetMixin, ModuleStoreTestC
"type": "discussion",
"title": "Test Title",
"raw_body": "Test body",
"rendered_body": "<p>Test body</p>",
"pinned": False,
"closed": False,
"following": False,
......@@ -1312,6 +1317,7 @@ class CreateCommentTest(CommentsServiceMockMixin, UrlResetMixin, ModuleStoreTest
"created_at": "2015-05-27T00:00:00Z",
"updated_at": "2015-05-27T00:00:00Z",
"raw_body": "Test body",
"rendered_body": "<p>Test body</p>",
"endorsed": False,
"endorsed_by": None,
"endorsed_by_label": None,
......@@ -1506,6 +1512,7 @@ class UpdateThreadTest(CommentsServiceMockMixin, UrlResetMixin, ModuleStoreTestC
"type": "discussion",
"title": "Original Title",
"raw_body": "Edited body",
"rendered_body": "<p>Edited body</p>",
"pinned": False,
"closed": False,
"following": False,
......@@ -1769,6 +1776,7 @@ class UpdateCommentTest(CommentsServiceMockMixin, UrlResetMixin, ModuleStoreTest
"created_at": "2015-06-03T00:00:00Z",
"updated_at": "2015-06-03T00:00:00Z",
"raw_body": "Edited body",
"rendered_body": "<p>Edited body</p>",
"endorsed": False,
"endorsed_by": None,
"endorsed_by_label": None,
......
"""
Tests for content rendering
"""
from unittest import TestCase
import ddt
from discussion_api.render import render_body
def _add_p_tags(raw_body):
"""Return raw_body surrounded by p tags"""
return "<p>{raw_body}</p>".format(raw_body=raw_body)
@ddt.ddt
class RenderBodyTest(TestCase):
"""Tests for render_body"""
def test_empty(self):
self.assertEqual(render_body(""), "")
@ddt.data(
("*", "em"),
("**", "strong"),
("`", "code"),
)
@ddt.unpack
def test_markdown_inline(self, delimiter, tag):
self.assertEqual(
render_body("{delimiter}some text{delimiter}".format(delimiter=delimiter)),
"<p><{tag}>some text</{tag}></p>".format(tag=tag)
)
@ddt.data(
"b", "blockquote", "code", "del", "dd", "dl", "dt", "em", "h1", "h2", "h3", "i", "kbd",
"li", "ol", "p", "pre", "s", "sup", "sub", "strong", "strike", "ul"
)
def test_openclose_tag(self, tag):
raw_body = "<{tag}>some text</{tag}>".format(tag=tag)
is_inline_tag = tag in ["b", "code", "del", "em", "i", "kbd", "s", "sup", "sub", "strong", "strike"]
rendered_body = _add_p_tags(raw_body) if is_inline_tag else raw_body
self.assertEqual(render_body(raw_body), rendered_body)
@ddt.data("br", "hr")
def test_selfclosing_tag(self, tag):
raw_body = "<{tag}>".format(tag=tag)
is_inline_tag = tag == "br"
rendered_body = _add_p_tags(raw_body) if is_inline_tag else raw_body
self.assertEqual(render_body(raw_body), rendered_body)
@ddt.data("http", "https", "ftp")
def test_allowed_a_tag(self, protocol):
raw_body = '<a href="{protocol}://foo" title="bar">baz</a>'.format(protocol=protocol)
self.assertEqual(render_body(raw_body), _add_p_tags(raw_body))
def test_disallowed_a_tag(self):
raw_body = '<a href="gopher://foo">link content</a>'
self.assertEqual(render_body(raw_body), "<p>link content</p>")
@ddt.data("http", "https")
def test_allowed_img_tag(self, protocol):
raw_body = '<img src="{protocol}://foo" width="111" height="222" alt="bar" title="baz">'.format(
protocol=protocol
)
self.assertEqual(render_body(raw_body), _add_p_tags(raw_body))
def test_disallowed_img_tag(self):
raw_body = '<img src="gopher://foo">'
self.assertEqual(render_body(raw_body), "<p></p>")
def test_script_tag(self):
raw_body = '<script type="text/javascript">alert("evil script");</script>'
self.assertEqual(render_body(raw_body), 'alert("evil script");')
@ddt.data("p", "br", "li", "hr") # img is tested above
def test_allowed_unpaired_tags(self, tag):
raw_body = "foo<{tag}>bar".format(tag=tag)
self.assertEqual(render_body(raw_body), _add_p_tags(raw_body))
def test_unpaired_start_tag(self):
self.assertEqual(render_body("foo<i>bar"), "<p>foobar</p>")
def test_unpaired_end_tag(self):
self.assertEqual(render_body("foo</i>bar"), "<p>foobar</p>")
def test_interleaved_tags(self):
self.assertEqual(
render_body("foo<i>bar<b>baz</i>quux</b>greg"),
"<p>foo<i>barbaz</i>quuxgreg</p>"
)
......@@ -184,6 +184,7 @@ class ThreadSerializerSerializationTest(SerializerTestMixin, ModuleStoreTestCase
"type": "discussion",
"title": "Test Title",
"raw_body": "Test body",
"rendered_body": "<p>Test body</p>",
"pinned": True,
"closed": False,
"following": False,
......@@ -281,6 +282,7 @@ class CommentSerializerTest(SerializerTestMixin, ModuleStoreTestCase):
"created_at": "2015-04-28T00:00:00Z",
"updated_at": "2015-04-28T11:11:11Z",
"raw_body": "Test body",
"rendered_body": "<p>Test body</p>",
"endorsed": False,
"endorsed_by": None,
"endorsed_by_label": None,
......
......@@ -191,6 +191,7 @@ class ThreadViewSetListTest(DiscussionAPIViewTestMixin, ModuleStoreTestCase):
"type": "discussion",
"title": "Test Title",
"raw_body": "Test body",
"rendered_body": "<p>Test body</p>",
"pinned": False,
"closed": False,
"following": False,
......@@ -280,6 +281,7 @@ class ThreadViewSetCreateTest(DiscussionAPIViewTestMixin, ModuleStoreTestCase):
"type": "discussion",
"title": "Test Title",
"raw_body": "Test body",
"rendered_body": "<p>Test body</p>",
"pinned": False,
"closed": False,
"following": False,
......@@ -369,6 +371,7 @@ class ThreadViewSetPartialUpdateTest(DiscussionAPIViewTestMixin, ModuleStoreTest
"type": "discussion",
"title": "Original Title",
"raw_body": "Edited body",
"rendered_body": "<p>Edited body</p>",
"pinned": False,
"closed": False,
"following": False,
......@@ -513,6 +516,7 @@ class CommentViewSetListTest(DiscussionAPIViewTestMixin, ModuleStoreTestCase):
"created_at": "2015-05-11T00:00:00Z",
"updated_at": "2015-05-11T11:11:11Z",
"raw_body": "Test body",
"rendered_body": "<p>Test body</p>",
"endorsed": False,
"endorsed_by": None,
"endorsed_by_label": None,
......@@ -665,6 +669,7 @@ class CommentViewSetCreateTest(DiscussionAPIViewTestMixin, ModuleStoreTestCase):
"created_at": "2015-05-27T00:00:00Z",
"updated_at": "2015-05-27T00:00:00Z",
"raw_body": "Test body",
"rendered_body": "<p>Test body</p>",
"endorsed": False,
"endorsed_by": None,
"endorsed_by_label": None,
......@@ -747,6 +752,7 @@ class CommentViewSetPartialUpdateTest(DiscussionAPIViewTestMixin, ModuleStoreTes
"created_at": "2015-06-03T00:00:00Z",
"updated_at": "2015-06-03T00:00:00Z",
"raw_body": "Edited body",
"rendered_body": "<p>Edited body</p>",
"endorsed": False,
"endorsed_by": None,
"endorsed_by_label": None,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment