Use html rather than xml parsing for course updates

1708dc40 · Don Mitchell · 7377f4bd · 1708dc40
Commit 1708dc40 authored Jan 08, 2013 by Don Mitchell
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 14 deletions

cms/djangoapps/contentstore/course_info_model.py
+14 -14

No files found.
--- a/cms/djangoapps/contentstore/course_info_model.py
+++ b/cms/djangoapps/contentstore/course_info_model.py
 from xmodule.modulestore.exceptions import ItemNotFoundError
 from xmodule.modulestore import Location
 from xmodule.modulestore.django import modulestore
-from lxml import etree
+from lxml import html
 import re
 from django.http import HttpResponseBadRequest
 import logging
@@ -24,9 +24,9 @@ def get_course_updates(location):
    # purely to handle free formed updates not done via editor. Actually kills them, but at least doesn't break.
    try:
-        course_html_parsed = etree.fromstring(course_updates.definition['data'])
+        course_html_parsed = html.fromstring(course_updates.definition['data'])
-    except etree.XMLSyntaxError:
+    except:
-        course_html_parsed = etree.fromstring("<ol></ol>")
+        course_html_parsed = html.fromstring("<ol></ol>")
    # Confirm that root is <ol>, iterate over <li>, pull out <h2> subs and then rest of val
    course_upd_collection = []
@@ -39,7 +39,7 @@ def get_course_updates(location):
                # could enforce that update[0].tag == 'h2'
                content = update[0].tail
            else:
-                content = "\n".join([etree.tostring(ele) for ele in update[1:]])
+                content = "\n".join([html.tostring(ele) for ele in update[1:]])
            # make the id on the client be 1..len w/ 1 being the oldest and len being the newest
            course_upd_collection.append({"id" : location_base + "/" + str(len(course_html_parsed) - idx),
@@ -61,12 +61,12 @@ def update_course_updates(location, update, passed_id=None):
    # purely to handle free formed updates not done via editor. Actually kills them, but at least doesn't break.
    try:
-        course_html_parsed = etree.fromstring(course_updates.definition['data'])
+        course_html_parsed = html.fromstring(course_updates.definition['data'])
-    except etree.XMLSyntaxError:
+    except:
-        course_html_parsed = etree.fromstring("<ol></ol>")
+        course_html_parsed = html.fromstring("<ol></ol>")
    # No try/catch b/c failure generates an error back to client
-    new_html_parsed = etree.fromstring('<li><h2>' + update['date'] + '</h2>' + update['content'] + '</li>')
+    new_html_parsed = html.fromstring('<li><h2>' + update['date'] + '</h2>' + update['content'] + '</li>')
    # Confirm that root is <ol>, iterate over <li>, pull out <h2> subs and then rest of val
    if course_html_parsed.tag == 'ol':
@@ -82,7 +82,7 @@ def update_course_updates(location, update, passed_id=None):
            passed_id = course_updates.location.url() + "/" + str(idx)
        # update db record
-        course_updates.definition['data'] = etree.tostring(course_html_parsed)
+        course_updates.definition['data'] = html.tostring(course_html_parsed)
        modulestore('direct').update_item(location, course_updates.definition['data'])
        return {"id" : passed_id,
@@ -105,9 +105,9 @@ def delete_course_update(location, update, passed_id):
    # TODO use delete_blank_text parser throughout and cache as a static var in a class
    # purely to handle free formed updates not done via editor. Actually kills them, but at least doesn't break.
    try:
-        course_html_parsed = etree.fromstring(course_updates.definition['data'])
+        course_html_parsed = html.fromstring(course_updates.definition['data'])
-    except etree.XMLSyntaxError:
+    except:
-        course_html_parsed = etree.fromstring("<ol></ol>")
+        course_html_parsed = html.fromstring("<ol></ol>")
    if course_html_parsed.tag == 'ol':
        # ??? Should this use the id in the json or in the url or does it matter?
@@ -118,7 +118,7 @@ def delete_course_update(location, update, passed_id):
            course_html_parsed.remove(element_to_delete)
        # update db record
-        course_updates.definition['data'] = etree.tostring(course_html_parsed)
+        course_updates.definition['data'] = html.tostring(course_html_parsed)
        store = modulestore('direct')
        store.update_item(location, course_updates.definition['data'])