Commit d65985b4 by Jay Zoldak

Use beautiful soup to massage the HTML for easier comparison.

Strip off the data-id tag for HTML diff of courseware

Strip data-id attributes so they are not compared in the HTML
parent 8934f54d
......@@ -17,6 +17,9 @@ def check_for_errors():
@step(u'I verify all the content of each course')
def i_verify_all_the_content_of_each_course(step):
all_possible_courses = get_courses()
logger.debug('Courses found:')
for c in all_possible_courses:
logger.debug(c.id)
ids = [c.id for c in all_possible_courses]
# Get a list of all the registered courses
......@@ -110,7 +113,7 @@ def browse_course(course_id):
#logger.debug(msg)
# Save the HTML to a file for later comparison
world.save_the_html()
world.save_the_course_content('/tmp/%s' % course_id)
assert num_tabs == num_rendered_tabs, msg
......@@ -146,4 +149,4 @@ def validate_course(current_course, ids):
try:
ids.index(current_course)
except:
assert False, "invalid course id"
assert False, "invalid course id %s" % current_course
......@@ -6,9 +6,12 @@ from lettuce.django import django_url
from django.conf import settings
from django.contrib.auth.models import User
from student.models import CourseEnrollment
import time
from urllib import quote_plus
from nose.tools import assert_equals
from bs4 import BeautifulSoup
import time
import re
import os.path
from logging import getLogger
logger = getLogger(__name__)
......@@ -109,6 +112,48 @@ def save_the_html(path='/tmp'):
f.write(html)
f.close
@world.absorb
def save_the_course_content(path='/tmp'):
html = world.browser.html.encode('ascii', 'ignore')
soup = BeautifulSoup(html)
# get rid of the header, we only want to compare the body
# soup.head.decompose()
# for now, remove the data-id attributes, because they are
# causing mismatches between cms-master and master
for item in soup.find_all(attrs={'data-id': re.compile('.*')}):
del item['data-id']
# we also need to remove them from unrendered problems,
# where they are contained in the text of divs instead of
# in attributes of tags
# Be careful of whether or not it was the last attribute
# and needs a trailing space
for item in soup.find_all(text=re.compile(' data-id=".*?" ')):
s = unicode(item.string)
item.string.replace_with(re.sub(' data-id=".*?" ', ' ', s))
for item in soup.find_all(text=re.compile(' data-id=".*?"')):
s = unicode(item.string)
item.string.replace_with(re.sub(' data-id=".*?"', ' ', s))
# prettify the html so it will compare better, with
# each HTML tag on its own line
output = soup.prettify()
# use string slicing to grab everything after 'courseware/' in the URL
u = world.browser.url
section_url = u[u.find('courseware/')+11:]
if not os.path.exists(path):
os.makedirs(path)
filename = '%s.html' % (quote_plus(section_url))
f = open('%s/%s' % (path, filename), 'w')
f.write(output)
f.close
########### DEBUGGING ##############
@step(u'I save a screenshot to "(.*)"')
def save_screenshot_to(step, filename):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment