Commit de06dda3 by Mike Chen

added capaxmlconverter app to convert xml into json format

parent 7f6f04df
###
### One-off script for importing courseware form XML format
###
from django.core.management.base import BaseCommand, CommandError
import json
from capaconverter import CapaXMLConverter
class Command(BaseCommand):
help = \
'''Import the specified data directory into the default ModuleStore'''
def handle(self, *args, **options):
self.converter = CapaXMLConverter()
# print json.dumps(self.converter.convert_xml_file("/Users/ccp/code/mitx_all/mitx/1.xml"), indent=2)
print json.dumps(self.converter.convert_xml_file("/Users/ccp/code/mitx_all/data/6.002x/problems/HW3ID1.xml"), indent=2)
# print json.dumps(self.converter.convert_xml_file("/Users/ccp/code/mitx_all/data/6.002x/problems/multichoice.xml"), indent=2)
#!/usr/bin/env python
from django.utils import unittest
import logging
import os.path
from __init__ import CapaXMLConverter
import json
import sys
from lxml import etree
class CapaXMLConverterTestCase(unittest.TestCase):
def setUp(self):
self.converter = CapaXMLConverter()
self.converter.logger.setLevel(logging.DEBUG)
self.problems_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), "problems")
logging.info("Testing problems from folder %s" % self.problems_folder)
self.problem_files = map(lambda filename: os.path.join(self.problems_folder, filename),
filter(lambda filename: filename.endswith(".xml"),
os.listdir(self.problems_folder)))
logging.info("Found %d lon-CAPA XML files. " % len(self.problem_files))
def test_center(self):
xml = '<center><img src="/aa" /></center>'
elements = self.converter.picky_center_element_format(etree.fromstring(xml))
self.assertEqual(elements, [{'url': '/aa', '_tag_': 'img', 'type': 'image'}])
xml = '<center><img src="/aa" />title</center>'
elements = self.converter.picky_center_element_format(etree.fromstring(xml))
self.assertEqual(elements, [{'url': '/aa', '_tag_': 'img', 'type': 'image', 'title': 'title'}])
xml = '<center><img src="/aa" />title<input /></center>'
elements = self.converter.picky_center_element_format(etree.fromstring(xml))
self.assertEqual(elements, None)
def test_iterator(self):
xml = """<text>In this problem we will investigate a fun idea called "duality."
<br />
Consider the series circuit in the diagram shown.
<center>
<img src="/static/images/circuits/duality.gif" />
</center>
We are given device parameters \(V=$V\)V, \(R_1=$R1\Omega\), and \(R_2=$R2\Omega\).
All of the unknown voltages and currents are labeled in associated reference
directions. Solve this circuit for the unknowns and enter them into
the boxes given.
<br />
The value (in Volts) of \(v_1\) is: </text>"""
elements = list(self.converter.iterate_element(etree.fromstring(xml)))
self.assertEqual(7, len(elements))
def test_xmls(self):
for filepath in self.problem_files:
try:
out = self.converter.convert_xml_file(filepath)
except:
print "Failed to convert file %s" % filepath
raise
f = open(filepath.replace(".xml", ".json"), "w")
json.dump(out, f, indent=2)
f.close()
if __name__ == '__main__':
unittest.main()
\ No newline at end of file
###
### One-off script for importing courseware form XML format
###
from django.core.management.base import BaseCommand, CommandError
import json
from lxml import etree
class CapaXMLConverter(object):
def convert_from_xml(self, filename):
out = {'scripts': [], 'contents': []}
temp = {'text':'', 'response':None}
with open(filename, "r") as f:
for event, element in etree.iterparse(f, events=("start", "end")):
if event == "start" and element.tag == "br":
temp['text'] += '\n\n'
elif event == "start" and element.text:
temp['text'] += element.text
elif event == "end" and element.tail:
temp['text'] += element.tail
if event == "start":
if element.tag == 'multiplechoiceresponse':
temp['group'] = {'type': 'multiple_choice', 'choices': []}
elif element.tag == 'truefalseresponse':
temp['group'] = {'type': 'true_false', 'statements': []}
elif element.tag == "choice":
if temp['group']['type'] == 'multiple_choice':
temp['response'] = {'type':'choice', 'text': '', 'correct': element.get('correct') == "true"}
elif temp['group']['type'] == 'true_false':
temp['response'] = {'type':'statement', 'text': '', 'correct': element.get('correct') == "true"}
elif event == "end":
if element.tag == "endouttext":
if temp['response']:
temp['response']['text'] += temp['text'].strip()
else:
out['contents'].append({'type':'paragraph', 'text': temp['text'].strip()})
temp['text'] = ''
elif element.tag in ["multiplechoiceresponse", "truefalseresponse"]:
out['contents'].append(temp['group'])
temp['group'] = None
elif element.tag == "choice":
if temp['group']['type'] == 'true_false':
temp['group']['statements'].append(temp['response'])
elif temp['group']['type'] == 'multiple_choice':
temp['group']['choices'].append(temp['response'])
temp['response'] = None
# self.parse_tree(tree, out)
return out
class Command(BaseCommand):
help = \
'''Import the specified data directory into the default ModuleStore'''
def handle(self, *args, **options):
self.converter = CapaXMLConverter()
# print json.dumps(self.converter.convert_from_xml("/Users/ccp/code/mitx_all/data/6.002x/problems/HW3ID1.xml"), indent=2)
print json.dumps(self.converter.convert_from_xml("/Users/ccp/code/mitx_all/data/6.002x/problems/multichoice.xml"), indent=2)
...@@ -311,6 +311,7 @@ INSTALLED_APPS = ( ...@@ -311,6 +311,7 @@ INSTALLED_APPS = (
'contentstore', 'contentstore',
'github_sync', 'github_sync',
'student', # misleading name due to sharing with lms 'student', # misleading name due to sharing with lms
'capaconverter',
# For asset pipelining # For asset pipelining
'pipeline', 'pipeline',
......
- scripts: scripts:
- type: 'script' - type: 'script'
language: 'python' language: 'python'
code: 'print "Hello world!"' code: 'print "Hello world!"'
- contents: contents:
- type: 'text' - type: 'text'
text: 'This is a sample paragraph. The linebreaks here should matter..?' text: 'This is a sample paragraph. The linebreaks here matter.\n\n'
- type: 'linebreaks' - type: 'multiple_choice'
count: 2
- type: 'multiple_choice'
randomize: true randomize: true
choices: choices:
- type: 'choice' - type: 'choice'
...@@ -19,7 +17,7 @@ ...@@ -19,7 +17,7 @@
- type: 'choice' - type: 'choice'
text: 'Choice C' text: 'Choice C'
correct: true correct: true
- type: 'true_false' - type: 'true_false'
statements: statements:
- type: 'statement' - type: 'statement'
text: 'Sun revolves around Earth. ' text: 'Sun revolves around Earth. '
...@@ -27,12 +25,12 @@ ...@@ -27,12 +25,12 @@
- type: 'statement' - type: 'statement'
text: 'This is a true statement. ' text: 'This is a true statement. '
correct: true correct: true
- type: 'string' - type: 'string'
answer: 'banana' answer: 'banana'
- type: 'numerical' - type: 'numerical'
tolerance: '5%' tolerance: '5%'
answer: 6 answer: 6
- type: 'formula' - type: 'formula'
answer: '-A*(RF/RS)' answer: '-A*(RF/RS)'
samples: 10 samples: 10
tolerance: '5%' tolerance: '5%'
...@@ -52,7 +50,7 @@ ...@@ -52,7 +50,7 @@
- type: 'variable' - type: 'variable'
symbol: 'T' symbol: 'T'
range: '1-3' range: '1-3'
- type: 'custom' - type: 'custom'
script: script:
- type: 'script' - type: 'script'
language: 'python' language: 'python'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment