#!/usr/bin/python # -*- coding: utf-8 -*- # # File: formula.py # Date: 04-May-12 (creation) # Author: I. Chuang <ichuang@mit.edu> # # flexible python representation of a symbolic mathematical formula. # Acceptes Presentation MathML, Content MathML (and could also do OpenMath) # Provides sympy representation. import os, sys, string, re import logging import operator import sympy from sympy.printing.latex import LatexPrinter from sympy.printing.str import StrPrinter from sympy import latex, sympify from sympy.physics.quantum.qubit import * from sympy.physics.quantum.state import * # from sympy import exp, pi, I # from sympy.core.operations import LatticeOp # import sympy.physics.quantum.qubit import urllib from xml.sax.saxutils import escape, unescape import sympy import unicodedata from lxml import etree #import subprocess import requests from copy import deepcopy log = logging.getLogger(__name__) log.warning("Dark code. Needs review before enabling in prod.") os.environ['PYTHONIOENCODING'] = 'utf-8' #----------------------------------------------------------------------------- class dot(sympy.operations.LatticeOp): # my dot product zero = sympy.Symbol('dotzero') identity = sympy.Symbol('dotidentity') #class dot(sympy.Mul): # my dot product # is_Mul = False def _print_dot(self,expr): return '{((%s) \cdot (%s))}' % (expr.args[0],expr.args[1]) LatexPrinter._print_dot = _print_dot #----------------------------------------------------------------------------- # unit vectors (for 8.02) def _print_hat(self,expr): return '\\hat{%s}' % str(expr.args[0]).lower() LatexPrinter._print_hat = _print_hat StrPrinter._print_hat = _print_hat #----------------------------------------------------------------------------- # helper routines def to_latex(x): if x==None: return '' # LatexPrinter._print_dot = _print_dot xs = latex(x) xs = xs.replace(r'\XI','XI') # workaround for strange greek #return '<math>%s{}{}</math>' % (xs[1:-1]) if xs[0]=='$': return '[mathjax]%s[/mathjax]<br>' % (xs[1:-1]) # for sympy v6 return '[mathjax]%s[/mathjax]<br>' % (xs) # for sympy v7 def my_evalf(expr,chop=False): if type(expr)==list: try: return [x.evalf(chop=chop) for x in expr] except: return expr try: return expr.evalf(chop=chop) except: return expr #----------------------------------------------------------------------------- # my version of sympify to import expression into sympy def my_sympify(expr,normphase=False,matrix=False,abcsym=False,do_qubit=False,symtab=None): # make all lowercase real? if symtab: varset = symtab else: varset = {'p':sympy.Symbol('p'), 'g':sympy.Symbol('g'), 'e':sympy.E, # for exp 'i':sympy.I, # lowercase i is also sqrt(-1) 'Q':sympy.Symbol('Q'), # otherwise it is a sympy "ask key" #'X':sympy.sympify('Matrix([[0,1],[1,0]])'), #'Y':sympy.sympify('Matrix([[0,-I],[I,0]])'), #'Z':sympy.sympify('Matrix([[1,0],[0,-1]])'), 'ZZ':sympy.Symbol('ZZ'), # otherwise it is the PythonIntegerRing 'XI':sympy.Symbol('XI'), # otherwise it is the capital \XI 'hat':sympy.Function('hat'), # for unit vectors (8.02) } if do_qubit: # turn qubit(...) into Qubit instance varset.update({'qubit':sympy.physics.quantum.qubit.Qubit, 'Ket':sympy.physics.quantum.state.Ket, 'dot':dot, 'bit':sympy.Function('bit'), }) if abcsym: # consider all lowercase letters as real symbols, in the parsing for letter in string.lowercase: if letter in varset: # exclude those already done continue varset.update({letter:sympy.Symbol(letter,real=True)}) sexpr = sympify(expr,locals=varset) if normphase: # remove overall phase if sexpr is a list if type(sexpr)==list: if sexpr[0].is_number: ophase = sympy.sympify('exp(-I*arg(%s))' % sexpr[0]) sexpr = [ sympy.Mul(x,ophase) for x in sexpr ] def to_matrix(x): # if x is a list of lists, and is rectangular, then return Matrix(x) if not type(x)==list: return x for row in x: if (not type(row)==list): return x rdim = len(x[0]) for row in x: if not len(row)==rdim: return x return sympy.Matrix(x) if matrix: sexpr = to_matrix(sexpr) return sexpr #----------------------------------------------------------------------------- # class for symbolic mathematical formulas class formula(object): ''' Representation of a mathematical formula object. Accepts mathml math expression for constructing, and can produce sympy translation. The formula may or may not include an assignment (=). ''' def __init__(self,expr,asciimath='',options=None): self.expr = expr.strip() self.asciimath = asciimath self.the_cmathml = None self.the_sympy = None self.options = options def is_presentation_mathml(self): return '<mstyle' in self.expr def is_mathml(self): return '<math ' in self.expr def fix_greek_in_mathml(self,xml): def gettag(x): return re.sub('{http://[^}]+}','',x.tag) for k in xml: tag = gettag(k) if tag=='mi' or tag=='ci': usym = unicode(k.text) try: udata = unicodedata.name(usym) except Exception,err: udata = None #print "usym = %s, udata=%s" % (usym,udata) if udata: # eg "GREEK SMALL LETTER BETA" if 'GREEK' in udata: usym = udata.split(' ')[-1] if 'SMALL' in udata: usym = usym.lower() #print "greek: ",usym k.text = usym self.fix_greek_in_mathml(k) return xml def preprocess_pmathml(self,xml): ''' Pre-process presentation MathML from ASCIIMathML to make it more acceptable for SnuggleTeX, and also to accomodate some sympy conventions (eg hat(i) for \hat{i}). ''' if type(xml)==str or type(xml)==unicode: xml = etree.fromstring(xml) # TODO: wrap in try xml = self.fix_greek_in_mathml(xml) # convert greek utf letters to greek spelled out in ascii def gettag(x): return re.sub('{http://[^}]+}','',x.tag) # f and g are processed as functions by asciimathml, eg "f-2" turns into "<mrow><mi>f</mi><mo>-</mo></mrow><mn>2</mn>" # this is really terrible for turning into cmathml. # undo this here. def fix_pmathml(xml): for k in xml: tag = gettag(k) if tag=='mrow': if len(k)==2: if gettag(k[0])=='mi' and k[0].text in ['f','g'] and gettag(k[1])=='mo': idx = xml.index(k) xml.insert(idx,deepcopy(k[0])) # drop the <mrow> container xml.insert(idx+1,deepcopy(k[1])) xml.remove(k) fix_pmathml(k) fix_pmathml(xml) # hat i is turned into <mover><mi>i</mi><mo>^</mo></mover> ; mangle this into <mi>hat(f)</mi> # hat i also somtimes turned into <mover><mrow> <mi>j</mi> </mrow><mo>^</mo></mover> def fix_hat(xml): for k in xml: tag = gettag(k) if tag=='mover': if len(k)==2: if gettag(k[0])=='mi' and gettag(k[1])=='mo' and str(k[1].text)=='^': newk = etree.Element('mi') newk.text = 'hat(%s)' % k[0].text xml.replace(k,newk) if gettag(k[0])=='mrow' and gettag(k[0][0])=='mi' and gettag(k[1])=='mo' and str(k[1].text)=='^': newk = etree.Element('mi') newk.text = 'hat(%s)' % k[0][0].text xml.replace(k,newk) fix_hat(k) fix_hat(xml) self.xml = xml return self.xml def get_content_mathml(self): if self.the_cmathml: return self.the_cmathml # pre-process the presentation mathml before sending it to snuggletex to convert to content mathml try: xml = self.preprocess_pmathml(self.expr) except Exception,err: return "<html>Error! Cannot process pmathml</html>" pmathml = etree.tostring(xml,pretty_print=True) self.the_pmathml = pmathml # convert to cmathml self.the_cmathml = self.GetContentMathML(self.asciimath,pmathml) return self.the_cmathml cmathml = property(get_content_mathml,None,None,'content MathML representation') def make_sympy(self,xml=None): ''' Return sympy expression for the math formula. The math formula is converted to Content MathML then that is parsed. ''' if self.the_sympy: return self.the_sympy if xml==None: # root if not self.is_mathml(): return my_sympify(self.expr) if self.is_presentation_mathml(): try: cmml = self.cmathml xml = etree.fromstring(str(cmml)) except Exception,err: raise Exception,'Err %s while converting cmathml to xml; cmml=%s' % (err,cmml) xml = self.fix_greek_in_mathml(xml) self.the_sympy = self.make_sympy(xml[0]) else: xml = etree.fromstring(self.expr) xml = self.fix_greek_in_mathml(xml) self.the_sympy = self.make_sympy(xml[0]) return self.the_sympy def gettag(x): return re.sub('{http://[^}]+}','',x.tag) # simple math def op_divide(*args): if not len(args)==2: raise Exception,'divide given wrong number of arguments!' # print "divide: arg0=%s, arg1=%s" % (args[0],args[1]) return sympy.Mul(args[0],sympy.Pow(args[1],-1)) def op_plus(*args): return args[0] if len(args)==1 else op_plus(*args[:-1])+args[-1] def op_times(*args): return reduce(operator.mul,args) def op_minus(*args): if len(args)==1: return -args[0] if not len(args)==2: raise Exception,'minus given wrong number of arguments!' #return sympy.Add(args[0],-args[1]) return args[0]-args[1] opdict = {'plus': op_plus, 'divide' : operator.div, 'times' : op_times, 'minus' : op_minus, #'plus': sympy.Add, #'divide' : op_divide, #'times' : sympy.Mul, 'minus' : op_minus, 'root' : sympy.sqrt, 'power' : sympy.Pow, 'sin': sympy.sin, 'cos': sympy.cos, } # simple sumbols nums1dict = {'pi': sympy.pi, } def parsePresentationMathMLSymbol(xml): ''' Parse <msub>, <msup>, <mi>, and <mn> ''' tag = gettag(xml) if tag=='mn': return xml.text elif tag=='mi': return xml.text elif tag=='msub': return '_'.join([parsePresentationMathMLSymbol(y) for y in xml]) elif tag=='msup': return '^'.join([parsePresentationMathMLSymbol(y) for y in xml]) raise Exception,'[parsePresentationMathMLSymbol] unknown tag %s' % tag # parser tree for Content MathML tag = gettag(xml) # print "tag = ",tag # first do compound objects if tag=='apply': # apply operator opstr = gettag(xml[0]) if opstr in opdict: op = opdict[opstr] args = [ self.make_sympy(x) for x in xml[1:]] try: res = op(*args) except Exception,err: self.args = args self.op = op raise Exception,'[formula] error=%s failed to apply %s to args=%s' % (err,opstr,args) return res else: raise Exception,'[formula]: unknown operator tag %s' % (opstr) elif tag=='list': # square bracket list if gettag(xml[0])=='matrix': return self.make_sympy(xml[0]) else: return [ self.make_sympy(x) for x in xml ] elif tag=='matrix': return sympy.Matrix([ self.make_sympy(x) for x in xml ]) elif tag=='vector': return [ self.make_sympy(x) for x in xml ] # atoms are below elif tag=='cn': # number return sympy.sympify(xml.text) return float(xml.text) elif tag=='ci': # variable (symbol) if len(xml)>0 and (gettag(xml[0])=='msub' or gettag(xml[0])=='msup'): # subscript or superscript usym = parsePresentationMathMLSymbol(xml[0]) sym = sympy.Symbol(str(usym)) else: usym = unicode(xml.text) if 'hat' in usym: sym = my_sympify(usym) else: if usym=='i': print "options=",self.options if usym=='i' and 'imaginary' in self.options: # i = sqrt(-1) sym = sympy.I else: sym = sympy.Symbol(str(usym)) return sym else: # unknown tag raise Exception,'[formula] unknown tag %s' % tag sympy = property(make_sympy,None,None,'sympy representation') def GetContentMathML(self,asciimath,mathml): # URL = '' URL = '' if 1: payload = {'asciiMathInput':asciimath, 'asciiMathML':mathml, #'asciiMathML':unicode(mathml).encode('utf-8'), } headers = {'User-Agent':"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv: Gecko/20080311 Firefox/"} r = requests.post(URL,data=payload,headers=headers) r.encoding = 'utf-8' ret = r.text #print "encoding: ",r.encoding # return ret mode = 0 cmathml = [] for k in ret.split('\n'): if 'conversion to Content MathML' in k: mode = 1 continue if mode==1: if '<h3>Maxima Input Form</h3>' in k: mode = 0 continue cmathml.append(k) # return '\n'.join(cmathml) cmathml = '\n'.join(cmathml[2:]) cmathml = '<math xmlns="http://www.w3.org/1998/Math/MathML">\n' + unescape(cmathml) + '\n</math>' # print cmathml #return unicode(cmathml) return cmathml #----------------------------------------------------------------------------- def test1(): xmlstr = ''' <math xmlns="http://www.w3.org/1998/Math/MathML"> <apply> <plus/> <cn>1</cn> <cn>2</cn> </apply> </math> ''' return formula(xmlstr) def test2(): xmlstr = u''' <math xmlns="http://www.w3.org/1998/Math/MathML"> <apply> <plus/> <cn>1</cn> <apply> <times/> <cn>2</cn> <ci>α</ci> </apply> </apply> </math> ''' return formula(xmlstr) def test3(): xmlstr = ''' <math xmlns="http://www.w3.org/1998/Math/MathML"> <apply> <divide/> <cn>1</cn> <apply> <plus/> <cn>2</cn> <ci>γ</ci> </apply> </apply> </math> ''' return formula(xmlstr) def test4(): xmlstr = u''' <math xmlns="http://www.w3.org/1998/Math/MathML"> <mstyle displaystyle="true"> <mn>1</mn> <mo>+</mo> <mfrac> <mn>2</mn> <mi>α</mi> </mfrac> </mstyle> </math> ''' return formula(xmlstr) def test5(): # sum of two matrices xmlstr = u''' <math xmlns="http://www.w3.org/1998/Math/MathML"> <mstyle displaystyle="true"> <mrow> <mi>cos</mi> <mrow> <mo>(</mo> <mi>θ</mi> <mo>)</mo> </mrow> </mrow> <mo>⋅</mo> <mrow> <mo>[</mo> <mtable> <mtr> <mtd> <mn>1</mn> </mtd> <mtd> <mn>0</mn> </mtd> </mtr> <mtr> <mtd> <mn>0</mn> </mtd> <mtd> <mn>1</mn> </mtd> </mtr> </mtable> <mo>]</mo> </mrow> <mo>+</mo> <mrow> <mo>[</mo> <mtable> <mtr> <mtd> <mn>0</mn> </mtd> <mtd> <mn>1</mn> </mtd> </mtr> <mtr> <mtd> <mn>1</mn> </mtd> <mtd> <mn>0</mn> </mtd> </mtr> </mtable> <mo>]</mo> </mrow> </mstyle> </math> ''' return formula(xmlstr) def test6(): # imaginary numbers xmlstr = u''' <math xmlns="http://www.w3.org/1998/Math/MathML"> <mstyle displaystyle="true"> <mn>1</mn> <mo>+</mo> <mi>i</mi> </mstyle> </math> ''' return formula(xmlstr,options='imaginaryi')