#!/usr/bin/env python2 # -*- coding: utf-8 -*- # I, Danny Milosavljevic, place this file in the public domain. # FIXME fix bug with "-∇x" import sys import scanners import StringIO import exceptions import data #functions = ["ln", "log", "exp", "cos", "sin", "tan", "cot", "csc", "sec", "cosh", "sinh"] # TODO inverse. # TODO complex conjugate and other suffix operators. # TODO right-associative operators (^). unary_prefix_operators = set([u"¬", u"√", u"Σ", u"∫", u"∇", u"∂"]) # + functions) unary_suffix_operators = set([u"!"]) # TODO [u"¹", u"²", u"³", u"⁴"] opening_braces = set([u"(", u"[", u"{"]) closing_braces = set([u")", u"]", u"}"]) # TODO ∫ has a LOT of arguments: the differential variable, the bounds # so in the end it has ∫(dx,(a,b),body) ; dx can be considered part of the body. # likewise for sum, product, ... which even have an iteration variable. # TODO support "," for tuples. # note: don't put unary and binary operators in the same line: operator_precedence = [ #["("], ["¬"], # unary. FIXME use properly. ["√"], # questionable... ["^"], ["!"], # questionable.... ["∇"], # very questionable... # TODO differential ["1/"], # reciprocal (internal use, that's why the operator looks so stupid). ["⨯"], # verified to come before "∙" by Ewa Weinmüller. ["∙"], # very questionable... ["⋅", "/"], # from here upwards: monom. ["*"], # convolution... very questionable... maybe use "∗" (U+2217) instead. #["%"], # ??? ["Σ"], # questionable... ["∫"], # questionable... ["0-"], # inverse addition element (internal use, that's why the operator looks so stupid). ["+", "-"], ["=", "≠"], ["≤", "<", "≥", ">"], ["&"], # precedence like SQL92, Python. Unlike half of Ruby. # TODO XOR ["|"], # TODO | abs neg ² etc. # TODO case selection. # TODO fourier transform operator? # TODO Laplace transform (scripted L) # TODO matrix and vector literals. # TODO trigonometric functions. # TODO quantors, ...: # extra quantors: ∄ # TODO compositing operator # precedence levels: # ¬, ∀, ∃ # |, ∨ or # &, ∧ and # <-, ⇒ left, right implication # ⇔(material equivalence), ≡(iff), ↔(propositional logic) # TODO: ⊕ xor: (A ∨ B) ∧ ¬(A ∧ B) # ⊤ tautology. # ⊥ contradiction. # definition: := ≡ :⇔ # ⊢ inference; A → B ⊢ ¬B → ¬A # TODO explicit reciprocation. # TODO floor, ceiling. # TODO fractional part, sign. # TODO complex conjugate. # TODO norm. ] term_operator_precedence_index = [index for index, entry in enumerate(operator_precedence) if entry[0] == "⋅"][0] operator_chars = set() for entry in operator_precedence: for x_entry in entry: operator_chars.add(x_entry.decode("utf-8")) def create_inversion(operator, operand): if operator == "/": return data.Reciprocation(operand) # FIXME elif operator == "√": #return data. elif operator == "-": return data.Negation(operand) elif operator == u"≠": return data.Notation(operand) #elif operator == "<": # TODO reduce these? #return data.LessRelation(operand) else: print "OP", operator raise scanners.ParseError("unknown operator '%s'" % operator) operator_class = { u"^": data.Power, u"⨯": data.CrossProduct, u"∙": data.InnerProduct, u"⋅": data.Product, u"+": data.Addition, u"=": data.Equation, u"&": data.Andation, u"|": data.Oration, u"¬": data.Notation, u"∫": data.Antiderivative, u"∇": data.Gradient, # VectorDifferentiation, u"≤": data.LessOrEqualRelation, u"<": data.LessRelation, u"≠": data.NotEqualRelation, u">": data.GreaterRelation, u"≥": data.GreaterOrEqualRelation, u"*": data.Convolution, u"!": data.Factorial, } def create_operation(operator, operands): global operator_class global unary_prefix_operators #operator = operator.encode("utf-8") if operator == u"√": return data.Power([operands[0], data.Reciprocation(2)]) elif operator in unary_prefix_operators: assert(len(operands) == 1) return operator_class[operator](operands[0]) elif operator in unary_suffix_operators: assert(len(operands) == 1) return operator_class[operator](operands[0]) elif operator in operator_class: return operator_class[operator](operands) else: print >>sys.stderr, "error: unknown operator:", operator raise scanners.ParseError("unknown operator '%s'" % operator) class ExpressionParser(scanners.Scanner): def parse(self): self.consume() return self.expression() def match_operator(self, possible_operators): if self.input is not None and self.input in possible_operators: return self.consume() #self.err("|".join(possible_operators), result) return None def symbol(self): # TODO allow leading delta. # TODO allow trailing "'"s. import StringIO IO = StringIO.StringIO() first_char = self.input if not (first_char and first_char not in operator_chars and first_char not in opening_braces and first_char not in closing_braces): raise scanners.ParseError("expected , but got '%s'" % first_char.encode("utf-8")) while self.input and self.input not in operator_chars and self.input not in opening_braces and self.input not in closing_braces and not scanners.whitespace_P(self.input) and ord(self.input) > 32: # hardcoded UTF-8, so sue me IO.write(self.consume()) while self.input and scanners.combining_P(self.input): IO.write(self.consume()) while scanners.whitespace_P(self.input): self.consume() return IO.getvalue().encode("utf-8") def expression(self): return self.operation(len(operator_precedence) - 1) def operation(self, operator_precedence_index): #if operator_precedence_index < 0: # assert(operator_precedence_index == 0) # return self.value() possible_operators = map(lambda x: x.decode("utf-8"), operator_precedence[operator_precedence_index]) if possible_operators[0] in unary_prefix_operators: return self.unary_prefix_operation(operator_precedence_index) elif possible_operators[0] in unary_suffix_operators: return self.unary_suffix_operation(operator_precedence_index) else: return self.binary_operation(operator_precedence_index) def unary_prefix_operation(self, operator_precedence_index): global operator_precedence possible_operators = map(lambda x: x.decode("utf-8"), operator_precedence[operator_precedence_index]) operator = self.match_operator(possible_operators) if operator is None: return self.operation(operator_precedence_index - 1) if operator == u"∇" and (self.input in operator_chars): # ∇⨯ # (∇⨯B detection is going to hurt since ∇a also exists and is an unary operator) return operator.encode("utf-8") # FIXME stop encoding/decode all the damn time. #return self.binary_operation(operator_precedence_index) b = self.child_operand(operator_precedence_index) primary_operator = possible_operators[0] return create_operation(primary_operator, [b]) def unary_suffix_operation(self, operator_precedence_index): global operator_precedence possible_operators = map(lambda x: x.decode("utf-8"), operator_precedence[operator_precedence_index]) b = self.child_operand(operator_precedence_index) operator = self.match_operator(possible_operators) if operator is None: return b else: primary_operator = possible_operators[0] return create_operation(primary_operator, [b]) def child_operand(self, operator_precedence_index): """ operator_precedence_index is the precedence of the parent """ if operator_precedence_index > 0: return self.operation(operator_precedence_index - 1) else: return self.value() def binary_operation(self, operator_precedence_index): global operator_precedence possible_operators = map(lambda x: x.decode("utf-8"), operator_precedence[operator_precedence_index]) #lower_operators = operator_precedence[operator_precedence_index - 1] if operator_precedence_index > 0 else None operands = [] operator = possible_operators[0] while True: b = self.child_operand(operator_precedence_index) if operator != possible_operators[0]: b = create_inversion(operator, b) operands.append(b) operator = self.match_operator(possible_operators) if operator is None: break if len(operands) == 1: return operands[0] else: primary_operator = possible_operators[0] return create_operation(primary_operator, operands) def value(self): # bottom-out. if self.input == "(": self.consume() a = self.expression() self.consume(")") return a elif self.input == "[": self.consume() a = self.expression() self.consume("]") return a elif self.input == "-": # eew... self.consume() argument = self.operation(term_operator_precedence_index) #a = self.value() return data.negate(argument) symbol_1 = self.symbol() if self.input in opening_braces or (self.input and self.input not in operator_chars and self.input not in closing_braces): # function arg follows? if self.input in opening_braces: argument = self.value() # don't ask me why mathematicians do that... else: argument = self.operation(term_operator_precedence_index) #argument = self.value() # FIXME or just a term? return data.FunctionApplication([symbol_1, argument]) return symbol_1 # convenience function, don't overdo it... def parse_expression(text): expression = ExpressionParser(StringIO.StringIO(text)).parse() return expression if __name__ == "__main__": print parse_expression("-1") print parse_expression("∇A") print parse_expression("∇⨯A") print parse_expression("∇∙A") print parse_expression("∫f(x)⋅dx") print parse_expression("√1⋅√2") print parse_expression("sin ω⋅t+2⋅3⋅f(x)/2+√5") print parse_expression("1≤2&2<3") print parse_expression("1*5")