#!/usr/bin/env python # -*- coding: utf-8 -*- # I, Danny Milosavljevic, place this file in the public domain. # TODO: special-case ^\frac{1}{2} so a \sqrt is used instead. # TODO: special-case fractions so that all the denumerator parts are collected and all numerator parts are collected in a term and just one fraction is printed. import formatters import sys import scanners unicode_to_LaTeX_table = { # ambiguous: 0x2192: "\\to ", # FIXME? 178: "^2", 179: "^3", 0x2074: "^4", 945: "\\alpha ", 946: "\\beta ", 947: "\\gamma ", 948: "\\delta ", 949: "\\varepsilon ", # FIXME 0x3f5: "\\epsilon ", 950: "\\zeta ", 951: "\\eta ", 952: "\\theta ", 953: "\\iota ", 954: "\\kappa ", 955: "\\lambda ", 956: "\\mu ", 957: "\\nu ", 958: "\\xi ", 959: "\\omicron ", 960: "\\pi ", 961: "\\rho ", 963: "\\sigma ", 964: "\\tau ", 965: "\\upsilon ", 966: "\\varphi ", 967: "\\chi ", 968: "\\psi ", 969: "\\omega ", # big letters: 913: "\\Alpha ", 914: "\\Beta ", 915: "\\Gamma ", 916: "\\Delta ", 917: "\\Epsilon ", 918: "\\Zeta ", 919: "\\Eta ", 920: "\\Theta ", 921: "\\Iota ", 922: "\\Kappa ", 923: "\\Lambda ", 924: "\\Mu ", 925: "\\Nu ", 926: "\\Xi ", 927: "\\Omicron ", 928: "\\Pi ", 929: "\\Rho ", 931: "\\Sigma ", 932: "\\Tau ", 933: "\\Upsilon ", 934: "\\Phi ", 935: "\\Chi ", 936: "\\Psi ", 937: "\\Omega ", 981: "\\phi ", # Maths: 8800: "\\neq ", 172: "\\neg ", 8745: "\\cap ", 8743: "\\bigwedge ", 8746: "\\cup ", 8744: "\\bigvee ", 8834: "\\subset ", 8838: "\\subseteq ", 8614: "\\mapsto ", 8869: "\\perp ", 8729: "\\bullet ", 0x22C5: "\\cdot ", 10799: "\\times ", 8706: "\\partial ", 0x221A: "\\sqrt ", 0x00F7: "\\frac ", 0x2044: "\\frac ", 0x2200: "\\forall ", 0x2203: "\\exists ", 0x2206: "\\nabla^2 ", # \cdot \\nabla ", # FIXME laplace. # \Delta 0x2207: "\\nabla ", # 0x2207: "\\nabla_", 0x222B: "\\int ", 0x222E: "\\int", 0x2026: "\\ldots ", 0x00B1: "\\pm ", 0x221E: "\\infty ", 0x2190: "\\leftarrow ", 0x2191: "\\uparrow ", 0x2192: "\\rightarrow ", 0x2193: "\\downarrow ", 0x21D0: "\\Leftarrow ", 0x21D2: "\\Rightarrow ", 0x21D4: "\\Leftrightarrow ", # more arrows: http://www.alanwood.net/unicode/arrows.html 0x2208: "\\in ", # circle 0xB0: "^{\circ} ", 0x2265: "\\geq ", 0x2264: "\\leq ", 0x03d1: "\\vartheta ", #0x03d5: "\\varphi ", 0x03d6: "\\varpi ", 0x03f1: "\\varrho ", 0x2211: "\\sum ", #0x2329: "\\langle ", 0x27e8: "\\langle ", 0x27e9: "\\rangle ", 0x2218: "\\circ ", 0x223C: "\\sim ", 0x2297: "\\otimes ", # TODO http://www.cl.cam.ac.uk/~mgk25/ucs/examples/TeX.txt 0x0127: "\\hbar ", 0x29E0: "\\quabla ", # Box ", # glyph'003 in msam. # \\quabla. 0x212B: "\\AA ", 0x2248: "\\approx ", } def uncombine(text): """ resolves unicode stacking like (in symbolic notation): "x_ijk" => "\\dot \\vec x_{ijk}" """ def encode(item): code = ord(item) if code == 0x20D7: return "\\vec " elif code == 0x0307: return "\\dot " elif code == 0x0308: return "\\ddot " elif code == 0x0304: return "\\overline " elif code == ord("_"): return "_{" else: return unicode_to_LaTeX_table.get(code) or item """ newer Python (>=2.5): return "\\vec " if code == 0x20D7 else \ "\\dot " if code == 0x0307 else \ "\\ddot " if code == 0x0308 else \ "\\overline " if code == 0x0304 else \ "_{" if code == ord("_") else \ (unicode_to_LaTeX_table.get(code) or item) """ combined_stuff = [] def append_character(character): combined_stuff.append(character) suffix = [] for character in text: if len(combined_stuff) == 0: append_character(character) elif scanners.combining_P(character): combined_stuff.append(character) else: for item in reversed(combined_stuff): thing = encode(item) if thing.endswith("{"): suffix.append("}") yield thing combined_stuff = [] append_character(character) if len(combined_stuff) > 0: for item in reversed(combined_stuff): yield encode(item) for item in suffix: yield encode(item) #text #scanners.combining_P class LaTeXFormatter(formatters.Formatter): known_LaTeX_functions = [u"cos", u"sin", u"arccos", u"arcsin", u"tan", u"arctan", u"cot", u"cosh", u"sinh", u"coth", u"tanh", u"det", u"lim", u"liminf", u"limsup", u"ln", u"log", u"artanh", u"arsinh", u"arcosh", u"exp", u"max", u"diag"] def format_reciprocation(self, expression, IO): IO.write("\\frac{1}{") self.format(expression.operands[0], IO) IO.write("}") def format_opening_brace(self, IO): IO.write(r"\left(") # TODO different parentheses? def format_closing_brace(self, IO): IO.write(r"\right)") def format_symbol(self, expression, IO): name = str(expression) x_name = name.decode("utf-8") if x_name in self.__class__.known_LaTeX_functions: name = "\\" + name IO.write(name) else: for item in uncombine(x_name): IO.write(item) if __name__ == "__main__": import StringIO import parsers import codecs def parse_expression(text): text = text.replace("²", "^2") text = text.replace("³", "^3") expression = parsers.ExpressionParser(StringIO.StringIO(text)).parse() return expression def test_expression(expression_text, expected_text = None): expression = parse_expression(expression_text) IO = StringIO.StringIO() formatter_1 = LaTeXFormatter() formatter_1.format(expression, IO) text = IO.getvalue() if not expected_text: expected_text = expression_text if text != expected_text: print >>sys.stderr, "error: output was different than expected: %r" % text assert(text == expected_text) def print_expression(expression): formatter_1 = LaTeXFormatter() formatter_1.format(expression, sys.stdout) sys.stdout.write("\n") sys.stdout.flush() def test_uncombine(text, expected_text): import StringIO IO = StringIO.StringIO() for item in uncombine(text.decode("utf-8")): IO.write(item.encode("utf-8")) got_text = IO.getvalue() if got_text != expected_text: print >>sys.stderr, "error: test failed: expected %r but got %r" % (expected_text, got_text) assert(got_text == expected_text) test_expression("5+3⋅2", '5+3\\cdot 2') test_expression("5⋅3+2", '5\\cdot 3+2') test_expression("5⋅(3+2)", '5\\cdot (3+2)') test_expression("5+3⋅2≤0", '5+3\\cdot 2\\leq 0') #sys.stdout = codecs.open("/dev/stdout", "w", "UTF-8") expression = parse_expression("5+3⋅2≤0") print_expression(parse_expression("5⋅3")) print_expression(parse_expression("-1")) print_expression(parse_expression("∇A")) print_expression(parse_expression("∇⨯A")) print_expression(parse_expression("∇∙A")) print_expression(parse_expression("∫f(x)⋅dx")) print_expression(parse_expression("√1⋅√2")) test_expression("sin ω⋅t+2⋅3⋅f(x)/2+√5", r'\sin(\omega \cdot t)+2\cdot 3\cdot f(x)\cdot \frac{1}{2}+5^\frac{1}{2}') print(parse_expression("2^3^5")) print_expression(parse_expression("2^3^5")) print_expression(parse_expression("a_ijk")) print_expression(parse_expression("3⋅x⃗̈+2⋅x⃗̇+3⋅x⃗+4=0")) test_uncombine("x", "x") test_uncombine("xyz", "xyz") test_uncombine("x⃗", "\\vec x") test_uncombine("x⃗̇", "\\dot \\vec x") test_uncombine("x̄", "\\overline x") test_uncombine("x⃗̇_ijk_l", "\\dot \\vec x_{ijk_{l}}")