| /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
| /* |
| * This file is part of the Collabora Office project. |
| * |
| * This Source Code Form is subject to the terms of the Mozilla Public |
| * License, v. 2.0. If a copy of the MPL was not distributed with this |
| * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| * |
| * This file incorporates work covered by the following license notice: |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed |
| * with this work for additional information regarding copyright |
| * ownership. The ASF licenses this file to you under the Apache |
| * License, Version 2.0 (the "License"); you may not use this file |
| * except in compliance with the License. You may obtain a copy of |
| * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
| */ |
| |
| #include <com/sun/star/i18n/UnicodeType.hpp> |
| #include <com/sun/star/i18n/KParseTokens.hpp> |
| #include <com/sun/star/i18n/KParseType.hpp> |
| #include <i18nlangtag/lang.h> |
| #include <tools/lineend.hxx> |
| #include <comphelper/configuration.hxx> |
| #include <unotools/syslocale.hxx> |
| #include <osl/diagnose.h> |
| #include <rtl/character.hxx> |
| #include <parse5.hxx> |
| #include <strings.hrc> |
| #include <smmod.hxx> |
| #include <symbol.hxx> |
| #include <cfgitem.hxx> |
| #include <starmathdatabase.hxx> |
| |
| #include <stack> |
| #include <unordered_set> |
| |
| using namespace ::com::sun::star::i18n; |
| |
| //Definition of math keywords |
| constexpr SmTokenTableEntry aTokenTable[] |
| = { { u"abs"_ustr, TABS, '\0', TG::UnOper, 13 }, |
| { u"acute"_ustr, TACUTE, MS_ACUTE, TG::Attribute, 5 }, |
| { u"aleph"_ustr, TALEPH, MS_ALEPH, TG::Standalone, 5 }, |
| { u"alignb"_ustr, TALIGNC, '\0', TG::Align, 0 }, |
| { u"alignc"_ustr, TALIGNC, '\0', TG::Align, 0 }, |
| { u"alignl"_ustr, TALIGNL, '\0', TG::Align, 0 }, |
| { u"alignm"_ustr, TALIGNC, '\0', TG::Align, 0 }, |
| { u"alignr"_ustr, TALIGNR, '\0', TG::Align, 0 }, |
| { u"alignt"_ustr, TALIGNC, '\0', TG::Align, 0 }, |
| { u"and"_ustr, TAND, MS_AND, TG::Product, 0 }, |
| { u"approx"_ustr, TAPPROX, MS_APPROX, TG::Relation, 0 }, |
| { u"arccos"_ustr, TACOS, '\0', TG::Function, 5 }, |
| { u"arccot"_ustr, TACOT, '\0', TG::Function, 5 }, |
| { u"arcosh"_ustr, TACOSH, '\0', TG::Function, 5 }, |
| { u"arcoth"_ustr, TACOTH, '\0', TG::Function, 5 }, |
| { u"arcsin"_ustr, TASIN, '\0', TG::Function, 5 }, |
| { u"arctan"_ustr, TATAN, '\0', TG::Function, 5 }, |
| { u"arsinh"_ustr, TASINH, '\0', TG::Function, 5 }, |
| { u"artanh"_ustr, TATANH, '\0', TG::Function, 5 }, |
| { u"backepsilon"_ustr, TBACKEPSILON, MS_BACKEPSILON, TG::Standalone, 5 }, |
| { u"bar"_ustr, TBAR, MS_BAR, TG::Attribute, 5 }, |
| { u"binom"_ustr, TBINOM, '\0', TG::NONE, 5 }, |
| { u"bold"_ustr, TBOLD, '\0', TG::FontAttr, 5 }, |
| { u"boper"_ustr, TBOPER, '\0', TG::Product, 0 }, |
| { u"breve"_ustr, TBREVE, MS_BREVE, TG::Attribute, 5 }, |
| { u"bslash"_ustr, TBACKSLASH, MS_BACKSLASH, TG::Product, 0 }, |
| { u"cdot"_ustr, TCDOT, MS_CDOT, TG::Product, 0 }, |
| { u"check"_ustr, TCHECK, MS_CHECK, TG::Attribute, 5 }, |
| { u"circ"_ustr, TCIRC, MS_CIRC, TG::Standalone, 5 }, |
| { u"circle"_ustr, TCIRCLE, MS_CIRCLE, TG::Attribute, 5 }, |
| { u"color"_ustr, TCOLOR, '\0', TG::FontAttr, 5 }, |
| { u"coprod"_ustr, TCOPROD, MS_COPROD, TG::Oper, 5 }, |
| { u"cos"_ustr, TCOS, '\0', TG::Function, 5 }, |
| { u"cosh"_ustr, TCOSH, '\0', TG::Function, 5 }, |
| { u"cot"_ustr, TCOT, '\0', TG::Function, 5 }, |
| { u"coth"_ustr, TCOTH, '\0', TG::Function, 5 }, |
| { u"csub"_ustr, TCSUB, '\0', TG::Power, 0 }, |
| { u"csup"_ustr, TCSUP, '\0', TG::Power, 0 }, |
| { u"dddot"_ustr, TDDDOT, MS_DDDOT, TG::Attribute, 5 }, |
| { u"ddot"_ustr, TDDOT, MS_DDOT, TG::Attribute, 5 }, |
| { u"def"_ustr, TDEF, MS_DEF, TG::Relation, 0 }, |
| { u"div"_ustr, TDIV, MS_DIV, TG::Product, 0 }, |
| { u"divides"_ustr, TDIVIDES, MS_LINE, TG::Relation, 0 }, |
| { u"dlarrow"_ustr, TDLARROW, MS_DLARROW, TG::Standalone, 5 }, |
| { u"dlrarrow"_ustr, TDLRARROW, MS_DLRARROW, TG::Standalone, 5 }, |
| { u"dot"_ustr, TDOT, MS_DOT, TG::Attribute, 5 }, |
| { u"dotsaxis"_ustr, TDOTSAXIS, MS_DOTSAXIS, TG::Standalone, 5 }, // 5 to continue expression |
| { u"dotsdiag"_ustr, TDOTSDIAG, MS_DOTSUP, TG::Standalone, 5 }, |
| { u"dotsdown"_ustr, TDOTSDOWN, MS_DOTSDOWN, TG::Standalone, 5 }, |
| { u"dotslow"_ustr, TDOTSLOW, MS_DOTSLOW, TG::Standalone, 5 }, |
| { u"dotsup"_ustr, TDOTSUP, MS_DOTSUP, TG::Standalone, 5 }, |
| { u"dotsvert"_ustr, TDOTSVERT, MS_DOTSVERT, TG::Standalone, 5 }, |
| { u"downarrow"_ustr, TDOWNARROW, MS_DOWNARROW, TG::Standalone, 5 }, |
| { u"drarrow"_ustr, TDRARROW, MS_DRARROW, TG::Standalone, 5 }, |
| { u"emptyset"_ustr, TEMPTYSET, MS_EMPTYSET, TG::Standalone, 5 }, |
| { u"equiv"_ustr, TEQUIV, MS_EQUIV, TG::Relation, 0 }, |
| { u"evaluate"_ustr, TEVALUATE, '\0', TG::NONE, 0 }, |
| { u"exists"_ustr, TEXISTS, MS_EXISTS, TG::Standalone, 5 }, |
| { u"exp"_ustr, TEXP, '\0', TG::Function, 5 }, |
| { u"fact"_ustr, TFACT, MS_FACT, TG::UnOper, 5 }, |
| { u"fixed"_ustr, TFIXED, '\0', TG::Font, 0 }, |
| { u"font"_ustr, TFONT, '\0', TG::FontAttr, 5 }, |
| { u"forall"_ustr, TFORALL, MS_FORALL, TG::Standalone, 5 }, |
| { u"fourier"_ustr, TFOURIER, MS_FOURIER, TG::Standalone, 5 }, |
| { u"frac"_ustr, TFRAC, '\0', TG::NONE, 5 }, |
| { u"from"_ustr, TFROM, '\0', TG::Limit, 0 }, |
| { u"func"_ustr, TFUNC, '\0', TG::Function, 5 }, |
| { u"ge"_ustr, TGE, MS_GE, TG::Relation, 0 }, |
| { u"geslant"_ustr, TGESLANT, MS_GESLANT, TG::Relation, 0 }, |
| { u"gg"_ustr, TGG, MS_GG, TG::Relation, 0 }, |
| { u"grave"_ustr, TGRAVE, MS_GRAVE, TG::Attribute, 5 }, |
| { u"gt"_ustr, TGT, MS_GT, TG::Relation, 0 }, |
| { u"hadd"_ustr, THADD, MS_HADD, TG::Oper, 5 }, |
| { u"harpoon"_ustr, THARPOON, MS_HARPOON, TG::Attribute, 5 }, |
| { u"hat"_ustr, THAT, MS_HAT, TG::Attribute, 5 }, |
| { u"hbar"_ustr, THBAR, MS_HBAR, TG::Standalone, 5 }, |
| { u"hex"_ustr, THEX, '\0', TG::NONE, 5 }, |
| { u"iiint"_ustr, TIIINT, MS_IIINT, TG::Oper, 5 }, |
| { u"iint"_ustr, TIINT, MS_IINT, TG::Oper, 5 }, |
| { u"im"_ustr, TIM, MS_IM, TG::Standalone, 5 }, |
| { u"in"_ustr, TIN, MS_IN, TG::Relation, 0 }, |
| { u"infinity"_ustr, TINFINITY, MS_INFINITY, TG::Standalone, 5 }, |
| { u"infty"_ustr, TINFINITY, MS_INFINITY, TG::Standalone, 5 }, |
| { u"int"_ustr, TINT, MS_INT, TG::Oper, 5 }, |
| { u"intd"_ustr, TINTD, MS_INT, TG::Oper, 5 }, |
| { u"intersection"_ustr, TINTERSECT, MS_INTERSECT, TG::Product, 0 }, |
| { u"it"_ustr, TIT, '\0', TG::Product, 0 }, |
| { u"ital"_ustr, TITALIC, '\0', TG::FontAttr, 5 }, |
| { u"italic"_ustr, TITALIC, '\0', TG::FontAttr, 5 }, |
| { u"lambdabar"_ustr, TLAMBDABAR, MS_LAMBDABAR, TG::Standalone, 5 }, |
| { u"langle"_ustr, TLANGLE, MS_LMATHANGLE, TG::LBrace, 5 }, |
| { u"laplace"_ustr, TLAPLACE, MS_LAPLACE, TG::Standalone, 5 }, |
| { u"lbrace"_ustr, TLBRACE, MS_LBRACE, TG::LBrace, 5 }, |
| { u"lceil"_ustr, TLCEIL, MS_LCEIL, TG::LBrace, 5 }, |
| { u"ldbracket"_ustr, TLDBRACKET, MS_LDBRACKET, TG::LBrace, 5 }, |
| { u"ldline"_ustr, TLDLINE, MS_DVERTLINE, TG::LBrace, 5 }, |
| { u"le"_ustr, TLE, MS_LE, TG::Relation, 0 }, |
| { u"left"_ustr, TLEFT, '\0', TG::NONE, 5 }, |
| { u"leftarrow"_ustr, TLEFTARROW, MS_LEFTARROW, TG::Standalone, 5 }, |
| { u"leslant"_ustr, TLESLANT, MS_LESLANT, TG::Relation, 0 }, |
| { u"lfloor"_ustr, TLFLOOR, MS_LFLOOR, TG::LBrace, 5 }, |
| { u"lim"_ustr, TLIM, '\0', TG::Oper, 5 }, |
| { u"liminf"_ustr, TLIMINF, '\0', TG::Oper, 5 }, |
| { u"limsup"_ustr, TLIMSUP, '\0', TG::Oper, 5 }, |
| { u"lint"_ustr, TLINT, MS_LINT, TG::Oper, 5 }, |
| { u"ll"_ustr, TLL, MS_LL, TG::Relation, 0 }, |
| { u"lline"_ustr, TLLINE, MS_VERTLINE, TG::LBrace, 5 }, |
| { u"llint"_ustr, TLLINT, MS_LLINT, TG::Oper, 5 }, |
| { u"lllint"_ustr, TLLLINT, MS_LLLINT, TG::Oper, 5 }, |
| { u"ln"_ustr, TLN, '\0', TG::Function, 5 }, |
| { u"log"_ustr, TLOG, '\0', TG::Function, 5 }, |
| { u"lrline"_ustr, TLRLINE, MS_VERTLINE, TG::LBrace | TG::RBrace, 5 }, |
| { u"lrdline"_ustr, TLRDLINE, MS_VERTLINE, TG::LBrace | TG::RBrace, 5 }, |
| { u"lsub"_ustr, TLSUB, '\0', TG::Power, 0 }, |
| { u"lsup"_ustr, TLSUP, '\0', TG::Power, 0 }, |
| { u"lt"_ustr, TLT, MS_LT, TG::Relation, 0 }, |
| { u"maj"_ustr, TSUM, MS_MAJ, TG::Oper, 5 }, |
| { u"matrix"_ustr, TMATRIX, '\0', TG::NONE, 5 }, |
| { u"minusplus"_ustr, TMINUSPLUS, MS_MINUSPLUS, TG::UnOper | TG::Sum, 5 }, |
| { u"mline"_ustr, TMLINE, MS_VERTLINE, TG::NONE, 0 }, //! not in TG::RBrace, Level 0 |
| { u"nabla"_ustr, TNABLA, MS_NABLA, TG::Standalone, 5 }, |
| { u"nbold"_ustr, TNBOLD, '\0', TG::FontAttr, 5 }, |
| { u"ndivides"_ustr, TNDIVIDES, MS_NDIVIDES, TG::Relation, 0 }, |
| { u"neg"_ustr, TNEG, MS_NEG, TG::UnOper, 5 }, |
| { u"neq"_ustr, TNEQ, MS_NEQ, TG::Relation, 0 }, |
| { u"newline"_ustr, TNEWLINE, '\0', TG::NONE, 0 }, |
| { u"ni"_ustr, TNI, MS_NI, TG::Relation, 0 }, |
| { u"nitalic"_ustr, TNITALIC, '\0', TG::FontAttr, 5 }, |
| { u"none"_ustr, TNONE, '\0', TG::LBrace | TG::RBrace, 0 }, |
| { u"nospace"_ustr, TNOSPACE, '\0', TG::Standalone, 5 }, |
| { u"notexists"_ustr, TNOTEXISTS, MS_NOTEXISTS, TG::Standalone, 5 }, |
| { u"notin"_ustr, TNOTIN, MS_NOTIN, TG::Relation, 0 }, |
| { u"nprec"_ustr, TNOTPRECEDES, MS_NOTPRECEDES, TG::Relation, 0 }, |
| { u"nroot"_ustr, TNROOT, MS_SQRT, TG::UnOper, 5 }, |
| { u"nsubset"_ustr, TNSUBSET, MS_NSUBSET, TG::Relation, 0 }, |
| { u"nsubseteq"_ustr, TNSUBSETEQ, MS_NSUBSETEQ, TG::Relation, 0 }, |
| { u"nsucc"_ustr, TNOTSUCCEEDS, MS_NOTSUCCEEDS, TG::Relation, 0 }, |
| { u"nsupset"_ustr, TNSUPSET, MS_NSUPSET, TG::Relation, 0 }, |
| { u"nsupseteq"_ustr, TNSUPSETEQ, MS_NSUPSETEQ, TG::Relation, 0 }, |
| { u"odivide"_ustr, TODIVIDE, MS_ODIVIDE, TG::Product, 0 }, |
| { u"odot"_ustr, TODOT, MS_ODOT, TG::Product, 0 }, |
| { u"ominus"_ustr, TOMINUS, MS_OMINUS, TG::Sum, 0 }, |
| { u"oper"_ustr, TOPER, '\0', TG::Oper, 5 }, |
| { u"oplus"_ustr, TOPLUS, MS_OPLUS, TG::Sum, 0 }, |
| { u"or"_ustr, TOR, MS_OR, TG::Sum, 0 }, |
| { u"ortho"_ustr, TORTHO, MS_ORTHO, TG::Relation, 0 }, |
| { u"otimes"_ustr, TOTIMES, MS_OTIMES, TG::Product, 0 }, |
| { u"over"_ustr, TOVER, '\0', TG::Product, 0 }, |
| { u"overbrace"_ustr, TOVERBRACE, MS_OVERBRACE, TG::Product, 5 }, |
| { u"overline"_ustr, TOVERLINE, '\0', TG::Attribute, 5 }, |
| { u"overstrike"_ustr, TOVERSTRIKE, '\0', TG::Attribute, 5 }, |
| { u"owns"_ustr, TNI, MS_NI, TG::Relation, 0 }, |
| { u"parallel"_ustr, TPARALLEL, MS_DLINE, TG::Relation, 0 }, |
| { u"partial"_ustr, TPARTIAL, MS_PARTIAL, TG::Standalone, 5 }, |
| { u"phantom"_ustr, TPHANTOM, '\0', TG::FontAttr, 5 }, |
| { u"plusminus"_ustr, TPLUSMINUS, MS_PLUSMINUS, TG::UnOper | TG::Sum, 5 }, |
| { u"prec"_ustr, TPRECEDES, MS_PRECEDES, TG::Relation, 0 }, |
| { u"preccurlyeq"_ustr, TPRECEDESEQUAL, MS_PRECEDESEQUAL, TG::Relation, 0 }, |
| { u"precsim"_ustr, TPRECEDESEQUIV, MS_PRECEDESEQUIV, TG::Relation, 0 }, |
| { u"prod"_ustr, TPROD, MS_PROD, TG::Oper, 5 }, |
| { u"prop"_ustr, TPROP, MS_PROP, TG::Relation, 0 }, |
| { u"rangle"_ustr, TRANGLE, MS_RMATHANGLE, TG::RBrace, 0 }, //! 0 to terminate expression |
| { u"rbrace"_ustr, TRBRACE, MS_RBRACE, TG::RBrace, 0 }, |
| { u"rceil"_ustr, TRCEIL, MS_RCEIL, TG::RBrace, 0 }, |
| { u"rdbracket"_ustr, TRDBRACKET, MS_RDBRACKET, TG::RBrace, 0 }, |
| { u"rdline"_ustr, TRDLINE, MS_DVERTLINE, TG::RBrace, 0 }, |
| { u"re"_ustr, TRE, MS_RE, TG::Standalone, 5 }, |
| { u"rfloor"_ustr, TRFLOOR, MS_RFLOOR, TG::RBrace, 0 }, //! 0 to terminate expression |
| { u"right"_ustr, TRIGHT, '\0', TG::NONE, 0 }, |
| { u"rightarrow"_ustr, TRIGHTARROW, MS_RIGHTARROW, TG::Standalone, 5 }, |
| { u"rline"_ustr, TRLINE, MS_VERTLINE, TG::RBrace, 0 }, //! 0 to terminate expression |
| { u"rsub"_ustr, TRSUB, '\0', TG::Power, 0 }, |
| { u"rsup"_ustr, TRSUP, '\0', TG::Power, 0 }, |
| { u"sans"_ustr, TSANS, '\0', TG::Font, 0 }, |
| { u"serif"_ustr, TSERIF, '\0', TG::Font, 0 }, |
| { u"setC"_ustr, TSETC, MS_SETC, TG::Standalone, 5 }, |
| { u"setminus"_ustr, TSETMINUS, MS_BACKSLASH, TG::Product, 0 }, |
| { u"setN"_ustr, TSETN, MS_SETN, TG::Standalone, 5 }, |
| { u"setQ"_ustr, TSETQ, MS_SETQ, TG::Standalone, 5 }, |
| { u"setquotient"_ustr, TSETQUOTIENT, MS_SLASH, TG::Product, 0 }, |
| { u"setR"_ustr, TSETR, MS_SETR, TG::Standalone, 5 }, |
| { u"setZ"_ustr, TSETZ, MS_SETZ, TG::Standalone, 5 }, |
| { u"sim"_ustr, TSIM, MS_SIM, TG::Relation, 0 }, |
| { u"simeq"_ustr, TSIMEQ, MS_SIMEQ, TG::Relation, 0 }, |
| { u"sin"_ustr, TSIN, '\0', TG::Function, 5 }, |
| { u"sinh"_ustr, TSINH, '\0', TG::Function, 5 }, |
| { u"size"_ustr, TSIZE, '\0', TG::FontAttr, 5 }, |
| { u"slash"_ustr, TSLASH, MS_SLASH, TG::Product, 0 }, |
| { u"sqrt"_ustr, TSQRT, MS_SQRT, TG::UnOper, 5 }, |
| { u"stack"_ustr, TSTACK, '\0', TG::NONE, 5 }, |
| { u"sub"_ustr, TRSUB, '\0', TG::Power, 0 }, |
| { u"subset"_ustr, TSUBSET, MS_SUBSET, TG::Relation, 0 }, |
| { u"subseteq"_ustr, TSUBSETEQ, MS_SUBSETEQ, TG::Relation, 0 }, |
| { u"succ"_ustr, TSUCCEEDS, MS_SUCCEEDS, TG::Relation, 0 }, |
| { u"succcurlyeq"_ustr, TSUCCEEDSEQUAL, MS_SUCCEEDSEQUAL, TG::Relation, 0 }, |
| { u"succsim"_ustr, TSUCCEEDSEQUIV, MS_SUCCEEDSEQUIV, TG::Relation, 0 }, |
| { u"sum"_ustr, TSUM, MS_SUM, TG::Oper, 5 }, |
| { u"sup"_ustr, TRSUP, '\0', TG::Power, 0 }, |
| { u"supset"_ustr, TSUPSET, MS_SUPSET, TG::Relation, 0 }, |
| { u"supseteq"_ustr, TSUPSETEQ, MS_SUPSETEQ, TG::Relation, 0 }, |
| { u"tan"_ustr, TTAN, '\0', TG::Function, 5 }, |
| { u"tanh"_ustr, TTANH, '\0', TG::Function, 5 }, |
| { u"tilde"_ustr, TTILDE, MS_TILDE, TG::Attribute, 5 }, |
| { u"times"_ustr, TTIMES, MS_TIMES, TG::Product, 0 }, |
| { u"to"_ustr, TTO, '\0', TG::Limit, 0 }, |
| { u"toward"_ustr, TTOWARD, MS_RIGHTARROW, TG::Relation, 0 }, |
| { u"transl"_ustr, TTRANSL, MS_TRANSL, TG::Relation, 0 }, |
| { u"transr"_ustr, TTRANSR, MS_TRANSR, TG::Relation, 0 }, |
| { u"underbrace"_ustr, TUNDERBRACE, MS_UNDERBRACE, TG::Product, 5 }, |
| { u"underline"_ustr, TUNDERLINE, '\0', TG::Attribute, 5 }, |
| { u"union"_ustr, TUNION, MS_UNION, TG::Sum, 0 }, |
| { u"uoper"_ustr, TUOPER, '\0', TG::UnOper, 5 }, |
| { u"uparrow"_ustr, TUPARROW, MS_UPARROW, TG::Standalone, 5 }, |
| { u"vec"_ustr, TVEC, MS_VEC, TG::Attribute, 5 }, |
| { u"widebslash"_ustr, TWIDEBACKSLASH, MS_BACKSLASH, TG::Product, 0 }, |
| { u"wideharpoon"_ustr, TWIDEHARPOON, MS_HARPOON, TG::Attribute, 5 }, |
| { u"widehat"_ustr, TWIDEHAT, MS_HAT, TG::Attribute, 5 }, |
| { u"wideslash"_ustr, TWIDESLASH, MS_SLASH, TG::Product, 0 }, |
| { u"widetilde"_ustr, TWIDETILDE, MS_TILDE, TG::Attribute, 5 }, |
| { u"widevec"_ustr, TWIDEVEC, MS_VEC, TG::Attribute, 5 }, |
| { u"wp"_ustr, TWP, MS_WP, TG::Standalone, 5 }, |
| { u"جا"_ustr, TSIN, '\0', TG::Function, 5 }, |
| { u"جاز"_ustr, TSINH, '\0', TG::Function, 5 }, |
| { u"جتا"_ustr, TCOS, '\0', TG::Function, 5 }, |
| { u"جتاز"_ustr, TCOSH, '\0', TG::Function, 5 }, |
| { u"ØØ§"_ustr, TSIN, '\0', TG::Function, 5 }, |
| { u"ØØ§Ø²"_ustr, TSINH, '\0', TG::Function, 5 }, |
| { u"ØØªØ§"_ustr, TCOS, '\0', TG::Function, 5 }, |
| { u"ØØªØ§Ø²"_ustr, TCOSH, '\0', TG::Function, 5 }, |
| { u"ØØ¯"_ustr, THADD, MS_HADD, TG::Oper, 5 }, |
| { u"طا"_ustr, TTAN, '\0', TG::Function, 5 }, |
| { u"طاز"_ustr, TTANH, '\0', TG::Function, 5 }, |
| { u"طتا"_ustr, TCOT, '\0', TG::Function, 5 }, |
| { u"طتاز"_ustr, TCOTH, '\0', TG::Function, 5 }, |
| { u"ظا"_ustr, TTAN, '\0', TG::Function, 5 }, |
| { u"ظاز"_ustr, TTANH, '\0', TG::Function, 5 }, |
| { u"ظتا"_ustr, TCOT, '\0', TG::Function, 5 }, |
| { u"ظتاز"_ustr, TCOTH, '\0', TG::Function, 5 }, |
| { u"قا"_ustr, TSEC, '\0', TG::Function, 5 }, |
| { u"قاز"_ustr, TSECH, '\0', TG::Function, 5 }, |
| { u"قتا"_ustr, TCSC, '\0', TG::Function, 5 }, |
| { u"قتاز"_ustr, TCSCH, '\0', TG::Function, 5 }, |
| { u"لو"_ustr, TLOG, '\0', TG::Function, 5 }, |
| { u"مجـ"_ustr, TSUM, MS_MAJ, TG::Oper, 5 }, |
| { u"نها"_ustr, TNAHA, '\0', TG::Oper, 5 }, |
| { u"ٯا"_ustr, TSEC, '\0', TG::Function, 5 }, |
| { u"ٯاز"_ustr, TSECH, '\0', TG::Function, 5 }, |
| { u"ٯتا"_ustr, TCSC, '\0', TG::Function, 5 }, |
| { u"ٯتاز"_ustr, TCSCH, '\0', TG::Function, 5 } }; |
| |
| // First character may be any alphabetic |
| const sal_Int32 coStartFlags = KParseTokens::ANY_LETTER | KParseTokens::IGNORE_LEADING_WS; |
| |
| // Continuing characters may be any alphabetic |
| const sal_Int32 coContFlags = (coStartFlags & ~KParseTokens::IGNORE_LEADING_WS) |
| | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING; |
| // First character for numbers, may be any numeric or dot |
| const sal_Int32 coNumStartFlags |
| = KParseTokens::ASC_DIGIT | KParseTokens::ASC_DOT | KParseTokens::IGNORE_LEADING_WS; |
| // Continuing characters for numbers, may be any numeric or dot or comma. |
| // tdf#127873: additionally accept ',' comma group separator as too many |
| // existing documents unwittingly may have used that as decimal separator |
| // in such locales (though it never was as this is always the en-US locale |
| // and the group separator is only parsed away). |
| const sal_Int32 coNumContFlags = (coNumStartFlags & ~KParseTokens::IGNORE_LEADING_WS) |
| | KParseTokens::GROUP_SEPARATOR_IN_NUMBER; |
| // First character for numbers hexadecimal |
| const sal_Int32 coNum16StartFlags |
| = KParseTokens::ASC_DIGIT | KParseTokens::ASC_UPALPHA | KParseTokens::IGNORE_LEADING_WS; |
| |
| // Continuing characters for numbers hexadecimal |
| const sal_Int32 coNum16ContFlags = (coNum16StartFlags & ~KParseTokens::IGNORE_LEADING_WS); |
| // user-defined char continuing characters may be any alphanumeric or dot. |
| const sal_Int32 coUserDefinedCharContFlags = KParseTokens::ANY_LETTER_OR_NUMBER |
| | KParseTokens::ASC_DOT |
| | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING; |
| |
| //Checks if keyword is in the list. |
| static inline bool findCompare(const SmTokenTableEntry& lhs, const OUString& s) |
| { |
| return s.compareToIgnoreAsciiCase(lhs.aIdent) > 0; |
| } |
| |
| //Returns the SmTokenTableEntry for a keyword |
| const SmTokenTableEntry* GetTokenTableEntry(const OUString& rName) |
| { |
| if (rName.isEmpty()) |
| return nullptr; //avoid null pointer exceptions |
| //Looks for the first keyword after or equal to rName in alphabetical order. |
| auto findIter |
| = std::lower_bound(std::begin(aTokenTable), std::end(aTokenTable), rName, findCompare); |
| if (findIter != std::end(aTokenTable) && rName.equalsIgnoreAsciiCase(findIter->aIdent)) |
| return &*findIter; //check is equal |
| return nullptr; //not found |
| } |
| |
| OUString encloseOrEscapeLiteral(const OUString& string, bool force) |
| { |
| if (force) |
| return "\"" + string + "\""; |
| OUStringBuffer result; |
| const std::unordered_set<sal_Unicode> DelimiterTable1{ |
| //keeping " as first entry is important to not get into recursive replacement |
| ' ', '\t', '\n', '\r', '+', '-', '*', '/', '=', '^', |
| '_', '#', '%', '>', '<', '&', '|', '~', '`' |
| }; |
| const std::unordered_set<sal_Unicode> DelimiterTable2{ |
| //keeping " as first entry is important to not get into recursive replacement |
| '{', '}', '(', ')', '[', ']', |
| }; |
| for (sal_Int32 i = 0; i < string.getLength(); i++) |
| { |
| if (string[i] == '"') |
| result.append("\"\\\"\""); |
| else if (DelimiterTable1.find(string[i]) != DelimiterTable1.end()) |
| result.append("\"" + OUStringChar(string[i]) + "\""); |
| else if (DelimiterTable2.find(string[i]) != DelimiterTable2.end()) |
| result.append("\\" + OUStringChar(string[i])); |
| else |
| result.append(string[i]); |
| } |
| |
| OUString resultString = result.makeStringAndClear(); |
| const SmTokenTableEntry* tkn = GetTokenTableEntry(resultString); |
| // excluding function and operator as they take arguments and can't treat them as literal or else arguments are not displayed correctly |
| if (tkn && tkn->nGroup != TG::Function && tkn->nGroup != TG::Oper) |
| { |
| resultString = "\"" + resultString + "\""; |
| } |
| return resultString; |
| } |
| |
| static bool IsDelimiter(const OUString& rTxt, sal_Int32 nPos) |
| { // returns 'true' iff cChar is '\0' or a delimiter |
| |
| assert(nPos <= rTxt.getLength()); //index out of range |
| if (nPos == rTxt.getLength()) |
| return true; //This is EOF |
| sal_Unicode cChar = rTxt[nPos]; |
| |
| // check if 'cChar' is in the delimiter table |
| static constexpr sal_Unicode aDelimiterTable[] = { |
| ' ', '{', '}', '(', ')', '\t', '\n', '\r', '+', '-', '*', '/', '=', '[', |
| ']', '^', '_', '#', '%', '>', '<', '&', '|', '\\', '"', '~', '`' |
| }; //reordered by usage (by eye) for nanoseconds saving. |
| |
| //checks the array |
| for (auto const& cDelimiter : aDelimiterTable) |
| { |
| if (cDelimiter == cChar) |
| return true; |
| } |
| |
| //special chars support |
| sal_Int16 nTypJp = SmModule::get()->GetSysLocale().GetCharClass().getType(rTxt, nPos); |
| return (nTypJp == css::i18n::UnicodeType::SPACE_SEPARATOR |
| || nTypJp == css::i18n::UnicodeType::CONTROL); |
| } |
| |
| // checks number used as arguments in Math formulas (e.g. 'size' command) |
| // Format: no negative numbers, must start with a digit, no exponent notation, ... |
| static bool lcl_IsNumber(const OUString& rText) |
| { |
| bool bPoint = false; |
| const sal_Unicode* pBuffer = rText.getStr(); |
| for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++) |
| { |
| const sal_Unicode cChar = *pBuffer; |
| if (cChar == '.') |
| { |
| if (bPoint) |
| return false; |
| else |
| bPoint = true; |
| } |
| else if (!rtl::isAsciiDigit(cChar)) |
| return false; |
| } |
| return true; |
| } |
| // checks number used as arguments in Math formulas (e.g. 'size' command) |
| // Format: no negative numbers, must start with a digit, no exponent notation, ... |
| static bool lcl_IsNotWholeNumber(const OUString& rText) |
| { |
| const sal_Unicode* pBuffer = rText.getStr(); |
| for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++) |
| if (!rtl::isAsciiDigit(*pBuffer)) |
| return true; |
| return false; |
| } |
| // checks hex number used as arguments in Math formulas (e.g. 'hex' command) |
| // Format: no negative numbers, must start with a digit, no exponent notation, ... |
| static bool lcl_IsNotWholeNumber16(const OUString& rText) |
| { |
| const sal_Unicode* pBuffer = rText.getStr(); |
| for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++) |
| if (!rtl::isAsciiCanonicHexDigit(*pBuffer)) |
| return true; |
| return false; |
| } |
| |
| //Text replace onto m_aBufferString |
| void SmParser5::Replace(sal_Int32 nPos, sal_Int32 nLen, std::u16string_view aText) |
| { |
| assert(nPos + nLen <= m_aBufferString.getLength()); //checks if length allows text replace |
| |
| m_aBufferString = m_aBufferString.replaceAt(nPos, nLen, aText); //replace and reindex |
| sal_Int32 nChg = aText.size() - nLen; |
| m_nBufferIndex = m_nBufferIndex + nChg; |
| m_nTokenIndex = m_nTokenIndex + nChg; |
| } |
| |
| void SmParser5::NextToken() //Central part of the parser |
| { |
| sal_Int32 nBufLen = m_aBufferString.getLength(); |
| ParseResult aRes; |
| sal_Int32 nRealStart; |
| bool bCont; |
| do |
| { |
| // skip white spaces |
| while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex)) |
| ++m_nBufferIndex; |
| |
| // Try to parse a number in a locale-independent manner using |
| // '.' as decimal separator. |
| // See https://bz.apache.org/ooo/show_bug.cgi?id=45779 |
| aRes |
| = m_aNumCC.parsePredefinedToken(KParseType::ASC_NUMBER, m_aBufferString, m_nBufferIndex, |
| coNumStartFlags, u""_ustr, coNumContFlags, u""_ustr); |
| |
| if (aRes.TokenType == 0) |
| { |
| // Try again with the default token parsing. |
| aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, u""_ustr, |
| coContFlags, u""_ustr); |
| } |
| |
| nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace; |
| m_nBufferIndex = nRealStart; |
| |
| bCont = false; |
| if (aRes.TokenType == 0 && nRealStart < nBufLen && '\n' == m_aBufferString[nRealStart]) |
| { |
| // keep data needed for tokens row and col entry up to date |
| ++m_nRow; |
| m_nBufferIndex = m_nColOff = nRealStart + 1; |
| bCont = true; |
| } |
| else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR) |
| { |
| if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart)) |
| { |
| //SkipComment |
| m_nBufferIndex = nRealStart + 2; |
| while (m_nBufferIndex < nBufLen && '\n' != m_aBufferString[m_nBufferIndex]) |
| ++m_nBufferIndex; |
| bCont = true; |
| } |
| } |
| |
| } while (bCont); |
| |
| // set index of current token |
| m_nTokenIndex = m_nBufferIndex; |
| sal_uInt32 nCol = nRealStart - m_nColOff; |
| |
| bool bHandled = true; |
| if (nRealStart >= nBufLen) |
| { |
| m_aCurToken.eType = TEND; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::NONE; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText.clear(); |
| } |
| else if (aRes.TokenType & KParseType::ANY_NUMBER) |
| { |
| assert(aRes.EndPos > 0); |
| if (m_aBufferString[aRes.EndPos - 1] == ',' && aRes.EndPos < nBufLen |
| && m_pSysCC->getType(m_aBufferString, aRes.EndPos) != UnicodeType::SPACE_SEPARATOR) |
| { |
| // Comma followed by a non-space char is unlikely for decimal/thousands separator. |
| --aRes.EndPos; |
| } |
| sal_Int32 n = aRes.EndPos - nRealStart; |
| assert(n >= 0); |
| m_aCurToken.eType = TNUMBER; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::NONE; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = m_aBufferString.copy(nRealStart, n); |
| |
| SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos), "starmath", |
| "identifier really finished? (compatibility!)"); |
| } |
| else if (aRes.TokenType & KParseType::DOUBLE_QUOTE_STRING) |
| { |
| m_aCurToken.eType = TTEXT; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::NONE; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = aRes.DequotedNameOrString; |
| nCol++; |
| } |
| else if (aRes.TokenType & KParseType::IDENTNAME) |
| { |
| sal_Int32 n = aRes.EndPos - nRealStart; |
| assert(n >= 0); |
| OUString aName(m_aBufferString.copy(nRealStart, n)); |
| const SmTokenTableEntry* pEntry = GetTokenTableEntry(aName); |
| |
| if (pEntry) |
| { |
| m_aCurToken.eType = pEntry->eType; |
| m_aCurToken.setChar(pEntry->cMathChar); |
| m_aCurToken.nGroup = pEntry->nGroup; |
| m_aCurToken.nLevel = pEntry->nLevel; |
| m_aCurToken.aText = pEntry->aIdent; |
| } |
| else |
| { |
| m_aCurToken.eType = TIDENT; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::NONE; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = aName; |
| |
| SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos), "starmath", |
| "identifier really finished? (compatibility!)"); |
| } |
| } |
| else if (aRes.TokenType == 0 && '_' == m_aBufferString[nRealStart]) |
| { |
| m_aCurToken.eType = TRSUB; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::Power; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "_"; |
| |
| aRes.EndPos = nRealStart + 1; |
| } |
| else if (aRes.TokenType & KParseType::BOOLEAN) |
| { |
| sal_Int32& rnEndPos = aRes.EndPos; |
| if (rnEndPos - nRealStart <= 2) |
| { |
| sal_Unicode ch = m_aBufferString[nRealStart]; |
| switch (ch) |
| { |
| case '<': |
| { |
| if (m_aBufferString.match("<<", nRealStart)) |
| { |
| m_aCurToken.eType = TLL; |
| m_aCurToken.setChar(MS_LL); |
| m_aCurToken.nGroup = TG::Relation; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "<<"; |
| |
| rnEndPos = nRealStart + 2; |
| } |
| else if (m_aBufferString.match("<=", nRealStart)) |
| { |
| m_aCurToken.eType = TLE; |
| m_aCurToken.setChar(MS_LE); |
| m_aCurToken.nGroup = TG::Relation; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "<="; |
| |
| rnEndPos = nRealStart + 2; |
| } |
| else if (m_aBufferString.match("<-", nRealStart)) |
| { |
| m_aCurToken.eType = TLEFTARROW; |
| m_aCurToken.setChar(MS_LEFTARROW); |
| m_aCurToken.nGroup = TG::Standalone; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = "<-"; |
| |
| rnEndPos = nRealStart + 2; |
| } |
| else if (m_aBufferString.match("<>", nRealStart)) |
| { |
| m_aCurToken.eType = TNEQ; |
| m_aCurToken.setChar(MS_NEQ); |
| m_aCurToken.nGroup = TG::Relation; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "<>"; |
| |
| rnEndPos = nRealStart + 2; |
| } |
| else if (m_aBufferString.match("<?>", nRealStart)) |
| { |
| m_aCurToken.eType = TPLACE; |
| m_aCurToken.setChar(MS_PLACE); |
| m_aCurToken.nGroup = TG::NONE; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = "<?>"; |
| |
| rnEndPos = nRealStart + 3; |
| } |
| else |
| { |
| m_aCurToken.eType = TLT; |
| m_aCurToken.setChar(MS_LT); |
| m_aCurToken.nGroup = TG::Relation; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "<"; |
| } |
| } |
| break; |
| case '>': |
| { |
| if (m_aBufferString.match(">=", nRealStart)) |
| { |
| m_aCurToken.eType = TGE; |
| m_aCurToken.setChar(MS_GE); |
| m_aCurToken.nGroup = TG::Relation; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = ">="; |
| |
| rnEndPos = nRealStart + 2; |
| } |
| else if (m_aBufferString.match(">>", nRealStart)) |
| { |
| m_aCurToken.eType = TGG; |
| m_aCurToken.setChar(MS_GG); |
| m_aCurToken.nGroup = TG::Relation; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = ">>"; |
| |
| rnEndPos = nRealStart + 2; |
| } |
| else |
| { |
| m_aCurToken.eType = TGT; |
| m_aCurToken.setChar(MS_GT); |
| m_aCurToken.nGroup = TG::Relation; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = ">"; |
| } |
| } |
| break; |
| default: |
| bHandled = false; |
| } |
| } |
| } |
| else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR) |
| { |
| sal_Int32& rnEndPos = aRes.EndPos; |
| if (rnEndPos - nRealStart == 1) |
| { |
| sal_Unicode ch = m_aBufferString[nRealStart]; |
| switch (ch) |
| { |
| case '%': |
| { |
| //! modifies aRes.EndPos |
| |
| OSL_ENSURE(rnEndPos >= nBufLen || '%' != m_aBufferString[rnEndPos], |
| "unexpected comment start"); |
| |
| // get identifier of user-defined character |
| ParseResult aTmpRes = m_pSysCC->parseAnyToken( |
| m_aBufferString, rnEndPos, KParseTokens::ANY_LETTER, u""_ustr, |
| coUserDefinedCharContFlags, u""_ustr); |
| |
| sal_Int32 nTmpStart = rnEndPos + aTmpRes.LeadingWhiteSpace; |
| |
| // default setting for the case that no identifier |
| // i.e. a valid symbol-name is following the '%' |
| // character |
| m_aCurToken.eType = TTEXT; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::NONE; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = "%"; |
| |
| if (aTmpRes.TokenType & KParseType::IDENTNAME) |
| { |
| sal_Int32 n = aTmpRes.EndPos - nTmpStart; |
| m_aCurToken.eType = TSPECIAL; |
| m_aCurToken.aText = m_aBufferString.copy(nTmpStart - 1, n + 1); |
| |
| OSL_ENSURE(aTmpRes.EndPos > rnEndPos, "empty identifier"); |
| if (aTmpRes.EndPos > rnEndPos) |
| rnEndPos = aTmpRes.EndPos; |
| else |
| ++rnEndPos; |
| } |
| |
| // if no symbol-name was found we start-over with |
| // finding the next token right after the '%' sign. |
| // I.e. we leave rnEndPos unmodified. |
| } |
| break; |
| case '[': |
| { |
| m_aCurToken.eType = TLBRACKET; |
| m_aCurToken.setChar(MS_LBRACKET); |
| m_aCurToken.nGroup = TG::LBrace; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = "["; |
| } |
| break; |
| case '\\': |
| { |
| m_aCurToken.eType = TESCAPE; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::NONE; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = "\\"; |
| } |
| break; |
| case ']': |
| { |
| m_aCurToken.eType = TRBRACKET; |
| m_aCurToken.setChar(MS_RBRACKET); |
| m_aCurToken.nGroup = TG::RBrace; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "]"; |
| } |
| break; |
| case '^': |
| { |
| m_aCurToken.eType = TRSUP; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::Power; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "^"; |
| } |
| break; |
| case '`': |
| { |
| m_aCurToken.eType = TSBLANK; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::Blank; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = "`"; |
| } |
| break; |
| case '{': |
| { |
| m_aCurToken.eType = TLGROUP; |
| m_aCurToken.setChar(MS_LBRACE); |
| m_aCurToken.nGroup = TG::NONE; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = "{"; |
| } |
| break; |
| case '|': |
| { |
| m_aCurToken.eType = TOR; |
| m_aCurToken.setChar(MS_OR); |
| m_aCurToken.nGroup = TG::Sum; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "|"; |
| } |
| break; |
| case '}': |
| { |
| m_aCurToken.eType = TRGROUP; |
| m_aCurToken.setChar(MS_RBRACE); |
| m_aCurToken.nGroup = TG::NONE; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "}"; |
| } |
| break; |
| case '~': |
| { |
| m_aCurToken.eType = TBLANK; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::Blank; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = "~"; |
| } |
| break; |
| case '#': |
| { |
| if (m_aBufferString.match("##", nRealStart)) |
| { |
| m_aCurToken.eType = TDPOUND; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::NONE; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "##"; |
| |
| rnEndPos = nRealStart + 2; |
| } |
| else |
| { |
| m_aCurToken.eType = TPOUND; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::NONE; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "#"; |
| } |
| } |
| break; |
| case '&': |
| { |
| m_aCurToken.eType = TAND; |
| m_aCurToken.setChar(MS_AND); |
| m_aCurToken.nGroup = TG::Product; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "&"; |
| } |
| break; |
| case '(': |
| { |
| m_aCurToken.eType = TLPARENT; |
| m_aCurToken.setChar(MS_LPARENT); |
| m_aCurToken.nGroup = TG::LBrace; |
| m_aCurToken.nLevel = 5; //! 0 to continue expression |
| m_aCurToken.aText = "("; |
| } |
| break; |
| case ')': |
| { |
| m_aCurToken.eType = TRPARENT; |
| m_aCurToken.setChar(MS_RPARENT); |
| m_aCurToken.nGroup = TG::RBrace; |
| m_aCurToken.nLevel = 0; //! 0 to terminate expression |
| m_aCurToken.aText = ")"; |
| } |
| break; |
| case '*': |
| { |
| m_aCurToken.eType = TMULTIPLY; |
| m_aCurToken.setChar(MS_MULTIPLY); |
| m_aCurToken.nGroup = TG::Product; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "*"; |
| } |
| break; |
| case '+': |
| { |
| if (m_aBufferString.match("+-", nRealStart)) |
| { |
| m_aCurToken.eType = TPLUSMINUS; |
| m_aCurToken.setChar(MS_PLUSMINUS); |
| m_aCurToken.nGroup = TG::UnOper | TG::Sum; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = "+-"; |
| |
| rnEndPos = nRealStart + 2; |
| } |
| else |
| { |
| m_aCurToken.eType = TPLUS; |
| m_aCurToken.setChar(MS_PLUS); |
| m_aCurToken.nGroup = TG::UnOper | TG::Sum; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = "+"; |
| } |
| } |
| break; |
| case '-': |
| { |
| if (m_aBufferString.match("-+", nRealStart)) |
| { |
| m_aCurToken.eType = TMINUSPLUS; |
| m_aCurToken.setChar(MS_MINUSPLUS); |
| m_aCurToken.nGroup = TG::UnOper | TG::Sum; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = "-+"; |
| |
| rnEndPos = nRealStart + 2; |
| } |
| else if (m_aBufferString.match("->", nRealStart)) |
| { |
| m_aCurToken.eType = TRIGHTARROW; |
| m_aCurToken.setChar(MS_RIGHTARROW); |
| m_aCurToken.nGroup = TG::Standalone; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = "->"; |
| |
| rnEndPos = nRealStart + 2; |
| } |
| else |
| { |
| m_aCurToken.eType = TMINUS; |
| m_aCurToken.setChar(MS_MINUS); |
| m_aCurToken.nGroup = TG::UnOper | TG::Sum; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = "-"; |
| } |
| } |
| break; |
| case '.': |
| { |
| // Only one character? Then it can't be a number. |
| if (m_nBufferIndex < m_aBufferString.getLength() - 1) |
| { |
| // for compatibility with SO5.2 |
| // texts like .34 ...56 ... h ...78..90 |
| // will be treated as numbers |
| m_aCurToken.eType = TNUMBER; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::NONE; |
| m_aCurToken.nLevel = 5; |
| |
| sal_Int32 nTxtStart = m_nBufferIndex; |
| sal_Unicode cChar; |
| // if the equation ends with dot(.) then increment m_nBufferIndex till end of string only |
| do |
| { |
| cChar = m_aBufferString[++m_nBufferIndex]; |
| } while ((cChar == '.' || rtl::isAsciiDigit(cChar)) |
| && (m_nBufferIndex < m_aBufferString.getLength() - 1)); |
| |
| m_aCurToken.aText |
| = m_aBufferString.copy(nTxtStart, m_nBufferIndex - nTxtStart); |
| aRes.EndPos = m_nBufferIndex; |
| } |
| else |
| bHandled = false; |
| } |
| break; |
| case '/': |
| { |
| m_aCurToken.eType = TDIVIDEBY; |
| m_aCurToken.setChar(MS_SLASH); |
| m_aCurToken.nGroup = TG::Product; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "/"; |
| } |
| break; |
| case '=': |
| { |
| m_aCurToken.eType = TASSIGN; |
| m_aCurToken.setChar(MS_ASSIGN); |
| m_aCurToken.nGroup = TG::Relation; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "="; |
| } |
| break; |
| default: |
| bHandled = false; |
| } |
| } |
| } |
| else |
| bHandled = false; |
| |
| if (!bHandled) |
| { |
| m_aCurToken.eType = TCHARACTER; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::NONE; |
| m_aCurToken.nLevel = 5; |
| |
| // tdf#129372: we may have to deal with surrogate pairs |
| // (see https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates) |
| // in this case, we must read 2 sal_Unicode instead of 1 |
| int nOffset(rtl::isSurrogate(m_aBufferString[nRealStart]) ? 2 : 1); |
| m_aCurToken.aText = m_aBufferString.copy(nRealStart, nOffset); |
| |
| aRes.EndPos = nRealStart + nOffset; |
| } |
| m_aCurESelection = ESelection(m_nRow, nCol, m_nRow, nCol + m_aCurToken.aText.getLength()); |
| |
| if (TEND != m_aCurToken.eType) |
| m_nBufferIndex = aRes.EndPos; |
| } |
| |
| void SmParser5::NextTokenColor(SmTokenType dvipload) |
| { |
| sal_Int32 nBufLen = m_aBufferString.getLength(); |
| ParseResult aRes; |
| sal_Int32 nRealStart; |
| bool bCont; |
| |
| do |
| { |
| // skip white spaces |
| while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex)) |
| ++m_nBufferIndex; |
| //parse, there are few options, so less strict. |
| aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, u""_ustr, |
| coContFlags, u""_ustr); |
| nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace; |
| m_nBufferIndex = nRealStart; |
| bCont = false; |
| if (aRes.TokenType == 0 && nRealStart < nBufLen && '\n' == m_aBufferString[nRealStart]) |
| { |
| // keep data needed for tokens row and col entry up to date |
| ++m_nRow; |
| m_nBufferIndex = m_nColOff = nRealStart + 1; |
| bCont = true; |
| } |
| else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR) |
| { |
| if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart)) |
| { |
| //SkipComment |
| m_nBufferIndex = nRealStart + 2; |
| while (m_nBufferIndex < nBufLen && '\n' != m_aBufferString[m_nBufferIndex]) |
| ++m_nBufferIndex; |
| bCont = true; |
| } |
| } |
| } while (bCont); |
| |
| // set index of current token |
| m_nTokenIndex = m_nBufferIndex; |
| sal_uInt32 nCol = nRealStart - m_nColOff; |
| |
| if (nRealStart >= nBufLen) |
| m_aCurToken.eType = TEND; |
| else if (aRes.TokenType & KParseType::IDENTNAME) |
| { |
| sal_Int32 n = aRes.EndPos - nRealStart; |
| assert(n >= 0); |
| OUString aName(m_aBufferString.copy(nRealStart, n)); |
| switch (dvipload) |
| { |
| case TCOLOR: |
| m_aCurToken = starmathdatabase::Identify_ColorName_Parser(aName); |
| break; |
| case TDVIPSNAMESCOL: |
| m_aCurToken = starmathdatabase::Identify_ColorName_DVIPSNAMES(aName); |
| break; |
| default: |
| m_aCurToken = starmathdatabase::Identify_ColorName_Parser(aName); |
| break; |
| } |
| } |
| else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR) |
| { |
| if (m_aBufferString[nRealStart] == '#' && !m_aBufferString.match("##", nRealStart)) |
| { |
| m_aCurToken.eType = THEX; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::Color; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "hex"; |
| } |
| } |
| else |
| m_aCurToken.eType = TNONE; |
| |
| m_aCurESelection = ESelection(m_nRow, nCol, m_nRow, nCol + m_aCurToken.aText.getLength()); |
| if (TEND != m_aCurToken.eType) |
| m_nBufferIndex = aRes.EndPos; |
| } |
| |
| void SmParser5::NextTokenFontSize() |
| { |
| sal_Int32 nBufLen = m_aBufferString.getLength(); |
| ParseResult aRes; |
| sal_Int32 nRealStart; |
| bool bCont; |
| bool hex = false; |
| |
| do |
| { |
| // skip white spaces |
| while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex)) |
| ++m_nBufferIndex; |
| //hexadecimal parser |
| aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coNum16StartFlags, |
| u"."_ustr, coNum16ContFlags, u".,"_ustr); |
| if (aRes.TokenType == 0) |
| { |
| // Try again with the default token parsing. |
| aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, u""_ustr, |
| coContFlags, u""_ustr); |
| } |
| else |
| hex = true; |
| nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace; |
| m_nBufferIndex = nRealStart; |
| bCont = false; |
| if (aRes.TokenType == 0 && nRealStart < nBufLen && '\n' == m_aBufferString[nRealStart]) |
| { |
| // keep data needed for tokens row and col entry up to date |
| ++m_nRow; |
| m_nBufferIndex = m_nColOff = nRealStart + 1; |
| bCont = true; |
| } |
| else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR) |
| { |
| if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart)) |
| { |
| //SkipComment |
| m_nBufferIndex = nRealStart + 2; |
| while (m_nBufferIndex < nBufLen && '\n' != m_aBufferString[m_nBufferIndex]) |
| ++m_nBufferIndex; |
| bCont = true; |
| } |
| } |
| } while (bCont); |
| |
| // set index of current token |
| m_nTokenIndex = m_nBufferIndex; |
| sal_uInt32 nCol = nRealStart - m_nColOff; |
| |
| if (nRealStart >= nBufLen) |
| m_aCurToken.eType = TEND; |
| else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR) |
| { |
| if (aRes.EndPos - nRealStart == 1) |
| { |
| switch (m_aBufferString[nRealStart]) |
| { |
| case '*': |
| m_aCurToken.eType = TMULTIPLY; |
| m_aCurToken.setChar(MS_MULTIPLY); |
| m_aCurToken.nGroup = TG::Product; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "*"; |
| break; |
| case '+': |
| m_aCurToken.eType = TPLUS; |
| m_aCurToken.setChar(MS_PLUS); |
| m_aCurToken.nGroup = TG::UnOper | TG::Sum; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = "+"; |
| break; |
| case '-': |
| m_aCurToken.eType = TMINUS; |
| m_aCurToken.setChar(MS_MINUS); |
| m_aCurToken.nGroup = TG::UnOper | TG::Sum; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = "-"; |
| break; |
| case '/': |
| m_aCurToken.eType = TDIVIDEBY; |
| m_aCurToken.setChar(MS_SLASH); |
| m_aCurToken.nGroup = TG::Product; |
| m_aCurToken.nLevel = 0; |
| m_aCurToken.aText = "/"; |
| break; |
| default: |
| m_aCurToken.eType = TNONE; |
| break; |
| } |
| } |
| else |
| m_aCurToken.eType = TNONE; |
| } |
| else if (hex) |
| { |
| assert(aRes.EndPos > 0); |
| sal_Int32 n = aRes.EndPos - nRealStart; |
| assert(n >= 0); |
| m_aCurToken.eType = THEX; |
| m_aCurToken.cMathChar = u""_ustr; |
| m_aCurToken.nGroup = TG::NONE; |
| m_aCurToken.nLevel = 5; |
| m_aCurToken.aText = m_aBufferString.copy(nRealStart, n); |
| } |
| else |
| m_aCurToken.eType = TNONE; |
| |
| m_aCurESelection = ESelection(m_nRow, nCol, m_nRow, nCol + m_aCurToken.aText.getLength()); |
| if (TEND != m_aCurToken.eType) |
| m_nBufferIndex = aRes.EndPos; |
| } |
| |
| namespace |
| { |
| SmNodeArray buildNodeArray(std::vector<std::unique_ptr<SmNode>>& rSubNodes) |
| { |
| SmNodeArray aSubArray(rSubNodes.size()); |
| for (size_t i = 0; i < rSubNodes.size(); ++i) |
| aSubArray[i] = rSubNodes[i].release(); |
| return aSubArray; |
| } |
| } //end namespace |
| |
| // grammar |
| /*************************************************************************************************/ |
| |
| std::unique_ptr<SmTableNode> SmParser5::DoTable() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| std::vector<std::unique_ptr<SmNode>> aLineArray; |
| aLineArray.push_back(DoLine()); |
| while (m_aCurToken.eType == TNEWLINE) |
| { |
| NextToken(); |
| aLineArray.push_back(DoLine()); |
| } |
| assert(m_aCurToken.eType == TEND); |
| std::unique_ptr<SmTableNode> xSNode(new SmTableNode(m_aCurToken)); |
| xSNode->SetSelection(m_aCurESelection); |
| xSNode->SetSubNodes(buildNodeArray(aLineArray)); |
| return xSNode; |
| } |
| |
| std::unique_ptr<SmNode> SmParser5::DoAlign(bool bUseExtraSpaces) |
| // parse alignment info (if any), then go on with rest of expression |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| std::unique_ptr<SmStructureNode> xSNode; |
| |
| if (TokenInGroup(TG::Align)) |
| { |
| xSNode.reset(new SmAlignNode(m_aCurToken)); |
| xSNode->SetSelection(m_aCurESelection); |
| |
| NextToken(); |
| |
| // allow for just one align statement in 5.0 |
| if (TokenInGroup(TG::Align)) |
| return DoError(SmParseError::DoubleAlign); |
| } |
| |
| auto pNode = DoExpression(bUseExtraSpaces); |
| |
| if (xSNode) |
| { |
| xSNode->SetSubNode(0, pNode.release()); |
| return xSNode; |
| } |
| return pNode; |
| } |
| |
| // Postcondition: m_aCurToken.eType == TEND || m_aCurToken.eType == TNEWLINE |
| std::unique_ptr<SmNode> SmParser5::DoLine() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| std::vector<std::unique_ptr<SmNode>> ExpressionArray; |
| |
| // start with single expression that may have an alignment statement |
| // (and go on with expressions that must not have alignment |
| // statements in 'while' loop below. See also 'Expression()'.) |
| if (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE) |
| ExpressionArray.push_back(DoAlign()); |
| |
| while (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE) |
| ExpressionArray.push_back(DoExpression()); |
| |
| //If there's no expression, add an empty one. |
| //this is to avoid a formula tree without any caret |
| //positions, in visual formula editor. |
| if (ExpressionArray.empty()) |
| { |
| SmToken aTok; |
| aTok.eType = TNEWLINE; |
| ExpressionArray.emplace_back(std::unique_ptr<SmNode>(new SmExpressionNode(aTok))); |
| } |
| |
| auto xSNode = std::make_unique<SmLineNode>(m_aCurToken); |
| xSNode->SetSelection(m_aCurESelection); |
| xSNode->SetSubNodes(buildNodeArray(ExpressionArray)); |
| return xSNode; |
| } |
| |
| std::unique_ptr<SmNode> SmParser5::DoExpression(bool bUseExtraSpaces) |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| std::vector<std::unique_ptr<SmNode>> RelationArray; |
| RelationArray.push_back(DoRelation()); |
| while (m_aCurToken.nLevel >= 4) |
| RelationArray.push_back(DoRelation()); |
| |
| if (RelationArray.size() > 1) |
| { |
| std::unique_ptr<SmExpressionNode> xSNode(new SmExpressionNode(m_aCurToken)); |
| xSNode->SetSubNodes(buildNodeArray(RelationArray)); |
| xSNode->SetUseExtraSpaces(bUseExtraSpaces); |
| return xSNode; |
| } |
| else |
| { |
| // This expression has only one node so just push this node. |
| return std::move(RelationArray[0]); |
| } |
| } |
| |
| std::unique_ptr<SmNode> SmParser5::DoRelation() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| int nDepthLimit = m_nParseDepth; |
| |
| auto xFirst = DoSum(); |
| while (TokenInGroup(TG::Relation)) |
| { |
| std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken)); |
| xSNode->SetSelection(m_aCurESelection); |
| auto xSecond = DoOpSubSup(); |
| auto xThird = DoSum(); |
| xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird)); |
| xFirst = std::move(xSNode); |
| |
| ++m_nParseDepth; |
| DepthProtect bDepthGuard(m_nParseDepth); |
| } |
| |
| m_nParseDepth = nDepthLimit; |
| |
| return xFirst; |
| } |
| |
| std::unique_ptr<SmNode> SmParser5::DoSum() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| int nDepthLimit = m_nParseDepth; |
| |
| auto xFirst = DoProduct(); |
| while (TokenInGroup(TG::Sum)) |
| { |
| std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken)); |
| xSNode->SetSelection(m_aCurESelection); |
| auto xSecond = DoOpSubSup(); |
| auto xThird = DoProduct(); |
| xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird)); |
| xFirst = std::move(xSNode); |
| |
| ++m_nParseDepth; |
| DepthProtect bDepthGuard(m_nParseDepth); |
| } |
| |
| m_nParseDepth = nDepthLimit; |
| |
| return xFirst; |
| } |
| |
| std::unique_ptr<SmNode> SmParser5::DoProduct() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| auto xFirst = DoPower(); |
| |
| int nDepthLimit = 0; |
| |
| while (TokenInGroup(TG::Product)) |
| { |
| //this linear loop builds a recursive structure, if it gets |
| //too deep then later processing, e.g. releasing the tree, |
| //can exhaust stack |
| if (m_nParseDepth + nDepthLimit > DEPTH_LIMIT) |
| throw std::range_error("parser depth limit"); |
| |
| std::unique_ptr<SmStructureNode> xSNode; |
| std::unique_ptr<SmNode> xOper; |
| |
| SmTokenType eType = m_aCurToken.eType; |
| switch (eType) |
| { |
| case TOVER: |
| xSNode.reset(new SmBinVerNode(m_aCurToken)); |
| xSNode->SetSelection(m_aCurESelection); |
| xOper.reset(new SmRectangleNode(m_aCurToken)); |
| xOper->SetSelection(m_aCurESelection); |
| NextToken(); |
| break; |
| |
| case TBOPER: |
| xSNode.reset(new SmBinHorNode(m_aCurToken)); |
| |
| NextToken(); |
| |
| //Let the glyph node know it's a binary operation |
| m_aCurToken.eType = TBOPER; |
| m_aCurToken.nGroup = TG::Product; |
| xOper = DoGlyphSpecial(); |
| break; |
| |
| case TOVERBRACE: |
| case TUNDERBRACE: |
| xSNode.reset(new SmVerticalBraceNode(m_aCurToken)); |
| xSNode->SetSelection(m_aCurESelection); |
| xOper.reset(new SmMathSymbolNode(m_aCurToken)); |
| xOper->SetSelection(m_aCurESelection); |
| |
| NextToken(); |
| break; |
| |
| case TWIDEBACKSLASH: |
| case TWIDESLASH: |
| { |
| SmBinDiagonalNode* pSTmp = new SmBinDiagonalNode(m_aCurToken); |
| pSTmp->SetAscending(eType == TWIDESLASH); |
| xSNode.reset(pSTmp); |
| |
| xOper.reset(new SmPolyLineNode(m_aCurToken)); |
| xOper->SetSelection(m_aCurESelection); |
| NextToken(); |
| |
| break; |
| } |
| |
| default: |
| xSNode.reset(new SmBinHorNode(m_aCurToken)); |
| xSNode->SetSelection(m_aCurESelection); |
| |
| xOper = DoOpSubSup(); |
| } |
| |
| auto xArg = DoPower(); |
| xSNode->SetSubNodesBinMo(std::move(xFirst), std::move(xOper), std::move(xArg)); |
| xFirst = std::move(xSNode); |
| ++nDepthLimit; |
| } |
| return xFirst; |
| } |
| |
| std::unique_ptr<SmNode> SmParser5::DoSubSup(TG nActiveGroup, std::unique_ptr<SmNode> xGivenNode) |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| assert(nActiveGroup == TG::Power || nActiveGroup == TG::Limit); |
| assert(m_aCurToken.nGroup == nActiveGroup); |
| |
| std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken)); |
| pNode->SetSelection(m_aCurESelection); |
| //! Of course 'm_aCurToken' is just the first sub-/supscript token. |
| //! It should be of no further interest. The positions of the |
| //! sub-/supscripts will be identified by the corresponding subnodes |
| //! index in the 'aSubNodes' array (enum value from 'SmSubSup'). |
| |
| pNode->SetUseLimits(nActiveGroup == TG::Limit); |
| |
| // initialize subnodes array |
| std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES); |
| aSubNodes[0] = std::move(xGivenNode); |
| |
| // process all sub-/supscripts |
| int nIndex = 0; |
| while (TokenInGroup(nActiveGroup)) |
| { |
| SmTokenType eType(m_aCurToken.eType); |
| |
| switch (eType) |
| { |
| case TRSUB: |
| nIndex = static_cast<int>(RSUB); |
| break; |
| case TRSUP: |
| nIndex = static_cast<int>(RSUP); |
| break; |
| case TFROM: |
| case TCSUB: |
| nIndex = static_cast<int>(CSUB); |
| break; |
| case TTO: |
| case TCSUP: |
| nIndex = static_cast<int>(CSUP); |
| break; |
| case TLSUB: |
| nIndex = static_cast<int>(LSUB); |
| break; |
| case TLSUP: |
| nIndex = static_cast<int>(LSUP); |
| break; |
| default: |
| SAL_WARN("starmath", "unknown case"); |
| } |
| nIndex++; |
| assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES); |
| |
| std::unique_ptr<SmNode> xENode; |
| if (aSubNodes[nIndex]) // if already occupied at earlier iteration |
| { |
| // forget the earlier one, remember an error instead |
| aSubNodes[nIndex].reset(); |
| xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token. |
| } |
| else |
| { |
| // skip sub-/supscript token |
| NextToken(); |
| } |
| |
| // get sub-/supscript node |
| // (even when we saw a double-sub/supscript error in the above |
| // in order to minimize mess and continue parsing.) |
| std::unique_ptr<SmNode> xSNode; |
| if (eType == TFROM || eType == TTO) |
| { |
| // parse limits in old 4.0 and 5.0 style |
| xSNode = DoRelation(); |
| } |
| else |
| xSNode = DoTerm(true); |
| |
| aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode); |
| } |
| |
| pNode->SetSubNodes(buildNodeArray(aSubNodes)); |
| return pNode; |
| } |
| |
| std::unique_ptr<SmNode> SmParser5::DoSubSupEvaluate(std::unique_ptr<SmNode> xGivenNode) |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken)); |
| pNode->SetSelection(m_aCurESelection); |
| pNode->SetUseLimits(true); |
| |
| // initialize subnodes array |
| std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES); |
| aSubNodes[0] = std::move(xGivenNode); |
| |
| // process all sub-/supscripts |
| int nIndex = 0; |
| while (TokenInGroup(TG::Limit)) |
| { |
| SmTokenType eType(m_aCurToken.eType); |
| |
| switch (eType) |
| { |
| case TFROM: |
| nIndex = static_cast<int>(RSUB); |
| break; |
| case TTO: |
| nIndex = static_cast<int>(RSUP); |
| break; |
| default: |
| SAL_WARN("starmath", "unknown case"); |
| } |
| nIndex++; |
| assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES); |
| |
| std::unique_ptr<SmNode> xENode; |
| if (aSubNodes[nIndex]) // if already occupied at earlier iteration |
| { |
| // forget the earlier one, remember an error instead |
| aSubNodes[nIndex].reset(); |
| xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token. |
| } |
| else |
| NextToken(); // skip sub-/supscript token |
| |
| // get sub-/supscript node |
| std::unique_ptr<SmNode> xSNode; |
| xSNode = DoTerm(true); |
| |
| aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode); |
| } |
| |
| pNode->SetSubNodes(buildNodeArray(aSubNodes)); |
| return pNode; |
| } |
| |
| std::unique_ptr<SmNode> SmParser5::DoOpSubSup() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| // get operator symbol |
| auto xNode = std::make_unique<SmMathSymbolNode>(m_aCurToken); |
| xNode->SetSelection(m_aCurESelection); |
| // skip operator token |
| NextToken(); |
| // get sub- supscripts if any |
| if (m_aCurToken.nGroup == TG::Power) |
| return DoSubSup(TG::Power, std::move(xNode)); |
| return xNode; |
| } |
| |
| std::unique_ptr<SmNode> SmParser5::DoPower() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| // get body for sub- supscripts on top of stack |
| std::unique_ptr<SmNode> xNode(DoTerm(false)); |
| |
| if (m_aCurToken.nGroup == TG::Power) |
| return DoSubSup(TG::Power, std::move(xNode)); |
| return xNode; |
| } |
| |
| std::unique_ptr<SmBlankNode> SmParser5::DoBlank() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| assert(TokenInGroup(TG::Blank)); |
| std::unique_ptr<SmBlankNode> pBlankNode(new SmBlankNode(m_aCurToken)); |
| pBlankNode->SetSelection(m_aCurESelection); |
| |
| do |
| { |
| pBlankNode->IncreaseBy(m_aCurToken); |
| NextToken(); |
| } while (TokenInGroup(TG::Blank)); |
| |
| // Ignore trailing spaces, if corresponding option is set |
| if (m_aCurToken.eType == TNEWLINE |
| || (m_aCurToken.eType == TEND && !comphelper::IsFuzzing() |
| && SmModule::get()->GetConfig()->IsIgnoreSpacesRight())) |
| { |
| pBlankNode->Clear(); |
| } |
| return pBlankNode; |
| } |
| |
| std::unique_ptr<SmNode> SmParser5::DoTerm(bool bGroupNumberIdent) |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| switch (m_aCurToken.eType) |
| { |
| case TESCAPE: |
| return DoEscape(); |
| |
| case TNOSPACE: |
| case TLGROUP: |
| { |
| bool bNoSpace = m_aCurToken.eType == TNOSPACE; |
| if (bNoSpace) |
| NextToken(); |
| if (m_aCurToken.eType != TLGROUP) |
| return DoTerm(false); // nospace is no longer concerned |
| |
| NextToken(); |
| |
| // allow for empty group |
| if (m_aCurToken.eType == TRGROUP) |
| { |
| std::unique_ptr<SmStructureNode> xSNode(new SmExpressionNode(m_aCurToken)); |
| xSNode->SetSelection(m_aCurESelection); |
| xSNode->SetSubNodes(nullptr, nullptr); |
| |
| NextToken(); |
| return std::unique_ptr<SmNode>(xSNode.release()); |
| } |
| |
| auto pNode = DoAlign(!bNoSpace); |
| if (m_aCurToken.eType == TRGROUP) |
| { |
| NextToken(); |
| return pNode; |
| } |
| auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken); |
| xSNode->SetSelection(m_aCurESelection); |
| std::unique_ptr<SmNode> xError(DoError(SmParseError::RgroupExpected)); |
| xSNode->SetSubNodes(std::move(pNode), std::move(xError)); |
| return std::unique_ptr<SmNode>(xSNode.release()); |
| } |
| |
| case TLEFT: |
| return DoBrace(); |
| case TEVALUATE: |
| return DoEvaluate(); |
| |
| case TBLANK: |
| case TSBLANK: |
| return DoBlank(); |
| |
| case TTEXT: |
| { |
| auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_TEXT); |
| pNode->SetSelection(m_aCurESelection); |
| NextToken(); |
| return std::unique_ptr<SmNode>(pNode.release()); |
| } |
| case TCHARACTER: |
| { |
| auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_VARIABLE); |
| pNode->SetSelection(m_aCurESelection); |
| NextToken(); |
| return std::unique_ptr<SmNode>(pNode.release()); |
| } |
| case TIDENT: |
| case TNUMBER: |
| { |
| auto pTextNode = std::make_unique<SmTextNode>( |
| m_aCurToken, m_aCurToken.eType == TNUMBER ? FNT_NUMBER : FNT_VARIABLE); |
| pTextNode->SetSelection(m_aCurESelection); |
| if (!bGroupNumberIdent) |
| { |
| NextToken(); |
| return std::unique_ptr<SmNode>(pTextNode.release()); |
| } |
| std::vector<std::unique_ptr<SmNode>> aNodes; |
| // Some people want to be able to write "x_2n" for "x_{2n}" |
| // although e.g. LaTeX or AsciiMath interpret that as "x_2 n". |
| // The tokenizer skips whitespaces so we need some additional |
| // work to distinguish from "x_2 n". |
| // See https://bz.apache.org/ooo/show_bug.cgi?id=11752 and |
| // https://bugs.libreoffice.org/show_bug.cgi?id=55853 |
| sal_Int32 nBufLen = m_aBufferString.getLength(); |
| |
| // We need to be careful to call NextToken() only after having |
| // tested for a whitespace separator (otherwise it will be |
| // skipped!) |
| bool moveToNextToken = true; |
| while (m_nBufferIndex < nBufLen |
| && m_pSysCC->getType(m_aBufferString, m_nBufferIndex) |
| != UnicodeType::SPACE_SEPARATOR) |
| { |
| NextToken(); |
| if (m_aCurToken.eType != TNUMBER && m_aCurToken.eType != TIDENT) |
| { |
| // Neither a number nor an identifier. We just moved to |
| // the next token, so no need to do that again. |
| moveToNextToken = false; |
| break; |
| } |
| aNodes.emplace_back(std::unique_ptr<SmNode>(new SmTextNode( |
| m_aCurToken, m_aCurToken.eType == TNUMBER ? FNT_NUMBER : FNT_VARIABLE))); |
| } |
| if (moveToNextToken) |
| NextToken(); |
| if (aNodes.empty()) |
| return std::unique_ptr<SmNode>(pTextNode.release()); |
| // We have several concatenated identifiers and numbers. |
| // Let's group them into one SmExpressionNode. |
| aNodes.insert(aNodes.begin(), std::move(pTextNode)); |
| std::unique_ptr<SmExpressionNode> xNode(new SmExpressionNode(SmToken())); |
| xNode->SetSubNodes(buildNodeArray(aNodes)); |
| return std::unique_ptr<SmNode>(xNode.release()); |
| } |
| case TLEFTARROW: |
| case TRIGHTARROW: |
| case TUPARROW: |
| case TDOWNARROW: |
| case TCIRC: |
| case TDRARROW: |
| case TDLARROW: |
| case TDLRARROW: |
| case TEXISTS: |
| case TNOTEXISTS: |
| case TFORALL: |
| case TPARTIAL: |
| case TNABLA: |
| case TLAPLACE: |
| case TFOURIER: |
| case TTOWARD: |
| case TDOTSAXIS: |
| case TDOTSDIAG: |
| case TDOTSDOWN: |
| case TDOTSLOW: |
| case TDOTSUP: |
| case TDOTSVERT: |
| { |
| auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken); |
| pNode->SetSelection(m_aCurESelection); |
| NextToken(); |
| return std::unique_ptr<SmNode>(pNode.release()); |
| } |
| |
| case TSETN: |
| case TSETZ: |
| case TSETQ: |
| case TSETR: |
| case TSETC: |
| case THBAR: |
| case TLAMBDABAR: |
| case TBACKEPSILON: |
| case TALEPH: |
| case TIM: |
| case TRE: |
| case TWP: |
| case TEMPTYSET: |
| case TINFINITY: |
| { |
| auto pNode = std::make_unique<SmMathIdentifierNode>(m_aCurToken); |
| pNode->SetSelection(m_aCurESelection); |
| NextToken(); |
| return std::unique_ptr<SmNode>(pNode.release()); |
| } |
| |
| case TPLACE: |
| { |
| auto pNode = std::make_unique<SmPlaceNode>(m_aCurToken); |
| pNode->SetSelection(m_aCurESelection); |
| NextToken(); |
| return std::unique_ptr<SmNode>(pNode.release()); |
| } |
| |
| case TSPECIAL: |
| return DoSpecial(); |
| |
| case TBINOM: |
| return DoBinom(); |
| |
| case TFRAC: |
| return DoFrac(); |
| |
| case TSTACK: |
| return DoStack(); |
| |
| case TMATRIX: |
| return DoMatrix(); |
| |
| case THEX: |
| NextTokenFontSize(); |
| if (m_aCurToken.eType == THEX) |
| { |
| auto pTextNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_NUMBER); |
| pTextNode->SetSelection(m_aCurESelection); |
| NextToken(); |
| return pTextNode; |
| } |
| else |
| return DoError(SmParseError::NumberExpected); |
| default: |
| if (TokenInGroup(TG::LBrace)) |
| return DoBrace(); |
| if (TokenInGroup(TG::Oper)) |
| return DoOperator(); |
| if (TokenInGroup(TG::UnOper)) |
| return DoUnOper(); |
| if (TokenInGroup(TG::Attribute) || TokenInGroup(TG::FontAttr)) |
| { |
| std::stack<std::unique_ptr<SmStructureNode>, |
| std::vector<std::unique_ptr<SmStructureNode>>> |
| aStack; |
| bool bIsAttr; |
| for (;;) |
| { |
| bIsAttr = TokenInGroup(TG::Attribute); |
| if (!bIsAttr && !TokenInGroup(TG::FontAttr)) |
| break; |
| aStack.push(bIsAttr ? DoAttribute() : DoFontAttribute()); |
| } |
| |
| auto xFirstNode = DoPower(); |
| while (!aStack.empty()) |
| { |
| std::unique_ptr<SmStructureNode> xNode = std::move(aStack.top()); |
| aStack.pop(); |
| xNode->SetSubNodes(nullptr, std::move(xFirstNode)); |
| xFirstNode = std::move(xNode); |
| } |
| return xFirstNode; |
| } |
| if (TokenInGroup(TG::Function)) |
| return DoFunction(); |
| return DoError(SmParseError::UnexpectedChar); |
| } |
| } |
| |
| std::unique_ptr<SmNode> SmParser5::DoEscape() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| NextToken(); |
| |
| switch (m_aCurToken.eType) |
| { |
| case TLPARENT: |
| case TRPARENT: |
| case TLBRACKET: |
| case TRBRACKET: |
| case TLDBRACKET: |
| case TRDBRACKET: |
| case TLBRACE: |
| case TLGROUP: |
| case TRBRACE: |
| case TRGROUP: |
| case TLANGLE: |
| case TRANGLE: |
| case TLCEIL: |
| case TRCEIL: |
| case TLFLOOR: |
| case TRFLOOR: |
| case TLLINE: |
| case TRLINE: |
| case TLDLINE: |
| case TRDLINE: |
| { |
| auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken); |
| pNode->SetSelection(m_aCurESelection); |
| NextToken(); |
| return std::unique_ptr<SmNode>(pNode.release()); |
| } |
| default: |
| return DoError(SmParseError::UnexpectedToken); |
| } |
| } |
| |
| std::unique_ptr<SmOperNode> SmParser5::DoOperator() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| assert(TokenInGroup(TG::Oper)); |
| |
| auto xSNode = std::make_unique<SmOperNode>(m_aCurToken); |
| xSNode->SetSelection(m_aCurESelection); |
| |
| // get operator |
| auto xOperator = DoOper(); |
| |
| if (m_aCurToken.nGroup == TG::Limit || m_aCurToken.nGroup == TG::Power) |
| xOperator = DoSubSup(m_aCurToken.nGroup, std::move(xOperator)); |
| |
| // get argument |
| auto xArg = DoPower(); |
| |
| xSNode->SetSubNodes(std::move(xOperator), std::move(xArg)); |
| return xSNode; |
| } |
| |
| std::unique_ptr<SmNode> SmParser5::DoOper() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| SmTokenType eType(m_aCurToken.eType); |
| std::unique_ptr<SmNode> pNode; |
| |
| switch (eType) |
| { |
| case TSUM: |
| case TPROD: |
| case TCOPROD: |
| case TINT: |
| case TINTD: |
| case TIINT: |
| case TIIINT: |
| case TLINT: |
| case TLLINT: |
| case TLLLINT: |
| pNode.reset(new SmMathSymbolNode(m_aCurToken)); |
| pNode->SetSelection(m_aCurESelection); |
| break; |
| |
| case TLIM: |
| case TLIMSUP: |
| case TLIMINF: |
| case THADD: |
| case TNAHA: |
| if (eType == TLIMSUP) |
| m_aCurToken.aText = u"lim sup"_ustr; |
| else if (eType == TLIMINF) |
| m_aCurToken.aText = u"lim inf"_ustr; |
| else if (eType == TNAHA) |
| m_aCurToken.aText = u"نها"_ustr; |
| else if (eType == THADD) |
| m_aCurToken.aText = OUString(&MS_HADD, 1); |
| else |
| m_aCurToken.aText = u"lim"_ustr; |
| pNode.reset(new SmTextNode(m_aCurToken, FNT_TEXT)); |
| pNode->SetSelection(m_aCurESelection); |
| break; |
| |
| case TOPER: |
| NextToken(); |
| OSL_ENSURE(m_aCurToken.eType == TSPECIAL, "Sm: wrong token"); |
| m_aCurToken.eType = TOPER; |
| pNode.reset(new SmGlyphSpecialNode(m_aCurToken)); |
| pNode->SetSelection(m_aCurESelection); |
| break; |
| |
| default: |
| assert(false && "unknown case"); |
| } |
| |
| NextToken(); |
| return pNode; |
| } |
| |
| std::unique_ptr<SmStructureNode> SmParser5::DoUnOper() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| assert(TokenInGroup(TG::UnOper)); |
| |
| SmToken aNodeToken = m_aCurToken; |
| ESelection aESelection = m_aCurESelection; |
| SmTokenType eType = m_aCurToken.eType; |
| bool bIsPostfix = eType == TFACT; |
| |
| std::unique_ptr<SmStructureNode> xSNode; |
| std::unique_ptr<SmNode> xOper; |
| std::unique_ptr<SmNode> xExtra; |
| std::unique_ptr<SmNode> xArg; |
| |
| switch (eType) |
| { |
| case TABS: |
| case TSQRT: |
| NextToken(); |
| break; |
| |
| case TNROOT: |
| NextToken(); |
| xExtra = DoPower(); |
| break; |
| |
| case TUOPER: |
| NextToken(); |
| //Let the glyph know what it is... |
| m_aCurToken.eType = TUOPER; |
| m_aCurToken.nGroup = TG::UnOper; |
| xOper = DoGlyphSpecial(); |
| break; |
| |
| case TPLUS: |
| case TMINUS: |
| case TPLUSMINUS: |
| case TMINUSPLUS: |
| case TNEG: |
| case TFACT: |
| xOper = DoOpSubSup(); |
| break; |
| |
| default: |
| assert(false); |
| } |
| |
| // get argument |
| xArg = DoPower(); |
| |
| if (eType == TABS) |
| { |
| xSNode.reset(new SmBraceNode(aNodeToken)); |
| xSNode->SetSelection(aESelection); |
| xSNode->SetScaleMode(SmScaleMode::Height); |
| |
| // build nodes for left & right lines |
| // (text, group, level of the used token are of no interest here) |
| // we'll use row & column of the keyword for abs |
| aNodeToken.eType = TABS; |
| |
| aNodeToken.setChar(MS_VERTLINE); |
| std::unique_ptr<SmNode> xLeft(new SmMathSymbolNode(aNodeToken)); |
| xLeft->SetSelection(aESelection); |
| std::unique_ptr<SmNode> xRight(new SmMathSymbolNode(aNodeToken)); |
| xRight->SetSelection(aESelection); |
| |
| xSNode->SetSubNodes(std::move(xLeft), std::move(xArg), std::move(xRight)); |
| } |
| else if (eType == TSQRT || eType == TNROOT) |
| { |
| xSNode.reset(new SmRootNode(aNodeToken)); |
| xSNode->SetSelection(aESelection); |
| xOper.reset(new SmRootSymbolNode(aNodeToken)); |
| xOper->SetSelection(aESelection); |
| xSNode->SetSubNodes(std::move(xExtra), std::move(xOper), std::move(xArg)); |
| } |
| else |
| { |
| xSNode.reset(new SmUnHorNode(aNodeToken)); |
| xSNode->SetSelection(aESelection); |
| if (bIsPostfix) |
| xSNode->SetSubNodes(std::move(xArg), std::move(xOper)); |
| else |
| { |
| // prefix operator |
| xSNode->SetSubNodes(std::move(xOper), std::move(xArg)); |
| } |
| } |
| return xSNode; |
| } |
| |
| std::unique_ptr<SmStructureNode> SmParser5::DoAttribute() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| assert(TokenInGroup(TG::Attribute)); |
| |
| auto xSNode = std::make_unique<SmAttributeNode>(m_aCurToken); |
| xSNode->SetSelection(m_aCurESelection); |
| std::unique_ptr<SmNode> xAttr; |
| SmScaleMode eScaleMode = SmScaleMode::None; |
| |
| // get appropriate node for the attribute itself |
| switch (m_aCurToken.eType) |
| { |
| case TUNDERLINE: |
| case TOVERLINE: |
| case TOVERSTRIKE: |
| xAttr.reset(new SmRectangleNode(m_aCurToken)); |
| xAttr->SetSelection(m_aCurESelection); |
| eScaleMode = SmScaleMode::Width; |
| break; |
| |
| case TWIDEVEC: |
| case TWIDEHARPOON: |
| case TWIDEHAT: |
| case TWIDETILDE: |
| xAttr.reset(new SmMathSymbolNode(m_aCurToken)); |
| xAttr->SetSelection(m_aCurESelection); |
| eScaleMode = SmScaleMode::Width; |
| break; |
| |
| default: |
| xAttr.reset(new SmMathSymbolNode(m_aCurToken)); |
| xAttr->SetSelection(m_aCurESelection); |
| } |
| |
| NextToken(); |
| |
| xSNode->SetSubNodes(std::move(xAttr), nullptr); // the body will be filled later |
| xSNode->SetScaleMode(eScaleMode); |
| return xSNode; |
| } |
| |
| std::unique_ptr<SmStructureNode> SmParser5::DoFontAttribute() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| assert(TokenInGroup(TG::FontAttr)); |
| |
| switch (m_aCurToken.eType) |
| { |
| case TITALIC: |
| case TNITALIC: |
| case TBOLD: |
| case TNBOLD: |
| case TPHANTOM: |
| { |
| auto pNode = std::make_unique<SmFontNode>(m_aCurToken); |
| pNode->SetSelection(m_aCurESelection); |
| NextToken(); |
| return pNode; |
| } |
| |
| case TSIZE: |
| return DoFontSize(); |
| |
| case TFONT: |
| return DoFont(); |
| |
| case TCOLOR: |
| return DoColor(); |
| |
| default: |
| assert(false); |
| return {}; |
| } |
| } |
| |
| std::unique_ptr<SmStructureNode> SmParser5::DoColor() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| assert(m_aCurToken.eType == TCOLOR); |
| sal_Int32 nBufferIndex = m_nBufferIndex; |
| NextTokenColor(TCOLOR); |
| SmToken aToken; |
| ESelection aESelection; |
| |
| if (m_aCurToken.eType == TDVIPSNAMESCOL) |
| NextTokenColor(TDVIPSNAMESCOL); |
| if (m_aCurToken.eType == TERROR) |
| return DoError(SmParseError::ColorExpected); |
| if (TokenInGroup(TG::Color)) |
| { |
| aToken = m_aCurToken; |
| aESelection = m_aCurESelection; |
| if (m_aCurToken.eType == TRGB) //loads r, g and b |
| { |
| sal_uInt32 nr, ng, nb, nc; |
| NextTokenFontSize(); |
| if (lcl_IsNotWholeNumber(m_aCurToken.aText)) |
| return DoError(SmParseError::ColorExpected); |
| nr = m_aCurToken.aText.toUInt32(); |
| if (nr > 255) |
| return DoError(SmParseError::ColorExpected); |
| NextTokenFontSize(); |
| if (lcl_IsNotWholeNumber(m_aCurToken.aText)) |
| return DoError(SmParseError::ColorExpected); |
| ng = m_aCurToken.aText.toUInt32(); |
| if (ng > 255) |
| return DoError(SmParseError::ColorExpected); |
| NextTokenFontSize(); |
| if (lcl_IsNotWholeNumber(m_aCurToken.aText)) |
| return DoError(SmParseError::ColorExpected); |
| nb = m_aCurToken.aText.toUInt32(); |
| if (nb > 255) |
| return DoError(SmParseError::ColorExpected); |
| nc = nb | ng << 8 | nr << 16 | sal_uInt32(0) << 24; |
| aToken.cMathChar = OUString::number(nc, 16); |
| } |
| else if (m_aCurToken.eType == TRGBA) //loads r, g and b |
| { |
| sal_uInt32 nr, na, ng, nb, nc; |
| NextTokenFontSize(); |
| if (lcl_IsNotWholeNumber(m_aCurToken.aText)) |
| return DoError(SmParseError::ColorExpected); |
| nr = m_aCurToken.aText.toUInt32(); |
| if (nr > 255) |
| return DoError(SmParseError::ColorExpected); |
| NextTokenFontSize(); |
| if (lcl_IsNotWholeNumber(m_aCurToken.aText)) |
| return DoError(SmParseError::ColorExpected); |
| ng = m_aCurToken.aText.toUInt32(); |
| if (ng > 255) |
| return DoError(SmParseError::ColorExpected); |
| NextTokenFontSize(); |
| if (lcl_IsNotWholeNumber(m_aCurToken.aText)) |
| return DoError(SmParseError::ColorExpected); |
| nb = m_aCurToken.aText.toUInt32(); |
| if (nb > 255) |
| return DoError(SmParseError::ColorExpected); |
| NextTokenFontSize(); |
| if (lcl_IsNotWholeNumber(m_aCurToken.aText)) |
| return DoError(SmParseError::ColorExpected); |
| na = m_aCurToken.aText.toUInt32(); |
| if (na > 255) |
| return DoError(SmParseError::ColorExpected); |
| nc = nb | ng << 8 | nr << 16 | na << 24; |
| aToken.cMathChar = OUString::number(nc, 16); |
| } |
| else if (m_aCurToken.eType == THEX) //loads hex code |
| { |
| sal_uInt32 nc; |
| NextTokenFontSize(); |
| if (lcl_IsNotWholeNumber16(m_aCurToken.aText)) |
| return DoError(SmParseError::ColorExpected); |
| nc = m_aCurToken.aText.toUInt32(16); |
| aToken.cMathChar = OUString::number(nc, 16); |
| } |
| aToken.aText = m_aBufferString.subView(nBufferIndex, m_nBufferIndex - nBufferIndex); |
| NextToken(); |
| } |
| else |
| return DoError(SmParseError::ColorExpected); |
| |
| std::unique_ptr<SmStructureNode> xNode; |
| xNode.reset(new SmFontNode(aToken)); |
| xNode->SetSelection(aESelection); |
| return xNode; |
| } |
| |
| std::unique_ptr<SmStructureNode> SmParser5::DoFont() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| assert(m_aCurToken.eType == TFONT); |
| |
| std::unique_ptr<SmStructureNode> xNode; |
| // last font rules, get that one |
| SmToken aToken; |
| ESelection aESelection = m_aCurESelection; |
| do |
| { |
| NextToken(); |
| |
| if (TokenInGroup(TG::Font)) |
| { |
| aToken = m_aCurToken; |
| NextToken(); |
| } |
| else |
| { |
| return DoError(SmParseError::FontExpected); |
| } |
| } while (m_aCurToken.eType == TFONT); |
| |
| xNode.reset(new SmFontNode(aToken)); |
| xNode->SetSelection(aESelection); |
| return xNode; |
| } |
| |
| std::unique_ptr<SmStructureNode> SmParser5::DoFontSize() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| std::unique_ptr<SmFontNode> pFontNode(new SmFontNode(m_aCurToken)); |
| pFontNode->SetSelection(m_aCurESelection); |
| NextTokenFontSize(); |
| FontSizeType Type; |
| |
| switch (m_aCurToken.eType) |
| { |
| case THEX: |
| Type = FontSizeType::ABSOLUT; |
| break; |
| case TPLUS: |
| Type = FontSizeType::PLUS; |
| break; |
| case TMINUS: |
| Type = FontSizeType::MINUS; |
| break; |
| case TMULTIPLY: |
| Type = FontSizeType::MULTIPLY; |
| break; |
| case TDIVIDEBY: |
| Type = FontSizeType::DIVIDE; |
| break; |
| |
| default: |
| return DoError(SmParseError::SizeExpected); |
| } |
| |
| if (Type != FontSizeType::ABSOLUT) |
| { |
| NextTokenFontSize(); |
| if (m_aCurToken.eType != THEX) |
| return DoError(SmParseError::SizeExpected); |
| } |
| |
| // get number argument |
| double fValue(1); |
| if (lcl_IsNumber(m_aCurToken.aText)) |
| { |
| fValue = m_aCurToken.aText.toDouble(); |
| } |
| else |
| return DoError(SmParseError::SizeExpected); |
| |
| pFontNode->SetSizeParameter(fValue, Type); |
| NextToken(); |
| return pFontNode; |
| } |
| |
| std::unique_ptr<SmStructureNode> SmParser5::DoBrace() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| assert(m_aCurToken.eType == TLEFT || TokenInGroup(TG::LBrace)); |
| |
| std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken)); |
| xSNode->SetSelection(m_aCurESelection); |
| std::unique_ptr<SmNode> pBody, pLeft, pRight; |
| SmScaleMode eScaleMode = SmScaleMode::None; |
| SmParseError eError = SmParseError::None; |
| |
| if (m_aCurToken.eType == TLEFT) |
| { |
| NextToken(); |
| |
| eScaleMode = SmScaleMode::Height; |
| |
| // check for left bracket |
| if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace)) |
| { |
| pLeft.reset(new SmMathSymbolNode(m_aCurToken)); |
| pLeft->SetSelection(m_aCurESelection); |
| |
| NextToken(); |
| pBody = DoBracebody(true); |
| |
| if (m_aCurToken.eType == TRIGHT) |
| { |
| NextToken(); |
| |
| // check for right bracket |
| if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace)) |
| { |
| pRight.reset(new SmMathSymbolNode(m_aCurToken)); |
| pRight->SetSelection(m_aCurESelection); |
| NextToken(); |
| } |
| else |
| eError = SmParseError::RbraceExpected; |
| } |
| else |
| eError = SmParseError::RightExpected; |
| } |
| else |
| eError = SmParseError::LbraceExpected; |
| } |
| else |
| { |
| assert(TokenInGroup(TG::LBrace)); |
| |
| pLeft.reset(new SmMathSymbolNode(m_aCurToken)); |
| pLeft->SetSelection(m_aCurESelection); |
| |
| NextToken(); |
| pBody = DoBracebody(false); |
| |
| SmTokenType eExpectedType = TUNKNOWN; |
| switch (pLeft->GetToken().eType) |
| { |
| case TLPARENT: |
| eExpectedType = TRPARENT; |
| break; |
| case TLBRACKET: |
| eExpectedType = TRBRACKET; |
| break; |
| case TLBRACE: |
| eExpectedType = TRBRACE; |
| break; |
| case TLDBRACKET: |
| eExpectedType = TRDBRACKET; |
| break; |
| case TLLINE: |
| eExpectedType = TRLINE; |
| break; |
| case TLDLINE: |
| eExpectedType = TRDLINE; |
| break; |
| case TLANGLE: |
| eExpectedType = TRANGLE; |
| break; |
| case TLFLOOR: |
| eExpectedType = TRFLOOR; |
| break; |
| case TLCEIL: |
| eExpectedType = TRCEIL; |
| break; |
| case TLRLINE: |
| eExpectedType = TLRLINE; |
| break; |
| case TLRDLINE: |
| eExpectedType = TLRDLINE; |
| break; |
| default: |
| SAL_WARN("starmath", "unknown case"); |
| } |
| |
| if (m_aCurToken.eType == eExpectedType) |
| { |
| pRight.reset(new SmMathSymbolNode(m_aCurToken)); |
| pRight->SetSelection(m_aCurESelection); |
| NextToken(); |
| } |
| else |
| eError = SmParseError::ParentMismatch; |
| } |
| |
| if (eError == SmParseError::None) |
| { |
| assert(pLeft); |
| assert(pRight); |
| xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight)); |
| xSNode->SetScaleMode(eScaleMode); |
| return xSNode; |
| } |
| return DoError(eError); |
| } |
| |
| std::unique_ptr<SmBracebodyNode> SmParser5::DoBracebody(bool bIsLeftRight) |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| auto pBody = std::make_unique<SmBracebodyNode>(m_aCurToken); |
| pBody->SetSelection(m_aCurESelection); |
| |
| std::vector<std::unique_ptr<SmNode>> aNodes; |
| // get body if any |
| if (bIsLeftRight) |
| { |
| do |
| { |
| if (m_aCurToken.eType == TMLINE) |
| { |
| SmMathSymbolNode* pTempNode = new SmMathSymbolNode(m_aCurToken); |
| pTempNode->SetSelection(m_aCurESelection); |
| aNodes.emplace_back(std::unique_ptr<SmMathSymbolNode>(pTempNode)); |
| NextToken(); |
| } |
| else if (m_aCurToken.eType != TRIGHT) |
| { |
| aNodes.push_back(DoAlign()); |
| if (m_aCurToken.eType != TMLINE && m_aCurToken.eType != TRIGHT) |
| aNodes.emplace_back(DoError(SmParseError::RightExpected)); |
| } |
| } while (m_aCurToken.eType != TEND && m_aCurToken.eType != TRIGHT); |
| } |
| else |
| { |
| do |
| { |
| if (m_aCurToken.eType == TMLINE) |
| { |
| SmMathSymbolNode* pTempNode = new SmMathSymbolNode(m_aCurToken); |
| pTempNode->SetSelection(m_aCurESelection); |
| aNodes.emplace_back(std::unique_ptr<SmMathSymbolNode>(pTempNode)); |
| NextToken(); |
| } |
| else if (!TokenInGroup(TG::RBrace)) |
| { |
| aNodes.push_back(DoAlign()); |
| if (m_aCurToken.eType != TMLINE && !TokenInGroup(TG::RBrace)) |
| aNodes.emplace_back(DoError(SmParseError::RbraceExpected)); |
| } |
| } while (m_aCurToken.eType != TEND && !TokenInGroup(TG::RBrace)); |
| } |
| |
| pBody->SetSubNodes(buildNodeArray(aNodes)); |
| pBody->SetScaleMode(bIsLeftRight ? SmScaleMode::Height : SmScaleMode::None); |
| return pBody; |
| } |
| |
| std::unique_ptr<SmNode> SmParser5::DoEvaluate() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| // Create node |
| std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken)); |
| xSNode->SetSelection(m_aCurESelection); |
| SmToken aToken(TRLINE, MS_VERTLINE, u"evaluate"_ustr, TG::RBrace, 5); |
| |
| // Parse body && left none |
| NextToken(); |
| std::unique_ptr<SmNode> pBody = DoPower(); |
| SmToken bToken(TNONE, '\0', u""_ustr, TG::LBrace, 5); |
| std::unique_ptr<SmNode> pLeft; |
| pLeft.reset(new SmMathSymbolNode(bToken)); |
| |
| // Mount nodes |
| std::unique_ptr<SmNode> pRight; |
| pRight.reset(new SmMathSymbolNode(aToken)); |
| xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight)); |
| xSNode->SetScaleMode(SmScaleMode::Height); // scalable line |
| |
| // Parse from to |
| if (m_aCurToken.nGroup == TG::Limit) |
| { |
| std::unique_ptr<SmNode> rSNode; |
| rSNode = DoSubSupEvaluate(std::move(xSNode)); |
| rSNode->GetToken().eType = TEVALUATE; |
| return rSNode; |
| } |
| |
| return xSNode; |
| } |
| |
| std::unique_ptr<SmTextNode> SmParser5::DoFunction() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| if (m_aCurToken.eType == TFUNC) |
| { |
| NextToken(); // skip "FUNC"-statement |
| m_aCurToken.eType = TFUNC; |
| m_aCurToken.nGroup = TG::Function; |
| } |
| auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_FUNCTION); |
| pNode->SetSelection(m_aCurESelection); |
| NextToken(); |
| return pNode; |
| } |
| |
| std::unique_ptr<SmTableNode> SmParser5::DoBinom() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| auto xSNode = std::make_unique<SmTableNode>(m_aCurToken); |
| xSNode->SetSelection(m_aCurESelection); |
| |
| NextToken(); |
| |
| auto xFirst = DoSum(); |
| auto xSecond = DoSum(); |
| xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond)); |
| return xSNode; |
| } |
| |
| std::unique_ptr<SmBinVerNode> SmParser5::DoFrac() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| std::unique_ptr<SmBinVerNode> xSNode = std::make_unique<SmBinVerNode>(m_aCurToken); |
| xSNode->SetSelection(m_aCurESelection); |
| std::unique_ptr<SmNode> xOper = std::make_unique<SmRectangleNode>(m_aCurToken); |
| xOper->SetSelection(m_aCurESelection); |
| |
| NextToken(); |
| |
| auto xFirst = DoSum(); |
| auto xSecond = DoSum(); |
| xSNode->SetSubNodes(std::move(xFirst), std::move(xOper), std::move(xSecond)); |
| return xSNode; |
| } |
| |
| std::unique_ptr<SmStructureNode> SmParser5::DoStack() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| std::unique_ptr<SmStructureNode> xSNode(new SmTableNode(m_aCurToken)); |
| xSNode->SetSelection(m_aCurESelection); |
| NextToken(); |
| if (m_aCurToken.eType != TLGROUP) |
| return DoError(SmParseError::LgroupExpected); |
| std::vector<std::unique_ptr<SmNode>> aExprArr; |
| do |
| { |
| NextToken(); |
| aExprArr.push_back(DoAlign()); |
| } while (m_aCurToken.eType == TPOUND); |
| |
| if (m_aCurToken.eType == TRGROUP) |
| NextToken(); |
| else |
| aExprArr.emplace_back(DoError(SmParseError::RgroupExpected)); |
| |
| xSNode->SetSubNodes(buildNodeArray(aExprArr)); |
| return xSNode; |
| } |
| |
| std::unique_ptr<SmStructureNode> SmParser5::DoMatrix() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| std::unique_ptr<SmMatrixNode> xMNode(new SmMatrixNode(m_aCurToken)); |
| xMNode->SetSelection(m_aCurESelection); |
| NextToken(); |
| if (m_aCurToken.eType != TLGROUP) |
| return DoError(SmParseError::LgroupExpected); |
| |
| std::vector<std::unique_ptr<SmNode>> aExprArr; |
| do |
| { |
| NextToken(); |
| aExprArr.push_back(DoAlign()); |
| } while (m_aCurToken.eType == TPOUND); |
| |
| size_t nCol = aExprArr.size(); |
| size_t nRow = 1; |
| while (m_aCurToken.eType == TDPOUND) |
| { |
| NextToken(); |
| for (size_t i = 0; i < nCol; i++) |
| { |
| auto xNode = DoAlign(); |
| if (i < (nCol - 1)) |
| { |
| if (m_aCurToken.eType == TPOUND) |
| NextToken(); |
| else |
| xNode = DoError(SmParseError::PoundExpected); |
| } |
| aExprArr.emplace_back(std::move(xNode)); |
| } |
| ++nRow; |
| } |
| |
| if (m_aCurToken.eType == TRGROUP) |
| NextToken(); |
| else |
| { |
| std::unique_ptr<SmNode> xENode(DoError(SmParseError::RgroupExpected)); |
| if (aExprArr.empty()) |
| nRow = nCol = 1; |
| else |
| aExprArr.pop_back(); |
| aExprArr.emplace_back(std::move(xENode)); |
| } |
| |
| xMNode->SetSubNodes(buildNodeArray(aExprArr)); |
| xMNode->SetRowCol(static_cast<sal_uInt16>(nRow), static_cast<sal_uInt16>(nCol)); |
| return std::unique_ptr<SmStructureNode>(xMNode.release()); |
| } |
| |
| std::unique_ptr<SmSpecialNode> SmParser5::DoSpecial() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| bool bReplace = false; |
| OUString& rName = m_aCurToken.aText; |
| OUString aNewName; |
| |
| // conversion of symbol names for 6.0 (XML) file format |
| // (name change on import / export. |
| // UI uses localized names XML file format does not.) |
| if (rName.startsWith("%")) |
| { |
| if (IsImportSymbolNames()) |
| { |
| const SmSym* pSym |
| = SmModule::get()->GetSymbolManager().GetSymbolByExportName(rName.subView(1)); |
| if (pSym) |
| { |
| aNewName = pSym->GetUiName(); |
| bReplace = true; |
| } |
| } |
| else if (IsExportSymbolNames()) |
| { |
| const SmSym* pSym |
| = SmModule::get()->GetSymbolManager().GetSymbolByUiName(rName.subView(1)); |
| if (pSym) |
| { |
| aNewName = pSym->GetExportName(); |
| bReplace = true; |
| } |
| } |
| } |
| if (!aNewName.isEmpty()) |
| aNewName = "%" + aNewName; |
| |
| if (bReplace && !aNewName.isEmpty() && rName != aNewName) |
| { |
| Replace(GetTokenIndex(), rName.getLength(), aNewName); |
| rName = aNewName; |
| } |
| |
| // add symbol name to list of used symbols |
| const OUString aSymbolName(m_aCurToken.aText.copy(1)); |
| if (!aSymbolName.isEmpty()) |
| m_aUsedSymbols.insert(aSymbolName); |
| |
| auto pNode = std::make_unique<SmSpecialNode>(m_aCurToken); |
| pNode->SetSelection(m_aCurESelection); |
| NextToken(); |
| return pNode; |
| } |
| |
| std::unique_ptr<SmGlyphSpecialNode> SmParser5::DoGlyphSpecial() |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| auto pNode = std::make_unique<SmGlyphSpecialNode>(m_aCurToken); |
| NextToken(); |
| return pNode; |
| } |
| |
| std::unique_ptr<SmExpressionNode> SmParser5::DoError(SmParseError eError) |
| { |
| DepthProtect aDepthGuard(m_nParseDepth); |
| |
| // Generate error node |
| m_aCurToken.eType = TERROR; |
| // Identify error message |
| m_aCurToken.cMathChar = SmResId(RID_ERR_IDENT) + starmathdatabase::getParseErrorDesc(eError); |
| auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken); |
| SmErrorNode* pErr(new SmErrorNode(m_aCurToken)); |
| pErr->SetSelection(m_aCurESelection); |
| xSNode->SetSubNode(0, pErr); |
| |
| // Append error to the error list |
| SmErrorDesc aErrDesc(eError, xSNode.get(), m_aCurToken.cMathChar); |
| m_aErrDescList.push_back(aErrDesc); |
| |
| NextToken(); |
| |
| return xSNode; |
| } |
| |
| // end grammar |
| |
| SmParser5::SmParser5() |
| : m_nCurError(0) |
| , m_nBufferIndex(0) |
| , m_nTokenIndex(0) |
| , m_nRow(0) |
| , m_nColOff(0) |
| , m_bImportSymNames(false) |
| , m_bExportSymNames(false) |
| , m_nParseDepth(0) |
| , m_aNumCC(LanguageTag(LANGUAGE_ENGLISH_US)) |
| , m_pSysCC(&SmModule::get()->GetSysLocale().GetCharClass()) |
| { |
| } |
| |
| SmParser5::~SmParser5() {} |
| |
| std::unique_ptr<SmTableNode> SmParser5::Parse(const OUString& rBuffer) |
| { |
| m_aUsedSymbols.clear(); |
| |
| m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF); |
| m_nBufferIndex = 0; |
| m_nTokenIndex = 0; |
| m_nRow = 0; |
| m_nColOff = 0; |
| m_nCurError = -1; |
| |
| m_aErrDescList.clear(); |
| |
| NextToken(); |
| return DoTable(); |
| } |
| |
| std::unique_ptr<SmNode> SmParser5::ParseExpression(const OUString& rBuffer) |
| { |
| m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF); |
| m_nBufferIndex = 0; |
| m_nTokenIndex = 0; |
| m_nRow = 0; |
| m_nColOff = 0; |
| m_nCurError = -1; |
| |
| m_aErrDescList.clear(); |
| |
| NextToken(); |
| return DoExpression(); |
| } |
| |
| const SmErrorDesc* SmParser5::NextError() |
| { |
| if (!m_aErrDescList.empty()) |
| if (m_nCurError > 0) |
| return &m_aErrDescList[--m_nCurError]; |
| else |
| { |
| m_nCurError = 0; |
| return &m_aErrDescList[m_nCurError]; |
| } |
| else |
| return nullptr; |
| } |
| |
| const SmErrorDesc* SmParser5::PrevError() |
| { |
| if (!m_aErrDescList.empty()) |
| if (m_nCurError < static_cast<int>(m_aErrDescList.size() - 1)) |
| return &m_aErrDescList[++m_nCurError]; |
| else |
| { |
| m_nCurError = static_cast<int>(m_aErrDescList.size() - 1); |
| return &m_aErrDescList[m_nCurError]; |
| } |
| else |
| return nullptr; |
| } |
| |
| const SmErrorDesc* SmParser5::GetError() const |
| { |
| if (m_aErrDescList.empty()) |
| return nullptr; |
| return &m_aErrDescList.front(); |
| } |
| |
| /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |