size_t ATMSP<T>::parse(ATMSB<T> &bc, const std::string &exps, const std::string &vars) {

	// First always force recursion break on errors
	size_t eLevel = noErr;
	if ( (eLevel=setjmp(errJmp)) != noErr ) return eLevel;

	// Prepare clean expression and variable strings
	std::string::size_type pos, lastPos;
	std::string es(exps), vs(vars);
	pos = 0; while ( (pos=es.find(' '), pos) != std::string::npos ) es.erase(pos, 1);
	pos = 0; while ( (pos=vs.find(' '), pos) != std::string::npos ) vs.erase(pos, 1);
	if ( es.empty() ) longjmp(errJmp, funErr);
	cp = (char *) es.c_str();

	// Split comma separated variables into varLst
	// One instance can be parsed repeatedly. So clear() is vital here
	varLst.clear();
	pos = vs.find_first_of(',', lastPos = vs.find_first_not_of(',', 0));
	while ( std::string::npos != pos || std::string::npos != lastPos ) {
		if ( !varLst.push(vs.substr(lastPos, pos-lastPos)) ) longjmp(errJmp, memErr);
		pos = vs.find_first_of(',', lastPos = vs.find_first_not_of(',', pos));
	}

	// Static parenthesis check. "Abuse" free opCnt/varCnt as open/close-counters
	opCnt = varCnt = 0;
	for (size_t i=0; i<es.size(); i++)
		if ( es[i] == '(' )
			opCnt++;
		else if ( es[i] == ')' ) {
			varCnt++;
			if ( varCnt > opCnt ) longjmp(errJmp, parErr);
		}
	if ( opCnt != varCnt ) longjmp(errJmp, parErr);

	// Reset all our counters and indices
	// opCnt  = Operator count. For bytecode and memory checks
	// varCnt = Variable count. For check if we have a constant expression
	// valInd = All num, var and con values are mapped into the bytecode-val-array
	// numInd = Numerical numbers array index
	opCnt = varCnt = valInd = numInd = 0;    

	// Run it once for parsing and generating the bytecode
	expression(bc);
	bc.opCnt = opCnt;

	// No vars in expression? Evaluate at compile time then
	if ( !varCnt ) {
		bc.num[0] = bc.run();
		if ( bc.fltErr ) longjmp(errJmp, nanErr);
		bc.val[0] = &bc.num[0];
		bc.fun[0] = &ATMSB<T>::ppush;
		bc.opCnt = 1;
	}

	return noErr;
}
/// A simple test function. Defines variables x,y,z as x=1, y=2, z=3 always
static void test(const char *s, size_t rounds=0) {

   // Bytecode instance with SAME basic type as the parser
   ATMSB<PTYPE> byteCode;

   // Parsing/bytecode generation with error check. In a scope here JUST to
   // demonstrate that the parser-instance itself is NOT needed later on
   {
      ATMSP<PTYPE> parser;
      size_t err = parser.parse(byteCode, s, "x, y, z");
      if ( err ) {
         std::cerr << parser.errMessage(err) << std::endl;
         exit(err);
      }
   }

   // Set variable values for x,y,z. Here always 1,2,3
   byteCode.var[0] = 1.0;
   byteCode.var[1] = 2.0;
   byteCode.var[2] = 3.0;

   // Calculate/bench and show result
   PTYPE res = byteCode.run();
   if ( rounds ) for (size_t i=0; i<rounds; i++) res = byteCode.run();
   std::cout << std::setprecision(25) << ">>> Result = " <<  res << std::endl;

   // Need to be picky? Then check NaN/inf-errors with built-in flag. This is
   // fast and "catches" all common errors like x/0, sqrt(-3), asin(123) ...
   if ( byteCode.fltErr )
      std::cerr << "Float error fetched by ATMSP default-check" << std::endl;

   // Extra paranoia about NaN/inf float/complex errors? Due to the nature of
   // floats, the built-in check may not catch every case. Here how to fetch
   // these rare errors of more theoretical nature. But as they may happen:
   #if !defined(MPFR)
   if ( hasNumErr(res) )
      std::cerr << "Float error fetched by extra-check" << std::endl;
   #endif
}
/** Benchmark **/
static void bench(const size_t loops) {

    double sec, minSec;
    PTYPE res;

    // Parser/bytecode instances
    ATMSP<PTYPE> pa;
    ATMSB<PTYPE> bc;

    // Sweep over all expressions
    std::cout << "Each expression is evaluated " << loops << " x times:\n";
    for (size_t i=0; i<list.size(); i++) {

        // Error check for parsing/bytecode generation
        size_t err;
        if ( (err=pa.parse(bc, list[i], "x, y, z")) ) {
            std::cerr << list[i] << " failed: " << pa.errMessage(err) << std::endl;
            exit(err);
        }

        // Some settings out of inner loops
        bc.var[0] = 1.0;
        bc.var[1] = 2.0;
        bc.var[2] = 3.0;
        minSec = 10.0e10;

        // Get fastest run out of a few
        for (size_t j=0; j<10; j++) {
            TIMERS t;
            for (size_t k=0; k<loops; k++) res = bc.run();
            sec = t.stop();
            if ( sec < minSec ) minSec = sec;
        }

        // Set locale for better import in OO spredsheet
        std::locale locale("");
        std::cout.imbue(locale);

        // Use ";" as delimiter for OO import
        std::cout << minSec << "\t;\'" << list[i] << std::endl;
    }
}
//-------------------------------------------------------------------------------------------------
double BenchATMSP::DoBenchmark(const std::string& sExpr, long iCount)
{
   ATMSB<double> bc;

   // Parsing/bytecode generation with error check. In a scope here JUST to
   // demonstrate that a parser-instance itself is NOT needed later on
   ATMSP<double> p;

   unsigned int err = p.parse(bc, sExpr, "a, b, c, x, y, z, w");

   if (err)
   {
      StopTimerAndReport(p.errMessage(err));
   }
   else
   {
      // Set variable values for x,y,z. Here always 1,2,3
      bc.var[0] = 1.1;
      bc.var[1] = 2.2;
      bc.var[2] = 3.3;
      bc.var[3] = 2.123456;
      bc.var[4] = 3.123456;
      bc.var[5] = 4.123456;
      bc.var[6] = 5.123456;

      //Prime the I and D caches for the expression
      {
         double d0 = 0.0;
         double d1 = 0.0;

         for (std::size_t i = 0; i < priming_rounds; ++i)
         {
            if (i & 1)
               d0 += bc.run();
            else
               d1 += bc.run();
         }

         if (
               (d0 == std::numeric_limits<double>::infinity()) &&
               (d1 == std::numeric_limits<double>::infinity())
            )
         {
            printf("\n");
         }
      }

      // Perform benchmark then return results
      double fRes (0);
      double fSum (0);

      fRes = bc.run();

      StartTimer();

      for (int j = 0; j < iCount; ++j)
      {
         fSum += bc.run();
         std::swap(bc.var[0], bc.var[1]);
         std::swap(bc.var[3], bc.var[4]);
      }

      StopTimer(fRes, fSum, iCount);
   }

   return m_fTime1;
}