char _doshift( floatnum dest, cfloatnum x, cfloatnum shift, char right) { int ishift; t_longint lx; if (float_isnan(shift)) return _seterror(dest, NoOperand); if (!float_isinteger(shift)) return _seterror(dest, OutOfDomain); if(!_cvtlogic(&lx, x)) return 0; if (float_iszero(shift)) { float_copy(dest, x, EXACT); return 1; } ishift = float_asinteger(shift); if (ishift == 0) ishift = (3*LOGICRANGE) * float_getsign(shift); if (!right) ishift = -ishift; if (ishift > 0) _shr(&lx, ishift); else _shl(&lx, -ishift); _logic2floatnum(dest, &lx); return 1; }
/* evaluates the rising pochhammer symbol x*(x+1)*...*(x+n-1) (n >= 0) by multiplying. This can be expensive when n is large, so better restrict n to something sane like n <= 100. su stands for "small" and "unsigned" n */ static char _pochhammer_su( floatnum x, int n, int digits) { floatstruct factor; char result; /* the rising pochhammer symbol is computed recursively, observing that pochhammer(x, n) == pochhammer(x, p) * pochhammer(x+p, n-p). p is choosen as floor(n/2), so both factors are somehow "balanced". This pays off, if x has just a few digits, since only some late multiplications are full scale then and Karatsuba boosting yields best results, because both factors are always almost the same size. */ result = 1; switch (n) { case 0: float_copy(x, &c1, EXACT); case 1: break; default: float_create(&factor); float_addi(&factor, x, n >> 1, digits+2); result = _pochhammer_su(x, n >> 1, digits) && _pochhammer_su(&factor, n - (n >> 1), digits) && float_mul(x, x, &factor, digits+2); float_free(&factor); } return result; }
char float_raisei( floatnum power, cfloatnum base, int exponent, int digits) { if (digits <= 0 || digits > maxdigits) return _seterror(power, InvalidPrecision); if (float_isnan(base)) return _seterror(power, NoOperand); if (float_iszero(base)) { if (exponent == 0) return _seterror(power, OutOfDomain); if (exponent < 0) return _seterror(power, ZeroDivide); return _setzero(power); } digits += 14; if (digits > maxdigits) digits = maxdigits; float_copy(power, base, digits); if (!_raisei(power, exponent, digits) || !float_isvalidexp(float_getexponent(power))) { if (float_getexponent(base) < 0) return _seterror(power, Underflow); return _seterror(power, Overflow); } return 1; }
/* evaluates ln(Gamma(x)) for all those x big enough to let the asymptotic series converge directly. Returns 0, if the result overflows relative error for a 100 gigit calculation < 5e-100 */ static char _lngammabigx( floatnum x, int digits) { floatstruct tmp1, tmp2; char result; result = 0; float_create(&tmp1); float_create(&tmp2); /* compute (ln x-1) * (x-0.5) - 0.5 + ln(sqrt(2*pi)) */ float_copy(&tmp2, x, digits+1); _ln(&tmp2, digits+1); float_sub(&tmp2, &tmp2, &c1, digits+2); float_sub(&tmp1, x, &c1Div2, digits+2); if (float_mul(&tmp1, &tmp1, &tmp2, digits+2)) { /* no overflow */ lngammaasymptotic(x, digits); float_add(x, &tmp1, x, digits+3); float_add(x, x, &cLnSqrt2PiMinusHalf, digits+3); result = 1; } float_free(&tmp2); float_free(&tmp1); return result; }
char _floatnum2logic( t_longint* longint, cfloatnum x) { floatstruct tmp; int digits; digits = float_getexponent(x)+1; if (float_iszero(x) || digits <= 0) { longint->length = 1; longint->value[0] = 0; } else { if (digits > MATHPRECISION) return 0; float_create(&tmp); /* floatnum2longint rounds, we have to truncate first */ float_copy(&tmp, x, digits); if (float_getsign(x) < 0) float_add(&tmp, &tmp, &c1, EXACT); _floatnum2longint(longint, &tmp); float_free(&tmp); if (_bitlength(longint) > LOGICRANGE) return 0; } _zeroextend(longint); if (float_getsign(x) < 0) _not(longint); return 1; }
static char _lngamma_prim( floatnum x, floatnum revfactor, int* infinity, int digits) { floatstruct tmp; char result; char odd; *infinity = 0; if (float_getsign(x) > 0) return _lngamma_prim_xgt0(x, revfactor, digits); float_copy(revfactor, x, digits + 2); float_sub(x, &c1, x, digits+2); float_create(&tmp); result = _lngamma_prim_xgt0(x, &tmp, digits); if (result) { float_neg(x); odd = float_isodd(revfactor); _sinpix(revfactor, digits); if (float_iszero(revfactor)) { *infinity = 1; float_setinteger(revfactor, odd? -1 : 1); } else float_mul(&tmp, &tmp, &cPi, digits+2); float_div(revfactor, revfactor, &tmp, digits+2); } float_free(&tmp); return result; }
char _gamma0_5( floatnum x, int digits) { floatstruct tmp; int ofs; if (float_getexponent(x) >= 2) return _gamma(x, digits); float_create(&tmp); float_sub(&tmp, x, &c1Div2, EXACT); ofs = float_asinteger(&tmp); float_free(&tmp); if (ofs >= 0) { float_copy(x, &c1Div2, EXACT); if(!_pochhammer_su(x, ofs, digits)) return 0; return float_mul(x, x, &cSqrtPi, digits); } if(!_pochhammer_su(x, -ofs, digits)) return 0; return float_div(x, &cSqrtPi, x, digits); }
char erfseries( floatnum x, int digits) { floatstruct xsqr, smd, pwr; int i, workprec, expx; expx = float_getexponent(x); workprec = digits + 2*expx + 2; if (workprec <= 0 || float_iszero(x)) /* for tiny arguments approx. == x */ return 1; float_create(&xsqr); float_create(&smd); float_create(&pwr); float_mul(&xsqr, x, x, workprec + 1); workprec = digits + float_getexponent(&xsqr) + 1; float_copy(&pwr, x, workprec + 1); i = 1; while (workprec > 0) { float_mul(&pwr, &pwr, &xsqr, workprec + 1); float_divi(&pwr, &pwr, -i, workprec + 1); float_divi(&smd, &pwr, 2 * i++ + 1, workprec); float_add(x, x, &smd, digits + 3); workprec = digits + float_getexponent(&smd) + expx + 2; } float_free(&pwr); float_free(&smd); float_free(&xsqr); return 1; }
MyFloat MyFloat::operator+(const MyFloat& rhs) const{ int exponent_difference = 0; int borrow = 0; int i; MyFloat float_copy(*this); MyFloat rhs_copy(rhs); float_copy.mantissa |= 1<<23; //restoreS leading bit for both mantissas rhs_copy.mantissa |= 1<<23; //restore leading bit if(float_copy.mantissa == rhs_copy.mantissa) // case if the number are the same, but the sign is opposite, just return 0 for 7 + -7, return 0; if(float_copy.exponent == rhs_copy.exponent) if(float_copy.sign != rhs_copy.sign) return 0; exponent_difference = rhs_copy.exponent - float_copy.exponent; // find difference between exponents if (exponent_difference == 0) { } else if(exponent_difference > 0) //rhs = 5*10^6 this = 32*10^4 = 2 { float_copy.mantissa = float_copy.mantissa >> (exponent_difference -1); // right shift, since rhs is bigger than this borrow = float_copy.mantissa & 1; float_copy.mantissa = float_copy.mantissa >> 1; float_copy.exponent += exponent_difference; //restore the same exponent }
char binetasymptotic(floatnum x, int digits) { floatstruct recsqr; floatstruct sum; floatstruct smd; floatstruct pwr; int i, workprec; if (float_getexponent(x) >= digits) { /* if x is very big, ln(gamma(x)) is dominated by x*ln x and the Binet function does not contribute anything substantial to the final result */ float_setzero(x); return 1; } float_create(&recsqr); float_create(&sum); float_create(&smd); float_create(&pwr); float_copy(&pwr, &c1, EXACT); float_setzero(&sum); float_div(&smd, &c1, &c12, digits+1); workprec = digits - 2*float_getexponent(x)+3; i = 1; if (workprec > 0) { float_mul(&recsqr, x, x, workprec); float_reciprocal(&recsqr, workprec); while (float_getexponent(&smd) > -digits-1 && ++i <= MAXBERNOULLIIDX) { workprec = digits + float_getexponent(&smd) + 3; float_add(&sum, &sum, &smd, digits+1); float_mul(&pwr, &recsqr, &pwr, workprec); float_muli(&smd, &cBernoulliDen[i-1], 2*i*(2*i-1), workprec); float_div(&smd, &pwr, &smd, workprec); float_mul(&smd, &smd, &cBernoulliNum[i-1], workprec); } } else /* sum reduces to the first summand*/ float_move(&sum, &smd); if (i > MAXBERNOULLIIDX) /* x was not big enough for the asymptotic series to converge sufficiently */ float_setnan(x); else float_div(x, &sum, x, digits); float_free(&pwr); float_free(&smd); float_free(&sum); float_free(&recsqr); return i <= MAXBERNOULLIIDX; }
char _gammaint( floatnum integer, int digits) { int ofs; if (float_getexponent(integer) >=2) return _gammagtminus20(integer, digits); ofs = float_asinteger(integer); float_copy(integer, &c1, EXACT); return _pochhammer_su(integer, ofs-1, digits); }
static char _lngamma_prim_xgt0( floatnum x, floatnum revfactor, int digits) { int ofs; ofs = _ofs(x, digits); float_copy(revfactor, x, digits+1); _pochhammer_su(revfactor, ofs, digits); float_addi(x, x, ofs, digits+2); return _lngammabigx(x, digits); }
char erfcasymptotic( floatnum x, int digits) { floatstruct smd, fct; int i, workprec, newprec; float_create(&smd); float_create(&fct); workprec = digits - 2 * float_getexponent(x) + 1; if (workprec <= 0) { float_copy(x, &c1, EXACT); return 1; } float_mul(&fct, x, x, digits + 1); float_div(&fct, &c1Div2, &fct, digits); float_neg(&fct); float_copy(&smd, &c1, EXACT); float_setzero(x); newprec = digits; workprec = newprec; i = 1; while (newprec > 0 && newprec <= workprec) { workprec = newprec; float_add(x, x, &smd, digits + 4); float_muli(&smd, &smd, i, workprec + 1); float_mul(&smd, &smd, &fct, workprec + 2); newprec = digits + float_getexponent(&smd) + 1; i += 2; } float_free(&fct); float_free(&smd); return newprec <= workprec; }
/* series expansion of cos/cosh - 1 used for small x, |x| <= 0.01. The function returns 0, if an underflow occurs. The relative error seems to be less than 5e-100 for a 100-digit calculation with |x| < 0.01 */ char cosminus1series( floatnum x, int digits, char alternating) { floatstruct sum, smd; int expsqrx, pwrsz, addsz, i; expsqrx = 2 * float_getexponent(x); float_setexponent(x, 0); float_mul(x, x, x, digits+1); float_mul(x, x, &c1Div2, digits+1); float_setsign(x, alternating? -1 : 1); expsqrx += float_getexponent(x); if (float_iszero(x) || expsqrx < EXPMIN) { /* underflow */ float_setzero(x); return expsqrx == 0; } float_setexponent(x, expsqrx); pwrsz = digits + expsqrx + 2; if (pwrsz <= 0) /* for very small x, cos/cosh(x) - 1 = (-/+)0.5*x*x */ return 1; addsz = pwrsz; float_create(&sum); float_create(&smd); float_copy(&smd, x, pwrsz); float_setzero(&sum); i = 2; while (pwrsz > 0) { float_mul(&smd, &smd, x, pwrsz+1); float_divi(&smd, &smd, i*(2*i-1), pwrsz); float_add(&sum, &sum, &smd, addsz); ++i; pwrsz = digits + float_getexponent(&smd); } float_add(x, x, &sum, digits+1); float_free(&sum); float_free(&smd); return 1; }
char float_raise( floatnum power, cfloatnum base, cfloatnum exponent, int digits) { signed char sgn; if (float_isnan(exponent) || float_isnan(base)) return _seterror(power, NoOperand); if (digits <= 0 || digits > MATHPRECISION) return _seterror(power, InvalidPrecision); if (float_iszero(base)) { switch(float_getsign(exponent)) { case 0: return _seterror(power, OutOfDomain); case -1: return _seterror(power, ZeroDivide); } return _setzero(power); } sgn = float_getsign(base); if (sgn < 0) { if (!float_isinteger(exponent)) return _seterror(power, OutOfDomain); if ((float_getdigit(exponent, float_getexponent(exponent)) & 1) == 0) sgn = 1; } float_copy(power, base, digits+1); float_abs(power); if (!_raise(power, exponent, digits)) { float_seterror(Overflow); if (float_getexponent(base) * float_getsign(exponent) < 0) float_seterror(Underflow); return _setnan(power); } float_setsign(power, sgn); return 1; }
static int _extractexp( floatnum x, int scale, signed char base) { floatstruct pwr; floatstruct fbase; int decprec; int pwrexp; int exp; int logbase; (void)scale; logbase = lgbase(base); decprec = DECPRECISION + 3; exp = (int)(aprxlog10fn(x) * 3.321928095f); if (float_getexponent(x) < 0) exp -= 3; exp /= logbase; if (exp != 0) { float_create(&fbase); float_setinteger(&fbase, base); float_create(&pwr); float_copy(&pwr, &fbase, EXACT); _raiseposi(&pwr, &pwrexp, exp < 0? -exp : exp, decprec); if (float_getexponent(x) < 0) { float_addexp(x, pwrexp); float_mul(x, x, &pwr, decprec); } else { float_addexp(x, -pwrexp); float_div(x, x, &pwr, decprec); } float_free(&pwr); float_free(&fbase); } exp += _checkbounds(x, decprec, base); return exp; }
static Error _outfixphex( p_otokens tokens, floatnum x, p_number_desc n, int scale) { t_longint l; Error result; float_copy(x, x, DECPRECISION+1); result = _fixp2longint(n, &l, x, scale); if (result != Success) return result; if (l.length == 0) return IOConversionUnderflow; _setscale(n, &l, scale); return desc2str(tokens, n, scale); }
char _gammagtminus20( floatnum x, int digits) { floatstruct factor; int ofs; char result; float_create(&factor); ofs = _ofs(x, digits+1); float_copy(&factor, x, digits+1); _pochhammer_su(&factor, ofs, digits); float_addi(x, x, ofs, digits+2); result = _lngammabigx(x, digits) && _exp(x, digits) && float_div(x, x, &factor, digits+1); float_free(&factor); if (!result) float_setnan(x); return result; }
static char _pochhammer_i( floatnum x, cfloatnum n, int digits) { /* do not use the expensive Gamma function when a few multiplications do the same */ /* pre: n is an integer */ int ni; signed char result; if (float_iszero(n)) return float_copy(x, &c1, EXACT); if (float_isinteger(x)) { result = -1; float_neg((floatnum)n); if (float_getsign(x) <= 0 && float_cmp(x, n) > 0) /* x and x+n have opposite signs, meaning 0 is among the factors */ result = _setzero(x); else if (float_getsign(x) > 0 && float_cmp(x, n) <= 0) /* x and x+n have opposite signs, meaning at one point you have to divide by 0 */ result = _seterror(x, ZeroDivide); float_neg((floatnum)n); if (result >= 0) return result; } if (float_getexponent(x) < EXPMAX/100) { ni = float_asinteger(n); if (ni != 0 && ni < 50 && ni > -50) return _pochhammer_si(x, ni, digits+2); } return _pochhammer_g(x, n, digits); }
char erfcsum( floatnum x, /* should be the square of the parameter to erfc */ int digits) { int i, workprec; floatstruct sum, smd; floatnum Ei; if (digits > erfcdigits) { /* cannot re-use last evaluation's intermediate results */ for (i = MAXERFCIDX; --i >= 0;) /* clear all exp(-k*k*alpha*alpha) to indicate their absence */ float_free(&erfccoeff[i]); /* current precision */ erfcdigits = digits; /* create new alpha appropriate for the desired precision This alpha need not be high precision, any alpha near the one evaluated here would do */ float_muli(&erfcalpha, &cLn10, digits + 4, 3); float_sqrt(&erfcalpha, 3); float_div(&erfcalpha, &cPi, &erfcalpha, 3); float_mul(&erfcalphasqr, &erfcalpha, &erfcalpha, EXACT); /* the exp(-k*k*alpha*alpha) are later evaluated iteratively. Initiate the iteration here */ float_copy(&erfct2, &erfcalphasqr, EXACT); float_neg(&erfct2); _exp(&erfct2, digits + 3); /* exp(-alpha*alpha) */ float_copy(erfccoeff, &erfct2, EXACT); /* start value */ float_mul(&erfct3, &erfct2, &erfct2, digits + 3); /* exp(-2*alpha*alpha) */ } float_create(&sum); float_create(&smd); float_setzero(&sum); for (i = 0; ++i < MAXERFCIDX;) { Ei = &erfccoeff[i-1]; if (float_isnan(Ei)) { /* if exp(-i*i*alpha*alpha) is not available, evaluate it from the coefficient of the last summand */ float_mul(&erfct2, &erfct2, &erfct3, workprec + 3); float_mul(Ei, &erfct2, &erfccoeff[i-2], workprec + 3); } /* Ei finally decays rapidly. save some time by adjusting the working precision */ workprec = digits + float_getexponent(Ei) + 1; if (workprec <= 0) break; /* evaluate the summand exp(-i*i*alpha*alpha)/(i*i*alpha*alpha+x) */ float_muli(&smd, &erfcalphasqr, i*i, workprec); float_add(&smd, x, &smd, workprec + 2); float_div(&smd, Ei, &smd, workprec + 1); /* add summand to the series */ float_add(&sum, &sum, &smd, digits + 3); } float_move(x, &sum); float_free(&smd); return 1; }
/* the Taylor series of arctan/arctanh x at x == 0. For small |x| < 0.01 this series converges very fast, yielding 4 or more digits of the result with every summand. The working precision is adjusted, so that the relative error for 100-digit arguments is around 5.0e-100. This means, the error is 1 in the 100-th place (or less) */ void arctanseries( floatnum x, int digits, char alternating) { int expx; int expsqrx; int pwrsz; int addsz; int i; floatstruct xsqr; floatstruct pwr; floatstruct smd; floatstruct sum; /* upper limit of log(x) and log(result) */ expx = float_getexponent(x)+1; /* the summands of the series from the second on are bounded by x^(2*i-1)/3. So the summation yields a result bounded by (x^3/(1-x*x))/3. For x < sqrt(1/3) approx.= 0.5, this is less than 0.5*x^3. We need to sum up only, if the first <digits> places of the result (roughly x) are touched. Ignoring the effect of a possile carry, this is only the case, if x*x >= 2*10^(-digits) > 10^(-digits) Example: for x = 9e-51, a 100-digits result covers the decimal places from 1e-51 to 1e-150. x^3/3 is roughly 3e-151, and so is the sum of the series. So we can ignore the sum, but we couldn't for x = 9e-50 */ if (float_iszero(x) || 2*expx < -digits) /* for very tiny arguments arctan/arctanh x is approx.== x */ return; float_create(&xsqr); float_create(&pwr); float_create(&smd); float_create(&sum); /* we adapt the working precision to the decreasing summands, saving time when multiplying. Unfortunately, there is no error bound given for the operations of bc_num. Tests show, that the last digit in an incomplete multiplication is usually not correct up to 5 ULP's. */ pwrsz = digits + 2*expx + 1; /* the precision of the addition must not decrease, of course */ addsz = pwrsz; i = 3; float_mul(&xsqr, x, x, pwrsz); float_setsign(&xsqr, alternating? -1 : 1); expsqrx = float_getexponent(&xsqr); float_copy(&pwr, x, pwrsz); float_setzero(&sum); for(; pwrsz > 0; ) { /* x^i */ float_mul(&pwr, &pwr, &xsqr, pwrsz+1); /* x^i/i */ float_divi(&smd, &pwr, i, pwrsz); /* The addition virtually does not introduce errors */ float_add(&sum, &sum, &smd, addsz); /* reduce the working precision according to the decreasing powers */ pwrsz = digits - expx + float_getexponent(&smd) + expsqrx + 3; i += 2; } /* add the first summand */ float_add(x, x, &sum, digits+1); float_free(&xsqr); float_free(&pwr); float_free(&smd); float_free(&sum); }
void floatmath_init() { int i, save; floatnum_init(); save = float_setprecision(MAXDIGITS); float_create(&c1); float_setinteger(&c1, 1); float_create(&c2); float_setinteger(&c2, 2); float_create(&c3); float_setinteger(&c3, 3); float_create(&c12); float_setinteger(&c12, 12); float_create(&c16); float_setinteger(&c16, 16); float_create(&cMinus1); float_setinteger(&cMinus1, -1); float_create(&cMinus20); float_setinteger(&cMinus20, -20); float_create(&c1Div2); float_setscientific(&c1Div2, ".5", NULLTERMINATED); float_create(&cExp); float_setscientific(&cExp, sExp, NULLTERMINATED); float_create(&cLn2); float_setscientific(&cLn2, sLn2, NULLTERMINATED); float_create(&cLn3); float_setscientific(&cLn3, sLn3, NULLTERMINATED); float_create(&cLn7); float_setscientific(&cLn7, sLn7, NULLTERMINATED); float_create(&cLn10); float_setscientific(&cLn10, sLn10, NULLTERMINATED); float_create(&cPhi); float_setscientific(&cPhi, sPhi, NULLTERMINATED); float_create(&cPi); float_setscientific(&cPi, sPi, NULLTERMINATED); float_create(&cPiDiv2); float_setscientific(&cPiDiv2, sPiDiv2, NULLTERMINATED); float_create(&cPiDiv4); float_setscientific(&cPiDiv4, sPiDiv4, NULLTERMINATED); float_create(&c2Pi); float_setscientific(&c2Pi, s2Pi, NULLTERMINATED); float_create(&c1DivPi); float_setscientific(&c1DivPi, s1DivPi, NULLTERMINATED); float_create(&cSqrtPi); float_setscientific(&cSqrtPi, sSqrtPi, NULLTERMINATED); float_create(&cLnSqrt2PiMinusHalf); float_setscientific(&cLnSqrt2PiMinusHalf, sLnSqrt2PiMinusHalf, NULLTERMINATED); float_create(&c1DivSqrtPi); float_setscientific(&c1DivSqrtPi, s1DivSqrtPi, NULLTERMINATED); float_create(&c2DivSqrtPi); float_setscientific(&c2DivSqrtPi, s2DivSqrtPi, NULLTERMINATED); float_create(&cMinus0_4); float_setscientific(&cMinus0_4, "-.4", NULLTERMINATED); for (i = -1; ++i < MAXBERNOULLIIDX;) { float_create(&cBernoulliNum[i]); float_create(&cBernoulliDen[i]); float_setscientific(&cBernoulliNum[i], sBernoulli[2*i], NULLTERMINATED); float_setscientific(&cBernoulliDen[i], sBernoulli[2*i+1], NULLTERMINATED); } float_create(&cUnsignedBound); float_copy(&cUnsignedBound, &c1, EXACT); for (i = -1; ++i < 2*(int)sizeof(unsigned);) float_mul(&cUnsignedBound, &c16, &cUnsignedBound, EXACT); for (i = -1; ++i < MAXERFCIDX;) float_create(&erfccoeff[i]); float_create(&erfcalpha); float_create(&erfcalphasqr); float_create(&erfct2); float_create(&erfct3); float_setprecision(save); }