Пример #1
0
/* Logarithm.  Computes log(x) in double-double precision.
   This is a natural logarithm (i.e., base e).            */
double2
dd_log(const double2 a)
{
    /* Strategy.  The Taylor series for log converges much more
       slowly than that of exp, due to the lack of the factorial
       term in the denominator.  Hence this routine instead tries
       to determine the root of the function

           f(x) = exp(x) - a

       using Newton iteration.  The iteration is given by

           x' = x - f(x)/f'(x)
              = x - (1 - a * exp(-x))
              = x + a * exp(-x) - 1.

       Only one iteration is needed, since Newton's iteration
       approximately doubles the number of digits per iteration. */
    double2 x;

    if (dd_is_one(a)) {
        return DD_C_ZERO;
    }

    if (a.x[0] <= 0.0) {
        dd_error("(dd_log): Non-positive argument.");
        return DD_C_NAN;
    }

    x = dd_create_d(log(a.x[0])); /* Initial approximation */

    /* x = x + a * exp(-x) - 1.0; */
    x = dd_add(x, dd_sub(dd_mul(a, dd_exp(dd_neg(x))), DD_C_ONE));
    return x;
}
Пример #2
0
/*
 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
 * that the result will be subnormal, and care is taken to ensure that
 * double rounding does not occur.
 */
static inline long double
add_and_denormalize(long double a, long double b, int scale)
{
	struct dd sum;
	int bits_lost;
	union IEEEl2bits u;

	sum = dd_add(a, b);

	/*
	 * If we are losing at least two bits of accuracy to denormalization,
	 * then the first lost bit becomes a round bit, and we adjust the
	 * lowest bit of sum.hi to make it a sticky bit summarizing all the
	 * bits in sum.lo. With the sticky bit adjusted, the hardware will
	 * break any ties in the correct direction.
	 *
	 * If we are losing only one bit to denormalization, however, we must
	 * break the ties manually.
	 */
	if (sum.lo != 0) {
		u.e = sum.hi;
		bits_lost = -u.bits.exp - scale + 1;
		if ((bits_lost != 1) ^ (int)(u.bits.manl & 1))
			sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
	}
	return (ldexp(sum.hi, scale));
}
Пример #3
0
/*
 * Compute a+b, with a small tweak:  The least significant bit of the
 * result is adjusted into a sticky bit summarizing all the bits that
 * were lost to rounding.  This adjustment negates the effects of double
 * rounding when the result is added to another number with a higher
 * exponent.  For an explanation of round and sticky bits, see any reference
 * on FPU design, e.g.,
 *
 *     J. Coonen.  An Implementation Guide to a Proposed Standard for
 *     Floating-Point Arithmetic.  Computer, vol. 13, no. 1, Jan 1980.
 */
static inline long double add_adjusted(long double a, long double b)
{
	struct dd sum;
	union ldshape u;

	sum = dd_add(a, b);
	if (sum.lo != 0) {
		u.f = sum.hi;
		if (!LASTBIT(u))
			sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
	}
	return (sum.hi);
}
Пример #4
0
/*
 * Compute a+b, with a small tweak:  The least significant bit of the
 * result is adjusted into a sticky bit summarizing all the bits that
 * were lost to rounding.  This adjustment negates the effects of double
 * rounding when the result is added to another number with a higher
 * exponent.  For an explanation of round and sticky bits, see any reference
 * on FPU design, e.g.,
 *
 *     J. Coonen.  An Implementation Guide to a Proposed Standard for
 *     Floating-Point Arithmetic.  Computer, vol. 13, no. 1, Jan 1980.
 */
static inline long double
add_adjusted(long double a, long double b)
{
	struct dd sum;
	union IEEEl2bits u;

	sum = dd_add(a, b);
	if (sum.lo != 0) {
		u.e = sum.hi;
		if ((u.bits.manl & 1) == 0)
			sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
	}
	return (sum.hi);
}
Пример #5
0
/*
 * Compute a+b, with a small tweak:  The least significant bit of the
 * result is adjusted into a sticky bit summarizing all the bits that
 * were lost to rounding.  This adjustment negates the effects of double
 * rounding when the result is added to another number with a higher
 * exponent.  For an explanation of round and sticky bits, see any reference
 * on FPU design, e.g.,
 *
 *     J. Coonen.  An Implementation Guide to a Proposed Standard for
 *     Floating-Point Arithmetic.  Computer, vol. 13, no. 1, Jan 1980.
 */
static inline long double
add_adjusted(long double a, long double b)
{
	struct dd sum;
	union ieee_ext_u u;

	sum = dd_add(a, b);
	if (sum.lo != 0) {
		u.extu_ld = sum.hi;
		if ((u.extu_ext.ext_fracl & 1) == 0)
			sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
	}
	return (sum.hi);
}
Пример #6
0
double2
polyeval(const double2 *c, int n, const double2 x)
{
    /* Just use Horner's method of polynomial evaluation. */
    double2 r = c[n];
    int i;

    for (i = n - 1; i >= 0; i--) {
        r = dd_mul(r, x);
        r = dd_add(r, c[i]);
    }

    return r;
}
Пример #7
0
/*
 * Compute a+b, with a small tweak:  The least significant bit of the
 * result is adjusted into a sticky bit summarizing all the bits that
 * were lost to rounding.  This adjustment negates the effects of double
 * rounding when the result is added to another number with a higher
 * exponent.  For an explanation of round and sticky bits, see any reference
 * on FPU design, e.g.,
 *
 *     J. Coonen.  An Implementation Guide to a Proposed Standard for
 *     Floating-Point Arithmetic.  Computer, vol. 13, no. 1, Jan 1980.
 */
static inline double
add_adjusted(double a, double b)
{
	struct dd sum;
	u_int64_t hibits, lobits;

	sum = dd_add(a, b);
	if (sum.lo != 0) {
		EXTRACT_WORD64(hibits, sum.hi);
		if ((hibits & 1) == 0) {
			/* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
			EXTRACT_WORD64(lobits, sum.lo);
			hibits += 1 - ((hibits ^ lobits) >> 62);
			INSERT_WORD64(sum.hi, hibits);
		}
Пример #8
0
/*
 * Fused multiply-add: Compute x * y + z with a single rounding error.
 *
 * We use scaling to avoid overflow/underflow, along with the
 * canonical precision-doubling technique adapted from:
 *
 *	Dekker, T.  A Floating-Point Technique for Extending the
 *	Available Precision.  Numer. Math. 18, 224-242 (1971).
 */
long double
fmal(long double x, long double y, long double z)
{
	long double xs, ys, zs, adj;
	struct dd xy, r;
	int oround;
	int ex, ey, ez;
	int spread;

	/*
	 * Handle special cases. The order of operations and the particular
	 * return values here are crucial in handling special cases involving
	 * infinities, NaNs, overflows, and signed zeroes correctly.
	 */
	if (x == 0.0 || y == 0.0)
		return (x * y + z);
	if (z == 0.0)
		return (x * y);
	if (!isfinite(x) || !isfinite(y))
		return (x * y + z);
	if (!isfinite(z))
		return (z);

	xs = frexpl(x, &ex);
	ys = frexpl(y, &ey);
	zs = frexpl(z, &ez);
	oround = fegetround();
	spread = ex + ey - ez;

	/*
	 * If x * y and z are many orders of magnitude apart, the scaling
	 * will overflow, so we handle these cases specially.  Rounding
	 * modes other than FE_TONEAREST are painful.
	 */
	if (spread < -LDBL_MANT_DIG) {
		feraiseexcept(FE_INEXACT);
		if (!isnormal(z))
			feraiseexcept(FE_UNDERFLOW);
		switch (oround) {
		case FE_TONEAREST:
			return (z);
		case FE_TOWARDZERO:
			if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
				return (z);
			else
				return (nextafterl(z, 0));
		case FE_DOWNWARD:
			if (x > 0.0 ^ y < 0.0)
				return (z);
			else
				return (nextafterl(z, -INFINITY));
		default:	/* FE_UPWARD */
			if (x > 0.0 ^ y < 0.0)
				return (nextafterl(z, INFINITY));
			else
				return (z);
		}
	}
	if (spread <= LDBL_MANT_DIG * 2)
		zs = ldexpl(zs, -spread);
	else
		zs = copysignl(LDBL_MIN, zs);

	fesetround(FE_TONEAREST);
	/* work around clang bug 8100 */
	volatile long double vxs = xs;

	/*
	 * Basic approach for round-to-nearest:
	 *
	 *     (xy.hi, xy.lo) = x * y		(exact)
	 *     (r.hi, r.lo)   = xy.hi + z	(exact)
	 *     adj = xy.lo + r.lo		(inexact; low bit is sticky)
	 *     result = r.hi + adj		(correctly rounded)
	 */
	xy = dd_mul(vxs, ys);
	r = dd_add(xy.hi, zs);

	spread = ex + ey;

	if (r.hi == 0.0) {
		/*
		 * When the addends cancel to 0, ensure that the result has
		 * the correct sign.
		 */
		fesetround(oround);
		volatile long double vzs = zs; /* XXX gcc CSE bug workaround */
		return (xy.hi + vzs + ldexpl(xy.lo, spread));
	}

	if (oround != FE_TONEAREST) {
		/*
		 * There is no need to worry about double rounding in directed
		 * rounding modes.
		 */
		fesetround(oround);
		/* work around clang bug 8100 */
		volatile long double vrlo = r.lo;
		adj = vrlo + xy.lo;
		return (ldexpl(r.hi + adj, spread));
	}

	adj = add_adjusted(r.lo, xy.lo);
	if (spread + ilogbl(r.hi) > -16383)
		return (ldexpl(r.hi + adj, spread));
	else
		return (add_and_denormalize(r.hi, adj, spread));
}
Пример #9
0
dd_MatrixPtr dd_BlockElimination(dd_MatrixPtr M, dd_colset delset, dd_ErrorType *error)
/* Eliminate the variables (columns) delset by
   the Block Elimination with dd_DoubleDescription algorithm.

   Given (where y is to be eliminated):
   c1 + A1 x + B1 y >= 0
   c2 + A2 x + B2 y =  0

   1. First construct the dual system:  z1^T B1 + z2^T B2 = 0, z1 >= 0.
   2. Compute the generators of the dual.
   3. Then take the linear combination of the original system with each generator.
   4. Remove redundant inequalies.

*/
{
  dd_MatrixPtr Mdual=NULL, Mproj=NULL, Gdual=NULL;
  dd_rowrange i,h,m,mproj,mdual,linsize;
  dd_colrange j,k,d,dproj,ddual,delsize;
  dd_colindex delindex;
  mytype temp,prod;
  dd_PolyhedraPtr dualpoly;
  dd_ErrorType err=dd_NoError;
  dd_boolean localdebug=dd_FALSE;

  *error=dd_NoError;
  m= M->rowsize;
  d= M->colsize;
  delindex=(long*)calloc(d+1,sizeof(long));
  dd_init(temp);
  dd_init(prod);

  k=0; delsize=0;
  for (j=1; j<=d; j++){
    if (set_member(j, delset)){
      k++;  delsize++;
      delindex[k]=j;  /* stores the kth deletion column index */
    }
  }
  if (localdebug) dd_WriteMatrix(stdout, M);

  linsize=set_card(M->linset);
  ddual=m+1;
  mdual=delsize + m - linsize;  /* #equalitions + dimension of z1 */

  /* setup the dual matrix */
  Mdual=dd_CreateMatrix(mdual, ddual);
  Mdual->representation=dd_Inequality;
  for (i = 1; i <= delsize; i++){
    set_addelem(Mdual->linset,i);  /* equality */
    for (j = 1; j <= m; j++) {
      dd_set(Mdual->matrix[i-1][j], M->matrix[j-1][delindex[i]-1]);
    }
  } 

  k=0;
  for (i = 1; i <= m; i++){
    if (!set_member(i, M->linset)){
      /* set nonnegativity for the dual variable associated with
         each non-linearity inequality. */
      k++;
      dd_set(Mdual->matrix[delsize+k-1][i], dd_one);  
    }
  } 
  
  /* 2. Compute the generators of the dual system. */
  dualpoly=dd_DDMatrix2Poly(Mdual, &err);
  Gdual=dd_CopyGenerators(dualpoly);

  /* 3. Take the linear combination of the original system with each generator.  */
  dproj=d-delsize;
  mproj=Gdual->rowsize;
  Mproj=dd_CreateMatrix(mproj, dproj);
  Mproj->representation=dd_Inequality;
  set_copy(Mproj->linset, Gdual->linset);

  for (i=1; i<=mproj; i++){
    k=0;
    for (j=1; j<=d; j++){
      if (!set_member(j, delset)){
        k++;  /* new index of the variable x_j  */
        dd_set(prod, dd_purezero);
        for (h = 1; h <= m; h++){
          dd_mul(temp,M->matrix[h-1][j-1],Gdual->matrix[i-1][h]); 
          dd_add(prod,prod,temp);
        }
        dd_set(Mproj->matrix[i-1][k-1],prod);
      }
    }
  }
  if (localdebug) printf("Size of the projection system: %ld x %ld\n", mproj, dproj);
  
  dd_FreePolyhedra(dualpoly);
  free(delindex);
  dd_clear(temp);
  dd_clear(prod);
  dd_FreeMatrix(Mdual);
  dd_FreeMatrix(Gdual);
  return Mproj;
}
Пример #10
0
double2
dd_exp(const double2 a)
{
    /* Strategy:  We first reduce the size of x by noting that

            exp(kr + m * log(2)) = 2^m * exp(r)^k

       where m and k are integers.  By choosing m appropriately
       we can make |kr| <= log(2) / 2 = 0.347.  Then exp(r) is
       evaluated using the familiar Taylor series.  Reducing the
       argument substantially speeds up the convergence.       */

    const double k = 512.0;
    const double inv_k = 1.0 / k;
    double m;
    double2 r, s, t, p;
    int i = 0;

    if (a.x[0] <= -709.0) {
        return DD_C_ZERO;
    }

    if (a.x[0] >= 709.0) {
        return DD_C_INF;
    }

    if (dd_is_zero(a)) {
        return DD_C_INF;
    }

    if (dd_is_one(a)) {
        return DD_C_E;
    }

    m = floor(a.x[0] / DD_C_LOG2.x[0] + 0.5);
    r = dd_mul_pwr2(dd_sub(a, dd_mul_dd_d(DD_C_LOG2, m)), inv_k);

    p = dd_sqr(r);
    s = dd_add(r, dd_mul_pwr2(p, 0.5));
    p = dd_mul(p, r);
    t = dd_mul(p, inv_fact[0]);
    do {
        s = dd_add(s, t);
        p = dd_mul(p, r);
        ++i;
        t = dd_mul(p, inv_fact[i]);
    } while (fabs(dd_to_double(t)) > inv_k * DD_C_EPS && i < 5);

    s = dd_add(s, t);

    s = dd_add(dd_mul_pwr2(s, 2.0), dd_sqr(s));
    s = dd_add(dd_mul_pwr2(s, 2.0), dd_sqr(s));
    s = dd_add(dd_mul_pwr2(s, 2.0), dd_sqr(s));
    s = dd_add(dd_mul_pwr2(s, 2.0), dd_sqr(s));
    s = dd_add(dd_mul_pwr2(s, 2.0), dd_sqr(s));
    s = dd_add(dd_mul_pwr2(s, 2.0), dd_sqr(s));
    s = dd_add(dd_mul_pwr2(s, 2.0), dd_sqr(s));
    s = dd_add(dd_mul_pwr2(s, 2.0), dd_sqr(s));
    s = dd_add(dd_mul_pwr2(s, 2.0), dd_sqr(s));
    s = dd_add(s, DD_C_ONE);

    return dd_ldexp(s, DD_STATIC_CAST(int, m));
}
Пример #11
0
int main(int argc, char **argv) {
    if (argc != 6) {
        fprintf(stderr, "usage: %s width height centerx centery magnification", argv[0]);
        return 1;
    }
    DoubleDouble temp1;
    unsigned int width = atoi(argv[1]);
    unsigned int height = atoi(argv[2]);
    unsigned char* tmpimage = malloc(width*height*3);
    unsigned char* finalimage = malloc(width*height*3);
    unsigned int x, y;
    DoubleDouble centerx, centery;
    centerx = dd_new(-0.7436438870371587, -3.628952515063387E-17);
    centery = dd_new(0.13182590420531198, -1.2892807754956678E-17);
    logLogBailout = log(log(bailout));
    DoubleDouble magn = dd_new(strtod(argv[5], NULL), 0);
    /*// maxiter = width * sqrt(magn);
    temp1 = dd_sqrt(magn);
    unsigned long maxiter = width * dd_get_ui(temp1);*/
    // x0d = 4 / magn / width;
    x0d = dd_ui_div(4, magn);
    x0d = dd_div_ui(x0d, width);
    // x2 = -2 / magn + centerx;
    x2 = dd_si_div(-2, magn);
    x2 = dd_add(x2, centerx);
    // y1d = -4 / magn / width;
    y1d = dd_si_div(-4, magn);
    y1d = dd_div_ui(y1d, width);
    // y2 = 2 / magn * height / width + centery;
    y2 = dd_ui_div(2, magn);
    temp1 = dd_new(height, 0);
    temp1 = dd_div_ui(temp1, width);
    y2 = dd_mul(y2, temp1);
    y2 = dd_add(y2, centery);
    unsigned int idx;
    unsigned int imgidx = 0;
    unsigned long lastit;
    double zxd, zyd;
    bool inside;
    for (y = 0; y < height; y++) {
        for (x = 0; x < width; x++) {
            fprintf(stderr, "\rR: %f %%", (float)imgidx/(width*height*3)*100);
            calculate_pixel(x, y, &lastit, &zxd, &zyd, &inside);

            if (inside) {
                tmpimage[imgidx++] = 0;
                tmpimage[imgidx++] = 0;
                tmpimage[imgidx++] = 0;
            } else {
                idx = getcoloridx(lastit, zxd, zyd);
                tmpimage[imgidx++] = colors[idx][0];
                tmpimage[imgidx++] = colors[idx][1];
                tmpimage[imgidx++] = colors[idx][2];
            }
        }
    }

    imgidx = 0;
    int finalidx = 0;
    int aafactor = 5;
    int aareach = aafactor / 2;
    int aaarea = aafactor * aafactor;
    double aafactorinv = 1.0/aafactor;
    int xi, yi;
    unsigned int val1, val2, val3;
    double dx, dy;
    for (y = 0; y < height; y++) {
        for (x = 0; x < width; x++) {
            fprintf(stderr, "\rAA: %f %%", (float)imgidx/(width*height*3)*100);
            val1 = tmpimage[imgidx++];
            val2 = tmpimage[imgidx++];
            val3 = tmpimage[imgidx++];
            // if pixel is neither at the border nor are its four neighbors
            // different, copy value and continue without antialiasing it
            if (x != 0 && y != 0 && x != width -1 && y != height -1
             && tmpimage[(y+1)*width*3+x*3+0] == val1
             && tmpimage[(y+1)*width*3+x*3+1] == val2
             && tmpimage[(y+1)*width*3+x*3+2] == val3
             && tmpimage[(y-1)*width*3+x*3+0] == val1
             && tmpimage[(y-1)*width*3+x*3+1] == val2
             && tmpimage[(y-1)*width*3+x*3+2] == val3
             && tmpimage[y*width*3+(x+1)*3+0] == val1
             && tmpimage[y*width*3+(x+1)*3+1] == val2
             && tmpimage[y*width*3+(x+1)*3+2] == val3
             && tmpimage[y*width*3+(x-1)*3+0] == val1
             && tmpimage[y*width*3+(x-1)*3+1] == val2
             && tmpimage[y*width*3+(x-1)*3+2] == val3) {
                finalimage[finalidx++] = val1;
                finalimage[finalidx++] = val2;
                finalimage[finalidx++] = val3;
                continue;
            }

            // otherwise do antialiasing
            for (xi = -aareach; xi <= aareach; xi++) {
                dx = xi*aafactorinv;
                for (yi = -aareach; yi <= aareach; yi++) {
                    dy = yi*aafactorinv;
                    if ((xi | yi) != 0) {
                        calculate_pixel(x+dx, y+dy, &lastit, &zxd, &zyd, &inside);
                        if (!inside) {
                            idx = getcoloridx(lastit, zxd, zyd);
                            val1 += colors[idx][0];
                            val2 += colors[idx][1];
                            val3 += colors[idx][2];
                        }
                    }
                }
            }
            finalimage[finalidx++] = val1/aaarea;
            finalimage[finalidx++] = val2/aaarea;
            finalimage[finalidx++] = val3/aaarea;
        }
    }
    // write out image
    printf("P6 %d %d 255\n", width, height);
    fwrite(finalimage, 1, width*height*3, stdout);
    fprintf(stderr, "\n");

    return 0;
}
Пример #12
0
inline void calculate_pixel(double x, double y, unsigned long *lastit, double *zxd, double *zyd, bool *inside) {
    DoubleDouble px, py, zx, zy, xx, yy;
    //px = x*x0d + x2;
    px = dd_mul_d(x0d, x);
    px = dd_add(px, x2);
    //py = y*y1d + y2;
    py = dd_mul_d(y1d, y);
    py = dd_add(py, y2);
    // no Main bulb or Cardoid check to be faster
    zx = dd_new(px.hi, px.lo);
    zy = dd_new(py.hi, py.lo);
    unsigned long i;
    *inside = true;
    int check = 3;
    int whenupdate = 10;
    double hx, hy, d;
    hx = 0;
    hy = 0;
    //for (i = 1; i <= maxiter; i++) {
    for (i = 1; i <= 50000; i++) {
        //xx = zx * zx;
        xx = dd_sqr(zx);
        //yy = zy * zy;
        yy = dd_sqr(zy);
        //if (xx + yy > bailout) {
        if (xx.hi + yy.hi > bailout) {
            *inside = false;
            break;
        }
        // iterate
        //zy = 2 * zx * zy + py;
        //zx = dd_mul_ui(zx, 2);
        //zy = dd_mul(zx, zy);
        zy = dd_add(dd_mul2(zx, zy), py);
        //zx = xx - yy + px;
        zx = dd_add(dd_sub(xx, yy), px);

        // period checking
        d = zx.hi - hx;
        if (d > 0.0 ? d < eps : d > -eps) {
            d = zy.hi - hy;
            if (d > 0.0 ? d < eps : d > -eps) {
                // Period found.
                break;
            }
        }
        if ((i & check) == 0) {
            if (--whenupdate == 0) {
                whenupdate = 10;
                check <<= 1;
                check++;
            }
            // period = 0;
            hx = zx.hi;
            hy = zy.hi;
        }
    }

    *lastit = i;
    *zxd = zx.hi;
    *zyd = zy.hi;
}
Пример #13
0
/*
 * Fused multiply-add: Compute x * y + z with a single rounding error.
 *
 * We use scaling to avoid overflow/underflow, along with the
 * canonical precision-doubling technique adapted from:
 *
 *      Dekker, T.  A Floating-Point Technique for Extending the
 *      Available Precision.  Numer. Math. 18, 224-242 (1971).
 */
long double fmal(long double x, long double y, long double z)
{
	#pragma STDC FENV_ACCESS ON
	long double xs, ys, zs, adj;
	struct dd xy, r;
	int oround;
	int ex, ey, ez;
	int spread;

	/*
	 * Handle special cases. The order of operations and the particular
	 * return values here are crucial in handling special cases involving
	 * infinities, NaNs, overflows, and signed zeroes correctly.
	 */
	if (!isfinite(x) || !isfinite(y))
		return (x * y + z);
	if (!isfinite(z))
		return (z);
	if (x == 0.0 || y == 0.0)
		return (x * y + z);
	if (z == 0.0)
		return (x * y);

	xs = frexpl(x, &ex);
	ys = frexpl(y, &ey);
	zs = frexpl(z, &ez);
	oround = fegetround();
	spread = ex + ey - ez;

	/*
	 * If x * y and z are many orders of magnitude apart, the scaling
	 * will overflow, so we handle these cases specially.  Rounding
	 * modes other than FE_TONEAREST are painful.
	 */
	if (spread < -LDBL_MANT_DIG) {
#ifdef FE_INEXACT
		feraiseexcept(FE_INEXACT);
#endif
#ifdef FE_UNDERFLOW
		if (!isnormal(z))
			feraiseexcept(FE_UNDERFLOW);
#endif
		switch (oround) {
		default: /* FE_TONEAREST */
			return (z);
#ifdef FE_TOWARDZERO
		case FE_TOWARDZERO:
			if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
				return (z);
			else
				return (nextafterl(z, 0));
#endif
#ifdef FE_DOWNWARD
		case FE_DOWNWARD:
			if (x > 0.0 ^ y < 0.0)
				return (z);
			else
				return (nextafterl(z, -INFINITY));
#endif
#ifdef FE_UPWARD
		case FE_UPWARD:
			if (x > 0.0 ^ y < 0.0)
				return (nextafterl(z, INFINITY));
			else
				return (z);
#endif
		}
	}
	if (spread <= LDBL_MANT_DIG * 2)
		zs = scalbnl(zs, -spread);
	else
		zs = copysignl(LDBL_MIN, zs);

	fesetround(FE_TONEAREST);

	/*
	 * Basic approach for round-to-nearest:
	 *
	 *     (xy.hi, xy.lo) = x * y           (exact)
	 *     (r.hi, r.lo)   = xy.hi + z       (exact)
	 *     adj = xy.lo + r.lo               (inexact; low bit is sticky)
	 *     result = r.hi + adj              (correctly rounded)
	 */
	xy = dd_mul(xs, ys);
	r = dd_add(xy.hi, zs);

	spread = ex + ey;

	if (r.hi == 0.0) {
		/*
		 * When the addends cancel to 0, ensure that the result has
		 * the correct sign.
		 */
		fesetround(oround);
		volatile long double vzs = zs; /* XXX gcc CSE bug workaround */
		return xy.hi + vzs + scalbnl(xy.lo, spread);
	}

	if (oround != FE_TONEAREST) {
		/*
		 * There is no need to worry about double rounding in directed
		 * rounding modes.
		 * But underflow may not be raised correctly, example in downward rounding:
		 * fmal(0x1.0000000001p-16000L, 0x1.0000000001p-400L, -0x1p-16440L)
		 */
		long double ret;
#if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
		int e = fetestexcept(FE_INEXACT);
		feclearexcept(FE_INEXACT);
#endif
		fesetround(oround);
		adj = r.lo + xy.lo;
		ret = scalbnl(r.hi + adj, spread);
#if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
		if (ilogbl(ret) < -16382 && fetestexcept(FE_INEXACT))
			feraiseexcept(FE_UNDERFLOW);
		else if (e)
			feraiseexcept(FE_INEXACT);
#endif
		return ret;
	}

	adj = add_adjusted(r.lo, xy.lo);
	if (spread + ilogbl(r.hi) > -16383)
		return scalbnl(r.hi + adj, spread);
	else
		return add_and_denormalize(r.hi, adj, spread);
}