Beispiel #1
0
int
main()
{
#if N & 1
	long double	value = 0;
#else
	double		value = 0;
#endif
#if N < 5
	int		exp = 0;
#endif

#if N == 1
	return ldexpl(value, exp) != 0;
#endif
#if N == 2
	return ldexp(value, exp) != 0;
#endif
#if N == 3
	return frexpl(value, &exp) != 0;
#endif
#if N == 4
	return frexp(value, &exp) != 0;
#endif
#if N == 5
	return isnan(value);
#endif
#if N == 6
	return isnan(value);
#endif
#if N == 7
	return copysign(1.0, value) < 0;
#endif
#if N == 8
	return signbit(value);
#endif
}
Beispiel #2
0
/* Tests along the real and imaginary axes. */
void
test_axes(void)
{
	static const long double nums[] = {
		-2, -1, -0.5, 0.5, 1, 2
	};
	long double complex z;
	int i;

	for (i = 0; i < sizeof(nums) / sizeof(nums[0]); i++) {
		/* Real axis */
		z = CMPLXL(nums[i], 0.0);
		if (fabs(nums[i]) <= 1) {
			testall_tol(cacosh, z, CMPLXL(0.0, acos(nums[i])), 1);
			testall_tol(cacos, z, CMPLXL(acosl(nums[i]), -0.0), 1);
			testall_tol(casin, z, CMPLXL(asinl(nums[i]), 0.0), 1);
			testall_tol(catanh, z, CMPLXL(atanh(nums[i]), 0.0), 1);
		} else {
			testall_tol(cacosh, z,
				    CMPLXL(acosh(fabs(nums[i])),
					   (nums[i] < 0) ? pi : 0), 1);
			testall_tol(cacos, z,
				    CMPLXL((nums[i] < 0) ? pi : 0,
					   -acosh(fabs(nums[i]))), 1);
			testall_tol(casin, z,
				    CMPLXL(copysign(pi / 2, nums[i]),
					   acosh(fabs(nums[i]))), 1);
			testall_tol(catanh, z,
				    CMPLXL(atanh(1 / nums[i]), pi / 2), 1);
		}
		testall_tol(casinh, z, CMPLXL(asinh(nums[i]), 0.0), 1);
		testall_tol(catan, z, CMPLXL(atan(nums[i]), 0), 1);

		/* TODO: Test the imaginary axis. */
	}
}
Beispiel #3
0
double CGaussian::Deviance(const CDataset& kData, const Bag& kBag,
                           const double* kFuncEstimate) {
  double loss = 0.0;
  double weight = 0.0;

  unsigned long num_rows_in_set = kData.get_size_of_set();
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : loss, weight) num_threads(get_num_threads())
  for (unsigned long i = 0; i < num_rows_in_set; i++) {
    const double tmp =
        (kData.y_ptr()[i] - kData.offset_ptr()[i] - kFuncEstimate[i]);
    loss += kData.weight_ptr()[i] * tmp * tmp;
    weight += kData.weight_ptr()[i];
  }

  // TODO: Check if weights are all zero for validation set
  if ((weight == 0.0) && (loss == 0.0)) {
    return nan("");
  } else if (weight == 0.0) {
    return copysign(HUGE_VAL, loss);
  }

  return loss / weight;
}
Beispiel #4
0
static Handle powerOf(TaskData *mdTaskData, Handle args)
{
    double x = real_arg1(args), y = real_arg2(args);
    /* Some of the special cases are defined and don't seem to match
       the C pow function (at least as implemented in MS C). */
    /* Maybe handle all this in ML? */
    if (isnan(x))
    {
        if (y == 0.0) return real_result(mdTaskData, 1.0);
        else return real_result(mdTaskData, notANumber);
    }
    else if (isnan(y)) return real_result(mdTaskData, y); /* i.e. nan. */
    else if (x == 0.0 && y < 0.0)
    {
        /* This case is not handled correctly in Solaris. It always
           returns -infinity. */
        int iy = (int)floor(y);
        /* If x is -0.0 and y is an odd integer the result is -infinity. */
        if (copysign(1.0, x) < 0.0 && (double)iy == y && (iy & 1))
            return real_result(mdTaskData, negInf); /* -infinity. */
        else return real_result(mdTaskData, posInf); /* +infinity. */
    }
    return real_result(mdTaskData, pow(x, y));
}
Beispiel #5
0
/**
    Purpose
    -------
    SLAEX3 finds the roots of the secular equation, as defined by the
    values in D, W, and RHO, between 1 and K.  It makes the
    appropriate calls to SLAED4 and then updates the eigenvectors by
    multiplying the matrix of eigenvectors of the pair of eigensystems
    being combined by the matrix of eigenvectors of the K-by-K system
    which is solved here.

    It is used in the last step when only a part of the eigenvectors
    is required.
    It compute only the required part of the eigenvectors and the rest
    is not used.

    This code makes very mild assumptions about floating point
    arithmetic. It will work on machines with a guard digit in
    add/subtract, or on those binary machines without guard digits
    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
    It could conceivably fail on hexadecimal or decimal machines
    without guard digits, but we know of none.

    Arguments
    ---------
    @param[in]
    ngpu    INTEGER
            Number of GPUs to use. ngpu > 0.

    @param[in]
    k       INTEGER
            The number of terms in the rational function to be solved by
            SLAED4.  K >= 0.

    @param[in]
    n       INTEGER
            The number of rows and columns in the Q matrix.
            N >= K (deflation may result in N > K).

    @param[in]
    n1      INTEGER
            The location of the last eigenvalue in the leading submatrix.
            min(1,N) <= N1 <= N/2.

    @param[out]
    d       REAL array, dimension (N)
            D(I) contains the updated eigenvalues for
            1 <= I <= K.

    @param[out]
    Q       REAL array, dimension (LDQ,N)
            Initially the first K columns are used as workspace.
            On output the columns ??? to ??? contain
            the updated eigenvectors.

    @param[in]
    ldq     INTEGER
            The leading dimension of the array Q.  LDQ >= max(1,N).

    @param[in]
    rho     REAL
            The value of the parameter in the rank one update equation.
            RHO >= 0 required.

    @param[in,out]
    dlamda  REAL array, dimension (K)
            The first K elements of this array contain the old roots
            of the deflated updating problem.  These are the poles
            of the secular equation. May be changed on output by
            having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
            Cray-2, or Cray C-90, as described above.

    @param[in]
    Q2      REAL array, dimension (LDQ2, N)
            The first K columns of this matrix contain the non-deflated
            eigenvectors for the split problem.

    @param[in]
    indx    INTEGER array, dimension (N)
            The permutation used to arrange the columns of the deflated
            Q matrix into three groups (see SLAED2).
            The rows of the eigenvectors found by SLAED4 must be likewise
            permuted before the matrix multiply can take place.

    @param[in]
    ctot    INTEGER array, dimension (4)
            A count of the total number of the various types of columns
            in Q, as described in INDX.  The fourth column type is any
            column which has been deflated.

    @param[in,out]
    w       REAL array, dimension (K)
            The first K elements of this array contain the components
            of the deflation-adjusted updating vector. Destroyed on
            output.

    @param
    s       (workspace) REAL array, dimension (N1 + 1)*K
            Will contain the eigenvectors of the repaired matrix which
            will be multiplied by the previously accumulated eigenvectors
            to update the system.

    @param[out]
    indxq   INTEGER array, dimension (N)
            On exit, the permutation which will reintegrate the
            subproblems back into sorted order,
            i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
    
    @param
    dwork   (devices workspaces) REAL array of arrays,
            dimension NRGPU.
            if NRGPU = 1 the dimension of the first workspace
            should be (3*N*N/2+3*N)
            otherwise the NRGPU workspaces should have the size
            ceil((N-N1) * (N-N1) / floor(ngpu/2)) +
            NB * ((N-N1) + (N-N1) / floor(ngpu/2))
    
    @param
    queues  (device queues) magma_queue_t array,
            dimension (MagmaMaxGPUs,2)

    @param[in]
    range   magma_range_t
      -     = MagmaRangeAll: all eigenvalues will be found.
      -     = MagmaRangeV:   all eigenvalues in the half-open interval (VL,VU]
                             will be found.
      -     = MagmaRangeI:   the IL-th through IU-th eigenvalues will be found.
            TODO verify range, vl, vu, il, iu -- copied from slaex1.

    @param[in]
    vl      REAL
    @param[in]
    vu      REAL
            if RANGE=MagmaRangeV, the lower and upper bounds of the interval to
            be searched for eigenvalues. VL < VU.
            Not referenced if RANGE = MagmaRangeAll or MagmaRangeI.

    @param[in]
    il      INTEGER
    @param[in]
    iu      INTEGER
            if RANGE=MagmaRangeI, the indices (in ascending order) of the
            smallest and largest eigenvalues to be returned.
            1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
            Not referenced if RANGE = MagmaRangeAll or MagmaRangeV.

    @param[out]
    info    INTEGER
      -     = 0:  successful exit.
      -     < 0:  if INFO = -i, the i-th argument had an illegal value.
      -     > 0:  if INFO = 1, an eigenvalue did not converge

    Further Details
    ---------------
    Based on contributions by
    Jeff Rutter, Computer Science Division, University of California
    at Berkeley, USA
    Modified by Francoise Tisseur, University of Tennessee.

    @ingroup magma_ssyev_aux
    ********************************************************************/
extern "C" magma_int_t
magma_slaex3_m(
    magma_int_t ngpu,
    magma_int_t k, magma_int_t n, magma_int_t n1, float *d,
    float *Q, magma_int_t ldq, float rho,
    float *dlamda, float *Q2, magma_int_t *indx,
    magma_int_t *ctot, float *w, float *s, magma_int_t *indxq,
    magmaFloat_ptr dwork[],
    magma_queue_t queues[MagmaMaxGPUs][2],
    magma_range_t range, float vl, float vu, magma_int_t il, magma_int_t iu,
    magma_int_t *info )
{
#define Q(i_,j_) (Q + (i_) + (j_)*ldq)

#define dQ2(id)    (dwork[id])
#define dS(id, ii) (dwork[id] + n2*n2_loc + (ii)*(n2*nb))
#define dQ(id, ii) (dwork[id] + n2*n2_loc +    2*(n2*nb) + (ii)*(n2_loc*nb))

    if (ngpu == 1) {
        magma_setdevice(0);
        magma_slaex3(k, n, n1, d, Q, ldq, rho,
                     dlamda, Q2, indx, ctot, w, s, indxq,
                     *dwork, range, vl, vu, il, iu, info );
        return *info;
    }
    float d_one  = 1.;
    float d_zero = 0.;
    magma_int_t ione = 1;
    magma_int_t ineg_one = -1;

    magma_int_t iil, iiu, rk;
    magma_int_t n1_loc, n2_loc, ib, nb, ib2, igpu;
    magma_int_t ni_loc[MagmaMaxGPUs];

    magma_int_t i, ind, iq2, j, n12, n2, n23, tmp;
    float temp;
    magma_int_t alleig, valeig, indeig;

    alleig = (range == MagmaRangeAll);
    valeig = (range == MagmaRangeV);
    indeig = (range == MagmaRangeI);

    *info = 0;

    if (k < 0)
        *info=-1;
    else if (n < k)
        *info=-2;
    else if (ldq < max(1,n))
        *info=-6;
    else if (! (alleig || valeig || indeig))
        *info = -15;
    else {
        if (valeig) {
            if (n > 0 && vu <= vl)
                *info = -17;
        }
        else if (indeig) {
            if (il < 1 || il > max(1,n))
                *info = -18;
            else if (iu < min(n,il) || iu > n)
                *info = -19;
        }
    }

    if (*info != 0) {
        magma_xerbla(__func__, -(*info));
        return *info;
    }

    // Quick return if possible
    if (k == 0)
        return *info;

    magma_device_t orig_dev;
    magma_getdevice( &orig_dev );
    magma_queue_t orig_stream;
    magmablasGetKernelStream( &orig_stream );
    
    /*
     Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
     be computed with high relative accuracy (barring over/underflow).
     This is a problem on machines without a guard digit in
     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
     The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
     which on any of these machines zeros out the bottommost
     bit of DLAMDA(I) if it is 1; this makes the subsequent
     subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
     occurs. On binary machines with a guard digit (almost all
     machines) it does not change DLAMDA(I) at all. On hexadecimal
     and decimal machines with a guard digit, it slightly
     changes the bottommost bits of DLAMDA(I). It does not account
     for hexadecimal or decimal machines without guard digits
     (we know of none). We use a subroutine call to compute
     2*DLAMBDA(I) to prevent optimizing compilers from eliminating
     this code.*/

//#define CHECK_CPU
#ifdef CHECK_CPU
    float *hwS[2][MagmaMaxGPUs], *hwQ[2][MagmaMaxGPUs], *hwQ2[MagmaMaxGPUs];
    #define hQ2(id) (hwQ2[id])
    #define hS(id, ii) (hwS[ii][id])
    #define hQ(id, ii) (hwQ[ii][id])
#endif
    n2 = n - n1;

    n12 = ctot[0] + ctot[1];
    n23 = ctot[1] + ctot[2];

    iq2 = n1 * n12;
    //lq2 = iq2 + n2 * n23;

    n1_loc = (n1-1) / (ngpu/2) + 1;
    n2_loc = (n2-1) / (ngpu/2) + 1;

    nb = magma_get_slaex3_m_nb();

    if (n1 >= magma_get_slaex3_m_k()) {
#ifdef CHECK_CPU
        for (igpu = 0; igpu < ngpu; ++igpu) {
            magma_smalloc_pinned( &(hwS[0][igpu]), n2*nb );
            magma_smalloc_pinned( &(hwS[1][igpu]), n2*nb );
            magma_smalloc_pinned( &(hwQ2[igpu]), n2*n2_loc );
            magma_smalloc_pinned( &(hwQ[0][igpu]), n2_loc*nb );
            magma_smalloc_pinned( &(hwQ[1][igpu]), n2_loc*nb );
        }
#endif
        for (igpu = 0; igpu < ngpu-1; igpu += 2) {
            ni_loc[igpu] = min(n1_loc, n1 - igpu/2 * n1_loc);
#ifdef CHECK_CPU
            lapackf77_slacpy("A", &ni_loc[igpu], &n12, Q2+n1_loc*(igpu/2), &n1, hQ2(igpu), &n1_loc);
#endif
            magma_setdevice(igpu);
            magma_ssetmatrix_async( ni_loc[igpu], n12,
                                    Q2+n1_loc*(igpu/2), n1,
                                    dQ2(igpu),          n1_loc, queues[igpu][0] );
            ni_loc[igpu+1] = min(n2_loc, n2 - igpu/2 * n2_loc);
#ifdef CHECK_CPU
            lapackf77_slacpy("A", &ni_loc[igpu+1], &n23, Q2+iq2+n2_loc*(igpu/2), &n2, hQ2(igpu+1), &n2_loc);
#endif
            magma_setdevice(igpu+1);
            magma_ssetmatrix_async( ni_loc[igpu+1], n23,
                                    Q2+iq2+n2_loc*(igpu/2), n2,
                                    dQ2(igpu+1),            n2_loc, queues[igpu+1][0] );
        }
    }

    //

#ifdef _OPENMP
    /////////////////////////////////////////////////////////////////////////////////
    //openmp implementation
    /////////////////////////////////////////////////////////////////////////////////
    magma_timer_t time=0;
    timer_start( time );

#pragma omp parallel private(i, j, tmp, temp)
    {
        magma_int_t id = omp_get_thread_num();
        magma_int_t tot = omp_get_num_threads();

        magma_int_t ib = (  id   * k) / tot; //start index of local loop
        magma_int_t ie = ((id+1) * k) / tot; //end index of local loop
        magma_int_t ik = ie - ib;           //number of local indices

        for (i = ib; i < ie; ++i)
            dlamda[i]=lapackf77_slamc3(&dlamda[i], &dlamda[i]) - dlamda[i];

        for (j = ib; j < ie; ++j) {
            magma_int_t tmpp=j+1;
            magma_int_t iinfo = 0;
            lapackf77_slaed4(&k, &tmpp, dlamda, w, Q(0,j), &rho, &d[j], &iinfo);
            // If the zero finder fails, the computation is terminated.
            if (iinfo != 0) {
#pragma omp critical (info)
                *info = iinfo;
                break;
            }
        }

#pragma omp barrier

        if (*info == 0) {
#pragma omp single
            {
                //Prepare the INDXQ sorting permutation.
                magma_int_t nk = n - k;
                lapackf77_slamrg( &k, &nk, d, &ione, &ineg_one, indxq);

                //compute the lower and upper bound of the non-deflated eigenvectors
                if (valeig)
                    magma_svrange(k, d, &iil, &iiu, vl, vu);
                else if (indeig)
                    magma_sirange(k, indxq, &iil, &iiu, il, iu);
                else {
                    iil = 1;
                    iiu = k;
                }
                rk = iiu - iil + 1;
            }

            if (k == 2) {
#pragma omp single
                {
                    for (j = 0; j < k; ++j) {
                        w[0] = *Q(0,j);
                        w[1] = *Q(1,j);

                        i = indx[0] - 1;
                        *Q(0,j) = w[i];
                        i = indx[1] - 1;
                        *Q(1,j) = w[i];
                    }
                }
            }
            else if (k != 1) {
                // Compute updated W.
                blasf77_scopy( &ik, &w[ib], &ione, &s[ib], &ione);

                // Initialize W(I) = Q(I,I)
                tmp = ldq + 1;
                blasf77_scopy( &ik, Q(ib,ib), &tmp, &w[ib], &ione);

                for (j = 0; j < k; ++j) {
                    magma_int_t i_tmp = min(j, ie);
                    for (i = ib; i < i_tmp; ++i)
                        w[i] = w[i] * ( *Q(i, j) / ( dlamda[i] - dlamda[j] ) );
                    i_tmp = max(j+1, ib);
                    for (i = i_tmp; i < ie; ++i)
                        w[i] = w[i] * ( *Q(i, j) / ( dlamda[i] - dlamda[j] ) );
                }

                for (i = ib; i < ie; ++i)
                    w[i] = copysign( sqrt( -w[i] ), s[i]);

#pragma omp barrier

                //reduce the number of used threads to have enough S workspace
                tot = min(n1, omp_get_num_threads());

                if (id < tot) {
                    ib = (  id   * rk) / tot + iil - 1;
                    ie = ((id+1) * rk) / tot + iil - 1;
                    ik = ie - ib;
                }
                else {
                    ib = -1;
                    ie = -1;
                    ik = -1;
                }

                // Compute eigenvectors of the modified rank-1 modification.
                for (j = ib; j < ie; ++j) {
                    for (i = 0; i < k; ++i)
                        s[id*k + i] = w[i] / *Q(i,j);
                    temp = magma_cblas_snrm2( k, s+id*k, 1 );
                    for (i = 0; i < k; ++i) {
                        magma_int_t iii = indx[i] - 1;
                        *Q(i,j) = s[id*k + iii] / temp;
                    }
                }
            }
        }
    }
    if (*info != 0)
        return *info;

    timer_stop( time );
    timer_printf( "eigenvalues/vector D+zzT = %6.2f\n", time );

#else
    /////////////////////////////////////////////////////////////////////////////////
    // Non openmp implementation
    /////////////////////////////////////////////////////////////////////////////////
    magma_timer_t time=0;
    timer_start( time );

    for (i = 0; i < k; ++i)
        dlamda[i]=lapackf77_slamc3(&dlamda[i], &dlamda[i]) - dlamda[i];

    for (j = 0; j < k; ++j) {
        magma_int_t tmpp=j+1;
        magma_int_t iinfo = 0;
        lapackf77_slaed4(&k, &tmpp, dlamda, w, Q(0,j), &rho, &d[j], &iinfo);
        // If the zero finder fails, the computation is terminated.
        if (iinfo != 0)
            *info=iinfo;
    }
    if (*info != 0)
        return *info;

    //Prepare the INDXQ sorting permutation.
    magma_int_t nk = n - k;
    lapackf77_slamrg( &k, &nk, d, &ione, &ineg_one, indxq);

    //compute the lower and upper bound of the non-deflated eigenvectors
    if (valeig)
        magma_svrange(k, d, &iil, &iiu, vl, vu);
    else if (indeig)
        magma_sirange(k, indxq, &iil, &iiu, il, iu);
    else {
        iil = 1;
        iiu = k;
    }
    rk = iiu - iil + 1;

    if (k == 2) {
        for (j = 0; j < k; ++j) {
            w[0] = *Q(0,j);
            w[1] = *Q(1,j);

            i = indx[0] - 1;
            *Q(0,j) = w[i];
            i = indx[1] - 1;
            *Q(1,j) = w[i];
        }
    }
    else if (k != 1) {
        // Compute updated W.
        blasf77_scopy( &k, w, &ione, s, &ione);

        // Initialize W(I) = Q(I,I)
        tmp = ldq + 1;
        blasf77_scopy( &k, Q, &tmp, w, &ione);

        for (j = 0; j < k; ++j) {
            for (i = 0; i < j; ++i)
                w[i] = w[i] * ( *Q(i, j) / ( dlamda[i] - dlamda[j] ) );
            for (i = j+1; i < k; ++i)
                w[i] = w[i] * ( *Q(i, j) / ( dlamda[i] - dlamda[j] ) );
        }

        for (i = 0; i < k; ++i)
            w[i] = copysign( sqrt( -w[i] ), s[i]);

        // Compute eigenvectors of the modified rank-1 modification.
        for (j = iil-1; j < iiu; ++j) {
            for (i = 0; i < k; ++i)
                s[i] = w[i] / *Q(i,j);
            temp = magma_cblas_snrm2( k, s, 1 );
            for (i = 0; i < k; ++i) {
                magma_int_t iii = indx[i] - 1;
                *Q(i,j) = s[iii] / temp;
            }
        }
    }

    timer_stop( time );
    timer_printf( "eigenvalues/vector D+zzT = %6.2f\n", time );

#endif //_OPENMP

    // Compute the updated eigenvectors.

    timer_start( time );

    if (rk > 0) {
        if (n1 < magma_get_slaex3_m_k()) {
            // stay on the CPU
            if ( n23 != 0 ) {
                lapackf77_slacpy("A", &n23, &rk, Q(ctot[0],iil-1), &ldq, s, &n23);
                blasf77_sgemm("N", "N", &n2, &rk, &n23, &d_one, &Q2[iq2], &n2,
                              s, &n23, &d_zero, Q(n1,iil-1), &ldq );
            }
            else
                lapackf77_slaset("A", &n2, &rk, &d_zero, &d_zero, Q(n1,iil-1), &ldq);

            if ( n12 != 0 ) {
                lapackf77_slacpy("A", &n12, &rk, Q(0,iil-1), &ldq, s, &n12);
                blasf77_sgemm("N", "N", &n1, &rk, &n12, &d_one, Q2, &n1,
                              s, &n12, &d_zero, Q(0,iil-1), &ldq);
            }
            else
                lapackf77_slaset("A", &n1, &rk, &d_zero, &d_zero, Q(0,iil-1), &ldq);
        }
        else {
            //use the gpus
            ib = min(nb, rk);
            for (igpu = 0; igpu < ngpu-1; igpu += 2) {
                if (n23 != 0) {
                    magma_setdevice(igpu+1);
                    magma_ssetmatrix_async( n23, ib,
                                            Q(ctot[0],iil-1), ldq,
                                            dS(igpu+1,0),     n23, queues[igpu+1][0] );
                }
                if (n12 != 0) {
                    magma_setdevice(igpu);
                    magma_ssetmatrix_async( n12, ib,
                                            Q(0,iil-1), ldq,
                                            dS(igpu,0), n12, queues[igpu][0] );
                }
            }

            for (i = 0; i < rk; i += nb) {
                ib = min(nb, rk - i);
                ind = (i/nb)%2;
                if (i+nb < rk) {
                    ib2 = min(nb, rk - i - nb);
                    for (igpu = 0; igpu < ngpu-1; igpu += 2) {
                        if (n23 != 0) {
                            magma_setdevice(igpu+1);
                            magma_ssetmatrix_async( n23, ib2,
                                                    Q(ctot[0],iil-1+i+nb), ldq,
                                                    dS(igpu+1,(ind+1)%2),  n23, queues[igpu+1][(ind+1)%2] );
                        }
                        if (n12 != 0) {
                            magma_setdevice(igpu);
                            magma_ssetmatrix_async( n12, ib2,
                                                    Q(0,iil-1+i+nb),    ldq,
                                                    dS(igpu,(ind+1)%2), n12, queues[igpu][(ind+1)%2] );
                        }
                    }
                }

                // Ensure that the data is copied on gpu since we will overwrite it.
                for (igpu = 0; igpu < ngpu-1; igpu += 2) {
                    if (n23 != 0) {
#ifdef CHECK_CPU
                        lapackf77_slacpy("A", &n23, &ib, Q(ctot[0],iil-1+i), &ldq, hS(igpu+1,ind), &n23);
#endif
                        magma_setdevice(igpu+1);
                        magma_queue_sync( queues[igpu+1][ind] );
                    }
                    if (n12 != 0) {
#ifdef CHECK_CPU
                        lapackf77_slacpy("A", &n12, &ib, Q(0,iil-1+i), &ldq, hS(igpu,ind), &n12);
#endif
                        magma_setdevice(igpu);
                        magma_queue_sync( queues[igpu][ind] );
                    }
                }
                for (igpu = 0; igpu < ngpu-1; igpu += 2) {
                    if (n23 != 0) {
#ifdef CHECK_CPU
                        blasf77_sgemm("N", "N", &ni_loc[igpu+1], &ib, &n23, &d_one, hQ2(igpu+1), &n2_loc,
                                      hS(igpu+1,ind), &n23, &d_zero, hQ(igpu+1, ind), &n2_loc);
#endif
                        magma_setdevice(igpu+1);
                        magmablasSetKernelStream(queues[igpu+1][ind]);
                        magma_sgemm(MagmaNoTrans, MagmaNoTrans, ni_loc[igpu+1], ib, n23, d_one, dQ2(igpu+1), n2_loc,
                                    dS(igpu+1, ind), n23, d_zero, dQ(igpu+1, ind), n2_loc);
#ifdef CHECK_CPU
                        printf("norm Q %d: %f\n", igpu+1, cpu_gpu_sdiff(ni_loc[igpu+1], ib, hQ(igpu+1, ind), n2_loc, dQ(igpu+1, ind), n2_loc));
#endif
                    }
                    if (n12 != 0) {
#ifdef CHECK_CPU
                        blasf77_sgemm("N", "N", &ni_loc[igpu], &ib, &n12, &d_one, hQ2(igpu), &n1_loc,
                                      hS(igpu,ind%2), &n12, &d_zero, hQ(igpu, ind%2), &n1_loc);
#endif
                        magma_setdevice(igpu);
                        magmablasSetKernelStream(queues[igpu][ind]);
                        magma_sgemm(MagmaNoTrans, MagmaNoTrans, ni_loc[igpu], ib, n12, d_one, dQ2(igpu), n1_loc,
                                    dS(igpu, ind), n12, d_zero, dQ(igpu, ind), n1_loc);
#ifdef CHECK_CPU
                        printf("norm Q %d: %f\n", igpu, cpu_gpu_sdiff(ni_loc[igpu], ib, hQ(igpu, ind), n1_loc, dQ(igpu, ind), n1_loc));
#endif
                    }
                }
                for (igpu = 0; igpu < ngpu-1; igpu += 2) {
                    if (n23 != 0) {
                        magma_setdevice(igpu+1);
                        magma_sgetmatrix( ni_loc[igpu+1], ib, dQ(igpu+1, ind), n2_loc,
                                          Q(n1+n2_loc*(igpu/2),iil-1+i), ldq );
//                        magma_sgetmatrix_async( ni_loc[igpu+1], ib, dQ(igpu+1, ind), n2_loc,
//                                                Q(n1+n2_loc*(igpu/2),iil-1+i), ldq, queues[igpu+1][ind] );
                    }
                    if (n12 != 0) {
                        magma_setdevice(igpu);
                        magma_sgetmatrix( ni_loc[igpu], ib, dQ(igpu, ind), n1_loc,
                                          Q(n1_loc*(igpu/2),iil-1+i), ldq );
//                        magma_sgetmatrix_async( ni_loc[igpu], ib, dQ(igpu, ind), n1_loc,
//                                                Q(n1_loc*(igpu/2),iil-1+i), ldq, queues[igpu][ind] );
                    }
                }
            }
            for (igpu = 0; igpu < ngpu; ++igpu) {
#ifdef CHECK_CPU
                magma_free_pinned( hwS[1][igpu] );
                magma_free_pinned( hwS[0][igpu] );
                magma_free_pinned( hwQ2[igpu] );
                magma_free_pinned( hwQ[1][igpu] );
                magma_free_pinned( hwQ[0][igpu] );
#endif
                magma_setdevice(igpu);
                magma_queue_sync( queues[igpu][0] );
                magma_queue_sync( queues[igpu][1] );
            }
            if ( n23 == 0 )
                lapackf77_slaset("A", &n2, &rk, &d_zero, &d_zero, Q(n1,iil-1), &ldq);

            if ( n12 == 0 )
                lapackf77_slaset("A", &n1, &rk, &d_zero, &d_zero, Q(0,iil-1), &ldq);
        }
    }
    timer_stop( time );
    timer_printf( "gemms = %6.2f\n", time );

    magma_setdevice( orig_dev );
    magmablasSetKernelStream( orig_stream );
    
    return *info;
} /* magma_slaed3_m */
Beispiel #6
0
double FPEnvironmentImpl::copySignImpl(double target, double source)
{
	return (float) copysign(target, source);
}
Beispiel #7
0
EXPORT_C double
fma(double x, double y, double z)
{
	#ifndef __SYMBIAN32__
	static const double split = 0x1p27 + 1.0;
	#else
	static const double split = 134217729;
	#endif //__SYMBIAN32__
	double xs, ys, zs;
	double c, cc, hx, hy, p, q, tx, ty;
	double r, rr, s;
	int oround;
	int ex, ey, ez;
	int spread;

	if (z == 0.0)
		return (x * y);
	if (x == 0.0 || y == 0.0)
		return (x * y + z);

	/* Results of frexp() are undefined for these cases. */
	if (!isfinite(x) || !isfinite(y) || !isfinite(z))
		return (x * y + z);

	xs = frexp(x, &ex);
	ys = frexp(y, &ey);
	zs = frexp(z, &ez);
	oround = fegetround();
	spread = ex + ey - ez;

	/*
	 * If x * y and z are many orders of magnitude apart, the scaling
	 * will overflow, so we handle these cases specially.  Rounding
	 * modes other than FE_TONEAREST are painful.
	 */
	if (spread > DBL_MANT_DIG * 2) {
		fenv_t env;
		feraiseexcept(FE_INEXACT);
		switch(oround) {
		case FE_TONEAREST:
			return (x * y);
		case FE_TOWARDZERO:
			if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
				return (x * y);
			feholdexcept(&env);
			r = x * y;
			if (!fetestexcept(FE_INEXACT))
				r = nextafter(r, 0);
			feupdateenv(&env);
			return (r);
		case FE_DOWNWARD:
			if (z > 0.0)
				return (x * y);
			feholdexcept(&env);
			r = x * y;
			if (!fetestexcept(FE_INEXACT))
				r = nextafter(r, -INFINITY);
			feupdateenv(&env);
			return (r);
		default:	/* FE_UPWARD */
			if (z < 0.0)
				return (x * y);
			feholdexcept(&env);
			r = x * y;
			if (!fetestexcept(FE_INEXACT))
				r = nextafter(r, INFINITY);
			feupdateenv(&env);
			return (r);
		}
	}
	if (spread < -DBL_MANT_DIG) {
		feraiseexcept(FE_INEXACT);
		if (!isnormal(z))
			feraiseexcept(FE_UNDERFLOW);
		switch (oround) {
		case FE_TONEAREST:
			return (z);
		case FE_TOWARDZERO:
			if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
				return (z);
			else
				return (nextafter(z, 0));
		case FE_DOWNWARD:
			if (x > 0.0 ^ y < 0.0)
				return (z);
			else
				return (nextafter(z, -INFINITY));
		default:	/* FE_UPWARD */
			if (x > 0.0 ^ y < 0.0)
				return (nextafter(z, INFINITY));
			else
				return (z);
		}
	}

	/*
	 * Use Dekker's algorithm to perform the multiplication and
	 * subsequent addition in twice the machine precision.
	 * Arrange so that x * y = c + cc, and x * y + z = r + rr.
	 */
	fesetround(FE_TONEAREST);

	p = xs * split;
	hx = xs - p;
	hx += p;
	tx = xs - hx;

	p = ys * split;
	hy = ys - p;
	hy += p;
	ty = ys - hy;

	p = hx * hy;
	q = hx * ty + tx * hy;
	c = p + q;
	cc = p - c + q + tx * ty;

	zs = ldexp(zs, -spread);
	r = c + zs;
	s = r - c;
	rr = (c - (r - s)) + (zs - s) + cc;

	spread = ex + ey;
	if (spread + ilogb(r) > -1023) {
		fesetround(oround);
		r = r + rr;
	} else {
		/*
		 * The result is subnormal, so we round before scaling to
		 * avoid double rounding.
		 */
		#ifndef __SYMBIAN32__
		p = ldexp(copysign(0x1p-1022, r), -spread);
		#else

		p = ldexp(copysign(0, r), -spread);


		#endif //__SYMBIAN32__
		c = r + p;
		s = c - r;
		cc = (r - (c - s)) + (p - s) + rr;
		fesetround(oround);
		r = (c + cc) - p;
	}
	return (ldexp(r, spread));
}
Beispiel #8
0
PyObject*
_PyCode_ConstantKey(PyObject *op)
{
    PyObject *key;

    /* Py_None and Py_Ellipsis are singleton */
    if (op == Py_None || op == Py_Ellipsis
       || PyLong_CheckExact(op)
       || PyBool_Check(op)
       || PyBytes_CheckExact(op)
       || PyUnicode_CheckExact(op)
          /* code_richcompare() uses _PyCode_ConstantKey() internally */
       || PyCode_Check(op)) {
        key = PyTuple_Pack(2, Py_TYPE(op), op);
    }
    else if (PyFloat_CheckExact(op)) {
        double d = PyFloat_AS_DOUBLE(op);
        /* all we need is to make the tuple different in either the 0.0
         * or -0.0 case from all others, just to avoid the "coercion".
         */
        if (d == 0.0 && copysign(1.0, d) < 0.0)
            key = PyTuple_Pack(3, Py_TYPE(op), op, Py_None);
        else
            key = PyTuple_Pack(2, Py_TYPE(op), op);
    }
    else if (PyComplex_CheckExact(op)) {
        Py_complex z;
        int real_negzero, imag_negzero;
        /* For the complex case we must make complex(x, 0.)
           different from complex(x, -0.) and complex(0., y)
           different from complex(-0., y), for any x and y.
           All four complex zeros must be distinguished.*/
        z = PyComplex_AsCComplex(op);
        real_negzero = z.real == 0.0 && copysign(1.0, z.real) < 0.0;
        imag_negzero = z.imag == 0.0 && copysign(1.0, z.imag) < 0.0;
        /* use True, False and None singleton as tags for the real and imag
         * sign, to make tuples different */
        if (real_negzero && imag_negzero) {
            key = PyTuple_Pack(3, Py_TYPE(op), op, Py_True);
        }
        else if (imag_negzero) {
            key = PyTuple_Pack(3, Py_TYPE(op), op, Py_False);
        }
        else if (real_negzero) {
            key = PyTuple_Pack(3, Py_TYPE(op), op, Py_None);
        }
        else {
            key = PyTuple_Pack(2, Py_TYPE(op), op);
        }
    }
    else if (PyTuple_CheckExact(op)) {
        Py_ssize_t i, len;
        PyObject *tuple;

        len = PyTuple_GET_SIZE(op);
        tuple = PyTuple_New(len);
        if (tuple == NULL)
            return NULL;

        for (i=0; i < len; i++) {
            PyObject *item, *item_key;

            item = PyTuple_GET_ITEM(op, i);
            item_key = _PyCode_ConstantKey(item);
            if (item_key == NULL) {
                Py_DECREF(tuple);
                return NULL;
            }

            PyTuple_SET_ITEM(tuple, i, item_key);
        }

        key = PyTuple_Pack(3, Py_TYPE(op), op, tuple);
        Py_DECREF(tuple);
    }
    else if (PyFrozenSet_CheckExact(op)) {
        Py_ssize_t pos = 0;
        PyObject *item;
        Py_hash_t hash;
        Py_ssize_t i, len;
        PyObject *tuple, *set;

        len = PySet_GET_SIZE(op);
        tuple = PyTuple_New(len);
        if (tuple == NULL)
            return NULL;

        i = 0;
        while (_PySet_NextEntry(op, &pos, &item, &hash)) {
            PyObject *item_key;

            item_key = _PyCode_ConstantKey(item);
            if (item_key == NULL) {
                Py_DECREF(tuple);
                return NULL;
            }

            assert(i < len);
            PyTuple_SET_ITEM(tuple, i, item_key);
            i++;
        }
        set = PyFrozenSet_New(tuple);
        Py_DECREF(tuple);
        if (set == NULL)
            return NULL;

        key = PyTuple_Pack(3, Py_TYPE(op), op, set);
        Py_DECREF(set);
        return key;
    }
    else {
        /* for other types, use the object identifier as a unique identifier
         * to ensure that they are seen as unequal. */
        PyObject *obj_id = PyLong_FromVoidPtr(op);
        if (obj_id == NULL)
            return NULL;

        key = PyTuple_Pack(3, Py_TYPE(op), op, obj_id);
        Py_DECREF(obj_id);
    }
    return key;
}
Beispiel #9
0
double complex
csqrt(double complex z)
{
	double complex result;
	double a, b;
	double t;
	int scale;

	a = creal(z);
	b = cimag(z);

	/* Handle special cases. */
	if (z == 0)
		return (cpack(0, b));
	if (isinf(b))
		return (cpack(INFINITY, b));
	if (isnan(a)) {
		t = (b - b) / (b - b);	/* raise invalid if b is not a NaN */
		return (cpack(a, t));	/* return NaN + NaN i */
	}
	if (isinf(a)) {
		/*
		 * csqrt(inf + NaN i)  = inf +  NaN i
		 * csqrt(inf + y i)    = inf +  0 i
		 * csqrt(-inf + NaN i) = NaN +- inf i
		 * csqrt(-inf + y i)   = 0   +  inf i
		 */
		if (signbit(a))
			return (cpack(fabs(b - b), copysign(a, b)));
		else
			return (cpack(a, copysign(b - b, b)));
	}
	/*
	 * The remaining special case (b is NaN) is handled just fine by
	 * the normal code path below.
	 */

	/* Scale to avoid overflow. */
	if (fabs(a) >= THRESH || fabs(b) >= THRESH) {
		a *= 0.25;
		b *= 0.25;
		scale = 1;
	} else {
		scale = 0;
	}

	/* Algorithm 312, CACM vol 10, Oct 1967. */
	if (a >= 0) {
		t = sqrt((a + hypot(a, b)) * 0.5);
		result = cpack(t, b / (2 * t));
	} else {
		t = sqrt((-a + hypot(a, b)) * 0.5);
		result = cpack(fabs(b) / (2 * t), copysign(t, b));
	}

	/* Rescale. */
	if (scale)
		return (result * 2);
	else
		return (result);
}
Beispiel #10
0
#if __MINGW32__

#include <math.h>
#include <time.h>
#include <sys/time.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <wchar.h>
#include <float.h>
#include <assert.h>

static double zero = 0;
double Port::nan = copysign(NAN, 1.0);
double Port::infinity = 1 / zero;
double Port::dbl_max = 1.7976931348623157e308;
double Port::dbl_min = 5e-324;
longdouble Port::ldbl_max = LDBL_MAX;

struct PortInitializer
{
    PortInitializer();
};

static PortInitializer portinitializer;

PortInitializer::PortInitializer()
{
    assert(!signbit(Port::nan));
void test2(double x, double y)
{
  if (-tan(x-y) != tan(y-x))
    link_error ();

  if (-sin(x-y) != sin(y-x))
    link_error ();

  if (cos(-x*y) != cos(x*y))
    link_error ();

  if (cos(x*-y) != cos(x*y))
    link_error ();

  if (cos(-x/y) != cos(x/y))
    link_error ();

  if (cos(x/-y) != cos(x/y))
    link_error ();

  if (cos(-fabs(tan(x/-y))) != cos(tan(x/y)))
    link_error ();

  if (cos(y<10 ? -x : y) != cos(y<10 ? x : y))
    link_error ();

  if (cos(y<10 ? x : -y) != cos(y<10 ? x : y))
    link_error ();

  if (cos(y<10 ? -fabs(x) : tan(x<20 ? -x : -fabs(y)))
      != cos(y<10 ? x : tan(x<20 ? x : y)))
    link_error ();

  if (cos((y*=3, -x)) != cos((y*=3,x)))
    link_error ();

  if (cos((y*=2, -fabs(tan(x/-y)))) != cos((y*=2,tan(x/y))))
    link_error ();

  if (cos(copysign(x,y)) != cos(x))
    link_error ();

  if (cos(copysign(-fabs(x),y*=2)) != cos((y*=2,x)))
    link_error ();

  if (hypot (x, 0) != fabs(x))
    link_error ();

  if (hypot (0, x) != fabs(x))
    link_error ();

  if (hypot (x, x) != fabs(x) * __builtin_sqrt(2))
    link_error ();

  if (hypot (-x, y) != hypot (x, y))
    link_error ();

  if (hypot (x, -y) != hypot (x, y))
    link_error ();

  if (hypot (-x, -y) != hypot (x, y))
    link_error ();

  if (hypot (fabs(x), y) != hypot (x, y))
    link_error ();

  if (hypot (x, fabs(y)) != hypot (x, y))
    link_error ();

  if (hypot (fabs(x), fabs(y)) != hypot (x, y))
    link_error ();

  if (hypot (-fabs(-x), -fabs(fabs(fabs(-y)))) != hypot (x, y))
    link_error ();

  if (hypot (-x, 0) != fabs(x))
    link_error ();

  if (hypot (-x, x) != fabs(x) * __builtin_sqrt(2))
    link_error ();

  if (hypot (pure(x), -pure(x)) != fabs(pure(x)) * __builtin_sqrt(2))
    link_error ();

  if (hypot (tan(-x), tan(-fabs(y))) != hypot (tan(x), tan(y)))
    link_error ();

  if (fmin (fmax(x,y),y) != y)
    link_error ();

  if (fmin (fmax(y,x),y) != y)
    link_error ();

  if (fmin (x,fmax(x,y)) != x)
    link_error ();
  
  if (fmin (x,fmax(y,x)) != x)
    link_error ();
  
  if (fmax (fmin(x,y),y) != y)
    link_error ();

  if (fmax (fmin(y,x),y) != y)
    link_error ();

  if (fmax (x,fmin(x,y)) != x)
    link_error ();
  
  if (fmax (x,fmin(y,x)) != x)
    link_error ();

  if ((__complex__ double) x != -(__complex__ double) (-x))
    link_error ();

  if (x*1i != -(-x*1i))
    link_error ();

  if (x+(x-y)*1i != -(-x+(y-x)*1i))
    link_error ();

  if (x+(x-y)*1i != -(-x-(x-y)*1i))
    link_error ();

  if (ccos(tan(x)+sin(y)*1i) != ccos(-tan(-x)+-sin(-y)*1i))
    link_error ();

  if (ccos(tan(x)+sin(x-y)*1i) != ccos(-tan(-x)-sin(y-x)*1i))
    link_error ();

  if (-5+x*1i != -~(5+x*1i))
    link_error ();

  if (tan(x)+tan(y)*1i != -~(tan(-x)+tan(y)*1i))
    link_error ();
}
Beispiel #12
0
bool _Stl_is_neg_nan(double x)    { return isnan(x) && ( copysign(1., x) < 0 );  }
Beispiel #13
0
// bool _Stl_is_neg_inf(double x)    { return _class(x) == FP_MINUS_INF; }
bool _Stl_is_neg_inf(double x)    { return _Stl_is_inf(x) && ( copysign(1., x) < 0 );  }
Beispiel #14
0
inline bool _Stl_is_neg_nan(double x)    { return isnan(x) && copysign(1., x) < 0 ; } 
Beispiel #15
0
int
main (void)
{
  int result = 0;

  float i = INFINITY;
  float m = FLT_MAX;
  feclearexcept (FE_ALL_EXCEPT);
  if (nextafterf (m, i) != i)
    {
      puts ("nextafterf+ failed");
      ++result;
    }
  if (fetestexcept (FE_OVERFLOW) == 0)
    {
      puts ("nextafterf+ did not overflow");
      ++result;
    }
  feclearexcept (FE_ALL_EXCEPT);
  if (nextafterf (-m, -i) != -i)
    {
      puts ("nextafterf- failed");
      ++result;
    }
  if (fetestexcept (FE_OVERFLOW) == 0)
    {
      puts ("nextafterf- did not overflow");
      ++result;
    }

  i = 0;
  m = FLT_MIN;
  feclearexcept (FE_ALL_EXCEPT);
  i = nextafterf (m, i);
  if (i < 0 || i >= FLT_MIN)
    {
      puts ("nextafterf+ failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafterf+ did not underflow");
      ++result;
    }
  i = 0;
  feclearexcept (FE_ALL_EXCEPT);
  i = nextafterf (-m, -i);
  if (i > 0 || i <= -FLT_MIN)
    {
      puts ("nextafterf- failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafterf- did not underflow");
      ++result;
    }
  i = -INFINITY;
  feclearexcept (FE_ALL_EXCEPT);
  m = nextafterf (zero, inf);
  if (m < 0.0 || m >= FLT_MIN)
    {
      puts ("nextafterf+ failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafterf+ did not underflow");
      ++result;
    }
  feclearexcept (FE_ALL_EXCEPT);
  if (nextafterf (m, i) != 0.0)
    {
      puts ("nextafterf+ failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafterf+ did not underflow");
      ++result;
    }
  feclearexcept (FE_ALL_EXCEPT);
  m = nextafterf (copysignf (zero, -1.0), -inf);
  if (m > 0.0 || m <= -FLT_MIN)
    {
      puts ("nextafterf- failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafterf- did not underflow");
      ++result;
    }
  feclearexcept (FE_ALL_EXCEPT);
  if (nextafterf (m, -i) != 0.0)
    {
      puts ("nextafterf- failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafterf- did not underflow");
      ++result;
    }

  double di = INFINITY;
  double dm = DBL_MAX;
  feclearexcept (FE_ALL_EXCEPT);
  if (nextafter (dm, di) != di)
    {
      puts ("nextafter+ failed");
      ++result;
    }
  if (fetestexcept (FE_OVERFLOW) == 0)
    {
      puts ("nextafter+ did not overflow");
      ++result;
    }
  feclearexcept (FE_ALL_EXCEPT);
  if (nextafter (-dm, -di) != -di)
    {
      puts ("nextafter failed");
      ++result;
    }
  if (fetestexcept (FE_OVERFLOW) == 0)
    {
      puts ("nextafter- did not overflow");
      ++result;
    }

  di = 0;
  dm = DBL_MIN;
  feclearexcept (FE_ALL_EXCEPT);
  di = nextafter (dm, di);
  if (di < 0 || di >= DBL_MIN)
    {
      puts ("nextafter+ failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafter+ did not underflow");
      ++result;
    }
  di = 0;
  feclearexcept (FE_ALL_EXCEPT);
  di = nextafter (-dm, -di);
  if (di > 0 || di <= -DBL_MIN)
    {
      puts ("nextafter- failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafter- did not underflow");
      ++result;
    }
  di = -INFINITY;
  feclearexcept (FE_ALL_EXCEPT);
  dm = nextafter (zero, inf);
  if (dm < 0.0 || dm >= DBL_MIN)
    {
      puts ("nextafter+ failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafter+ did not underflow");
      ++result;
    }
  feclearexcept (FE_ALL_EXCEPT);
  if (nextafter (dm, di) != 0.0)
    {
      puts ("nextafter+ failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafter+ did not underflow");
      ++result;
    }
  feclearexcept (FE_ALL_EXCEPT);
  dm = nextafter (copysign (zero, -1.0), -inf);
  if (dm > 0.0 || dm <= -DBL_MIN)
    {
      puts ("nextafter- failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafter- did not underflow");
      ++result;
    }
  feclearexcept (FE_ALL_EXCEPT);
  if (nextafter (dm, -di) != 0.0)
    {
      puts ("nextafter- failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafter- did not underflow");
      ++result;
    }

#ifndef NO_LONG_DOUBLE
  long double li = INFINITY;
  long double lm = LDBL_MAX;
  feclearexcept (FE_ALL_EXCEPT);
  if (nextafterl (lm, li) != li)
    {
      puts ("nextafterl+ failed");
      ++result;
    }
  if (fetestexcept (FE_OVERFLOW) == 0)
    {
      puts ("nextafterl+ did not overflow");
      ++result;
    }
  feclearexcept (FE_ALL_EXCEPT);
  if (nextafterl (-lm, -li) != -li)
    {
      puts ("nextafterl failed");
      ++result;
    }
  if (fetestexcept (FE_OVERFLOW) == 0)
    {
      puts ("nextafterl- did not overflow");
      ++result;
    }

  li = 0;
  lm = LDBL_MIN;
  feclearexcept (FE_ALL_EXCEPT);
  li = nextafterl (lm, li);
  if (li < 0 || li >= LDBL_MIN)
    {
      puts ("nextafterl+ failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafterl+ did not underflow");
      ++result;
    }
  li = 0;
  feclearexcept (FE_ALL_EXCEPT);
  li = nextafterl (-lm, -li);
  if (li > 0 || li <= -LDBL_MIN)
    {
      puts ("nextafterl- failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafterl- did not underflow");
      ++result;
    }
  li = -INFINITY;
  feclearexcept (FE_ALL_EXCEPT);
  lm = nextafterl (zero, inf);
  if (lm < 0.0 || lm >= LDBL_MIN)
    {
      puts ("nextafterl+ failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafterl+ did not underflow");
      ++result;
    }
  feclearexcept (FE_ALL_EXCEPT);
  if (nextafterl (lm, li) != 0.0)
    {
      puts ("nextafterl+ failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafterl+ did not underflow");
      ++result;
    }
  feclearexcept (FE_ALL_EXCEPT);
  lm = nextafterl (copysign (zero, -1.0), -inf);
  if (lm > 0.0 || lm <= -LDBL_MIN)
    {
      puts ("nextafterl- failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafterl- did not underflow");
      ++result;
    }
  feclearexcept (FE_ALL_EXCEPT);
  if (nextafterl (lm, -li) != 0.0)
    {
      puts ("nextafterl- failed");
      ++result;
    }
  if (fetestexcept (FE_UNDERFLOW) == 0)
    {
      puts ("nextafterl- did not underflow");
      ++result;
    }
#endif

  return result;
}
Beispiel #16
0
int CDataset::extractFeatures(const CConfig& conf){

    int imgRow = this->img.at(0)->rows, imgCol = this->img.at(0)->cols;
    cv::Mat *integralMat;

    if(conf.learningMode != 1){
        if(conf.rgbFeature == 1){ // if got rgb image only, calc hog feature
            feature.clear();
            feature.resize(32);
            for(int i = 0; i < 32; ++i)
                feature.at(i) = new cv::Mat(imgRow, imgCol, CV_8UC1);

            cv::cvtColor(*img.at(0), *(feature.at(0)), CV_RGB2GRAY);

            cv::Mat I_x(imgRow, imgCol, CV_16SC1);
            cv::Mat I_y(imgRow, imgCol, CV_16SC1);


            cv::Sobel(*(feature.at(0)), I_x, CV_16S, 1, 0);
            cv::Sobel(*(feature.at(0)), I_y, CV_16S, 0, 1);

            cv::convertScaleAbs(I_x, *(feature[3]), 0.25);
            cv::convertScaleAbs(I_y, *(feature[4]), 0.25);

            // Orientation of gradients
            for(int  y = 0; y < img.at(0)->rows; y++)
                for(int  x = 0; x < img.at(0)->cols; x++) {
                    // Avoid division by zero
                    float tx = (float)I_x.at<short>(y, x) + (float)copysign(0.000001f, I_x.at<short>(y, x));
                    // Scaling [-pi/2 pi/2] -> [0 80*pi]
                    feature.at(1)->at<uchar>(y, x) = (uchar)(( atan((float)I_y.at<short>(y, x) / tx) + 3.14159265f / 2.0f ) * 80);
                    //std::cout << "scaling" << std::endl;
                    feature.at(2)->at<uchar>(y, x) = (uchar)sqrt((float)I_x.at<short>(y, x)* (float)I_x.at<short>(y, x) + (float)I_y.at<short>(y, x) * (float)I_y.at<short>(y, x));
                }

            // Magunitude of gradients
            for(int y = 0; y < img.at(0)->rows; y++)
                for(int x = 0; x < img.at(0)->cols; x++ ) {
                    feature.at(2)->at<uchar>(y, x) = (uchar)sqrt(I_x.at<short>(y, x)*I_x.at<short>(y, x) + I_y.at<short>(y, x) * I_y.at<short>(y, x));
                }

            hog.extractOBin(feature[1], feature[2], feature, 7);

            // calc I_xx I_yy
            cv::Sobel(*(feature.at(0)), I_x, CV_16S, 2, 0);
            cv::Sobel(*(feature.at(0)), I_y, CV_16S, 0, 2);

            cv::convertScaleAbs(I_x, *(feature[5]), 0.25);
            cv::convertScaleAbs(I_y, *(feature[6]), 0.25);

            cv::Mat img_Lab;
            cv::cvtColor(*img.at(0), img_Lab, CV_RGB2Lab);
            cv::vector<cv::Mat> tempfeature(3);

            cv::split(img_Lab, tempfeature);

            for(int i = 0; i < 3; ++i)
                tempfeature.at(i).copyTo(*(feature.at(i)));

            // min max filter
            for(int c = 0; c < 16; ++c)
                minFilter(feature[c], feature[c + 16], 5);
            for(int c = 0; c < 16; ++c)
                maxFilter(feature[c], feature[c], 5);

        }else{
            feature.clear();

            // calc gray integral image
            cv::Mat grayImg(imgRow + 1, imgCol, CV_8U);
            cv::cvtColor(*img.at(0), grayImg, CV_RGB2GRAY);
            integralMat = new cv::Mat(imgRow + 1, imgCol + 1, CV_64F);
            cv::integral(grayImg, *integralMat, CV_64F);
            feature.push_back(integralMat);

            // calc r g b integral image
            std::vector<cv::Mat> splittedRgb;
            cv::split(*img.at(0), splittedRgb);
            for(int i = 0; i < splittedRgb.size(); ++i){
                integralMat = new cv::Mat(imgRow + 1, imgCol + 1, CV_64F);
                cv::integral(splittedRgb.at(i), *integralMat, CV_64F);
                feature.push_back(integralMat);
            }



            featureFlag = 1;
        }
    }

    if(img.size() > 1){
        cv::Mat tempDepth = cv::Mat(img.at(0)->rows, img.at(0)->cols, CV_8U);// = *img.at(1);

        if(img.at(1)->type() != CV_8U)
            img.at(1)->convertTo(tempDepth, CV_8U, 255.0 / (double)(conf.maxdist - conf.mindist));
        else
            tempDepth = *img.at(1);
        integralMat = new cv::Mat(imgRow + 1, imgCol + 1, CV_64F);
        cv::integral(tempDepth, *integralMat, CV_64F);
        feature.push_back(integralMat);

        featureFlag  = 1;
    }

    return 0;
}
Beispiel #17
0
int main(int ac, char** av) {
    //Variables to be assigned by program options
    double height_map_resolution;
    double north_bound;
    double south_bound;
    double east_bound;
    double west_bound;
    int sun_angles;
    int times_per_year;
    int start_day;
    double summer_angle_panel;
    double winter_angle_panel;
    bool use_terrain_normals = true;
    bool compute_best_fixed_angle = false;
    bool compute_best_summer_winter_angle = false;

    
    std::string input_file;
    std::string output_file;
    bool verbose = false;
    bool elevation_dependant_sun_intensity = false;
    
    // Declare the supported options.
    po::options_description op_desc("Allowed options");
    op_desc.add_options()
    ("help", "print options table")
    ("input-file,i", po::value<std::string>(&input_file), "File containing height map data in x<space>y<space>z<newline> swiss coordinates format")
    ("output-file-base,o", po::value<std::string>(&output_file)->default_value("data_out"), "Base filname for output files (default data_out)")
    ("resolution,R", po::value<double>(&height_map_resolution)->default_value(200.0), "resolution of data (default: 200.0)")
    ("nmax",po::value<double>(&north_bound)->default_value(1e100), "maximum north coordinate to be treated (default 1e100)")
    ("nmin",po::value<double>(&south_bound)->default_value(-1e100), "minimum north coordinate to be treated (default -1e100)")
    ("emax",po::value<double>(&east_bound)->default_value(1e100), "maximum east coordinate to be treated (default 1e100)")
    ("emin",po::value<double>(&west_bound)->default_value(-1e100), "minimum east coordinate to be treated (default -1e100)")
    ("elevation,E", "Include effect of terrain elevation in the computation of sun intensity")
    ("sunangles,s", po::value<int>(&sun_angles)->default_value(360), "number of angles to compute sunlight from (default 360)")
    ("times,t", po::value<int>(&times_per_year)->default_value(12), "number of times per year to calculate at (default 12)")
    ("start-day, D", po::value<int>(&start_day)->default_value(20), "Day of the year to output first data. (default 20)")
    ("fixed-angle, F", po::value<double>(&summer_angle_panel), "Use fixed angle for solar panel inclination to compute sun intensity.")
    ("summer-angle", po::value<double>(&summer_angle_panel), "Fixed angle for solar panel inclination during summer to compute sun intensity..")
    ("winter-angle", po::value<double>(&winter_angle_panel), "Fixed angle for solar panel inclination during winter to compute sun intensity..")
    ("best-fixed-angle, B", "Use best fixed angle for this latitude to compute sun intensity.")
    ("best-two-season-angles, BSW", "Use best summer and winter angle for this latitude to compute sun intensity.")
    ("terrain-normals, T", "Use terrain normal for each point to compute sun intensity, default option if none of the solar panel angles options are specified.")
    ("verbose, v", "Verbose: output lots of text")
    ;
    
    po::positional_options_description pd;
    pd.add("input-file", 1).add("output-file", 1);
    
    po::variables_map vm;
    po::store(po::parse_command_line(ac, av, op_desc), vm);
    po::notify(vm);
    
    if (vm.count("help")) {
        std::cout <<"CrunchGeoData [options] [input file] [output base]"<<std::endl<< op_desc << std::endl;
        return 1;
    }
    if(vm.count("fixed-angle")){
        winter_angle_panel = summer_angle_panel;
        use_terrain_normals = false;
    }
    if(vm.count("summer-angle") && !vm.count("winter-angle")){
        std::cout << "Please specify also a winter angle, or use option fixed-angle" << std::endl;
        exit(255);
    }
    if(vm.count("best-fixed-angle")){
        use_terrain_normals = false;
        compute_best_fixed_angle = true;
    }
    if(vm.count("best-two-season-angles")){
        use_terrain_normals = false;
        compute_best_summer_winter_angle = true;
    }
    
    if (vm.count("elevation")){
        elevation_dependant_sun_intensity = true;
    }
    if (vm.count("verbose")){
        verbose = true;
    }
    if(!vm.count("input-file")){
        std::cout << "Input file must be specified" << std::endl;
        exit(255);
    }
    
    
    std::unordered_map<vector3d, std::vector<double>, hash> grid_points;   //grid_points is unordered_map of all points in bounding box with
                                                                            //a vector to hold average sun power for the days sun is computed
    
    double north_x_max;
    double east_y_max;
    double south_x_min;
    double west_y_min;
    
    import_heightmap(input_file, south_bound, north_bound, east_bound, west_bound,
                     times_per_year, grid_points ,north_x_max, south_x_min, east_y_max, west_y_min);
    
    
    std::pair<double,double> NE = swiss_to_lat_lon(north_x_max+height_map_resolution/2.0, east_y_max+height_map_resolution/2.0);
    std::pair<double,double> SW = swiss_to_lat_lon(south_x_min-height_map_resolution/2.0, west_y_min-height_map_resolution/2.0);
    
    std::cout << "NE: " << north_x_max <<", " << east_y_max << " SW: "<< south_x_min << " , " << west_y_min <<  std::endl;
    std::cout << std::setprecision(9) << "NE: " << NE.first <<", " << NE.second << " SW: "<< SW.first << ", " << SW.second <<  std::endl;
    std::cout << "Number of points in dataset: " << grid_points.size() << std::endl;
    
    double average_latitude=((NE.first+SW.first)/2.0)*M_PI/180.0;
    vector3d summer_normal_panel;
    vector3d winter_normal_panel;

    if(!use_terrain_normals){
        if(compute_best_fixed_angle){
        }
        if(compute_best_summer_winter_angle){
        }
        if(average_latitude > 0){  //if latitude is greater than zero, point south
            summer_normal_panel.x = -1*sin(summer_angle_panel*M_PI/180);
            summer_normal_panel.y = 0;
            summer_normal_panel.z = 1*cos(summer_angle_panel*M_PI/180);

            winter_normal_panel.x = -1*sin(winter_angle_panel*M_PI/180);
            winter_normal_panel.y = 0;
            winter_normal_panel.z = 1*cos(winter_angle_panel*M_PI/180);
        }
        else{  //if latitude is less than zero, point north
            summer_normal_panel.x = 1*sin(summer_angle_panel*M_PI/180);
            summer_normal_panel.y = 0;
            summer_normal_panel.z = 1*cos(summer_angle_panel*M_PI/180);
            
            winter_normal_panel.x = 1*sin(winter_angle_panel*M_PI/180);
            winter_normal_panel.y = 0;
            winter_normal_panel.z = 1*cos(winter_angle_panel*M_PI/180);
        }

    }
    
    
    std::vector<std::vector<double> > sun_elevation_angle(times_per_year,std::vector<double>(sun_angles));
    //Elevation angle PHI of sun for a day and hour
    
    std::vector<std::vector<double> > sun_intensity_day_angle(times_per_year,std::vector<double>(sun_angles));
    
    std::vector<std::vector <vector3d> > sun_vec_day_angle(times_per_year,std::vector<vector3d>(sun_angles));
    //Unit vectors for sun's direction in day, hour.
    
    
    for(int day = 0; day < times_per_year; day++){
        double N=(365.0*day)/times_per_year+start_day;  //We are computing values for the Nth day of the year
        double phi_axis=-asin(0.39779*cos((0.98565*(N+10)+1.914*sin(0.98565*(N-2)*M_PI/180))*M_PI/180));  //Earth axis inclination for day N
        for(int thH=0;thH<sun_angles;thH++){//Angle theta for sun; 0 is North, +90 is West.
            int indTH= thH;
            double thHrad = (180-thH)*M_PI/180.0;  //turn around for sun formula angle
            double phiElv=asin(sin(average_latitude)*sin(phi_axis)+cos(average_latitude)*cos(thHrad)*cos(phi_axis));  //Sun elevation, imperical fomula, see wikipedia
            
            thHrad = M_PI-thHrad;          //In our coordinate system (North x+, West y+) we must inverse the sun theta. Theta corresponds to sun ray direction.
            
            vector3d sun_vec(cos(phiElv) * cos(thHrad) , cos(phiElv) * sin(thHrad) , sin(phiElv));  //Unit vector for direction sun ray come from
            sun_elevation_angle[(int)day][indTH]=phiElv;  //stick sun elevation in its datastructure
            
            sun_vec_day_angle[(int)day][indTH]=sun_vec;  //stick sun vector in its datastructure
            
            double air_mass_coefficient =sqrt(708.0*708.0*sin(phiElv)*sin(phiElv)+2.0*708.0+1.0)-708.0*sin(phiElv);  //wikipedia
            sun_intensity_day_angle[(int)day][indTH]=pow(0.7,pow(air_mass_coefficient,0.678))/0.7;  //stick sun intensity in its datastructure
        }
    }
    
    
    
    
    std::vector<double> max_sun_intensity;
    max_sun_intensity.assign(times_per_year, 0);
    std::vector<double> min_sun_intensity;
    min_sun_intensity.assign(times_per_year, 1e12);
    
    
    int N=0;
    
    std::cout <<"   Progress: "<< 100*(N*1.0)/(grid_points.size()*1.0) <<" %    \n";
    
    for(auto grid_point : grid_points){
        vector3d v = grid_point.first;
        N++;
        
        if (1) {
            std::cout <<"   Progress: "<< 100.0*(N*1.0)/(grid_points.size()*1.0) <<" %  ("<<N<<")    \r";
            std::cout.flush();
        }
        vector3d vNx(v.x+height_map_resolution*10,v.y,0);            //get points 10xresolution to the north, west, south and east
        vector3d vWx(v.x,v.y+height_map_resolution*10,0);
        vector3d vSx(v.x-height_map_resolution*10,v.y,0);
        vector3d vEx(v.x,v.y-height_map_resolution*10,0);
        auto itEx=grid_points.find(vEx);
        auto itNx=grid_points.find(vNx);
        auto itWx=grid_points.find(vWx);
        auto itSx=grid_points.find(vSx);
        if (itEx == grid_points.end()||itNx == grid_points.end()||itWx == grid_points.end()||itSx == grid_points.end()){
            std::vector<double> L;
            L.assign(times_per_year,-1.0);  //border points get value -1.0 to indicate we have coputed no value for them
            grid_points[v]=L;
            continue;
        }
        
        //v = *it_v;
        
        vector3d vN(v.x+height_map_resolution,v.y,0);            //get points to the north, west, south and east
        vector3d vW(v.x,v.y+height_map_resolution,0);
        vector3d vS(v.x-height_map_resolution,v.y,0);
        vector3d vE(v.x,v.y-height_map_resolution,0);
        
        auto itE=grid_points.find(vE);
        auto itN=grid_points.find(vN);
        auto itW=grid_points.find(vW);
        auto itS=grid_points.find(vS);
        
        if (itE == grid_points.end()||itN == grid_points.end()||itW == grid_points.end()||itS == grid_points.end()){
            exit(-1);
        }
        
        vE=itE->first;
        vN=itN->first;
        vW=itW->first;
        vS=itS->first;
        
        vector3d Normal = ((((vE-v)^(vN-v))+((vW-v)^(vS-v)))/2).norm();       //normal is the crossproduct of two prependicular differences. Avgd.
        
        
        std::vector<double> horizon_elevation_angles;   //vector to hold elevation angles at theta
        horizon_elevation_angles.assign(sun_angles, 0);
        //    std::vector<vector3d> horizon(sun_angles);
        std::vector<double> dists(sun_angles);
        
        size_t edge_points = 0;
        size_t interior_points = 0;
        
        for(int theta = 0; theta<sun_angles;theta++){        //compute for each angle theta
            double th = theta*M_PI/180;
            {
                double sin_theta = sin(th);   //calculate sin of the angle in radians
                double yend = EQ_DBL(copysign(1,sin_theta),1)?east_y_max:0;     //if the sinus is positive endvalue is east_y_max, if negative 0
                for(double y=v.y+copysign(height_map_resolution,sin_theta);(y >= 0 && y <= east_y_max);y+=copysign(height_map_resolution,sin_theta)){//increase/decrease if theta +/-
                    double x = v.x+(y-v.y)/tan(th);                             //find the x that goes with y for this theta
                    double x1 = floor(x-((int)x%(int)height_map_resolution));       //find the nearest lower gridpoint by subtracting remainder according to height_map_resolution
                    double x2 = x1 + height_map_resolution;                  //Add height_map_resolution to get nearest higher gridpoint
                    double phi = horizon_elevation_angles[theta];
                    double dist = (v-vector3d(x,y,v.z)).length();
                    double phiMaxTheta = atan(5000.0/dist);      //maximal phi at this theta (with height 5000 m)
                    
                    
                    if(x1>=north_x_max||x1<0||x2>=north_x_max||x2<0||phi>phiMaxTheta){
                        break;
                    }
                    if((int)x%(int)height_map_resolution){                       //if x is not a grid point
                        
                        auto it_vec1 = grid_points.find(vector3d(x1,y,0));      //get the two gridpoints from the set
                        auto it_vec2 = grid_points.find(vector3d(x2,y,0));
                        if (it_vec1 == grid_points.end()||it_vec2 == grid_points.end()){
                            edge_points++;
                            continue;
                        }
                        vector3d vec1 = it_vec1->first;
                        vector3d vec2 = it_vec2->first;
                        double height = vec1.z*(x2-x)/height_map_resolution + vec2.z*(x-x1)/height_map_resolution-v.z;       //compute height at x via linear interpolation
                        dist=(v-vector3d(x,y,v.z)).length();
                        phi = atan(height/dist);

                        if(phi>horizon_elevation_angles[theta]){//see if larger
                            horizon_elevation_angles[theta]=phi;
                            dists[theta] = dist/1000;
                        }
                        
                    }
                    else{//if x is a gridpoint
                        auto it_vec = grid_points.find(vector3d(x,y,0));  //get vector
                        if (it_vec == grid_points.end()){
                            //exit(-1);
                            edge_points++;
                            continue;
                        }
                        vector3d vec = it_vec->first;
                        double height = vec.z-v.z;                          //get height
                        double dist=(v-vector3d(x,y,v.z)).length();
                        double phi = atan(height/dist);
                        if(phi>horizon_elevation_angles[theta]){//see if larger
                            horizon_elevation_angles[theta]=phi;
                            dists[theta] = dist/1000;
                        }
                        
                    }
                    interior_points++;
                }
            }
            double cos_theta = cos(th);
            double xend = EQ_DBL(copysign(1,cos_theta),1)?north_x_max:0;     //if the cosinus is positive endvalue is north_x_max, if negative 0
            
            for(double x=v.x+copysign(height_map_resolution,cos_theta);(x >= 0 && x <= north_x_max);x+=copysign(height_map_resolution,cos_theta)){//increase/decrease if theta +/-
                double y = v.y+(x-v.x)*tan(th);                             //find the y that goes with x for this theta
                double y1 = floor(y-((int)y%(int)height_map_resolution));       //find the nearest lower gridpoint by subtracting remainder according to height_map_resolution
                double y2 = y1 + height_map_resolution;                  //Add height_map_resolution to get nearest higher gridpoint
                double phi = horizon_elevation_angles[theta];
                double dist = sqrt((v.x-x)*(v.x-x) + (v.y-y)*(v.y-y));
                double phiMaxTheta = atan(5000.0/dist);      //maximal phi at this theta (with height 5000 m)
                
                if(y1>=north_x_max||y1<0||y2>=north_x_max||y2<0||phi>phiMaxTheta){
                    break;
                }
                if((int)y%(int)height_map_resolution){                       //if y is not a grid point
                    auto it_vec1 = grid_points.find(vector3d(x,y1,0));      //get the two gridpoints from the set
                    auto it_vec2 = grid_points.find(vector3d(x,y2,0));
                    if (it_vec1 == grid_points.end()||it_vec2 == grid_points.end()){
                        //exit(-1);
                        edge_points++;
                        continue;
                    }
                    vector3d vec1 = it_vec1->first;
                    vector3d vec2 = it_vec2->first;
                    double height = vec1.z*(y2-y)/height_map_resolution + vec2.z*(y-y1)/height_map_resolution-v.z;       //compute height at y via linear interpolation
                    double dist=(v-vector3d(x,y,v.z)).length();
                    double phi = atan(height/dist);
                    if(phi>horizon_elevation_angles[theta]){//see if larger
                        horizon_elevation_angles[theta]=phi;
                        dists[theta] = dist/1000;
                    }
                    
                }
                else{//if y is a gridpoint
                    auto it_vec = grid_points.find(vector3d(x,y,0));  //get vector
                    if (it_vec == grid_points.end()){
                        //exit(-1);
                        edge_points++;
                        continue;
                    }
                    vector3d vec = it_vec->first;
                    double height = vec.z-v.z;                          //get height
                    double dist=(v-vector3d(x,y,v.z)).length();
                    double phi = atan(height/dist);
                    if(phi>horizon_elevation_angles[theta]){//see if larger
                        horizon_elevation_angles[theta]=phi;
                        dists[theta] = dist/1000;
                    }
                    
                }
                interior_points++;
            }
            
            
            
            
        }
        
        
        
        
        int k = 0;
        
        std::vector<double> average_sun_intensity;
        average_sun_intensity.assign(times_per_year, 0.0);
        double sun_intensity = 0;
        
        for(int j=0;j<times_per_year;j++){
            for (k=0;k<sun_angles;k++) {
                if(horizon_elevation_angles[k]>sun_elevation_angle[j][k]){
                    sun_intensity=0;
                }
                else{
                    double height = 0;
                    if(elevation_dependant_sun_intensity){
                        height = v.z;
                    }
                    //spherical shell approximation for air mass attenuation, see wikipedia
                    // http://en.wikipedia.org/wiki/Air_mass_%28solar_energy%29
                    double phi = sun_elevation_angle[j][k];
                    double R_E = 6371000.0;      //Earth radius, meters
                    double y_atm = 9000.0;       //Athmospheric thickness, meters
                    double r = R_E/y_atm;
                    double c = height/y_atm;
                    double air_mass_coefficient = sqrt((r+c)*(r+c)*cos(phi)*cos(phi) + (2*r+1+c)*(1-c))-(r+c)*cos(phi);
                    double I_0 = 1.353; // kW/m^2
                    double I = 1.1 * I_0 * pow(0.7, pow(air_mass_coefficient, 0.678));
                    sun_intensity = I * (sun_vec_day_angle[j][k] * Normal);
                    
                    average_sun_intensity[j]+=sun_intensity;
                    
                }
                average_sun_intensity[j] = average_sun_intensity[j] / sun_angles;
                grid_point.second[j]=average_sun_intensity[j];
                max_sun_intensity[j] = (average_sun_intensity[j] > max_sun_intensity[j])?average_sun_intensity[j]:max_sun_intensity[j];
                min_sun_intensity[j] = (average_sun_intensity[j] < min_sun_intensity[j])?average_sun_intensity[j]:min_sun_intensity[j];
            }
            assert(average_sun_intensity.size() == times_per_year);
            grid_points[v]=average_sun_intensity;
            
            assert(grid_point.second.size() == times_per_year);
            
            
        }
    }
        std::cout << grid_points.size() << std::endl;
    
        for(int k = 0; k < times_per_year; ++k){
            std::ostringstream oss("");
            oss << k;
            std::ofstream ofs(output_file + oss.str() + ".xyz");
            if (!ofs.is_open()){
                exit(-2);
            }
            for(auto grid_point : grid_points){
                vector3d v = grid_point.first;
                if(grid_point.second.size()>k){
                    ofs << v.x << " " << v.y << " " << v.z << " ";
                    ofs << grid_points[v][k] << std::endl;
                }
            }
        }
        return 0;
    }
Beispiel #18
0
double tgamma ( double x )
      {
      register int n, parity, i;
      register double y, y1, result, fact, IsItAnInt, z, numerator, 
	                denominator, ysquared, sum; 
      hexdouble OldEnvironment;
    
      FEGETENVD( OldEnvironment.d );               // save environment, set default
      FESETENVD( 0.0 );
	
/*******************************************************************************
*     The next switch will decipher what sort of argument we have. If argument *
*     is SNaN then a QNaN has to be returned and the invalid flag signaled.    * 
*******************************************************************************/

      switch ( __fpclassifyd ( x ) )
            {
            case FP_NAN:
                  x *= 2.0;                  /* quiets NaN */
                  FESETENVD( OldEnvironment.d ); //   restore caller's environment
                  return x;
                  
            case FP_ZERO:
                  OldEnvironment.i.lo |= FE_DIVBYZERO;
                  FESETENVD( OldEnvironment.d );
                  return copysign( Huge.d, x);

             case FP_INFINITE:
                  if ( x > 0.0 )
                        x = Huge.d;
                  else
                        {
                        x = nan ( GAMMA_NAN );
                        OldEnvironment.i.lo |= SET_INVALID;
                        }

                  FESETENVD( OldEnvironment.d );
                  return x;
                  
            default:                  /*      NORMALNUM and DENORMALNUM      */
                  break;
            }
      
      parity = 0;
      fact = 1.0;
      n = 0;
      y = x;

/*******************************************************************************
*     The argument is negative.                                                *
*******************************************************************************/

      if ( y <= 0.0 )
            {
            y = - x;
			if ( y < MinimumX )
			      {
                  OldEnvironment.i.lo |= FE_OVERFLOW;
                  FESETENVD( OldEnvironment.d );
                  return MinusHuge.d;
				  }
            y1 = trunc ( y );
            IsItAnInt = y - y1;
            if ( IsItAnInt != 0.0 )                   /* is it an integer?   */
                  {                                   /* is it odd or even?  */
                  if ( y1 != trunc ( y1 * 0.5 ) * 2.0 ) parity = 1;
                  fact = - pi / sin ( pi * IsItAnInt );
                  y += 1.0;
                  }
            else
                  {
                  OldEnvironment.i.lo |= SET_INVALID;
                  FESETENVD( OldEnvironment.d );
                  return nan ( GAMMA_NAN );
                  }
            }

/*******************************************************************************
*     The argument is positive.                                                *
*******************************************************************************/

     if ( y < eps )                         /* argument is less than epsilon. */
            {
            if ( y >= MinimumX )          /* x is in [MinimumX,eps].          */
                  result = 1.0 / y;
            else                          /* othewise, x is in [0,MinimumX).  */
                  {
                  OldEnvironment.i.lo |= FE_OVERFLOW;
                  FESETENVD( OldEnvironment.d );
                  return Huge.d;
                  }
            }
      else if ( y < 12.0 )                 /* argument x is eps < x < 12.0.  */
            {
            y1 = y;
            if ( y < 1.0 )                 /* x is in (eps, 1.0).            */
                  {
                  z = y;
                  y += 1.0;
                  }
            else                           /* x is in [1.0,12.0].            */
                  {
                  n = ( int ) y - 1;
                  y -= ( double ) n;
                  z = y - 1.0;
                  }
            numerator = 0.0;
            denominator = 1.0;
            for ( i = 0; i < 8; i++ )
                  {
                  numerator = ( numerator + p[i] ) * z;
                  denominator = denominator * z + q[i];
                  }
            result = numerator / denominator + 1.0;
            if ( y1 < y )
                  result /= y1;
            else if ( y1 > y )
                  {
                  for ( i = 0; i < n; i++ )
                        {
                        result *= y;
                        y += 1.0;
                        }
                  }
            }
      else
            {
            if ( x <= xbig )
                  {
                  ysquared = y * y;
                  sum = c[6];
                  for ( i = 0; i < 6; i++ )
                        sum = sum / ysquared + c[i];
                  sum = sum / y - y + LogSqrt2pi;
                  sum += ( y - 0.5 ) * log ( y );
                  result = exp ( sum );
                  }
            else
                  {
                  OldEnvironment.i.lo |= FE_OVERFLOW;
                  FESETENVD( OldEnvironment.d );         //   restore caller's environment
                  return Huge.d;
                  }
            }
      if ( parity ) result = - result;
      if ( fact != 1.0 ) result = fact / result;
      FESETENVD( OldEnvironment.d );         //   restore caller's environment
      return result;
      }
Beispiel #19
0
double complex csinh(double complex z)
{
	double x, y, h;
	int32_t hx, hy, ix, iy, lx, ly;

	x = creal(z);
	y = cimag(z);

	EXTRACT_WORDS(hx, lx, x);
	EXTRACT_WORDS(hy, ly, y);

	ix = 0x7fffffff & hx;
	iy = 0x7fffffff & hy;

	/* Handle the nearly-non-exceptional cases where x and y are finite. */
	if (ix < 0x7ff00000 && iy < 0x7ff00000) {
		if ((iy | ly) == 0)
			return CMPLX(sinh(x), y);
		if (ix < 0x40360000)    /* small x: normal case */
			return CMPLX(sinh(x) * cos(y), cosh(x) * sin(y));

		/* |x| >= 22, so cosh(x) ~= exp(|x|) */
		if (ix < 0x40862e42) {
			/* x < 710: exp(|x|) won't overflow */
			h = exp(fabs(x)) * 0.5;
			return CMPLX(copysign(h, x) * cos(y), h * sin(y));
		} else if (ix < 0x4096bbaa) {
			/* x < 1455: scale to avoid overflow */
			z = __ldexp_cexp(CMPLX(fabs(x), y), -1);
			return CMPLX(creal(z) * copysign(1, x), cimag(z));
		} else {
			/* x >= 1455: the result always overflows */
			h = huge * x;
			return CMPLX(h * cos(y), h * h * sin(y));
		}
	}

	/*
	 * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN.
	 * The sign of 0 in the result is unspecified.  Choice = normally
	 * the same as dNaN.  Raise the invalid floating-point exception.
	 *
	 * sinh(+-0 +- I NaN) = sign(d(+-0, NaN))0 + I d(NaN).
	 * The sign of 0 in the result is unspecified.  Choice = normally
	 * the same as d(NaN).
	 */
	if ((ix | lx) == 0 && iy >= 0x7ff00000)
		return CMPLX(copysign(0, x * (y - y)), y - y);

	/*
	 * sinh(+-Inf +- I 0) = +-Inf + I +-0.
	 *
	 * sinh(NaN +- I 0)   = d(NaN) + I +-0.
	 */
	if ((iy | ly) == 0 && ix >= 0x7ff00000) {
		if (((hx & 0xfffff) | lx) == 0)
			return CMPLX(x, y);
		return CMPLX(x, copysign(0, y));
	}

	/*
	 * sinh(x +- I Inf) = dNaN + I dNaN.
	 * Raise the invalid floating-point exception for finite nonzero x.
	 *
	 * sinh(x + I NaN) = d(NaN) + I d(NaN).
	 * Optionally raises the invalid floating-point exception for finite
	 * nonzero x.  Choice = don't raise (except for signaling NaNs).
	 */
	if (ix < 0x7ff00000 && iy >= 0x7ff00000)
		return CMPLX(y - y, x * (y - y));

	/*
	 * sinh(+-Inf + I NaN)  = +-Inf + I d(NaN).
	 * The sign of Inf in the result is unspecified.  Choice = normally
	 * the same as d(NaN).
	 *
	 * sinh(+-Inf +- I Inf) = +Inf + I dNaN.
	 * The sign of Inf in the result is unspecified.  Choice = always +.
	 * Raise the invalid floating-point exception.
	 *
	 * sinh(+-Inf + I y)   = +-Inf cos(y) + I Inf sin(y)
	 */
	if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) {
		if (iy >= 0x7ff00000)
			return CMPLX(x * x, x * (y - y));
		return CMPLX(x * cos(y), INFINITY * sin(y));
	}

	/*
	 * sinh(NaN + I NaN)  = d(NaN) + I d(NaN).
	 *
	 * sinh(NaN +- I Inf) = d(NaN) + I d(NaN).
	 * Optionally raises the invalid floating-point exception.
	 * Choice = raise.
	 *
	 * sinh(NaN + I y)    = d(NaN) + I d(NaN).
	 * Optionally raises the invalid floating-point exception for finite
	 * nonzero y.  Choice = don't raise (except for signaling NaNs).
	 */
	return CMPLX((x * x) * (y - y), (x + x) * (y - y));
}
Beispiel #20
0
void eqs_todo_bulk_ode<real_t>::condevap(
    const mtx::arr<real_t> &rhod,
    mtx::arr<real_t> &rhod_th,
    mtx::arr<real_t> &rhod_rv,
    mtx::arr<real_t> &rhod_rl,
    mtx::arr<real_t> &rhod_rr,
    const quantity<si::time, real_t> dt
)   
{
#  if !defined(USE_BOOST_ODEINT)
    error_macro("eqs_todo_bulk requires icicle to be compiled with Boost.odeint");
#  else
    // odeint::euler< // TODO: opcja?
    odeint::runge_kutta4<
      quantity<multiply_typeof_helper<si::mass_density, si::temperature>::type, real_t>, // state_type
      real_t, // value_type
      quantity<si::temperature, real_t>, // deriv_type
      quantity<si::mass_density, real_t>, // time_type
      odeint::vector_space_algebra, 
      odeint::default_operations, 
      odeint::never_resizer
    > S; // TODO: would be better to instantiate in the ctor (but what about thread safety! :()
    typename detail::rhs F;

    for (int k = rhod.lbound(mtx::k); k <= rhod.ubound(mtx::k); ++k)
      for (int j = rhod.lbound(mtx::j); j <= rhod.ubound(mtx::j); ++j)
        for (int i = rhod.lbound(mtx::i); i <= rhod.ubound(mtx::i); ++i)
        {

          F.init(
            rhod(i,j,k)    * si::kilograms / si::cubic_metres, 
            rhod_th(i,j,k) * si::kilograms / si::cubic_metres * si::kelvins, 
            rhod_rv(i,j,k) * si::kilograms / si::cubic_metres
          );
          real_t // TODO: quantity<si::mass_density
            rho_eps = .00002, // TODO: as an option?
            vapour_excess;
          real_t drho_rr_max = 0; // TODO: quantity<si::mass_density
          if (F.rs > F.r && rhod_rr(i,j,k) > 0 && opt_revp) 
            drho_rr_max = (dt / si::seconds) * (1 - F.r / F.rs) * (1.6 + 124.9 * pow(1e-3 * rhod_rr(i,j,k), .2046)) * pow(1e-3 * rhod_rr(i,j,k), .525) / 
              (5.4e2 + 2.55e5 * (1. / (F.p / si::pascals) / F.rs)); // TODO: move to phc!!!
          bool incloud;

          // TODO: rethink and document 2*rho_eps!!!
          while ( 
            // condensation of cloud water if supersaturated
            (vapour_excess = rhod_rv(i,j,k) - rhod(i,j,k) * F.rs) > rho_eps 
            || (opt_cevp && vapour_excess < -rho_eps && ( // or if subsaturated
              (incloud = (rhod_rl(i,j,k) > 0)) // cloud evaporation if in cloud
              || (opt_revp && rhod_rr(i,j,k) > 0) // or rain evaportation if in a rain shaft (and out-of-cloud)
            )) 
          ) 
          {
            real_t drho_rv = - copysign(.5 * rho_eps, vapour_excess);
            drho_rv = (vapour_excess > 0 || incloud)
              ?  std::min(rhod_rl(i,j,k), drho_rv)
              :  std::min(drho_rr_max, std::min(rhod_rr(i,j,k), drho_rv)); // preventing negative mixing ratios
            assert(drho_rv != 0); // otherwise it should not pass the while condition!

            // theta is modified by do_step, and hence we cannot pass an expression and we need a temp. var.
            quantity<multiply_typeof_helper<si::mass_density, si::temperature>::type, real_t> 
              tmp = rhod_th(i,j,k) * si::kilograms / si::cubic_metres * si::kelvins;

            // integrating the First Law for moist air
            S.do_step(
              boost::ref(F), 
              tmp,
              rhod_rv(i,j,k) * si::kilograms / si::cubic_metres, 
              drho_rv        * si::kilograms / si::cubic_metres
            );

            // latent heat source/sink due to evaporation/condensation
            rhod_th(i,j,k) = tmp / (si::kilograms / si::cubic_metres * si::kelvins); 

            // updating rhod_rv
            rhod_rv(i,j,k) += drho_rv;
            assert(rhod_rv(i,j,k) >= 0);
            assert(isfinite(rhod_rv(i,j,k)));
            
            if (vapour_excess > 0 || incloud) 
            {
              rhod_rl(i,j,k) -= drho_rv; // cloud water 
              assert(rhod_rl(i,j,k) >= 0);
              assert(isfinite(rhod_rl(i,j,k)));
            }
            else // or rain water
            {
              assert(opt_revp); // should be guaranteed by the while() condition above
              rhod_rr(i,j,k) -= drho_rv;
              assert(rhod_rr(i,j,k) >= 0);
              assert(isfinite(rhod_rr(i,j,k)));
              if ((drho_rr_max -= drho_rv) == 0) break; // but not more than Kessler allows
            }
          }
          // hopefully true for RK4
          assert(F.r == real_t(rhod_rv(i,j,k) / rhod(i,j,k)));
          // double-checking....
          assert(rhod_rl(i,j,k) >= 0);
          assert(rhod_rv(i,j,k) >= 0);
          assert(rhod_rr(i,j,k) >= 0);
        }
#  endif
}
Beispiel #21
0
double complex cproj(double complex z)
{
	if (isinf(creal(z)) || isinf(cimag(z)))
		return CMPLX(INFINITY, copysign(0.0, creal(z)));
	return z;
}
///
/// Un is a 1 x N matrix, with N the number of finite volumes
///
/// Reference: Randall LeVeque. Numerical Methods for Conservation Laws (1992)
///
void SlopeLimiterStepPeriodic(const double h, const double dt, const double A, const unsigned int N, const double *restrict_ Un, double *restrict_ Un1)
{
	unsigned int j;

	const double Aabs = fabs(A);

	// nu_p = dt*lambda_p/h
	const double nu = dt*A/h;

	// for slope-limiter term
	const double S    = 0.5 * A    * (copysign(1, nu) - nu);
	const double Sabs = 0.5 * Aabs * (copysign(1, nu) - nu);

	// Eq. (16.34)
	// alpha_j = R^{-1}*(U^n_{j+1} - U^n_j)
	double *alpha = malloc(N * sizeof(double));
	for (j = 0; j < N-1; j++) {
		alpha[j] = Un[j+1] - Un[j];
	}
	alpha[N-1] = Un[0] - Un[N-1];	// periodic

	// h * beta_j, Eq. (16.56)
	double *h_beta = malloc(N * sizeof(double));
	for (j = 1; j < N; j++) {
		h_beta[j] = minmod(alpha[j], alpha[j-1]);
	}
Beispiel #23
0
static int
format_complex_internal(PyObject *value,
                        const InternalFormatSpec *format,
                        _PyUnicodeWriter *writer)
{
    double re;
    double im;
    char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */
    char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */

    InternalFormatSpec tmp_format = *format;
    Py_ssize_t n_re_digits;
    Py_ssize_t n_im_digits;
    Py_ssize_t n_re_remainder;
    Py_ssize_t n_im_remainder;
    Py_ssize_t n_re_total;
    Py_ssize_t n_im_total;
    int re_has_decimal;
    int im_has_decimal;
    int precision, default_precision = 6;
    Py_UCS4 type = format->type;
    Py_ssize_t i_re;
    Py_ssize_t i_im;
    NumberFieldWidths re_spec;
    NumberFieldWidths im_spec;
    int flags = 0;
    int result = -1;
    Py_UCS4 maxchar = 127;
    enum PyUnicode_Kind rkind;
    void *rdata;
    Py_UCS4 re_sign_char = '\0';
    Py_UCS4 im_sign_char = '\0';
    int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
    int im_float_type;
    int add_parens = 0;
    int skip_re = 0;
    Py_ssize_t lpad;
    Py_ssize_t rpad;
    Py_ssize_t total;
    PyObject *re_unicode_tmp = NULL;
    PyObject *im_unicode_tmp = NULL;

    /* Locale settings, either from the actual locale or
       from a hard-code pseudo-locale */
    LocaleInfo locale = STATIC_LOCALE_INFO_INIT;

    if (format->precision > INT_MAX) {
        PyErr_SetString(PyExc_ValueError, "precision too big");
        goto done;
    }
    precision = (int)format->precision;

    /* Zero padding is not allowed. */
    if (format->fill_char == '0') {
        PyErr_SetString(PyExc_ValueError,
                        "Zero padding is not allowed in complex format "
                        "specifier");
        goto done;
    }

    /* Neither is '=' alignment . */
    if (format->align == '=') {
        PyErr_SetString(PyExc_ValueError,
                        "'=' alignment flag is not allowed in complex format "
                        "specifier");
        goto done;
    }

    re = PyComplex_RealAsDouble(value);
    if (re == -1.0 && PyErr_Occurred())
        goto done;
    im = PyComplex_ImagAsDouble(value);
    if (im == -1.0 && PyErr_Occurred())
        goto done;

    if (format->alternate)
        flags |= Py_DTSF_ALT;

    if (type == '\0') {
        /* Omitted type specifier. Should be like str(self). */
        type = 'r';
        default_precision = 0;
        if (re == 0.0 && copysign(1.0, re) == 1.0)
            skip_re = 1;
        else
            add_parens = 1;
    }

    if (type == 'n')
        /* 'n' is the same as 'g', except for the locale used to
           format the result. We take care of that later. */
        type = 'g';

    if (precision < 0)
        precision = default_precision;
    else if (type == 'r')
        type = 'g';

    /* Cast "type", because if we're in unicode we need to pass a
       8-bit char. This is safe, because we've restricted what "type"
       can be. */
    re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
                                   &re_float_type);
    if (re_buf == NULL)
        goto done;
    im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
                                   &im_float_type);
    if (im_buf == NULL)
        goto done;

    n_re_digits = strlen(re_buf);
    n_im_digits = strlen(im_buf);

    /* Since there is no unicode version of PyOS_double_to_string,
       just use the 8 bit version and then convert to unicode. */
    re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
    if (re_unicode_tmp == NULL)
        goto done;
    i_re = 0;

    im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
    if (im_unicode_tmp == NULL)
        goto done;
    i_im = 0;

    /* Is a sign character present in the output?  If so, remember it
       and skip it */
    if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
        re_sign_char = '-';
        ++i_re;
        --n_re_digits;
    }
    if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
        im_sign_char = '-';
        ++i_im;
        --n_im_digits;
    }

    /* Determine if we have any "remainder" (after the digits, might include
       decimal or exponent or both (or neither)) */
    parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
                 &n_re_remainder, &re_has_decimal);
    parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
                 &n_im_remainder, &im_has_decimal);

    /* Determine the grouping, separator, and decimal point, if any. */
    if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
                        (format->thousands_separators ?
                         LT_DEFAULT_LOCALE :
                         LT_NO_LOCALE),
                        &locale) == -1)
        goto done;

    /* Turn off any padding. We'll do it later after we've composed
       the numbers without padding. */
    tmp_format.fill_char = '\0';
    tmp_format.align = '<';
    tmp_format.width = -1;

    /* Calculate how much memory we'll need. */
    n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
                                    i_re, i_re + n_re_digits, n_re_remainder,
                                    re_has_decimal, &locale, &tmp_format,
                                    &maxchar);

    /* Same formatting, but always include a sign, unless the real part is
     * going to be omitted, in which case we use whatever sign convention was
     * requested by the original format. */
    if (!skip_re)
        tmp_format.sign = '+';
    n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
                                    i_im, i_im + n_im_digits, n_im_remainder,
                                    im_has_decimal, &locale, &tmp_format,
                                    &maxchar);

    if (skip_re)
        n_re_total = 0;

    /* Add 1 for the 'j', and optionally 2 for parens. */
    calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
                 format->width, format->align, &lpad, &rpad, &total);

    if (lpad || rpad)
        maxchar = Py_MAX(maxchar, format->fill_char);

    if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
        goto done;
    rkind = writer->kind;
    rdata = writer->data;

    /* Populate the memory. First, the padding. */
    result = fill_padding(writer,
                          n_re_total + n_im_total + 1 + add_parens * 2,
                          format->fill_char, lpad, rpad);
    if (result == -1)
        goto done;

    if (add_parens) {
        PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
        writer->pos++;
    }

    if (!skip_re) {
        result = fill_number(writer, &re_spec,
                             re_unicode_tmp, i_re, i_re + n_re_digits,
                             NULL, 0,
                             0,
                             &locale, 0);
        if (result == -1)
            goto done;
    }
    result = fill_number(writer, &im_spec,
                         im_unicode_tmp, i_im, i_im + n_im_digits,
                         NULL, 0,
                         0,
                         &locale, 0);
    if (result == -1)
        goto done;
    PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
    writer->pos++;

    if (add_parens) {
        PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
        writer->pos++;
    }

    writer->pos += rpad;

done:
    PyMem_Free(re_buf);
    PyMem_Free(im_buf);
    Py_XDECREF(re_unicode_tmp);
    Py_XDECREF(im_unicode_tmp);
    free_locale_info(&locale);
    return result;
}
//_______________________________________________________________________________________________________________________
///
/// \brief min-mod function, see Eq. (16.52) in Randall LeVeque. Numerical Methods for Conservation Laws (1992)
///
static inline double minmod(const double a, const double b)
{
	return 0.5 * (copysign(1, a) + copysign(1, b)) * minf(fabs(a), fabs(b));
}
Beispiel #25
0
float FPEnvironmentImpl::copySignImpl(float target, float source)
{
	return (float) copysign(target, source);
}
Beispiel #26
0
double d_sign(double *x, double *y)
{
    return(copysign(*x,*y));
}
Beispiel #27
0
double epsD(double x) {
   x = copysign(x, 1.0);
   double y = nextafter(x, INFINITY);
   return y-x;
}
Beispiel #28
0
float
copysignf (float x, float y)
{
  /* We use the double version.  */
  return copysign (x, y);
}
Beispiel #29
0
void wgsecef2llh(const Vector3d &ecef, Vector3d &llh) {
  /* Distance from polar axis. */
  const double p = sqrt(ecef[0]*ecef[0] + ecef[1]*ecef[1]);

  /* Compute longitude first, this can be done exactly. */
  if (!is_zero(p))
    llh[1] = atan2(ecef[1], ecef[0]);
  else
    llh[1] = 0;

  /* If we are close to the pole then convergence is very slow, treat this is a
   * special case. */
  if (p < WGS84_A*1e-16) {
    llh[0] = copysign(M_PI_2, ecef[2]);
    llh[2] = fabs(ecef[2]) - WGS84_B;
    return;
  }

  /* Calculate some other constants as defined in the Fukushima paper. */
  const double P = p / WGS84_A;
  const double e_c = sqrt(1. - WGS84_E*WGS84_E);
  const double Z = fabs(ecef[2]) * e_c / WGS84_A;

  /* Initial values for S and C correspond to a zero height solution. */
  double S = Z;
  double C = e_c * P;

  /* Neither S nor C can be negative on the first iteration so
   * starting prev = -1 will not cause and early exit. */
  double prev_C = -1;
  double prev_S = -1;

  double A_n, B_n, D_n, F_n;

  /* Iterate a maximum of 10 times. This should be way more than enough for all
   * sane inputs */
  for (int i=0; i<10; i++)
  {
    /* Calculate some intermmediate variables used in the update step based on
     * the current state. */
    A_n = sqrt(S*S + C*C);
    D_n = Z*A_n*A_n*A_n + WGS84_E*WGS84_E*S*S*S;
    F_n = P*A_n*A_n*A_n - WGS84_E*WGS84_E*C*C*C;
    B_n = 1.5*WGS84_E*S*C*C*(A_n*(P*S - Z*C) - WGS84_E*S*C);

    /* Update step. */
    S = D_n*F_n - B_n*S;
    C = F_n*F_n - B_n*C;

    /* The original algorithm as presented in the paper by Fukushima has a
     * problem with numerical stability. S and C can grow very large or small
     * and over or underflow a double. In the paper this is acknowledged and
     * the proposed resolution is to non-dimensionalise the equations for S and
     * C. However, this does not completely solve the problem. The author caps
     * the solution to only a couple of iterations and in this period over or
     * underflow is unlikely but as we require a bit more precision and hence
     * more iterations so this is still a concern for us.
     *
     * As the only thing that is important is the ratio T = S/C, my solution is
     * to divide both S and C by either S or C. The scaling is chosen such that
     * one of S or C is scaled to unity whilst the other is scaled to a value
     * less than one. By dividing by the larger of S or C we ensure that we do
     * not divide by zero as only one of S or C should ever be zero.
     *
     * This incurs an extra division each iteration which the author was
     * explicityl trying to avoid and it may be that this solution is just
     * reverting back to the method of iterating on T directly, perhaps this
     * bears more thought?
     */

    if (S > C) {
      C = C / S;
      S = 1;
    } else {
      S = S / C;
      C = 1;
    }

    /* Check for convergence and exit early if we have converged. */
    if (fabs(S - prev_S) < 1e-16 && fabs(C - prev_C) < 1e-16) {
      break;
    } else {
      prev_S = S;
      prev_C = C;
    }
  }

  A_n = sqrt(S*S + C*C);
  llh[0] = copysign(1.0, ecef[2]) * atan(S / (e_c*C));
  llh[2] = (p*e_c*C + fabs(ecef[2])*S - WGS84_A*e_c*A_n) / sqrt(e_c*e_c*C*C + S*S);
}
Beispiel #30
0
float copysignf (float x, float y)
{
	return (float) copysign( (double)x, (double)y );
}