int main() { #if N & 1 long double value = 0; #else double value = 0; #endif #if N < 5 int exp = 0; #endif #if N == 1 return ldexpl(value, exp) != 0; #endif #if N == 2 return ldexp(value, exp) != 0; #endif #if N == 3 return frexpl(value, &exp) != 0; #endif #if N == 4 return frexp(value, &exp) != 0; #endif #if N == 5 return isnan(value); #endif #if N == 6 return isnan(value); #endif #if N == 7 return copysign(1.0, value) < 0; #endif #if N == 8 return signbit(value); #endif }
/* Tests along the real and imaginary axes. */ void test_axes(void) { static const long double nums[] = { -2, -1, -0.5, 0.5, 1, 2 }; long double complex z; int i; for (i = 0; i < sizeof(nums) / sizeof(nums[0]); i++) { /* Real axis */ z = CMPLXL(nums[i], 0.0); if (fabs(nums[i]) <= 1) { testall_tol(cacosh, z, CMPLXL(0.0, acos(nums[i])), 1); testall_tol(cacos, z, CMPLXL(acosl(nums[i]), -0.0), 1); testall_tol(casin, z, CMPLXL(asinl(nums[i]), 0.0), 1); testall_tol(catanh, z, CMPLXL(atanh(nums[i]), 0.0), 1); } else { testall_tol(cacosh, z, CMPLXL(acosh(fabs(nums[i])), (nums[i] < 0) ? pi : 0), 1); testall_tol(cacos, z, CMPLXL((nums[i] < 0) ? pi : 0, -acosh(fabs(nums[i]))), 1); testall_tol(casin, z, CMPLXL(copysign(pi / 2, nums[i]), acosh(fabs(nums[i]))), 1); testall_tol(catanh, z, CMPLXL(atanh(1 / nums[i]), pi / 2), 1); } testall_tol(casinh, z, CMPLXL(asinh(nums[i]), 0.0), 1); testall_tol(catan, z, CMPLXL(atan(nums[i]), 0), 1); /* TODO: Test the imaginary axis. */ } }
double CGaussian::Deviance(const CDataset& kData, const Bag& kBag, const double* kFuncEstimate) { double loss = 0.0; double weight = 0.0; unsigned long num_rows_in_set = kData.get_size_of_set(); #pragma omp parallel for schedule(static, get_array_chunk_size()) \ reduction(+ : loss, weight) num_threads(get_num_threads()) for (unsigned long i = 0; i < num_rows_in_set; i++) { const double tmp = (kData.y_ptr()[i] - kData.offset_ptr()[i] - kFuncEstimate[i]); loss += kData.weight_ptr()[i] * tmp * tmp; weight += kData.weight_ptr()[i]; } // TODO: Check if weights are all zero for validation set if ((weight == 0.0) && (loss == 0.0)) { return nan(""); } else if (weight == 0.0) { return copysign(HUGE_VAL, loss); } return loss / weight; }
static Handle powerOf(TaskData *mdTaskData, Handle args) { double x = real_arg1(args), y = real_arg2(args); /* Some of the special cases are defined and don't seem to match the C pow function (at least as implemented in MS C). */ /* Maybe handle all this in ML? */ if (isnan(x)) { if (y == 0.0) return real_result(mdTaskData, 1.0); else return real_result(mdTaskData, notANumber); } else if (isnan(y)) return real_result(mdTaskData, y); /* i.e. nan. */ else if (x == 0.0 && y < 0.0) { /* This case is not handled correctly in Solaris. It always returns -infinity. */ int iy = (int)floor(y); /* If x is -0.0 and y is an odd integer the result is -infinity. */ if (copysign(1.0, x) < 0.0 && (double)iy == y && (iy & 1)) return real_result(mdTaskData, negInf); /* -infinity. */ else return real_result(mdTaskData, posInf); /* +infinity. */ } return real_result(mdTaskData, pow(x, y)); }
/** Purpose ------- SLAEX3 finds the roots of the secular equation, as defined by the values in D, W, and RHO, between 1 and K. It makes the appropriate calls to SLAED4 and then updates the eigenvectors by multiplying the matrix of eigenvectors of the pair of eigensystems being combined by the matrix of eigenvectors of the K-by-K system which is solved here. It is used in the last step when only a part of the eigenvectors is required. It compute only the required part of the eigenvectors and the rest is not used. This code makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments --------- @param[in] ngpu INTEGER Number of GPUs to use. ngpu > 0. @param[in] k INTEGER The number of terms in the rational function to be solved by SLAED4. K >= 0. @param[in] n INTEGER The number of rows and columns in the Q matrix. N >= K (deflation may result in N > K). @param[in] n1 INTEGER The location of the last eigenvalue in the leading submatrix. min(1,N) <= N1 <= N/2. @param[out] d REAL array, dimension (N) D(I) contains the updated eigenvalues for 1 <= I <= K. @param[out] Q REAL array, dimension (LDQ,N) Initially the first K columns are used as workspace. On output the columns ??? to ??? contain the updated eigenvectors. @param[in] ldq INTEGER The leading dimension of the array Q. LDQ >= max(1,N). @param[in] rho REAL The value of the parameter in the rank one update equation. RHO >= 0 required. @param[in,out] dlamda REAL array, dimension (K) The first K elements of this array contain the old roots of the deflated updating problem. These are the poles of the secular equation. May be changed on output by having lowest order bit set to zero on Cray X-MP, Cray Y-MP, Cray-2, or Cray C-90, as described above. @param[in] Q2 REAL array, dimension (LDQ2, N) The first K columns of this matrix contain the non-deflated eigenvectors for the split problem. @param[in] indx INTEGER array, dimension (N) The permutation used to arrange the columns of the deflated Q matrix into three groups (see SLAED2). The rows of the eigenvectors found by SLAED4 must be likewise permuted before the matrix multiply can take place. @param[in] ctot INTEGER array, dimension (4) A count of the total number of the various types of columns in Q, as described in INDX. The fourth column type is any column which has been deflated. @param[in,out] w REAL array, dimension (K) The first K elements of this array contain the components of the deflation-adjusted updating vector. Destroyed on output. @param s (workspace) REAL array, dimension (N1 + 1)*K Will contain the eigenvectors of the repaired matrix which will be multiplied by the previously accumulated eigenvectors to update the system. @param[out] indxq INTEGER array, dimension (N) On exit, the permutation which will reintegrate the subproblems back into sorted order, i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. @param dwork (devices workspaces) REAL array of arrays, dimension NRGPU. if NRGPU = 1 the dimension of the first workspace should be (3*N*N/2+3*N) otherwise the NRGPU workspaces should have the size ceil((N-N1) * (N-N1) / floor(ngpu/2)) + NB * ((N-N1) + (N-N1) / floor(ngpu/2)) @param queues (device queues) magma_queue_t array, dimension (MagmaMaxGPUs,2) @param[in] range magma_range_t - = MagmaRangeAll: all eigenvalues will be found. - = MagmaRangeV: all eigenvalues in the half-open interval (VL,VU] will be found. - = MagmaRangeI: the IL-th through IU-th eigenvalues will be found. TODO verify range, vl, vu, il, iu -- copied from slaex1. @param[in] vl REAL @param[in] vu REAL if RANGE=MagmaRangeV, the lower and upper bounds of the interval to be searched for eigenvalues. VL < VU. Not referenced if RANGE = MagmaRangeAll or MagmaRangeI. @param[in] il INTEGER @param[in] iu INTEGER if RANGE=MagmaRangeI, the indices (in ascending order) of the smallest and largest eigenvalues to be returned. 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. Not referenced if RANGE = MagmaRangeAll or MagmaRangeV. @param[out] info INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an eigenvalue did not converge Further Details --------------- Based on contributions by Jeff Rutter, Computer Science Division, University of California at Berkeley, USA Modified by Francoise Tisseur, University of Tennessee. @ingroup magma_ssyev_aux ********************************************************************/ extern "C" magma_int_t magma_slaex3_m( magma_int_t ngpu, magma_int_t k, magma_int_t n, magma_int_t n1, float *d, float *Q, magma_int_t ldq, float rho, float *dlamda, float *Q2, magma_int_t *indx, magma_int_t *ctot, float *w, float *s, magma_int_t *indxq, magmaFloat_ptr dwork[], magma_queue_t queues[MagmaMaxGPUs][2], magma_range_t range, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *info ) { #define Q(i_,j_) (Q + (i_) + (j_)*ldq) #define dQ2(id) (dwork[id]) #define dS(id, ii) (dwork[id] + n2*n2_loc + (ii)*(n2*nb)) #define dQ(id, ii) (dwork[id] + n2*n2_loc + 2*(n2*nb) + (ii)*(n2_loc*nb)) if (ngpu == 1) { magma_setdevice(0); magma_slaex3(k, n, n1, d, Q, ldq, rho, dlamda, Q2, indx, ctot, w, s, indxq, *dwork, range, vl, vu, il, iu, info ); return *info; } float d_one = 1.; float d_zero = 0.; magma_int_t ione = 1; magma_int_t ineg_one = -1; magma_int_t iil, iiu, rk; magma_int_t n1_loc, n2_loc, ib, nb, ib2, igpu; magma_int_t ni_loc[MagmaMaxGPUs]; magma_int_t i, ind, iq2, j, n12, n2, n23, tmp; float temp; magma_int_t alleig, valeig, indeig; alleig = (range == MagmaRangeAll); valeig = (range == MagmaRangeV); indeig = (range == MagmaRangeI); *info = 0; if (k < 0) *info=-1; else if (n < k) *info=-2; else if (ldq < max(1,n)) *info=-6; else if (! (alleig || valeig || indeig)) *info = -15; else { if (valeig) { if (n > 0 && vu <= vl) *info = -17; } else if (indeig) { if (il < 1 || il > max(1,n)) *info = -18; else if (iu < min(n,il) || iu > n) *info = -19; } } if (*info != 0) { magma_xerbla(__func__, -(*info)); return *info; } // Quick return if possible if (k == 0) return *info; magma_device_t orig_dev; magma_getdevice( &orig_dev ); magma_queue_t orig_stream; magmablasGetKernelStream( &orig_stream ); /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can be computed with high relative accuracy (barring over/underflow). This is a problem on machines without a guard digit in add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), which on any of these machines zeros out the bottommost bit of DLAMDA(I) if it is 1; this makes the subsequent subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation occurs. On binary machines with a guard digit (almost all machines) it does not change DLAMDA(I) at all. On hexadecimal and decimal machines with a guard digit, it slightly changes the bottommost bits of DLAMDA(I). It does not account for hexadecimal or decimal machines without guard digits (we know of none). We use a subroutine call to compute 2*DLAMBDA(I) to prevent optimizing compilers from eliminating this code.*/ //#define CHECK_CPU #ifdef CHECK_CPU float *hwS[2][MagmaMaxGPUs], *hwQ[2][MagmaMaxGPUs], *hwQ2[MagmaMaxGPUs]; #define hQ2(id) (hwQ2[id]) #define hS(id, ii) (hwS[ii][id]) #define hQ(id, ii) (hwQ[ii][id]) #endif n2 = n - n1; n12 = ctot[0] + ctot[1]; n23 = ctot[1] + ctot[2]; iq2 = n1 * n12; //lq2 = iq2 + n2 * n23; n1_loc = (n1-1) / (ngpu/2) + 1; n2_loc = (n2-1) / (ngpu/2) + 1; nb = magma_get_slaex3_m_nb(); if (n1 >= magma_get_slaex3_m_k()) { #ifdef CHECK_CPU for (igpu = 0; igpu < ngpu; ++igpu) { magma_smalloc_pinned( &(hwS[0][igpu]), n2*nb ); magma_smalloc_pinned( &(hwS[1][igpu]), n2*nb ); magma_smalloc_pinned( &(hwQ2[igpu]), n2*n2_loc ); magma_smalloc_pinned( &(hwQ[0][igpu]), n2_loc*nb ); magma_smalloc_pinned( &(hwQ[1][igpu]), n2_loc*nb ); } #endif for (igpu = 0; igpu < ngpu-1; igpu += 2) { ni_loc[igpu] = min(n1_loc, n1 - igpu/2 * n1_loc); #ifdef CHECK_CPU lapackf77_slacpy("A", &ni_loc[igpu], &n12, Q2+n1_loc*(igpu/2), &n1, hQ2(igpu), &n1_loc); #endif magma_setdevice(igpu); magma_ssetmatrix_async( ni_loc[igpu], n12, Q2+n1_loc*(igpu/2), n1, dQ2(igpu), n1_loc, queues[igpu][0] ); ni_loc[igpu+1] = min(n2_loc, n2 - igpu/2 * n2_loc); #ifdef CHECK_CPU lapackf77_slacpy("A", &ni_loc[igpu+1], &n23, Q2+iq2+n2_loc*(igpu/2), &n2, hQ2(igpu+1), &n2_loc); #endif magma_setdevice(igpu+1); magma_ssetmatrix_async( ni_loc[igpu+1], n23, Q2+iq2+n2_loc*(igpu/2), n2, dQ2(igpu+1), n2_loc, queues[igpu+1][0] ); } } // #ifdef _OPENMP ///////////////////////////////////////////////////////////////////////////////// //openmp implementation ///////////////////////////////////////////////////////////////////////////////// magma_timer_t time=0; timer_start( time ); #pragma omp parallel private(i, j, tmp, temp) { magma_int_t id = omp_get_thread_num(); magma_int_t tot = omp_get_num_threads(); magma_int_t ib = ( id * k) / tot; //start index of local loop magma_int_t ie = ((id+1) * k) / tot; //end index of local loop magma_int_t ik = ie - ib; //number of local indices for (i = ib; i < ie; ++i) dlamda[i]=lapackf77_slamc3(&dlamda[i], &dlamda[i]) - dlamda[i]; for (j = ib; j < ie; ++j) { magma_int_t tmpp=j+1; magma_int_t iinfo = 0; lapackf77_slaed4(&k, &tmpp, dlamda, w, Q(0,j), &rho, &d[j], &iinfo); // If the zero finder fails, the computation is terminated. if (iinfo != 0) { #pragma omp critical (info) *info = iinfo; break; } } #pragma omp barrier if (*info == 0) { #pragma omp single { //Prepare the INDXQ sorting permutation. magma_int_t nk = n - k; lapackf77_slamrg( &k, &nk, d, &ione, &ineg_one, indxq); //compute the lower and upper bound of the non-deflated eigenvectors if (valeig) magma_svrange(k, d, &iil, &iiu, vl, vu); else if (indeig) magma_sirange(k, indxq, &iil, &iiu, il, iu); else { iil = 1; iiu = k; } rk = iiu - iil + 1; } if (k == 2) { #pragma omp single { for (j = 0; j < k; ++j) { w[0] = *Q(0,j); w[1] = *Q(1,j); i = indx[0] - 1; *Q(0,j) = w[i]; i = indx[1] - 1; *Q(1,j) = w[i]; } } } else if (k != 1) { // Compute updated W. blasf77_scopy( &ik, &w[ib], &ione, &s[ib], &ione); // Initialize W(I) = Q(I,I) tmp = ldq + 1; blasf77_scopy( &ik, Q(ib,ib), &tmp, &w[ib], &ione); for (j = 0; j < k; ++j) { magma_int_t i_tmp = min(j, ie); for (i = ib; i < i_tmp; ++i) w[i] = w[i] * ( *Q(i, j) / ( dlamda[i] - dlamda[j] ) ); i_tmp = max(j+1, ib); for (i = i_tmp; i < ie; ++i) w[i] = w[i] * ( *Q(i, j) / ( dlamda[i] - dlamda[j] ) ); } for (i = ib; i < ie; ++i) w[i] = copysign( sqrt( -w[i] ), s[i]); #pragma omp barrier //reduce the number of used threads to have enough S workspace tot = min(n1, omp_get_num_threads()); if (id < tot) { ib = ( id * rk) / tot + iil - 1; ie = ((id+1) * rk) / tot + iil - 1; ik = ie - ib; } else { ib = -1; ie = -1; ik = -1; } // Compute eigenvectors of the modified rank-1 modification. for (j = ib; j < ie; ++j) { for (i = 0; i < k; ++i) s[id*k + i] = w[i] / *Q(i,j); temp = magma_cblas_snrm2( k, s+id*k, 1 ); for (i = 0; i < k; ++i) { magma_int_t iii = indx[i] - 1; *Q(i,j) = s[id*k + iii] / temp; } } } } } if (*info != 0) return *info; timer_stop( time ); timer_printf( "eigenvalues/vector D+zzT = %6.2f\n", time ); #else ///////////////////////////////////////////////////////////////////////////////// // Non openmp implementation ///////////////////////////////////////////////////////////////////////////////// magma_timer_t time=0; timer_start( time ); for (i = 0; i < k; ++i) dlamda[i]=lapackf77_slamc3(&dlamda[i], &dlamda[i]) - dlamda[i]; for (j = 0; j < k; ++j) { magma_int_t tmpp=j+1; magma_int_t iinfo = 0; lapackf77_slaed4(&k, &tmpp, dlamda, w, Q(0,j), &rho, &d[j], &iinfo); // If the zero finder fails, the computation is terminated. if (iinfo != 0) *info=iinfo; } if (*info != 0) return *info; //Prepare the INDXQ sorting permutation. magma_int_t nk = n - k; lapackf77_slamrg( &k, &nk, d, &ione, &ineg_one, indxq); //compute the lower and upper bound of the non-deflated eigenvectors if (valeig) magma_svrange(k, d, &iil, &iiu, vl, vu); else if (indeig) magma_sirange(k, indxq, &iil, &iiu, il, iu); else { iil = 1; iiu = k; } rk = iiu - iil + 1; if (k == 2) { for (j = 0; j < k; ++j) { w[0] = *Q(0,j); w[1] = *Q(1,j); i = indx[0] - 1; *Q(0,j) = w[i]; i = indx[1] - 1; *Q(1,j) = w[i]; } } else if (k != 1) { // Compute updated W. blasf77_scopy( &k, w, &ione, s, &ione); // Initialize W(I) = Q(I,I) tmp = ldq + 1; blasf77_scopy( &k, Q, &tmp, w, &ione); for (j = 0; j < k; ++j) { for (i = 0; i < j; ++i) w[i] = w[i] * ( *Q(i, j) / ( dlamda[i] - dlamda[j] ) ); for (i = j+1; i < k; ++i) w[i] = w[i] * ( *Q(i, j) / ( dlamda[i] - dlamda[j] ) ); } for (i = 0; i < k; ++i) w[i] = copysign( sqrt( -w[i] ), s[i]); // Compute eigenvectors of the modified rank-1 modification. for (j = iil-1; j < iiu; ++j) { for (i = 0; i < k; ++i) s[i] = w[i] / *Q(i,j); temp = magma_cblas_snrm2( k, s, 1 ); for (i = 0; i < k; ++i) { magma_int_t iii = indx[i] - 1; *Q(i,j) = s[iii] / temp; } } } timer_stop( time ); timer_printf( "eigenvalues/vector D+zzT = %6.2f\n", time ); #endif //_OPENMP // Compute the updated eigenvectors. timer_start( time ); if (rk > 0) { if (n1 < magma_get_slaex3_m_k()) { // stay on the CPU if ( n23 != 0 ) { lapackf77_slacpy("A", &n23, &rk, Q(ctot[0],iil-1), &ldq, s, &n23); blasf77_sgemm("N", "N", &n2, &rk, &n23, &d_one, &Q2[iq2], &n2, s, &n23, &d_zero, Q(n1,iil-1), &ldq ); } else lapackf77_slaset("A", &n2, &rk, &d_zero, &d_zero, Q(n1,iil-1), &ldq); if ( n12 != 0 ) { lapackf77_slacpy("A", &n12, &rk, Q(0,iil-1), &ldq, s, &n12); blasf77_sgemm("N", "N", &n1, &rk, &n12, &d_one, Q2, &n1, s, &n12, &d_zero, Q(0,iil-1), &ldq); } else lapackf77_slaset("A", &n1, &rk, &d_zero, &d_zero, Q(0,iil-1), &ldq); } else { //use the gpus ib = min(nb, rk); for (igpu = 0; igpu < ngpu-1; igpu += 2) { if (n23 != 0) { magma_setdevice(igpu+1); magma_ssetmatrix_async( n23, ib, Q(ctot[0],iil-1), ldq, dS(igpu+1,0), n23, queues[igpu+1][0] ); } if (n12 != 0) { magma_setdevice(igpu); magma_ssetmatrix_async( n12, ib, Q(0,iil-1), ldq, dS(igpu,0), n12, queues[igpu][0] ); } } for (i = 0; i < rk; i += nb) { ib = min(nb, rk - i); ind = (i/nb)%2; if (i+nb < rk) { ib2 = min(nb, rk - i - nb); for (igpu = 0; igpu < ngpu-1; igpu += 2) { if (n23 != 0) { magma_setdevice(igpu+1); magma_ssetmatrix_async( n23, ib2, Q(ctot[0],iil-1+i+nb), ldq, dS(igpu+1,(ind+1)%2), n23, queues[igpu+1][(ind+1)%2] ); } if (n12 != 0) { magma_setdevice(igpu); magma_ssetmatrix_async( n12, ib2, Q(0,iil-1+i+nb), ldq, dS(igpu,(ind+1)%2), n12, queues[igpu][(ind+1)%2] ); } } } // Ensure that the data is copied on gpu since we will overwrite it. for (igpu = 0; igpu < ngpu-1; igpu += 2) { if (n23 != 0) { #ifdef CHECK_CPU lapackf77_slacpy("A", &n23, &ib, Q(ctot[0],iil-1+i), &ldq, hS(igpu+1,ind), &n23); #endif magma_setdevice(igpu+1); magma_queue_sync( queues[igpu+1][ind] ); } if (n12 != 0) { #ifdef CHECK_CPU lapackf77_slacpy("A", &n12, &ib, Q(0,iil-1+i), &ldq, hS(igpu,ind), &n12); #endif magma_setdevice(igpu); magma_queue_sync( queues[igpu][ind] ); } } for (igpu = 0; igpu < ngpu-1; igpu += 2) { if (n23 != 0) { #ifdef CHECK_CPU blasf77_sgemm("N", "N", &ni_loc[igpu+1], &ib, &n23, &d_one, hQ2(igpu+1), &n2_loc, hS(igpu+1,ind), &n23, &d_zero, hQ(igpu+1, ind), &n2_loc); #endif magma_setdevice(igpu+1); magmablasSetKernelStream(queues[igpu+1][ind]); magma_sgemm(MagmaNoTrans, MagmaNoTrans, ni_loc[igpu+1], ib, n23, d_one, dQ2(igpu+1), n2_loc, dS(igpu+1, ind), n23, d_zero, dQ(igpu+1, ind), n2_loc); #ifdef CHECK_CPU printf("norm Q %d: %f\n", igpu+1, cpu_gpu_sdiff(ni_loc[igpu+1], ib, hQ(igpu+1, ind), n2_loc, dQ(igpu+1, ind), n2_loc)); #endif } if (n12 != 0) { #ifdef CHECK_CPU blasf77_sgemm("N", "N", &ni_loc[igpu], &ib, &n12, &d_one, hQ2(igpu), &n1_loc, hS(igpu,ind%2), &n12, &d_zero, hQ(igpu, ind%2), &n1_loc); #endif magma_setdevice(igpu); magmablasSetKernelStream(queues[igpu][ind]); magma_sgemm(MagmaNoTrans, MagmaNoTrans, ni_loc[igpu], ib, n12, d_one, dQ2(igpu), n1_loc, dS(igpu, ind), n12, d_zero, dQ(igpu, ind), n1_loc); #ifdef CHECK_CPU printf("norm Q %d: %f\n", igpu, cpu_gpu_sdiff(ni_loc[igpu], ib, hQ(igpu, ind), n1_loc, dQ(igpu, ind), n1_loc)); #endif } } for (igpu = 0; igpu < ngpu-1; igpu += 2) { if (n23 != 0) { magma_setdevice(igpu+1); magma_sgetmatrix( ni_loc[igpu+1], ib, dQ(igpu+1, ind), n2_loc, Q(n1+n2_loc*(igpu/2),iil-1+i), ldq ); // magma_sgetmatrix_async( ni_loc[igpu+1], ib, dQ(igpu+1, ind), n2_loc, // Q(n1+n2_loc*(igpu/2),iil-1+i), ldq, queues[igpu+1][ind] ); } if (n12 != 0) { magma_setdevice(igpu); magma_sgetmatrix( ni_loc[igpu], ib, dQ(igpu, ind), n1_loc, Q(n1_loc*(igpu/2),iil-1+i), ldq ); // magma_sgetmatrix_async( ni_loc[igpu], ib, dQ(igpu, ind), n1_loc, // Q(n1_loc*(igpu/2),iil-1+i), ldq, queues[igpu][ind] ); } } } for (igpu = 0; igpu < ngpu; ++igpu) { #ifdef CHECK_CPU magma_free_pinned( hwS[1][igpu] ); magma_free_pinned( hwS[0][igpu] ); magma_free_pinned( hwQ2[igpu] ); magma_free_pinned( hwQ[1][igpu] ); magma_free_pinned( hwQ[0][igpu] ); #endif magma_setdevice(igpu); magma_queue_sync( queues[igpu][0] ); magma_queue_sync( queues[igpu][1] ); } if ( n23 == 0 ) lapackf77_slaset("A", &n2, &rk, &d_zero, &d_zero, Q(n1,iil-1), &ldq); if ( n12 == 0 ) lapackf77_slaset("A", &n1, &rk, &d_zero, &d_zero, Q(0,iil-1), &ldq); } } timer_stop( time ); timer_printf( "gemms = %6.2f\n", time ); magma_setdevice( orig_dev ); magmablasSetKernelStream( orig_stream ); return *info; } /* magma_slaed3_m */
double FPEnvironmentImpl::copySignImpl(double target, double source) { return (float) copysign(target, source); }
EXPORT_C double fma(double x, double y, double z) { #ifndef __SYMBIAN32__ static const double split = 0x1p27 + 1.0; #else static const double split = 134217729; #endif //__SYMBIAN32__ double xs, ys, zs; double c, cc, hx, hy, p, q, tx, ty; double r, rr, s; int oround; int ex, ey, ez; int spread; if (z == 0.0) return (x * y); if (x == 0.0 || y == 0.0) return (x * y + z); /* Results of frexp() are undefined for these cases. */ if (!isfinite(x) || !isfinite(y) || !isfinite(z)) return (x * y + z); xs = frexp(x, &ex); ys = frexp(y, &ey); zs = frexp(z, &ez); oround = fegetround(); spread = ex + ey - ez; /* * If x * y and z are many orders of magnitude apart, the scaling * will overflow, so we handle these cases specially. Rounding * modes other than FE_TONEAREST are painful. */ if (spread > DBL_MANT_DIG * 2) { fenv_t env; feraiseexcept(FE_INEXACT); switch(oround) { case FE_TONEAREST: return (x * y); case FE_TOWARDZERO: if (x > 0.0 ^ y < 0.0 ^ z < 0.0) return (x * y); feholdexcept(&env); r = x * y; if (!fetestexcept(FE_INEXACT)) r = nextafter(r, 0); feupdateenv(&env); return (r); case FE_DOWNWARD: if (z > 0.0) return (x * y); feholdexcept(&env); r = x * y; if (!fetestexcept(FE_INEXACT)) r = nextafter(r, -INFINITY); feupdateenv(&env); return (r); default: /* FE_UPWARD */ if (z < 0.0) return (x * y); feholdexcept(&env); r = x * y; if (!fetestexcept(FE_INEXACT)) r = nextafter(r, INFINITY); feupdateenv(&env); return (r); } } if (spread < -DBL_MANT_DIG) { feraiseexcept(FE_INEXACT); if (!isnormal(z)) feraiseexcept(FE_UNDERFLOW); switch (oround) { case FE_TONEAREST: return (z); case FE_TOWARDZERO: if (x > 0.0 ^ y < 0.0 ^ z < 0.0) return (z); else return (nextafter(z, 0)); case FE_DOWNWARD: if (x > 0.0 ^ y < 0.0) return (z); else return (nextafter(z, -INFINITY)); default: /* FE_UPWARD */ if (x > 0.0 ^ y < 0.0) return (nextafter(z, INFINITY)); else return (z); } } /* * Use Dekker's algorithm to perform the multiplication and * subsequent addition in twice the machine precision. * Arrange so that x * y = c + cc, and x * y + z = r + rr. */ fesetround(FE_TONEAREST); p = xs * split; hx = xs - p; hx += p; tx = xs - hx; p = ys * split; hy = ys - p; hy += p; ty = ys - hy; p = hx * hy; q = hx * ty + tx * hy; c = p + q; cc = p - c + q + tx * ty; zs = ldexp(zs, -spread); r = c + zs; s = r - c; rr = (c - (r - s)) + (zs - s) + cc; spread = ex + ey; if (spread + ilogb(r) > -1023) { fesetround(oround); r = r + rr; } else { /* * The result is subnormal, so we round before scaling to * avoid double rounding. */ #ifndef __SYMBIAN32__ p = ldexp(copysign(0x1p-1022, r), -spread); #else p = ldexp(copysign(0, r), -spread); #endif //__SYMBIAN32__ c = r + p; s = c - r; cc = (r - (c - s)) + (p - s) + rr; fesetround(oround); r = (c + cc) - p; } return (ldexp(r, spread)); }
PyObject* _PyCode_ConstantKey(PyObject *op) { PyObject *key; /* Py_None and Py_Ellipsis are singleton */ if (op == Py_None || op == Py_Ellipsis || PyLong_CheckExact(op) || PyBool_Check(op) || PyBytes_CheckExact(op) || PyUnicode_CheckExact(op) /* code_richcompare() uses _PyCode_ConstantKey() internally */ || PyCode_Check(op)) { key = PyTuple_Pack(2, Py_TYPE(op), op); } else if (PyFloat_CheckExact(op)) { double d = PyFloat_AS_DOUBLE(op); /* all we need is to make the tuple different in either the 0.0 * or -0.0 case from all others, just to avoid the "coercion". */ if (d == 0.0 && copysign(1.0, d) < 0.0) key = PyTuple_Pack(3, Py_TYPE(op), op, Py_None); else key = PyTuple_Pack(2, Py_TYPE(op), op); } else if (PyComplex_CheckExact(op)) { Py_complex z; int real_negzero, imag_negzero; /* For the complex case we must make complex(x, 0.) different from complex(x, -0.) and complex(0., y) different from complex(-0., y), for any x and y. All four complex zeros must be distinguished.*/ z = PyComplex_AsCComplex(op); real_negzero = z.real == 0.0 && copysign(1.0, z.real) < 0.0; imag_negzero = z.imag == 0.0 && copysign(1.0, z.imag) < 0.0; /* use True, False and None singleton as tags for the real and imag * sign, to make tuples different */ if (real_negzero && imag_negzero) { key = PyTuple_Pack(3, Py_TYPE(op), op, Py_True); } else if (imag_negzero) { key = PyTuple_Pack(3, Py_TYPE(op), op, Py_False); } else if (real_negzero) { key = PyTuple_Pack(3, Py_TYPE(op), op, Py_None); } else { key = PyTuple_Pack(2, Py_TYPE(op), op); } } else if (PyTuple_CheckExact(op)) { Py_ssize_t i, len; PyObject *tuple; len = PyTuple_GET_SIZE(op); tuple = PyTuple_New(len); if (tuple == NULL) return NULL; for (i=0; i < len; i++) { PyObject *item, *item_key; item = PyTuple_GET_ITEM(op, i); item_key = _PyCode_ConstantKey(item); if (item_key == NULL) { Py_DECREF(tuple); return NULL; } PyTuple_SET_ITEM(tuple, i, item_key); } key = PyTuple_Pack(3, Py_TYPE(op), op, tuple); Py_DECREF(tuple); } else if (PyFrozenSet_CheckExact(op)) { Py_ssize_t pos = 0; PyObject *item; Py_hash_t hash; Py_ssize_t i, len; PyObject *tuple, *set; len = PySet_GET_SIZE(op); tuple = PyTuple_New(len); if (tuple == NULL) return NULL; i = 0; while (_PySet_NextEntry(op, &pos, &item, &hash)) { PyObject *item_key; item_key = _PyCode_ConstantKey(item); if (item_key == NULL) { Py_DECREF(tuple); return NULL; } assert(i < len); PyTuple_SET_ITEM(tuple, i, item_key); i++; } set = PyFrozenSet_New(tuple); Py_DECREF(tuple); if (set == NULL) return NULL; key = PyTuple_Pack(3, Py_TYPE(op), op, set); Py_DECREF(set); return key; } else { /* for other types, use the object identifier as a unique identifier * to ensure that they are seen as unequal. */ PyObject *obj_id = PyLong_FromVoidPtr(op); if (obj_id == NULL) return NULL; key = PyTuple_Pack(3, Py_TYPE(op), op, obj_id); Py_DECREF(obj_id); } return key; }
double complex csqrt(double complex z) { double complex result; double a, b; double t; int scale; a = creal(z); b = cimag(z); /* Handle special cases. */ if (z == 0) return (cpack(0, b)); if (isinf(b)) return (cpack(INFINITY, b)); if (isnan(a)) { t = (b - b) / (b - b); /* raise invalid if b is not a NaN */ return (cpack(a, t)); /* return NaN + NaN i */ } if (isinf(a)) { /* * csqrt(inf + NaN i) = inf + NaN i * csqrt(inf + y i) = inf + 0 i * csqrt(-inf + NaN i) = NaN +- inf i * csqrt(-inf + y i) = 0 + inf i */ if (signbit(a)) return (cpack(fabs(b - b), copysign(a, b))); else return (cpack(a, copysign(b - b, b))); } /* * The remaining special case (b is NaN) is handled just fine by * the normal code path below. */ /* Scale to avoid overflow. */ if (fabs(a) >= THRESH || fabs(b) >= THRESH) { a *= 0.25; b *= 0.25; scale = 1; } else { scale = 0; } /* Algorithm 312, CACM vol 10, Oct 1967. */ if (a >= 0) { t = sqrt((a + hypot(a, b)) * 0.5); result = cpack(t, b / (2 * t)); } else { t = sqrt((-a + hypot(a, b)) * 0.5); result = cpack(fabs(b) / (2 * t), copysign(t, b)); } /* Rescale. */ if (scale) return (result * 2); else return (result); }
#if __MINGW32__ #include <math.h> #include <time.h> #include <sys/time.h> #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <ctype.h> #include <wchar.h> #include <float.h> #include <assert.h> static double zero = 0; double Port::nan = copysign(NAN, 1.0); double Port::infinity = 1 / zero; double Port::dbl_max = 1.7976931348623157e308; double Port::dbl_min = 5e-324; longdouble Port::ldbl_max = LDBL_MAX; struct PortInitializer { PortInitializer(); }; static PortInitializer portinitializer; PortInitializer::PortInitializer() { assert(!signbit(Port::nan));
void test2(double x, double y) { if (-tan(x-y) != tan(y-x)) link_error (); if (-sin(x-y) != sin(y-x)) link_error (); if (cos(-x*y) != cos(x*y)) link_error (); if (cos(x*-y) != cos(x*y)) link_error (); if (cos(-x/y) != cos(x/y)) link_error (); if (cos(x/-y) != cos(x/y)) link_error (); if (cos(-fabs(tan(x/-y))) != cos(tan(x/y))) link_error (); if (cos(y<10 ? -x : y) != cos(y<10 ? x : y)) link_error (); if (cos(y<10 ? x : -y) != cos(y<10 ? x : y)) link_error (); if (cos(y<10 ? -fabs(x) : tan(x<20 ? -x : -fabs(y))) != cos(y<10 ? x : tan(x<20 ? x : y))) link_error (); if (cos((y*=3, -x)) != cos((y*=3,x))) link_error (); if (cos((y*=2, -fabs(tan(x/-y)))) != cos((y*=2,tan(x/y)))) link_error (); if (cos(copysign(x,y)) != cos(x)) link_error (); if (cos(copysign(-fabs(x),y*=2)) != cos((y*=2,x))) link_error (); if (hypot (x, 0) != fabs(x)) link_error (); if (hypot (0, x) != fabs(x)) link_error (); if (hypot (x, x) != fabs(x) * __builtin_sqrt(2)) link_error (); if (hypot (-x, y) != hypot (x, y)) link_error (); if (hypot (x, -y) != hypot (x, y)) link_error (); if (hypot (-x, -y) != hypot (x, y)) link_error (); if (hypot (fabs(x), y) != hypot (x, y)) link_error (); if (hypot (x, fabs(y)) != hypot (x, y)) link_error (); if (hypot (fabs(x), fabs(y)) != hypot (x, y)) link_error (); if (hypot (-fabs(-x), -fabs(fabs(fabs(-y)))) != hypot (x, y)) link_error (); if (hypot (-x, 0) != fabs(x)) link_error (); if (hypot (-x, x) != fabs(x) * __builtin_sqrt(2)) link_error (); if (hypot (pure(x), -pure(x)) != fabs(pure(x)) * __builtin_sqrt(2)) link_error (); if (hypot (tan(-x), tan(-fabs(y))) != hypot (tan(x), tan(y))) link_error (); if (fmin (fmax(x,y),y) != y) link_error (); if (fmin (fmax(y,x),y) != y) link_error (); if (fmin (x,fmax(x,y)) != x) link_error (); if (fmin (x,fmax(y,x)) != x) link_error (); if (fmax (fmin(x,y),y) != y) link_error (); if (fmax (fmin(y,x),y) != y) link_error (); if (fmax (x,fmin(x,y)) != x) link_error (); if (fmax (x,fmin(y,x)) != x) link_error (); if ((__complex__ double) x != -(__complex__ double) (-x)) link_error (); if (x*1i != -(-x*1i)) link_error (); if (x+(x-y)*1i != -(-x+(y-x)*1i)) link_error (); if (x+(x-y)*1i != -(-x-(x-y)*1i)) link_error (); if (ccos(tan(x)+sin(y)*1i) != ccos(-tan(-x)+-sin(-y)*1i)) link_error (); if (ccos(tan(x)+sin(x-y)*1i) != ccos(-tan(-x)-sin(y-x)*1i)) link_error (); if (-5+x*1i != -~(5+x*1i)) link_error (); if (tan(x)+tan(y)*1i != -~(tan(-x)+tan(y)*1i)) link_error (); }
bool _Stl_is_neg_nan(double x) { return isnan(x) && ( copysign(1., x) < 0 ); }
// bool _Stl_is_neg_inf(double x) { return _class(x) == FP_MINUS_INF; } bool _Stl_is_neg_inf(double x) { return _Stl_is_inf(x) && ( copysign(1., x) < 0 ); }
inline bool _Stl_is_neg_nan(double x) { return isnan(x) && copysign(1., x) < 0 ; }
int main (void) { int result = 0; float i = INFINITY; float m = FLT_MAX; feclearexcept (FE_ALL_EXCEPT); if (nextafterf (m, i) != i) { puts ("nextafterf+ failed"); ++result; } if (fetestexcept (FE_OVERFLOW) == 0) { puts ("nextafterf+ did not overflow"); ++result; } feclearexcept (FE_ALL_EXCEPT); if (nextafterf (-m, -i) != -i) { puts ("nextafterf- failed"); ++result; } if (fetestexcept (FE_OVERFLOW) == 0) { puts ("nextafterf- did not overflow"); ++result; } i = 0; m = FLT_MIN; feclearexcept (FE_ALL_EXCEPT); i = nextafterf (m, i); if (i < 0 || i >= FLT_MIN) { puts ("nextafterf+ failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafterf+ did not underflow"); ++result; } i = 0; feclearexcept (FE_ALL_EXCEPT); i = nextafterf (-m, -i); if (i > 0 || i <= -FLT_MIN) { puts ("nextafterf- failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafterf- did not underflow"); ++result; } i = -INFINITY; feclearexcept (FE_ALL_EXCEPT); m = nextafterf (zero, inf); if (m < 0.0 || m >= FLT_MIN) { puts ("nextafterf+ failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafterf+ did not underflow"); ++result; } feclearexcept (FE_ALL_EXCEPT); if (nextafterf (m, i) != 0.0) { puts ("nextafterf+ failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafterf+ did not underflow"); ++result; } feclearexcept (FE_ALL_EXCEPT); m = nextafterf (copysignf (zero, -1.0), -inf); if (m > 0.0 || m <= -FLT_MIN) { puts ("nextafterf- failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafterf- did not underflow"); ++result; } feclearexcept (FE_ALL_EXCEPT); if (nextafterf (m, -i) != 0.0) { puts ("nextafterf- failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafterf- did not underflow"); ++result; } double di = INFINITY; double dm = DBL_MAX; feclearexcept (FE_ALL_EXCEPT); if (nextafter (dm, di) != di) { puts ("nextafter+ failed"); ++result; } if (fetestexcept (FE_OVERFLOW) == 0) { puts ("nextafter+ did not overflow"); ++result; } feclearexcept (FE_ALL_EXCEPT); if (nextafter (-dm, -di) != -di) { puts ("nextafter failed"); ++result; } if (fetestexcept (FE_OVERFLOW) == 0) { puts ("nextafter- did not overflow"); ++result; } di = 0; dm = DBL_MIN; feclearexcept (FE_ALL_EXCEPT); di = nextafter (dm, di); if (di < 0 || di >= DBL_MIN) { puts ("nextafter+ failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafter+ did not underflow"); ++result; } di = 0; feclearexcept (FE_ALL_EXCEPT); di = nextafter (-dm, -di); if (di > 0 || di <= -DBL_MIN) { puts ("nextafter- failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafter- did not underflow"); ++result; } di = -INFINITY; feclearexcept (FE_ALL_EXCEPT); dm = nextafter (zero, inf); if (dm < 0.0 || dm >= DBL_MIN) { puts ("nextafter+ failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafter+ did not underflow"); ++result; } feclearexcept (FE_ALL_EXCEPT); if (nextafter (dm, di) != 0.0) { puts ("nextafter+ failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafter+ did not underflow"); ++result; } feclearexcept (FE_ALL_EXCEPT); dm = nextafter (copysign (zero, -1.0), -inf); if (dm > 0.0 || dm <= -DBL_MIN) { puts ("nextafter- failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafter- did not underflow"); ++result; } feclearexcept (FE_ALL_EXCEPT); if (nextafter (dm, -di) != 0.0) { puts ("nextafter- failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafter- did not underflow"); ++result; } #ifndef NO_LONG_DOUBLE long double li = INFINITY; long double lm = LDBL_MAX; feclearexcept (FE_ALL_EXCEPT); if (nextafterl (lm, li) != li) { puts ("nextafterl+ failed"); ++result; } if (fetestexcept (FE_OVERFLOW) == 0) { puts ("nextafterl+ did not overflow"); ++result; } feclearexcept (FE_ALL_EXCEPT); if (nextafterl (-lm, -li) != -li) { puts ("nextafterl failed"); ++result; } if (fetestexcept (FE_OVERFLOW) == 0) { puts ("nextafterl- did not overflow"); ++result; } li = 0; lm = LDBL_MIN; feclearexcept (FE_ALL_EXCEPT); li = nextafterl (lm, li); if (li < 0 || li >= LDBL_MIN) { puts ("nextafterl+ failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafterl+ did not underflow"); ++result; } li = 0; feclearexcept (FE_ALL_EXCEPT); li = nextafterl (-lm, -li); if (li > 0 || li <= -LDBL_MIN) { puts ("nextafterl- failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafterl- did not underflow"); ++result; } li = -INFINITY; feclearexcept (FE_ALL_EXCEPT); lm = nextafterl (zero, inf); if (lm < 0.0 || lm >= LDBL_MIN) { puts ("nextafterl+ failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafterl+ did not underflow"); ++result; } feclearexcept (FE_ALL_EXCEPT); if (nextafterl (lm, li) != 0.0) { puts ("nextafterl+ failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafterl+ did not underflow"); ++result; } feclearexcept (FE_ALL_EXCEPT); lm = nextafterl (copysign (zero, -1.0), -inf); if (lm > 0.0 || lm <= -LDBL_MIN) { puts ("nextafterl- failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafterl- did not underflow"); ++result; } feclearexcept (FE_ALL_EXCEPT); if (nextafterl (lm, -li) != 0.0) { puts ("nextafterl- failed"); ++result; } if (fetestexcept (FE_UNDERFLOW) == 0) { puts ("nextafterl- did not underflow"); ++result; } #endif return result; }
int CDataset::extractFeatures(const CConfig& conf){ int imgRow = this->img.at(0)->rows, imgCol = this->img.at(0)->cols; cv::Mat *integralMat; if(conf.learningMode != 1){ if(conf.rgbFeature == 1){ // if got rgb image only, calc hog feature feature.clear(); feature.resize(32); for(int i = 0; i < 32; ++i) feature.at(i) = new cv::Mat(imgRow, imgCol, CV_8UC1); cv::cvtColor(*img.at(0), *(feature.at(0)), CV_RGB2GRAY); cv::Mat I_x(imgRow, imgCol, CV_16SC1); cv::Mat I_y(imgRow, imgCol, CV_16SC1); cv::Sobel(*(feature.at(0)), I_x, CV_16S, 1, 0); cv::Sobel(*(feature.at(0)), I_y, CV_16S, 0, 1); cv::convertScaleAbs(I_x, *(feature[3]), 0.25); cv::convertScaleAbs(I_y, *(feature[4]), 0.25); // Orientation of gradients for(int y = 0; y < img.at(0)->rows; y++) for(int x = 0; x < img.at(0)->cols; x++) { // Avoid division by zero float tx = (float)I_x.at<short>(y, x) + (float)copysign(0.000001f, I_x.at<short>(y, x)); // Scaling [-pi/2 pi/2] -> [0 80*pi] feature.at(1)->at<uchar>(y, x) = (uchar)(( atan((float)I_y.at<short>(y, x) / tx) + 3.14159265f / 2.0f ) * 80); //std::cout << "scaling" << std::endl; feature.at(2)->at<uchar>(y, x) = (uchar)sqrt((float)I_x.at<short>(y, x)* (float)I_x.at<short>(y, x) + (float)I_y.at<short>(y, x) * (float)I_y.at<short>(y, x)); } // Magunitude of gradients for(int y = 0; y < img.at(0)->rows; y++) for(int x = 0; x < img.at(0)->cols; x++ ) { feature.at(2)->at<uchar>(y, x) = (uchar)sqrt(I_x.at<short>(y, x)*I_x.at<short>(y, x) + I_y.at<short>(y, x) * I_y.at<short>(y, x)); } hog.extractOBin(feature[1], feature[2], feature, 7); // calc I_xx I_yy cv::Sobel(*(feature.at(0)), I_x, CV_16S, 2, 0); cv::Sobel(*(feature.at(0)), I_y, CV_16S, 0, 2); cv::convertScaleAbs(I_x, *(feature[5]), 0.25); cv::convertScaleAbs(I_y, *(feature[6]), 0.25); cv::Mat img_Lab; cv::cvtColor(*img.at(0), img_Lab, CV_RGB2Lab); cv::vector<cv::Mat> tempfeature(3); cv::split(img_Lab, tempfeature); for(int i = 0; i < 3; ++i) tempfeature.at(i).copyTo(*(feature.at(i))); // min max filter for(int c = 0; c < 16; ++c) minFilter(feature[c], feature[c + 16], 5); for(int c = 0; c < 16; ++c) maxFilter(feature[c], feature[c], 5); }else{ feature.clear(); // calc gray integral image cv::Mat grayImg(imgRow + 1, imgCol, CV_8U); cv::cvtColor(*img.at(0), grayImg, CV_RGB2GRAY); integralMat = new cv::Mat(imgRow + 1, imgCol + 1, CV_64F); cv::integral(grayImg, *integralMat, CV_64F); feature.push_back(integralMat); // calc r g b integral image std::vector<cv::Mat> splittedRgb; cv::split(*img.at(0), splittedRgb); for(int i = 0; i < splittedRgb.size(); ++i){ integralMat = new cv::Mat(imgRow + 1, imgCol + 1, CV_64F); cv::integral(splittedRgb.at(i), *integralMat, CV_64F); feature.push_back(integralMat); } featureFlag = 1; } } if(img.size() > 1){ cv::Mat tempDepth = cv::Mat(img.at(0)->rows, img.at(0)->cols, CV_8U);// = *img.at(1); if(img.at(1)->type() != CV_8U) img.at(1)->convertTo(tempDepth, CV_8U, 255.0 / (double)(conf.maxdist - conf.mindist)); else tempDepth = *img.at(1); integralMat = new cv::Mat(imgRow + 1, imgCol + 1, CV_64F); cv::integral(tempDepth, *integralMat, CV_64F); feature.push_back(integralMat); featureFlag = 1; } return 0; }
int main(int ac, char** av) { //Variables to be assigned by program options double height_map_resolution; double north_bound; double south_bound; double east_bound; double west_bound; int sun_angles; int times_per_year; int start_day; double summer_angle_panel; double winter_angle_panel; bool use_terrain_normals = true; bool compute_best_fixed_angle = false; bool compute_best_summer_winter_angle = false; std::string input_file; std::string output_file; bool verbose = false; bool elevation_dependant_sun_intensity = false; // Declare the supported options. po::options_description op_desc("Allowed options"); op_desc.add_options() ("help", "print options table") ("input-file,i", po::value<std::string>(&input_file), "File containing height map data in x<space>y<space>z<newline> swiss coordinates format") ("output-file-base,o", po::value<std::string>(&output_file)->default_value("data_out"), "Base filname for output files (default data_out)") ("resolution,R", po::value<double>(&height_map_resolution)->default_value(200.0), "resolution of data (default: 200.0)") ("nmax",po::value<double>(&north_bound)->default_value(1e100), "maximum north coordinate to be treated (default 1e100)") ("nmin",po::value<double>(&south_bound)->default_value(-1e100), "minimum north coordinate to be treated (default -1e100)") ("emax",po::value<double>(&east_bound)->default_value(1e100), "maximum east coordinate to be treated (default 1e100)") ("emin",po::value<double>(&west_bound)->default_value(-1e100), "minimum east coordinate to be treated (default -1e100)") ("elevation,E", "Include effect of terrain elevation in the computation of sun intensity") ("sunangles,s", po::value<int>(&sun_angles)->default_value(360), "number of angles to compute sunlight from (default 360)") ("times,t", po::value<int>(×_per_year)->default_value(12), "number of times per year to calculate at (default 12)") ("start-day, D", po::value<int>(&start_day)->default_value(20), "Day of the year to output first data. (default 20)") ("fixed-angle, F", po::value<double>(&summer_angle_panel), "Use fixed angle for solar panel inclination to compute sun intensity.") ("summer-angle", po::value<double>(&summer_angle_panel), "Fixed angle for solar panel inclination during summer to compute sun intensity..") ("winter-angle", po::value<double>(&winter_angle_panel), "Fixed angle for solar panel inclination during winter to compute sun intensity..") ("best-fixed-angle, B", "Use best fixed angle for this latitude to compute sun intensity.") ("best-two-season-angles, BSW", "Use best summer and winter angle for this latitude to compute sun intensity.") ("terrain-normals, T", "Use terrain normal for each point to compute sun intensity, default option if none of the solar panel angles options are specified.") ("verbose, v", "Verbose: output lots of text") ; po::positional_options_description pd; pd.add("input-file", 1).add("output-file", 1); po::variables_map vm; po::store(po::parse_command_line(ac, av, op_desc), vm); po::notify(vm); if (vm.count("help")) { std::cout <<"CrunchGeoData [options] [input file] [output base]"<<std::endl<< op_desc << std::endl; return 1; } if(vm.count("fixed-angle")){ winter_angle_panel = summer_angle_panel; use_terrain_normals = false; } if(vm.count("summer-angle") && !vm.count("winter-angle")){ std::cout << "Please specify also a winter angle, or use option fixed-angle" << std::endl; exit(255); } if(vm.count("best-fixed-angle")){ use_terrain_normals = false; compute_best_fixed_angle = true; } if(vm.count("best-two-season-angles")){ use_terrain_normals = false; compute_best_summer_winter_angle = true; } if (vm.count("elevation")){ elevation_dependant_sun_intensity = true; } if (vm.count("verbose")){ verbose = true; } if(!vm.count("input-file")){ std::cout << "Input file must be specified" << std::endl; exit(255); } std::unordered_map<vector3d, std::vector<double>, hash> grid_points; //grid_points is unordered_map of all points in bounding box with //a vector to hold average sun power for the days sun is computed double north_x_max; double east_y_max; double south_x_min; double west_y_min; import_heightmap(input_file, south_bound, north_bound, east_bound, west_bound, times_per_year, grid_points ,north_x_max, south_x_min, east_y_max, west_y_min); std::pair<double,double> NE = swiss_to_lat_lon(north_x_max+height_map_resolution/2.0, east_y_max+height_map_resolution/2.0); std::pair<double,double> SW = swiss_to_lat_lon(south_x_min-height_map_resolution/2.0, west_y_min-height_map_resolution/2.0); std::cout << "NE: " << north_x_max <<", " << east_y_max << " SW: "<< south_x_min << " , " << west_y_min << std::endl; std::cout << std::setprecision(9) << "NE: " << NE.first <<", " << NE.second << " SW: "<< SW.first << ", " << SW.second << std::endl; std::cout << "Number of points in dataset: " << grid_points.size() << std::endl; double average_latitude=((NE.first+SW.first)/2.0)*M_PI/180.0; vector3d summer_normal_panel; vector3d winter_normal_panel; if(!use_terrain_normals){ if(compute_best_fixed_angle){ } if(compute_best_summer_winter_angle){ } if(average_latitude > 0){ //if latitude is greater than zero, point south summer_normal_panel.x = -1*sin(summer_angle_panel*M_PI/180); summer_normal_panel.y = 0; summer_normal_panel.z = 1*cos(summer_angle_panel*M_PI/180); winter_normal_panel.x = -1*sin(winter_angle_panel*M_PI/180); winter_normal_panel.y = 0; winter_normal_panel.z = 1*cos(winter_angle_panel*M_PI/180); } else{ //if latitude is less than zero, point north summer_normal_panel.x = 1*sin(summer_angle_panel*M_PI/180); summer_normal_panel.y = 0; summer_normal_panel.z = 1*cos(summer_angle_panel*M_PI/180); winter_normal_panel.x = 1*sin(winter_angle_panel*M_PI/180); winter_normal_panel.y = 0; winter_normal_panel.z = 1*cos(winter_angle_panel*M_PI/180); } } std::vector<std::vector<double> > sun_elevation_angle(times_per_year,std::vector<double>(sun_angles)); //Elevation angle PHI of sun for a day and hour std::vector<std::vector<double> > sun_intensity_day_angle(times_per_year,std::vector<double>(sun_angles)); std::vector<std::vector <vector3d> > sun_vec_day_angle(times_per_year,std::vector<vector3d>(sun_angles)); //Unit vectors for sun's direction in day, hour. for(int day = 0; day < times_per_year; day++){ double N=(365.0*day)/times_per_year+start_day; //We are computing values for the Nth day of the year double phi_axis=-asin(0.39779*cos((0.98565*(N+10)+1.914*sin(0.98565*(N-2)*M_PI/180))*M_PI/180)); //Earth axis inclination for day N for(int thH=0;thH<sun_angles;thH++){//Angle theta for sun; 0 is North, +90 is West. int indTH= thH; double thHrad = (180-thH)*M_PI/180.0; //turn around for sun formula angle double phiElv=asin(sin(average_latitude)*sin(phi_axis)+cos(average_latitude)*cos(thHrad)*cos(phi_axis)); //Sun elevation, imperical fomula, see wikipedia thHrad = M_PI-thHrad; //In our coordinate system (North x+, West y+) we must inverse the sun theta. Theta corresponds to sun ray direction. vector3d sun_vec(cos(phiElv) * cos(thHrad) , cos(phiElv) * sin(thHrad) , sin(phiElv)); //Unit vector for direction sun ray come from sun_elevation_angle[(int)day][indTH]=phiElv; //stick sun elevation in its datastructure sun_vec_day_angle[(int)day][indTH]=sun_vec; //stick sun vector in its datastructure double air_mass_coefficient =sqrt(708.0*708.0*sin(phiElv)*sin(phiElv)+2.0*708.0+1.0)-708.0*sin(phiElv); //wikipedia sun_intensity_day_angle[(int)day][indTH]=pow(0.7,pow(air_mass_coefficient,0.678))/0.7; //stick sun intensity in its datastructure } } std::vector<double> max_sun_intensity; max_sun_intensity.assign(times_per_year, 0); std::vector<double> min_sun_intensity; min_sun_intensity.assign(times_per_year, 1e12); int N=0; std::cout <<" Progress: "<< 100*(N*1.0)/(grid_points.size()*1.0) <<" % \n"; for(auto grid_point : grid_points){ vector3d v = grid_point.first; N++; if (1) { std::cout <<" Progress: "<< 100.0*(N*1.0)/(grid_points.size()*1.0) <<" % ("<<N<<") \r"; std::cout.flush(); } vector3d vNx(v.x+height_map_resolution*10,v.y,0); //get points 10xresolution to the north, west, south and east vector3d vWx(v.x,v.y+height_map_resolution*10,0); vector3d vSx(v.x-height_map_resolution*10,v.y,0); vector3d vEx(v.x,v.y-height_map_resolution*10,0); auto itEx=grid_points.find(vEx); auto itNx=grid_points.find(vNx); auto itWx=grid_points.find(vWx); auto itSx=grid_points.find(vSx); if (itEx == grid_points.end()||itNx == grid_points.end()||itWx == grid_points.end()||itSx == grid_points.end()){ std::vector<double> L; L.assign(times_per_year,-1.0); //border points get value -1.0 to indicate we have coputed no value for them grid_points[v]=L; continue; } //v = *it_v; vector3d vN(v.x+height_map_resolution,v.y,0); //get points to the north, west, south and east vector3d vW(v.x,v.y+height_map_resolution,0); vector3d vS(v.x-height_map_resolution,v.y,0); vector3d vE(v.x,v.y-height_map_resolution,0); auto itE=grid_points.find(vE); auto itN=grid_points.find(vN); auto itW=grid_points.find(vW); auto itS=grid_points.find(vS); if (itE == grid_points.end()||itN == grid_points.end()||itW == grid_points.end()||itS == grid_points.end()){ exit(-1); } vE=itE->first; vN=itN->first; vW=itW->first; vS=itS->first; vector3d Normal = ((((vE-v)^(vN-v))+((vW-v)^(vS-v)))/2).norm(); //normal is the crossproduct of two prependicular differences. Avgd. std::vector<double> horizon_elevation_angles; //vector to hold elevation angles at theta horizon_elevation_angles.assign(sun_angles, 0); // std::vector<vector3d> horizon(sun_angles); std::vector<double> dists(sun_angles); size_t edge_points = 0; size_t interior_points = 0; for(int theta = 0; theta<sun_angles;theta++){ //compute for each angle theta double th = theta*M_PI/180; { double sin_theta = sin(th); //calculate sin of the angle in radians double yend = EQ_DBL(copysign(1,sin_theta),1)?east_y_max:0; //if the sinus is positive endvalue is east_y_max, if negative 0 for(double y=v.y+copysign(height_map_resolution,sin_theta);(y >= 0 && y <= east_y_max);y+=copysign(height_map_resolution,sin_theta)){//increase/decrease if theta +/- double x = v.x+(y-v.y)/tan(th); //find the x that goes with y for this theta double x1 = floor(x-((int)x%(int)height_map_resolution)); //find the nearest lower gridpoint by subtracting remainder according to height_map_resolution double x2 = x1 + height_map_resolution; //Add height_map_resolution to get nearest higher gridpoint double phi = horizon_elevation_angles[theta]; double dist = (v-vector3d(x,y,v.z)).length(); double phiMaxTheta = atan(5000.0/dist); //maximal phi at this theta (with height 5000 m) if(x1>=north_x_max||x1<0||x2>=north_x_max||x2<0||phi>phiMaxTheta){ break; } if((int)x%(int)height_map_resolution){ //if x is not a grid point auto it_vec1 = grid_points.find(vector3d(x1,y,0)); //get the two gridpoints from the set auto it_vec2 = grid_points.find(vector3d(x2,y,0)); if (it_vec1 == grid_points.end()||it_vec2 == grid_points.end()){ edge_points++; continue; } vector3d vec1 = it_vec1->first; vector3d vec2 = it_vec2->first; double height = vec1.z*(x2-x)/height_map_resolution + vec2.z*(x-x1)/height_map_resolution-v.z; //compute height at x via linear interpolation dist=(v-vector3d(x,y,v.z)).length(); phi = atan(height/dist); if(phi>horizon_elevation_angles[theta]){//see if larger horizon_elevation_angles[theta]=phi; dists[theta] = dist/1000; } } else{//if x is a gridpoint auto it_vec = grid_points.find(vector3d(x,y,0)); //get vector if (it_vec == grid_points.end()){ //exit(-1); edge_points++; continue; } vector3d vec = it_vec->first; double height = vec.z-v.z; //get height double dist=(v-vector3d(x,y,v.z)).length(); double phi = atan(height/dist); if(phi>horizon_elevation_angles[theta]){//see if larger horizon_elevation_angles[theta]=phi; dists[theta] = dist/1000; } } interior_points++; } } double cos_theta = cos(th); double xend = EQ_DBL(copysign(1,cos_theta),1)?north_x_max:0; //if the cosinus is positive endvalue is north_x_max, if negative 0 for(double x=v.x+copysign(height_map_resolution,cos_theta);(x >= 0 && x <= north_x_max);x+=copysign(height_map_resolution,cos_theta)){//increase/decrease if theta +/- double y = v.y+(x-v.x)*tan(th); //find the y that goes with x for this theta double y1 = floor(y-((int)y%(int)height_map_resolution)); //find the nearest lower gridpoint by subtracting remainder according to height_map_resolution double y2 = y1 + height_map_resolution; //Add height_map_resolution to get nearest higher gridpoint double phi = horizon_elevation_angles[theta]; double dist = sqrt((v.x-x)*(v.x-x) + (v.y-y)*(v.y-y)); double phiMaxTheta = atan(5000.0/dist); //maximal phi at this theta (with height 5000 m) if(y1>=north_x_max||y1<0||y2>=north_x_max||y2<0||phi>phiMaxTheta){ break; } if((int)y%(int)height_map_resolution){ //if y is not a grid point auto it_vec1 = grid_points.find(vector3d(x,y1,0)); //get the two gridpoints from the set auto it_vec2 = grid_points.find(vector3d(x,y2,0)); if (it_vec1 == grid_points.end()||it_vec2 == grid_points.end()){ //exit(-1); edge_points++; continue; } vector3d vec1 = it_vec1->first; vector3d vec2 = it_vec2->first; double height = vec1.z*(y2-y)/height_map_resolution + vec2.z*(y-y1)/height_map_resolution-v.z; //compute height at y via linear interpolation double dist=(v-vector3d(x,y,v.z)).length(); double phi = atan(height/dist); if(phi>horizon_elevation_angles[theta]){//see if larger horizon_elevation_angles[theta]=phi; dists[theta] = dist/1000; } } else{//if y is a gridpoint auto it_vec = grid_points.find(vector3d(x,y,0)); //get vector if (it_vec == grid_points.end()){ //exit(-1); edge_points++; continue; } vector3d vec = it_vec->first; double height = vec.z-v.z; //get height double dist=(v-vector3d(x,y,v.z)).length(); double phi = atan(height/dist); if(phi>horizon_elevation_angles[theta]){//see if larger horizon_elevation_angles[theta]=phi; dists[theta] = dist/1000; } } interior_points++; } } int k = 0; std::vector<double> average_sun_intensity; average_sun_intensity.assign(times_per_year, 0.0); double sun_intensity = 0; for(int j=0;j<times_per_year;j++){ for (k=0;k<sun_angles;k++) { if(horizon_elevation_angles[k]>sun_elevation_angle[j][k]){ sun_intensity=0; } else{ double height = 0; if(elevation_dependant_sun_intensity){ height = v.z; } //spherical shell approximation for air mass attenuation, see wikipedia // http://en.wikipedia.org/wiki/Air_mass_%28solar_energy%29 double phi = sun_elevation_angle[j][k]; double R_E = 6371000.0; //Earth radius, meters double y_atm = 9000.0; //Athmospheric thickness, meters double r = R_E/y_atm; double c = height/y_atm; double air_mass_coefficient = sqrt((r+c)*(r+c)*cos(phi)*cos(phi) + (2*r+1+c)*(1-c))-(r+c)*cos(phi); double I_0 = 1.353; // kW/m^2 double I = 1.1 * I_0 * pow(0.7, pow(air_mass_coefficient, 0.678)); sun_intensity = I * (sun_vec_day_angle[j][k] * Normal); average_sun_intensity[j]+=sun_intensity; } average_sun_intensity[j] = average_sun_intensity[j] / sun_angles; grid_point.second[j]=average_sun_intensity[j]; max_sun_intensity[j] = (average_sun_intensity[j] > max_sun_intensity[j])?average_sun_intensity[j]:max_sun_intensity[j]; min_sun_intensity[j] = (average_sun_intensity[j] < min_sun_intensity[j])?average_sun_intensity[j]:min_sun_intensity[j]; } assert(average_sun_intensity.size() == times_per_year); grid_points[v]=average_sun_intensity; assert(grid_point.second.size() == times_per_year); } } std::cout << grid_points.size() << std::endl; for(int k = 0; k < times_per_year; ++k){ std::ostringstream oss(""); oss << k; std::ofstream ofs(output_file + oss.str() + ".xyz"); if (!ofs.is_open()){ exit(-2); } for(auto grid_point : grid_points){ vector3d v = grid_point.first; if(grid_point.second.size()>k){ ofs << v.x << " " << v.y << " " << v.z << " "; ofs << grid_points[v][k] << std::endl; } } } return 0; }
double tgamma ( double x ) { register int n, parity, i; register double y, y1, result, fact, IsItAnInt, z, numerator, denominator, ysquared, sum; hexdouble OldEnvironment; FEGETENVD( OldEnvironment.d ); // save environment, set default FESETENVD( 0.0 ); /******************************************************************************* * The next switch will decipher what sort of argument we have. If argument * * is SNaN then a QNaN has to be returned and the invalid flag signaled. * *******************************************************************************/ switch ( __fpclassifyd ( x ) ) { case FP_NAN: x *= 2.0; /* quiets NaN */ FESETENVD( OldEnvironment.d ); // restore caller's environment return x; case FP_ZERO: OldEnvironment.i.lo |= FE_DIVBYZERO; FESETENVD( OldEnvironment.d ); return copysign( Huge.d, x); case FP_INFINITE: if ( x > 0.0 ) x = Huge.d; else { x = nan ( GAMMA_NAN ); OldEnvironment.i.lo |= SET_INVALID; } FESETENVD( OldEnvironment.d ); return x; default: /* NORMALNUM and DENORMALNUM */ break; } parity = 0; fact = 1.0; n = 0; y = x; /******************************************************************************* * The argument is negative. * *******************************************************************************/ if ( y <= 0.0 ) { y = - x; if ( y < MinimumX ) { OldEnvironment.i.lo |= FE_OVERFLOW; FESETENVD( OldEnvironment.d ); return MinusHuge.d; } y1 = trunc ( y ); IsItAnInt = y - y1; if ( IsItAnInt != 0.0 ) /* is it an integer? */ { /* is it odd or even? */ if ( y1 != trunc ( y1 * 0.5 ) * 2.0 ) parity = 1; fact = - pi / sin ( pi * IsItAnInt ); y += 1.0; } else { OldEnvironment.i.lo |= SET_INVALID; FESETENVD( OldEnvironment.d ); return nan ( GAMMA_NAN ); } } /******************************************************************************* * The argument is positive. * *******************************************************************************/ if ( y < eps ) /* argument is less than epsilon. */ { if ( y >= MinimumX ) /* x is in [MinimumX,eps]. */ result = 1.0 / y; else /* othewise, x is in [0,MinimumX). */ { OldEnvironment.i.lo |= FE_OVERFLOW; FESETENVD( OldEnvironment.d ); return Huge.d; } } else if ( y < 12.0 ) /* argument x is eps < x < 12.0. */ { y1 = y; if ( y < 1.0 ) /* x is in (eps, 1.0). */ { z = y; y += 1.0; } else /* x is in [1.0,12.0]. */ { n = ( int ) y - 1; y -= ( double ) n; z = y - 1.0; } numerator = 0.0; denominator = 1.0; for ( i = 0; i < 8; i++ ) { numerator = ( numerator + p[i] ) * z; denominator = denominator * z + q[i]; } result = numerator / denominator + 1.0; if ( y1 < y ) result /= y1; else if ( y1 > y ) { for ( i = 0; i < n; i++ ) { result *= y; y += 1.0; } } } else { if ( x <= xbig ) { ysquared = y * y; sum = c[6]; for ( i = 0; i < 6; i++ ) sum = sum / ysquared + c[i]; sum = sum / y - y + LogSqrt2pi; sum += ( y - 0.5 ) * log ( y ); result = exp ( sum ); } else { OldEnvironment.i.lo |= FE_OVERFLOW; FESETENVD( OldEnvironment.d ); // restore caller's environment return Huge.d; } } if ( parity ) result = - result; if ( fact != 1.0 ) result = fact / result; FESETENVD( OldEnvironment.d ); // restore caller's environment return result; }
double complex csinh(double complex z) { double x, y, h; int32_t hx, hy, ix, iy, lx, ly; x = creal(z); y = cimag(z); EXTRACT_WORDS(hx, lx, x); EXTRACT_WORDS(hy, ly, y); ix = 0x7fffffff & hx; iy = 0x7fffffff & hy; /* Handle the nearly-non-exceptional cases where x and y are finite. */ if (ix < 0x7ff00000 && iy < 0x7ff00000) { if ((iy | ly) == 0) return CMPLX(sinh(x), y); if (ix < 0x40360000) /* small x: normal case */ return CMPLX(sinh(x) * cos(y), cosh(x) * sin(y)); /* |x| >= 22, so cosh(x) ~= exp(|x|) */ if (ix < 0x40862e42) { /* x < 710: exp(|x|) won't overflow */ h = exp(fabs(x)) * 0.5; return CMPLX(copysign(h, x) * cos(y), h * sin(y)); } else if (ix < 0x4096bbaa) { /* x < 1455: scale to avoid overflow */ z = __ldexp_cexp(CMPLX(fabs(x), y), -1); return CMPLX(creal(z) * copysign(1, x), cimag(z)); } else { /* x >= 1455: the result always overflows */ h = huge * x; return CMPLX(h * cos(y), h * h * sin(y)); } } /* * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN. * The sign of 0 in the result is unspecified. Choice = normally * the same as dNaN. Raise the invalid floating-point exception. * * sinh(+-0 +- I NaN) = sign(d(+-0, NaN))0 + I d(NaN). * The sign of 0 in the result is unspecified. Choice = normally * the same as d(NaN). */ if ((ix | lx) == 0 && iy >= 0x7ff00000) return CMPLX(copysign(0, x * (y - y)), y - y); /* * sinh(+-Inf +- I 0) = +-Inf + I +-0. * * sinh(NaN +- I 0) = d(NaN) + I +-0. */ if ((iy | ly) == 0 && ix >= 0x7ff00000) { if (((hx & 0xfffff) | lx) == 0) return CMPLX(x, y); return CMPLX(x, copysign(0, y)); } /* * sinh(x +- I Inf) = dNaN + I dNaN. * Raise the invalid floating-point exception for finite nonzero x. * * sinh(x + I NaN) = d(NaN) + I d(NaN). * Optionally raises the invalid floating-point exception for finite * nonzero x. Choice = don't raise (except for signaling NaNs). */ if (ix < 0x7ff00000 && iy >= 0x7ff00000) return CMPLX(y - y, x * (y - y)); /* * sinh(+-Inf + I NaN) = +-Inf + I d(NaN). * The sign of Inf in the result is unspecified. Choice = normally * the same as d(NaN). * * sinh(+-Inf +- I Inf) = +Inf + I dNaN. * The sign of Inf in the result is unspecified. Choice = always +. * Raise the invalid floating-point exception. * * sinh(+-Inf + I y) = +-Inf cos(y) + I Inf sin(y) */ if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) { if (iy >= 0x7ff00000) return CMPLX(x * x, x * (y - y)); return CMPLX(x * cos(y), INFINITY * sin(y)); } /* * sinh(NaN + I NaN) = d(NaN) + I d(NaN). * * sinh(NaN +- I Inf) = d(NaN) + I d(NaN). * Optionally raises the invalid floating-point exception. * Choice = raise. * * sinh(NaN + I y) = d(NaN) + I d(NaN). * Optionally raises the invalid floating-point exception for finite * nonzero y. Choice = don't raise (except for signaling NaNs). */ return CMPLX((x * x) * (y - y), (x + x) * (y - y)); }
void eqs_todo_bulk_ode<real_t>::condevap( const mtx::arr<real_t> &rhod, mtx::arr<real_t> &rhod_th, mtx::arr<real_t> &rhod_rv, mtx::arr<real_t> &rhod_rl, mtx::arr<real_t> &rhod_rr, const quantity<si::time, real_t> dt ) { # if !defined(USE_BOOST_ODEINT) error_macro("eqs_todo_bulk requires icicle to be compiled with Boost.odeint"); # else // odeint::euler< // TODO: opcja? odeint::runge_kutta4< quantity<multiply_typeof_helper<si::mass_density, si::temperature>::type, real_t>, // state_type real_t, // value_type quantity<si::temperature, real_t>, // deriv_type quantity<si::mass_density, real_t>, // time_type odeint::vector_space_algebra, odeint::default_operations, odeint::never_resizer > S; // TODO: would be better to instantiate in the ctor (but what about thread safety! :() typename detail::rhs F; for (int k = rhod.lbound(mtx::k); k <= rhod.ubound(mtx::k); ++k) for (int j = rhod.lbound(mtx::j); j <= rhod.ubound(mtx::j); ++j) for (int i = rhod.lbound(mtx::i); i <= rhod.ubound(mtx::i); ++i) { F.init( rhod(i,j,k) * si::kilograms / si::cubic_metres, rhod_th(i,j,k) * si::kilograms / si::cubic_metres * si::kelvins, rhod_rv(i,j,k) * si::kilograms / si::cubic_metres ); real_t // TODO: quantity<si::mass_density rho_eps = .00002, // TODO: as an option? vapour_excess; real_t drho_rr_max = 0; // TODO: quantity<si::mass_density if (F.rs > F.r && rhod_rr(i,j,k) > 0 && opt_revp) drho_rr_max = (dt / si::seconds) * (1 - F.r / F.rs) * (1.6 + 124.9 * pow(1e-3 * rhod_rr(i,j,k), .2046)) * pow(1e-3 * rhod_rr(i,j,k), .525) / (5.4e2 + 2.55e5 * (1. / (F.p / si::pascals) / F.rs)); // TODO: move to phc!!! bool incloud; // TODO: rethink and document 2*rho_eps!!! while ( // condensation of cloud water if supersaturated (vapour_excess = rhod_rv(i,j,k) - rhod(i,j,k) * F.rs) > rho_eps || (opt_cevp && vapour_excess < -rho_eps && ( // or if subsaturated (incloud = (rhod_rl(i,j,k) > 0)) // cloud evaporation if in cloud || (opt_revp && rhod_rr(i,j,k) > 0) // or rain evaportation if in a rain shaft (and out-of-cloud) )) ) { real_t drho_rv = - copysign(.5 * rho_eps, vapour_excess); drho_rv = (vapour_excess > 0 || incloud) ? std::min(rhod_rl(i,j,k), drho_rv) : std::min(drho_rr_max, std::min(rhod_rr(i,j,k), drho_rv)); // preventing negative mixing ratios assert(drho_rv != 0); // otherwise it should not pass the while condition! // theta is modified by do_step, and hence we cannot pass an expression and we need a temp. var. quantity<multiply_typeof_helper<si::mass_density, si::temperature>::type, real_t> tmp = rhod_th(i,j,k) * si::kilograms / si::cubic_metres * si::kelvins; // integrating the First Law for moist air S.do_step( boost::ref(F), tmp, rhod_rv(i,j,k) * si::kilograms / si::cubic_metres, drho_rv * si::kilograms / si::cubic_metres ); // latent heat source/sink due to evaporation/condensation rhod_th(i,j,k) = tmp / (si::kilograms / si::cubic_metres * si::kelvins); // updating rhod_rv rhod_rv(i,j,k) += drho_rv; assert(rhod_rv(i,j,k) >= 0); assert(isfinite(rhod_rv(i,j,k))); if (vapour_excess > 0 || incloud) { rhod_rl(i,j,k) -= drho_rv; // cloud water assert(rhod_rl(i,j,k) >= 0); assert(isfinite(rhod_rl(i,j,k))); } else // or rain water { assert(opt_revp); // should be guaranteed by the while() condition above rhod_rr(i,j,k) -= drho_rv; assert(rhod_rr(i,j,k) >= 0); assert(isfinite(rhod_rr(i,j,k))); if ((drho_rr_max -= drho_rv) == 0) break; // but not more than Kessler allows } } // hopefully true for RK4 assert(F.r == real_t(rhod_rv(i,j,k) / rhod(i,j,k))); // double-checking.... assert(rhod_rl(i,j,k) >= 0); assert(rhod_rv(i,j,k) >= 0); assert(rhod_rr(i,j,k) >= 0); } # endif }
double complex cproj(double complex z) { if (isinf(creal(z)) || isinf(cimag(z))) return CMPLX(INFINITY, copysign(0.0, creal(z))); return z; }
/// /// Un is a 1 x N matrix, with N the number of finite volumes /// /// Reference: Randall LeVeque. Numerical Methods for Conservation Laws (1992) /// void SlopeLimiterStepPeriodic(const double h, const double dt, const double A, const unsigned int N, const double *restrict_ Un, double *restrict_ Un1) { unsigned int j; const double Aabs = fabs(A); // nu_p = dt*lambda_p/h const double nu = dt*A/h; // for slope-limiter term const double S = 0.5 * A * (copysign(1, nu) - nu); const double Sabs = 0.5 * Aabs * (copysign(1, nu) - nu); // Eq. (16.34) // alpha_j = R^{-1}*(U^n_{j+1} - U^n_j) double *alpha = malloc(N * sizeof(double)); for (j = 0; j < N-1; j++) { alpha[j] = Un[j+1] - Un[j]; } alpha[N-1] = Un[0] - Un[N-1]; // periodic // h * beta_j, Eq. (16.56) double *h_beta = malloc(N * sizeof(double)); for (j = 1; j < N; j++) { h_beta[j] = minmod(alpha[j], alpha[j-1]); }
static int format_complex_internal(PyObject *value, const InternalFormatSpec *format, _PyUnicodeWriter *writer) { double re; double im; char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */ char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */ InternalFormatSpec tmp_format = *format; Py_ssize_t n_re_digits; Py_ssize_t n_im_digits; Py_ssize_t n_re_remainder; Py_ssize_t n_im_remainder; Py_ssize_t n_re_total; Py_ssize_t n_im_total; int re_has_decimal; int im_has_decimal; int precision, default_precision = 6; Py_UCS4 type = format->type; Py_ssize_t i_re; Py_ssize_t i_im; NumberFieldWidths re_spec; NumberFieldWidths im_spec; int flags = 0; int result = -1; Py_UCS4 maxchar = 127; enum PyUnicode_Kind rkind; void *rdata; Py_UCS4 re_sign_char = '\0'; Py_UCS4 im_sign_char = '\0'; int re_float_type; /* Used to see if we have a nan, inf, or regular float. */ int im_float_type; int add_parens = 0; int skip_re = 0; Py_ssize_t lpad; Py_ssize_t rpad; Py_ssize_t total; PyObject *re_unicode_tmp = NULL; PyObject *im_unicode_tmp = NULL; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ LocaleInfo locale = STATIC_LOCALE_INFO_INIT; if (format->precision > INT_MAX) { PyErr_SetString(PyExc_ValueError, "precision too big"); goto done; } precision = (int)format->precision; /* Zero padding is not allowed. */ if (format->fill_char == '0') { PyErr_SetString(PyExc_ValueError, "Zero padding is not allowed in complex format " "specifier"); goto done; } /* Neither is '=' alignment . */ if (format->align == '=') { PyErr_SetString(PyExc_ValueError, "'=' alignment flag is not allowed in complex format " "specifier"); goto done; } re = PyComplex_RealAsDouble(value); if (re == -1.0 && PyErr_Occurred()) goto done; im = PyComplex_ImagAsDouble(value); if (im == -1.0 && PyErr_Occurred()) goto done; if (format->alternate) flags |= Py_DTSF_ALT; if (type == '\0') { /* Omitted type specifier. Should be like str(self). */ type = 'r'; default_precision = 0; if (re == 0.0 && copysign(1.0, re) == 1.0) skip_re = 1; else add_parens = 1; } if (type == 'n') /* 'n' is the same as 'g', except for the locale used to format the result. We take care of that later. */ type = 'g'; if (precision < 0) precision = default_precision; else if (type == 'r') type = 'g'; /* Cast "type", because if we're in unicode we need to pass a 8-bit char. This is safe, because we've restricted what "type" can be. */ re_buf = PyOS_double_to_string(re, (char)type, precision, flags, &re_float_type); if (re_buf == NULL) goto done; im_buf = PyOS_double_to_string(im, (char)type, precision, flags, &im_float_type); if (im_buf == NULL) goto done; n_re_digits = strlen(re_buf); n_im_digits = strlen(im_buf); /* Since there is no unicode version of PyOS_double_to_string, just use the 8 bit version and then convert to unicode. */ re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits); if (re_unicode_tmp == NULL) goto done; i_re = 0; im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits); if (im_unicode_tmp == NULL) goto done; i_im = 0; /* Is a sign character present in the output? If so, remember it and skip it */ if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') { re_sign_char = '-'; ++i_re; --n_re_digits; } if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') { im_sign_char = '-'; ++i_im; --n_im_digits; } /* Determine if we have any "remainder" (after the digits, might include decimal or exponent or both (or neither)) */ parse_number(re_unicode_tmp, i_re, i_re + n_re_digits, &n_re_remainder, &re_has_decimal); parse_number(im_unicode_tmp, i_im, i_im + n_im_digits, &n_im_remainder, &im_has_decimal); /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : (format->thousands_separators ? LT_DEFAULT_LOCALE : LT_NO_LOCALE), &locale) == -1) goto done; /* Turn off any padding. We'll do it later after we've composed the numbers without padding. */ tmp_format.fill_char = '\0'; tmp_format.align = '<'; tmp_format.width = -1; /* Calculate how much memory we'll need. */ n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp, i_re, i_re + n_re_digits, n_re_remainder, re_has_decimal, &locale, &tmp_format, &maxchar); /* Same formatting, but always include a sign, unless the real part is * going to be omitted, in which case we use whatever sign convention was * requested by the original format. */ if (!skip_re) tmp_format.sign = '+'; n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp, i_im, i_im + n_im_digits, n_im_remainder, im_has_decimal, &locale, &tmp_format, &maxchar); if (skip_re) n_re_total = 0; /* Add 1 for the 'j', and optionally 2 for parens. */ calc_padding(n_re_total + n_im_total + 1 + add_parens * 2, format->width, format->align, &lpad, &rpad, &total); if (lpad || rpad) maxchar = Py_MAX(maxchar, format->fill_char); if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1) goto done; rkind = writer->kind; rdata = writer->data; /* Populate the memory. First, the padding. */ result = fill_padding(writer, n_re_total + n_im_total + 1 + add_parens * 2, format->fill_char, lpad, rpad); if (result == -1) goto done; if (add_parens) { PyUnicode_WRITE(rkind, rdata, writer->pos, '('); writer->pos++; } if (!skip_re) { result = fill_number(writer, &re_spec, re_unicode_tmp, i_re, i_re + n_re_digits, NULL, 0, 0, &locale, 0); if (result == -1) goto done; } result = fill_number(writer, &im_spec, im_unicode_tmp, i_im, i_im + n_im_digits, NULL, 0, 0, &locale, 0); if (result == -1) goto done; PyUnicode_WRITE(rkind, rdata, writer->pos, 'j'); writer->pos++; if (add_parens) { PyUnicode_WRITE(rkind, rdata, writer->pos, ')'); writer->pos++; } writer->pos += rpad; done: PyMem_Free(re_buf); PyMem_Free(im_buf); Py_XDECREF(re_unicode_tmp); Py_XDECREF(im_unicode_tmp); free_locale_info(&locale); return result; }
//_______________________________________________________________________________________________________________________ /// /// \brief min-mod function, see Eq. (16.52) in Randall LeVeque. Numerical Methods for Conservation Laws (1992) /// static inline double minmod(const double a, const double b) { return 0.5 * (copysign(1, a) + copysign(1, b)) * minf(fabs(a), fabs(b)); }
float FPEnvironmentImpl::copySignImpl(float target, float source) { return (float) copysign(target, source); }
double d_sign(double *x, double *y) { return(copysign(*x,*y)); }
double epsD(double x) { x = copysign(x, 1.0); double y = nextafter(x, INFINITY); return y-x; }
float copysignf (float x, float y) { /* We use the double version. */ return copysign (x, y); }
void wgsecef2llh(const Vector3d &ecef, Vector3d &llh) { /* Distance from polar axis. */ const double p = sqrt(ecef[0]*ecef[0] + ecef[1]*ecef[1]); /* Compute longitude first, this can be done exactly. */ if (!is_zero(p)) llh[1] = atan2(ecef[1], ecef[0]); else llh[1] = 0; /* If we are close to the pole then convergence is very slow, treat this is a * special case. */ if (p < WGS84_A*1e-16) { llh[0] = copysign(M_PI_2, ecef[2]); llh[2] = fabs(ecef[2]) - WGS84_B; return; } /* Calculate some other constants as defined in the Fukushima paper. */ const double P = p / WGS84_A; const double e_c = sqrt(1. - WGS84_E*WGS84_E); const double Z = fabs(ecef[2]) * e_c / WGS84_A; /* Initial values for S and C correspond to a zero height solution. */ double S = Z; double C = e_c * P; /* Neither S nor C can be negative on the first iteration so * starting prev = -1 will not cause and early exit. */ double prev_C = -1; double prev_S = -1; double A_n, B_n, D_n, F_n; /* Iterate a maximum of 10 times. This should be way more than enough for all * sane inputs */ for (int i=0; i<10; i++) { /* Calculate some intermmediate variables used in the update step based on * the current state. */ A_n = sqrt(S*S + C*C); D_n = Z*A_n*A_n*A_n + WGS84_E*WGS84_E*S*S*S; F_n = P*A_n*A_n*A_n - WGS84_E*WGS84_E*C*C*C; B_n = 1.5*WGS84_E*S*C*C*(A_n*(P*S - Z*C) - WGS84_E*S*C); /* Update step. */ S = D_n*F_n - B_n*S; C = F_n*F_n - B_n*C; /* The original algorithm as presented in the paper by Fukushima has a * problem with numerical stability. S and C can grow very large or small * and over or underflow a double. In the paper this is acknowledged and * the proposed resolution is to non-dimensionalise the equations for S and * C. However, this does not completely solve the problem. The author caps * the solution to only a couple of iterations and in this period over or * underflow is unlikely but as we require a bit more precision and hence * more iterations so this is still a concern for us. * * As the only thing that is important is the ratio T = S/C, my solution is * to divide both S and C by either S or C. The scaling is chosen such that * one of S or C is scaled to unity whilst the other is scaled to a value * less than one. By dividing by the larger of S or C we ensure that we do * not divide by zero as only one of S or C should ever be zero. * * This incurs an extra division each iteration which the author was * explicityl trying to avoid and it may be that this solution is just * reverting back to the method of iterating on T directly, perhaps this * bears more thought? */ if (S > C) { C = C / S; S = 1; } else { S = S / C; C = 1; } /* Check for convergence and exit early if we have converged. */ if (fabs(S - prev_S) < 1e-16 && fabs(C - prev_C) < 1e-16) { break; } else { prev_S = S; prev_C = C; } } A_n = sqrt(S*S + C*C); llh[0] = copysign(1.0, ecef[2]) * atan(S / (e_c*C)); llh[2] = (p*e_c*C + fabs(ecef[2])*S - WGS84_A*e_c*A_n) / sqrt(e_c*e_c*C*C + S*S); }
float copysignf (float x, float y) { return (float) copysign( (double)x, (double)y ); }