/** * @brief Computes the total source (fission, scattering, fixed) in each FSR. * @details This method computes the total source in each FSR based on * this iteration's current approximation to the scalar flux. */ void VectorizedSolver::computeFSRSources() { #pragma omp parallel default(none) { int tid; Material* material; FP_PRECISION* sigma_t; FP_PRECISION* sigma_s; FP_PRECISION* fiss_mat; FP_PRECISION scatter_source, fission_source; int size = _num_groups * sizeof(FP_PRECISION); FP_PRECISION* fission_sources = (FP_PRECISION*)MM_MALLOC(size, VEC_ALIGNMENT); FP_PRECISION* scatter_sources = (FP_PRECISION*)MM_MALLOC(size, VEC_ALIGNMENT); /* For all FSRs, find the source */ #pragma omp for schedule(guided) for (int r=0; r < _num_FSRs; r++) { tid = omp_get_thread_num(); material = _FSR_materials[r]; sigma_t = material->getSigmaT(); sigma_s = material->getSigmaS(); fiss_mat = material->getFissionMatrix(); /* Compute scatter + fission source for group G */ for (int G=0; G < _num_groups; G++) { for (int v=0; v < _num_vector_lengths; v++) { #pragma simd vectorlength(VEC_LENGTH) for (int g=v*VEC_LENGTH; g < (v+1)*VEC_LENGTH; g++) { scatter_sources[g] = sigma_s[G*_num_groups+g] * _scalar_flux(r,g); fission_sources[g] = fiss_mat[G*_num_groups+g] * _scalar_flux(r,g); } } #ifdef SINGLE scatter_source=cblas_sasum(_num_groups, scatter_sources, 1); fission_source=cblas_sasum(_num_groups, fission_sources, 1); #else scatter_source=cblas_dasum(_num_groups, scatter_sources, 1); fission_source=cblas_dasum(_num_groups, fission_sources, 1); #endif fission_source /= _k_eff; /* Compute total (scatter+fission+fixed) reduced source */ _reduced_sources(r,G) = _fixed_sources(r,G); _reduced_sources(r,G) += scatter_source + fission_source; _reduced_sources(r,G) *= ONE_OVER_FOUR_PI / sigma_t[G]; } } MM_FREE(fission_sources); MM_FREE(scatter_sources); } }
/** * @brief Compute \f$ k_{eff} \f$ from successive fission sources. */ void VectorizedSolver::computeKeff() { FP_PRECISION fission; int size = _num_FSRs * sizeof(FP_PRECISION); FP_PRECISION* FSR_rates = (FP_PRECISION*)MM_MALLOC(size, VEC_ALIGNMENT); size = _num_threads * _num_groups * sizeof(FP_PRECISION); FP_PRECISION* group_rates = (FP_PRECISION*)MM_MALLOC(size, VEC_ALIGNMENT); #pragma omp parallel { int tid = omp_get_thread_num() * _num_groups; Material* material; FP_PRECISION* sigma; FP_PRECISION volume; /* Compute the new nu-fission rates in each FSR */ #pragma omp for schedule(guided) for (int r=0; r < _num_FSRs; r++) { volume = _FSR_volumes[r]; material = _FSR_materials[r]; sigma = material->getNuSigmaF(); /* Loop over each energy group vector length */ for (int v=0; v < _num_vector_lengths; v++) { /* Loop over energy groups within this vector */ #pragma simd vectorlength(VEC_LENGTH) for (int e=v*VEC_LENGTH; e < (v+1)*VEC_LENGTH; e++) group_rates[tid+e] = sigma[e] * _scalar_flux(r,e); } #ifdef SINGLE FSR_rates[r] = cblas_sasum(_num_groups, &group_rates[tid], 1) * volume; #else FSR_rates[r] = cblas_dasum(_num_groups, &group_rates[tid], 1) * volume; #endif } } /* Reduce new fission rates across FSRs */ #ifdef SINGLE fission = cblas_sasum(_num_FSRs, FSR_rates, 1); #else fission = cblas_dasum(_num_FSRs, FSR_rates, 1); #endif _k_eff *= fission; MM_FREE(FSR_rates); MM_FREE(group_rates); }
/** * @brief Normalizes all FSR scalar fluxes and Track boundary angular * fluxes to the total fission source (times \f$ \nu \f$). */ void VectorizedSolver::normalizeFluxes() { FP_PRECISION* nu_sigma_f; FP_PRECISION volume; FP_PRECISION tot_fission_source; FP_PRECISION norm_factor; /* Compute total fission source for each FSR, energy group */ #pragma omp parallel for private(volume, nu_sigma_f) \ reduction(+:tot_fission_source) schedule(guided) for (int r=0; r < _num_FSRs; r++) { /* Get pointers to important data structures */ nu_sigma_f = _FSR_materials[r]->getNuSigmaF(); volume = _FSR_volumes[r]; /* Loop over energy group vector lengths */ for (int v=0; v < _num_vector_lengths; v++) { /* Loop over each energy group within this vector */ #pragma simd vectorlength(VEC_LENGTH) for (int e=v*VEC_LENGTH; e < (v+1)*VEC_LENGTH; e++) { _fission_sources(r,e) = nu_sigma_f[e] * _scalar_flux(r,e); _fission_sources(r,e) *= volume; } } } /* Compute the total fission source */ int size = _num_FSRs * _num_groups; #ifdef SINGLE tot_fission_source = cblas_sasum(size, _fission_sources, 1); #else tot_fission_source = cblas_dasum(size, _fission_sources, 1); #endif /* Compute the normalization factor */ norm_factor = 1.0 / tot_fission_source; log_printf(DEBUG, "Tot. Fiss. Src. = %f, Normalization factor = %f", tot_fission_source, norm_factor); /* Normalize the FSR scalar fluxes */ #ifdef SINGLE cblas_sscal(size, norm_factor, _scalar_flux, 1); #else cblas_dscal(size, norm_factor, _scalar_flux, 1); #endif /* Normalize the Track angular boundary fluxes */ size = 2 * _tot_num_tracks * _num_polar * _num_groups; #ifdef SINGLE cblas_sscal(size, norm_factor, _boundary_flux, 1); #else cblas_dscal(size, norm_factor, _boundary_flux, 1); #endif return; }
/* * 最大最大激励化 第 unitdx 个单元 * */ double LayerWiseRBMs::maximizeUnit(int layerIdx, int unitIdx, double * unitSample, double argvNorm, int epoch){ int AMnumIn = layers[0]->numVis; // unitsample 归一化 double curNorm = squareNorm(unitSample, AMnumIn, 1); cblas_dscal(AMnumIn, argvNorm / curNorm, unitSample, 1); double maxValue =0; for(int k=0; k<epoch; k++){ // forward for(int i=0; i<=layerIdx; i++){ if(i==0) layers[i]->setInput(unitSample); else layers[i]->setInput(layers[i-1]->getOutput()); layers[i]->setBatchSize(1); layers[i]->runBatch(); } maxValue = layers[layerIdx]->getOutput()[unitIdx]; //back propagate for(int i=layerIdx; i>=0; i--){ if(i==layerIdx) layers[i]->getAMDelta(unitIdx, NULL) ; else layers[i]->getAMDelta(-1, layers[i+1]->AMDelta); } double lr = 0.01 * cblas_dasum(AMnumIn, unitSample, 1) / cblas_dasum(AMnumIn, layers[0]->AMDelta, 1); // update unitSample cblas_daxpy(AMnumIn, lr, layers[0]->AMDelta, 1, unitSample, 1); //归一化 unitSample curNorm = squareNorm(unitSample, AMnumIn, 1); cblas_dscal(AMnumIn, argvNorm / curNorm, unitSample, 1); } return maxValue; }
double dot(int N, double *vec1, double *vec2){ // thread variables int nthds, tid; // compute variables int m, stride, start, stop; double dot; /* Fork a team of threads giving them their own copies of variables */ #pragma omp parallel private(nthds, tid) shared(m) { // compute thread variables nthds = omp_get_num_threads(); tid = omp_get_thread_num(); if(tid == 0){ m = nthds; } } //printf("m = %d\n",m); double pnrms[m]; /* Fork a team of threads giving them their own copies of variables */ #pragma omp parallel private(nthds, tid, stride, start, stop) shared(N, vec1, vec2, pnrms) { // compute thread variables nthds = omp_get_num_threads(); tid = omp_get_thread_num(); // compute stride stride = ceil((long double)N/nthds); // compute start and stop start = tid*stride; stop = (int)fminl((long double)(tid+1)*stride,(long double)N); pnrms[tid] = cblas_ddot(stop-start,&vec1[start],1,&vec2[start],1); //printf("pnrms[%d] = %+e\n",tid,pnrms[tid]); } dot = cblas_dasum(m,&pnrms[0],1); //printf("nrm = %+e\n",nrm); return dot; }
void Vector::processEnergy(const double* inputs, double* outputs) { double energySum = 0., energyAbscissa = 0., energyOrdinate = 0.; cblas_dcopy(m_number_of_channels, inputs, 1, m_channels_double, 1); for(int i = 0; i < m_number_of_channels; i++) m_channels_double[i] *= m_channels_double[i]; energySum = cblas_dasum(m_number_of_channels, m_channels_double, 1); energyAbscissa = cblas_ddot(m_number_of_channels, m_channels_double, 1, m_channels_abscissa_double, 1); energyOrdinate = cblas_ddot(m_number_of_channels, m_channels_double, 1, m_channels_ordinate_double, 1); if(energySum) { outputs[0] = energyAbscissa / energySum; outputs[1] = energyOrdinate / energySum; } else { outputs[0] = 0.; outputs[1] = 0.; } }
double My_dasum(const gsl_vector *x) { return cblas_dasum(x->size, x->data, x->stride); }
void test_asum (void) { const double flteps = 1e-4, dbleps = 1e-6; { int N = 1; float X[] = { 0.239f }; int incX = -1; float expected = 0.0f; float f; f = cblas_sasum(N, X, incX); gsl_test_rel(f, expected, flteps, "sasum(case 40)"); }; { int N = 1; double X[] = { -0.413 }; int incX = -1; double expected = 0; double f; f = cblas_dasum(N, X, incX); gsl_test_rel(f, expected, dbleps, "dasum(case 41)"); }; { int N = 1; float X[] = { 0.1f, 0.017f }; int incX = -1; float expected = 0.0f; float f; f = cblas_scasum(N, X, incX); gsl_test_rel(f, expected, flteps, "scasum(case 42)"); }; { int N = 1; double X[] = { -0.651, 0.079 }; int incX = -1; double expected = 0; double f; f = cblas_dzasum(N, X, incX); gsl_test_rel(f, expected, dbleps, "dzasum(case 43)"); }; { int N = 2; float X[] = { 0.899f, -0.72f }; int incX = 1; float expected = 1.619f; float f; f = cblas_sasum(N, X, incX); gsl_test_rel(f, expected, flteps, "sasum(case 44)"); }; { int N = 2; double X[] = { 0.271, -0.012 }; int incX = 1; double expected = 0.283; double f; f = cblas_dasum(N, X, incX); gsl_test_rel(f, expected, dbleps, "dasum(case 45)"); }; { int N = 2; float X[] = { -0.567f, -0.645f, 0.098f, 0.256f }; int incX = 1; float expected = 1.566f; float f; f = cblas_scasum(N, X, incX); gsl_test_rel(f, expected, flteps, "scasum(case 46)"); }; { int N = 2; double X[] = { -0.046, -0.671, -0.323, 0.785 }; int incX = 1; double expected = 1.825; double f; f = cblas_dzasum(N, X, incX); gsl_test_rel(f, expected, dbleps, "dzasum(case 47)"); }; { int N = 2; float X[] = { 0.169f, 0.833f }; int incX = -1; float expected = 0.0f; float f; f = cblas_sasum(N, X, incX); gsl_test_rel(f, expected, flteps, "sasum(case 48)"); }; { int N = 2; double X[] = { -0.586, -0.486 }; int incX = -1; double expected = 0; double f; f = cblas_dasum(N, X, incX); gsl_test_rel(f, expected, dbleps, "dasum(case 49)"); }; { int N = 2; float X[] = { -0.314f, -0.318f, -0.835f, -0.807f }; int incX = -1; float expected = 0.0f; float f; f = cblas_scasum(N, X, incX); gsl_test_rel(f, expected, flteps, "scasum(case 50)"); }; { int N = 2; double X[] = { -0.927, 0.152, -0.554, -0.844 }; int incX = -1; double expected = 0; double f; f = cblas_dzasum(N, X, incX); gsl_test_rel(f, expected, dbleps, "dzasum(case 51)"); }; }
double F77_dasum(const int *N, double *X, const int *incX) { return cblas_dasum(*N, X, *incX); }
// ---------------------------------------- int main( int argc, char** argv ) { TESTING_INIT(); //real_Double_t t_m, t_c, t_f; magma_int_t ione = 1; double *A, *B; double diff, error; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t m, n, k, size, maxn, ld; double x2_m, x2_c; // real x for magma, cblas/fortran blas respectively double x_m, x_c; // x for magma, cblas/fortran blas respectively magma_opts opts; parse_opts( argc, argv, &opts ); opts.tolerance = max( 100., opts.tolerance ); double tol = opts.tolerance * lapackf77_dlamch("E"); gTol = tol; printf( "!! Calling these CBLAS and Fortran BLAS sometimes crashes (segfault), which !!\n" "!! is why we use wrappers. It does not necesarily indicate a bug in MAGMA. !!\n" "\n" "Diff compares MAGMA wrapper to CBLAS and BLAS function; should be exactly 0.\n" "Error compares MAGMA implementation to CBLAS and BLAS function; should be ~ machine epsilon.\n" "\n" ); double total_diff = 0.; double total_error = 0.; int inc[] = { 1 }; //{ -2, -1, 1, 2 }; //{ 1 }; //{ -1, 1 }; int ninc = sizeof(inc)/sizeof(*inc); for( int itest = 0; itest < opts.ntest; ++itest ) { m = opts.msize[itest]; n = opts.nsize[itest]; k = opts.ksize[itest]; for( int iincx = 0; iincx < ninc; ++iincx ) { magma_int_t incx = inc[iincx]; for( int iincy = 0; iincy < ninc; ++iincy ) { magma_int_t incy = inc[iincy]; printf("=========================================================================\n"); printf( "m=%d, n=%d, k=%d, incx = %d, incy = %d\n", (int) m, (int) n, (int) k, (int) incx, (int) incy ); printf( "Function MAGMA CBLAS BLAS Diff Error\n" " msec msec msec\n" ); // allocate matrices // over-allocate so they can be any combination of // {m,n,k} * {abs(incx), abs(incy)} by // {m,n,k} * {abs(incx), abs(incy)} maxn = max( max( m, n ), k ) * max( abs(incx), abs(incy) ); ld = max( 1, maxn ); size = ld*maxn; magma_dmalloc_pinned( &A, size ); assert( A != NULL ); magma_dmalloc_pinned( &B, size ); assert( B != NULL ); // initialize matrices lapackf77_dlarnv( &ione, ISEED, &size, A ); lapackf77_dlarnv( &ione, ISEED, &size, B ); printf( "Level 1 BLAS ----------------------------------------------------------\n" ); // ----- test DASUM // get one-norm of column j of A if ( incx > 0 && incx == incy ) { // positive, no incy diff = 0; error = 0; for( int j = 0; j < k; ++j ) { x_m = magma_cblas_dasum( m, A(0,j), incx ); x_c = cblas_dasum( m, A(0,j), incx ); diff += fabs( x_m - x_c ); x_c = blasf77_dasum( &m, A(0,j), &incx ); error += fabs( (x_m - x_c) / (m*x_c) ); } output( "dasum", diff, error ); total_diff += diff; total_error += error; } // ----- test DNRM2 // get two-norm of column j of A if ( incx > 0 && incx == incy ) { // positive, no incy diff = 0; error = 0; for( int j = 0; j < k; ++j ) { x_m = magma_cblas_dnrm2( m, A(0,j), incx ); x_c = cblas_dnrm2( m, A(0,j), incx ); diff += fabs( x_m - x_c ); x_c = blasf77_dnrm2( &m, A(0,j), &incx ); error += fabs( (x_m - x_c) / (m*x_c) ); } output( "dnrm2", diff, error ); total_diff += diff; total_error += error; } // ----- test DDOT // dot columns, Aj^H Bj diff = 0; error = 0; for( int j = 0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_ddot( m, A(0,j), incx, B(0,j), incy ); // crashes on MKL 11.1.2, ILP64 #if ! defined( MAGMA_WITH_MKL ) #ifdef COMPLEX cblas_ddot_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_ddot( m, A(0,j), incx, B(0,j), incy ); #endif error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif // crashes on MacOS 10.9 #if ! defined( __APPLE__ ) x2_c = blasf77_ddot( &m, A(0,j), &incx, B(0,j), &incy ); error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif } output( "ddot", diff, error ); total_diff += diff; total_error += error; total_error += error; // ----- test DDOT // dot columns, Aj^T * Bj diff = 0; error = 0; for( int j = 0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_ddot( m, A(0,j), incx, B(0,j), incy ); // crashes on MKL 11.1.2, ILP64 #if ! defined( MAGMA_WITH_MKL ) #ifdef COMPLEX cblas_ddot_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_ddot( m, A(0,j), incx, B(0,j), incy ); #endif error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif // crashes on MacOS 10.9 #if ! defined( __APPLE__ ) x2_c = blasf77_ddot( &m, A(0,j), &incx, B(0,j), &incy ); error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif } output( "ddot", diff, error ); total_diff += diff; total_error += error; // tell user about disabled functions #if defined( MAGMA_WITH_MKL ) printf( "cblas_ddot and cblas_ddot disabled with MKL (segfaults)\n" ); #endif #if defined( __APPLE__ ) printf( "blasf77_ddot and blasf77_ddot disabled on MacOS (segfaults)\n" ); #endif // cleanup magma_free_pinned( A ); magma_free_pinned( B ); fflush( stdout ); }}} // itest, incx, incy // TODO use average error? printf( "sum diffs = %8.2g, MAGMA wrapper compared to CBLAS and Fortran BLAS; should be exactly 0.\n" "sum errors = %8.2e, MAGMA implementation compared to CBLAS and Fortran BLAS; should be ~ machine epsilon.\n\n", total_diff, total_error ); if ( total_diff != 0. ) { printf( "some tests failed diff == 0.; see above.\n" ); } else { printf( "all tests passed diff == 0.\n" ); } TESTING_FINALIZE(); int status = (total_diff != 0.); return status; }
double dense_vector_asum(DenseVector *vector) { return cblas_dasum((int)vector->size,vector->data,(int)vector->stride); }
double vector_t::norm1() const { stack::fe_asserter dummy{}; return cblas_dasum(len, data.get(), inc); }
double aa_la_trace( size_t n, const double *A ) { return cblas_dasum( (int)n, A, (int)(n+1) ); }
/** * @brief Compute \f$ k_{eff} \f$ from the total fission and absorption rates. * @details This method computes the current approximation to the * multiplication factor on this iteration as follows: * \f$ k_{eff} = \frac{\displaystyle\sum \displaystyle\sum \nu * \Sigma_f \Phi V}{\displaystyle\sum * \displaystyle\sum \Sigma_a \Phi V} \f$ * */ void VectorizedSolver::computeKeff() { int tid; Material* material; FP_PRECISION* sigma_a; FP_PRECISION* nu_sigma_f; FP_PRECISION volume; FP_PRECISION tot_abs = 0.0; FP_PRECISION tot_fission = 0.0; int size = _num_FSRs * sizeof(FP_PRECISION); FP_PRECISION* FSR_rates = (FP_PRECISION*)MM_MALLOC(size, VEC_ALIGNMENT); size = _num_threads * _num_groups * sizeof(FP_PRECISION); FP_PRECISION* group_rates = (FP_PRECISION*)MM_MALLOC(size, VEC_ALIGNMENT); /* Loop over all FSRs and compute the volume-weighted absorption rates */ #pragma omp parallel for private(tid, volume, \ material, sigma_a) schedule(guided) for (int r=0; r < _num_FSRs; r++) { tid = omp_get_thread_num() * _num_groups; volume = _FSR_volumes[r]; material = _FSR_materials[r]; sigma_a = material->getSigmaA(); /* Loop over each energy group vector length */ for (int v=0; v < _num_vector_lengths; v++) { /* Loop over energy groups within this vector */ #pragma simd vectorlength(VEC_LENGTH) for (int e=v*VEC_LENGTH; e < (v+1)*VEC_LENGTH; e++) group_rates[tid+e] = sigma_a[e] * _scalar_flux(r,e); } #ifdef SINGLE FSR_rates[r] = cblas_sasum(_num_groups, &group_rates[tid], 1) * volume; #else FSR_rates[r] = cblas_dasum(_num_groups, &group_rates[tid], 1) * volume; #endif } /* Reduce absorption and fission rates across FSRs, energy groups */ #ifdef SINGLE tot_abs = cblas_sasum(_num_FSRs, FSR_rates, 1); #else tot_abs = cblas_dasum(_num_FSRs, FSR_rates, 1); #endif /* Loop over all FSRs and compute the volume-weighted fission rates */ #pragma omp parallel for private(tid, volume, \ material, nu_sigma_f) schedule(guided) for (int r=0; r < _num_FSRs; r++) { tid = omp_get_thread_num() * _num_groups; volume = _FSR_volumes[r]; material = _FSR_materials[r]; nu_sigma_f = material->getNuSigmaF(); /* Loop over each energy group vector length */ for (int v=0; v < _num_vector_lengths; v++) { /* Loop over energy groups within this vector */ #pragma simd vectorlength(VEC_LENGTH) for (int e=v*VEC_LENGTH; e < (v+1)*VEC_LENGTH; e++) group_rates[tid+e] = nu_sigma_f[e] * _scalar_flux(r,e); } #ifdef SINGLE FSR_rates[r] = cblas_sasum(_num_groups, &group_rates[tid], 1) * volume; #else FSR_rates[r] = cblas_dasum(_num_groups, &group_rates[tid], 1) * volume; #endif } /* Reduce fission rates across FSRs */ #ifdef SINGLE tot_fission = cblas_sasum(_num_FSRs, FSR_rates, 1); #else tot_fission = cblas_dasum(_num_FSRs, FSR_rates, 1); #endif /** Reduce leakage array across tracks, energy groups, polar angles */ size = 2 * _tot_num_tracks * _polar_times_groups; #ifdef SINGLE _leakage = cblas_sasum(size, _boundary_leakage, 1) * 0.5; #else _leakage = cblas_dasum(size, _boundary_leakage, 1) * 0.5; #endif _k_eff = tot_fission / (tot_abs + _leakage); log_printf(DEBUG, "abs = %f, fission = %f, leakage = %f, k_eff = %f", tot_abs, tot_fission, _leakage, _k_eff); MM_FREE(FSR_rates); MM_FREE(group_rates); return; }
/** * @brief Computes the total source (fission and scattering) in each FSR. * @details This method computes the total source in each FSR based on * this iteration's current approximation to the scalar flux. A * residual for the source with respect to the source compute on * the previous iteration is computed and returned. The residual * is determined as follows: * /f$ res = \sqrt{\frac{\displaystyle\sum \displaystyle\sum * \left(\frac{Q^i - Q^{i-1}{Q^i}\right)^2}{# FSRs}}} \f$ * * @return the residual between this source and the previous source */ FP_PRECISION VectorizedSolver::computeFSRSources() { int tid; FP_PRECISION scatter_source; FP_PRECISION fission_source; FP_PRECISION* nu_sigma_f; FP_PRECISION* sigma_s; FP_PRECISION* sigma_t; FP_PRECISION* chi; Material* material; FP_PRECISION source_residual = 0.0; FP_PRECISION inverse_k_eff = 1.0 / _k_eff; /* For all FSRs, find the source */ #pragma omp parallel for private(material, nu_sigma_f, chi, \ sigma_s, sigma_t, fission_source, scatter_source) schedule(guided) for (int r=0; r < _num_FSRs; r++) { tid = omp_get_thread_num(); material = _FSR_materials[r]; nu_sigma_f = material->getNuSigmaF(); chi = material->getChi(); sigma_s = material->getSigmaS(); sigma_t = material->getSigmaT(); /* Initialize the source residual to zero */ _source_residuals[r] = 0.; /* Compute fission source for each group */ if (material->isFissionable()) { for (int v=0; v < _num_vector_lengths; v++) { /* Compute fission source for each group */ #pragma simd vectorlength(VEC_LENGTH) for (int e=v*VEC_LENGTH; e < (v+1)*VEC_LENGTH; e++) _fission_sources(r,e) = _scalar_flux(r,e) * nu_sigma_f[e]; } #ifdef SINGLE fission_source = cblas_sasum(_num_groups, &_fission_sources(r,0),1); #else fission_source = cblas_dasum(_num_groups, &_fission_sources(r,0),1); #endif fission_source *= inverse_k_eff; } else fission_source = 0.0; /* Compute total scattering source for group G */ for (int G=0; G < _num_groups; G++) { scatter_source = 0; for (int v=0; v < _num_vector_lengths; v++) { #pragma simd vectorlength(VEC_LENGTH) for (int g=v*VEC_LENGTH; g < (v+1)*VEC_LENGTH; g++) _scatter_sources(tid,g) = sigma_s[G*_num_groups+g] * _scalar_flux(r,g); } #ifdef SINGLE scatter_source=cblas_sasum(_num_groups,&_scatter_sources(tid,0),1); #else scatter_source=cblas_dasum(_num_groups,&_scatter_sources(tid,0),1); #endif /* Set the total source for FSR r in group G */ _source(r,G) = (fission_source * chi[G] + scatter_source) * ONE_OVER_FOUR_PI; _reduced_source(r,G) = _source(r,G) / sigma_t[G]; /* Compute the norm of residual of the source in the FSR */ if (fabs(_source(r,G)) > 1E-10) _source_residuals[r] += pow((_source(r,G) - _old_source(r,G)) / _source(r,G), 2); /* Update the old source */ _old_source(r,G) = _source(r,G); } } /* Sum up the residuals from each group and in each FSR */ #ifdef SINGLE source_residual = cblas_sasum(_num_FSRs,_source_residuals,1); #else source_residual = cblas_dasum(_num_FSRs,_source_residuals,1); #endif source_residual = sqrt(source_residual / (_num_groups * _num_FSRs)); return source_residual; }
JNIEXPORT jdouble JNICALL Java_uncomplicate_neanderthal_CBLAS_dasum (JNIEnv *env, jclass clazz, jint N, jobject X, jint offsetX, jint incX) { double *cX = (double *) (*env)->GetDirectBufferAddress(env, X); return cblas_dasum(N, cX + offsetX, incX); };
double caffe_cpu_asum<double>(const int n, const double* x) { return cblas_dasum(n, x, 1); }
inline double STARPU_DASUM(int N, double *X, int incX) { return cblas_dasum(N, X, incX); }