/**
 * @brief Computes the total source (fission, scattering, fixed) in each FSR.
 * @details This method computes the total source in each FSR based on
 *          this iteration's current approximation to the scalar flux.
 */
void VectorizedSolver::computeFSRSources() {

#pragma omp parallel default(none)
  {
    int tid;
    Material* material;
    FP_PRECISION* sigma_t;
    FP_PRECISION* sigma_s;
    FP_PRECISION* fiss_mat;
    FP_PRECISION scatter_source, fission_source;

    int size = _num_groups * sizeof(FP_PRECISION);
    FP_PRECISION* fission_sources =
      (FP_PRECISION*)MM_MALLOC(size, VEC_ALIGNMENT);
    FP_PRECISION* scatter_sources =
      (FP_PRECISION*)MM_MALLOC(size, VEC_ALIGNMENT);

    /* For all FSRs, find the source */
#pragma omp for schedule(guided)
    for (int r=0; r < _num_FSRs; r++) {

      tid = omp_get_thread_num();
      material = _FSR_materials[r];
      sigma_t = material->getSigmaT();
      sigma_s = material->getSigmaS();
      fiss_mat = material->getFissionMatrix();

      /* Compute scatter + fission source for group G */
      for (int G=0; G < _num_groups; G++) {
        for (int v=0; v < _num_vector_lengths; v++) {

#pragma simd vectorlength(VEC_LENGTH)
          for (int g=v*VEC_LENGTH; g < (v+1)*VEC_LENGTH; g++) {
            scatter_sources[g] = sigma_s[G*_num_groups+g] * _scalar_flux(r,g);
            fission_sources[g] = fiss_mat[G*_num_groups+g] * _scalar_flux(r,g);
          }
        }

#ifdef SINGLE
        scatter_source=cblas_sasum(_num_groups, scatter_sources, 1);
        fission_source=cblas_sasum(_num_groups, fission_sources, 1);
#else
        scatter_source=cblas_dasum(_num_groups, scatter_sources, 1);
        fission_source=cblas_dasum(_num_groups, fission_sources, 1);
#endif

        fission_source /= _k_eff;

        /* Compute total (scatter+fission+fixed) reduced source */
        _reduced_sources(r,G) = _fixed_sources(r,G);
        _reduced_sources(r,G) += scatter_source + fission_source;
        _reduced_sources(r,G) *= ONE_OVER_FOUR_PI / sigma_t[G];
      }
    }

    MM_FREE(fission_sources);
    MM_FREE(scatter_sources);
  }
}
/**
 * @brief Compute \f$ k_{eff} \f$ from successive fission sources.
 */
void VectorizedSolver::computeKeff() {

  FP_PRECISION fission;

  int size = _num_FSRs * sizeof(FP_PRECISION);
  FP_PRECISION* FSR_rates = (FP_PRECISION*)MM_MALLOC(size, VEC_ALIGNMENT);

  size = _num_threads * _num_groups * sizeof(FP_PRECISION);
  FP_PRECISION* group_rates = (FP_PRECISION*)MM_MALLOC(size, VEC_ALIGNMENT);

#pragma omp parallel
  {

    int tid = omp_get_thread_num() * _num_groups;
    Material* material;
    FP_PRECISION* sigma;
    FP_PRECISION volume;

    /* Compute the new nu-fission rates in each FSR */
#pragma omp for schedule(guided)
    for (int r=0; r < _num_FSRs; r++) {

      volume = _FSR_volumes[r];
      material = _FSR_materials[r];
      sigma = material->getNuSigmaF();

      /* Loop over each energy group vector length */
      for (int v=0; v < _num_vector_lengths; v++) {

        /* Loop over energy groups within this vector */
#pragma simd vectorlength(VEC_LENGTH)
        for (int e=v*VEC_LENGTH; e < (v+1)*VEC_LENGTH; e++)
          group_rates[tid+e] = sigma[e] * _scalar_flux(r,e);
      }

#ifdef SINGLE
      FSR_rates[r] = cblas_sasum(_num_groups, &group_rates[tid], 1) * volume;
#else
      FSR_rates[r] = cblas_dasum(_num_groups, &group_rates[tid], 1) * volume;
#endif
    }
  }

  /* Reduce new fission rates across FSRs */
#ifdef SINGLE
  fission = cblas_sasum(_num_FSRs, FSR_rates, 1);
#else
  fission = cblas_dasum(_num_FSRs, FSR_rates, 1);
#endif

  _k_eff *= fission;

  MM_FREE(FSR_rates);
  MM_FREE(group_rates);
}
/**
 * @brief Normalizes all FSR scalar fluxes and Track boundary angular
 *        fluxes to the total fission source (times \f$ \nu \f$).
 */
void VectorizedSolver::normalizeFluxes() {

  FP_PRECISION* nu_sigma_f;
  FP_PRECISION volume;
  FP_PRECISION tot_fission_source;
  FP_PRECISION norm_factor;

  /* Compute total fission source for each FSR, energy group */
  #pragma omp parallel for private(volume, nu_sigma_f)  \
    reduction(+:tot_fission_source) schedule(guided)
  for (int r=0; r < _num_FSRs; r++) {

    /* Get pointers to important data structures */
    nu_sigma_f = _FSR_materials[r]->getNuSigmaF();
    volume = _FSR_volumes[r];

    /* Loop over energy group vector lengths */
    for (int v=0; v < _num_vector_lengths; v++) {

      /* Loop over each energy group within this vector */
      #pragma simd vectorlength(VEC_LENGTH)
      for (int e=v*VEC_LENGTH; e < (v+1)*VEC_LENGTH; e++) {
        _fission_sources(r,e) = nu_sigma_f[e] * _scalar_flux(r,e);
        _fission_sources(r,e) *= volume;
      }
    }
  }

  /* Compute the total fission source */
  int size = _num_FSRs * _num_groups;
  #ifdef SINGLE
  tot_fission_source = cblas_sasum(size, _fission_sources, 1);
  #else
  tot_fission_source = cblas_dasum(size, _fission_sources, 1);
  #endif

  /* Compute the normalization factor */
  norm_factor = 1.0 / tot_fission_source;

  log_printf(DEBUG, "Tot. Fiss. Src. = %f, Normalization factor = %f",
             tot_fission_source, norm_factor);

  /* Normalize the FSR scalar fluxes */
  #ifdef SINGLE
  cblas_sscal(size, norm_factor, _scalar_flux, 1);
  #else
  cblas_dscal(size, norm_factor, _scalar_flux, 1);
  #endif

  /* Normalize the Track angular boundary fluxes */
  size = 2 * _tot_num_tracks * _num_polar * _num_groups;

  #ifdef SINGLE
  cblas_sscal(size, norm_factor, _boundary_flux, 1);
  #else
  cblas_dscal(size, norm_factor, _boundary_flux, 1);
  #endif

  return;
}
/*
 * 最大最大激励化 第 unitdx 个单元
 *
 */
double LayerWiseRBMs::maximizeUnit(int layerIdx, int unitIdx, double * unitSample, double argvNorm, int epoch){

    int AMnumIn = layers[0]->numVis;                                            

    // unitsample 归一化
    double curNorm = squareNorm(unitSample, AMnumIn, 1);
    cblas_dscal(AMnumIn, argvNorm / curNorm, unitSample, 1);
	
	double maxValue =0;

	for(int k=0; k<epoch; k++){
	// forward
		for(int i=0; i<=layerIdx; i++){
			if(i==0)
				layers[i]->setInput(unitSample);
			else
				layers[i]->setInput(layers[i-1]->getOutput());
			layers[i]->setBatchSize(1);
			layers[i]->runBatch();
		}
		maxValue = layers[layerIdx]->getOutput()[unitIdx];
	//back propagate
		for(int i=layerIdx; i>=0; i--){
			if(i==layerIdx)
				layers[i]->getAMDelta(unitIdx, NULL)	;
			else
				layers[i]->getAMDelta(-1, layers[i+1]->AMDelta);
		}
        double lr = 0.01 * cblas_dasum(AMnumIn, unitSample, 1) /                
                    cblas_dasum(AMnumIn, layers[0]->AMDelta, 1);
		
	// update unitSample
		cblas_daxpy(AMnumIn, lr, layers[0]->AMDelta, 1, unitSample, 1);
	//归一化 unitSample
		curNorm = squareNorm(unitSample, AMnumIn, 1);
	    cblas_dscal(AMnumIn, argvNorm / curNorm, unitSample, 1);
	
	}
	return maxValue;
}
Exemple #5
0
double dot(int N, double *vec1, double *vec2){

	// thread variables
	int nthds, tid;

	// compute variables
	int m, stride, start, stop;
	double dot;

	/* Fork a team of threads giving them their own copies of variables */
	#pragma omp parallel private(nthds, tid) shared(m)
	{
		// compute thread variables
		nthds = omp_get_num_threads();
		tid = omp_get_thread_num();
		
		if(tid == 0){
			m = nthds;
		}

	}

	//printf("m = %d\n",m);

	double pnrms[m]; 

	/* Fork a team of threads giving them their own copies of variables */
	#pragma omp parallel private(nthds, tid, stride, start, stop) shared(N, vec1, vec2, pnrms)
	{
		// compute thread variables
		nthds = omp_get_num_threads();
		tid = omp_get_thread_num();

		// compute stride
		stride = ceil((long double)N/nthds);

		// compute start and stop
		start = tid*stride;
		stop = (int)fminl((long double)(tid+1)*stride,(long double)N);

		pnrms[tid] = cblas_ddot(stop-start,&vec1[start],1,&vec2[start],1);
		//printf("pnrms[%d] = %+e\n",tid,pnrms[tid]);
	} 

	dot = cblas_dasum(m,&pnrms[0],1);
	//printf("nrm = %+e\n",nrm);

	return dot;
}
Exemple #6
0
 void Vector::processEnergy(const double* inputs, double* outputs)
 {
     double energySum = 0., energyAbscissa = 0., energyOrdinate = 0.;
     
     cblas_dcopy(m_number_of_channels, inputs, 1, m_channels_double, 1);
     for(int i = 0; i < m_number_of_channels; i++)
         m_channels_double[i] *= m_channels_double[i];
     
     energySum = cblas_dasum(m_number_of_channels, m_channels_double, 1);
     energyAbscissa = cblas_ddot(m_number_of_channels, m_channels_double, 1, m_channels_abscissa_double, 1);
     energyOrdinate = cblas_ddot(m_number_of_channels, m_channels_double, 1, m_channels_ordinate_double, 1);
     
     if(energySum)
     {
         outputs[0] = energyAbscissa / energySum;
         outputs[1] = energyOrdinate / energySum;
     }
     else
     {
         outputs[0] = 0.;
         outputs[1] = 0.;
     }
 }
double My_dasum(const gsl_vector *x)
{
	return cblas_dasum(x->size, x->data, x->stride);
}
void
test_asum (void) {
const double flteps = 1e-4, dbleps = 1e-6;
  {
   int N = 1;
   float X[] = { 0.239f };
   int incX = -1;
   float expected = 0.0f;
   float f;
   f = cblas_sasum(N, X, incX);
   gsl_test_rel(f, expected, flteps, "sasum(case 40)");
  };


  {
   int N = 1;
   double X[] = { -0.413 };
   int incX = -1;
   double expected = 0;
   double f;
   f = cblas_dasum(N, X, incX);
   gsl_test_rel(f, expected, dbleps, "dasum(case 41)");
  };


  {
   int N = 1;
   float X[] = { 0.1f, 0.017f };
   int incX = -1;
   float expected = 0.0f;
   float f;
   f = cblas_scasum(N, X, incX);
   gsl_test_rel(f, expected, flteps, "scasum(case 42)");
  };


  {
   int N = 1;
   double X[] = { -0.651, 0.079 };
   int incX = -1;
   double expected = 0;
   double f;
   f = cblas_dzasum(N, X, incX);
   gsl_test_rel(f, expected, dbleps, "dzasum(case 43)");
  };


  {
   int N = 2;
   float X[] = { 0.899f, -0.72f };
   int incX = 1;
   float expected = 1.619f;
   float f;
   f = cblas_sasum(N, X, incX);
   gsl_test_rel(f, expected, flteps, "sasum(case 44)");
  };


  {
   int N = 2;
   double X[] = { 0.271, -0.012 };
   int incX = 1;
   double expected = 0.283;
   double f;
   f = cblas_dasum(N, X, incX);
   gsl_test_rel(f, expected, dbleps, "dasum(case 45)");
  };


  {
   int N = 2;
   float X[] = { -0.567f, -0.645f, 0.098f, 0.256f };
   int incX = 1;
   float expected = 1.566f;
   float f;
   f = cblas_scasum(N, X, incX);
   gsl_test_rel(f, expected, flteps, "scasum(case 46)");
  };


  {
   int N = 2;
   double X[] = { -0.046, -0.671, -0.323, 0.785 };
   int incX = 1;
   double expected = 1.825;
   double f;
   f = cblas_dzasum(N, X, incX);
   gsl_test_rel(f, expected, dbleps, "dzasum(case 47)");
  };


  {
   int N = 2;
   float X[] = { 0.169f, 0.833f };
   int incX = -1;
   float expected = 0.0f;
   float f;
   f = cblas_sasum(N, X, incX);
   gsl_test_rel(f, expected, flteps, "sasum(case 48)");
  };


  {
   int N = 2;
   double X[] = { -0.586, -0.486 };
   int incX = -1;
   double expected = 0;
   double f;
   f = cblas_dasum(N, X, incX);
   gsl_test_rel(f, expected, dbleps, "dasum(case 49)");
  };


  {
   int N = 2;
   float X[] = { -0.314f, -0.318f, -0.835f, -0.807f };
   int incX = -1;
   float expected = 0.0f;
   float f;
   f = cblas_scasum(N, X, incX);
   gsl_test_rel(f, expected, flteps, "scasum(case 50)");
  };


  {
   int N = 2;
   double X[] = { -0.927, 0.152, -0.554, -0.844 };
   int incX = -1;
   double expected = 0;
   double f;
   f = cblas_dzasum(N, X, incX);
   gsl_test_rel(f, expected, dbleps, "dzasum(case 51)");
  };


}
Exemple #9
0
double F77_dasum(const int *N, double *X, const int *incX)
{
   return cblas_dasum(*N, X, *incX);
}
Exemple #10
0
// ----------------------------------------
int main( int argc, char** argv )
{
    TESTING_INIT();
    
    //real_Double_t   t_m, t_c, t_f;
    magma_int_t ione = 1;
    
    double  *A, *B;
    double diff, error;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t m, n, k, size, maxn, ld;
    double x2_m, x2_c;  // real x for magma, cblas/fortran blas respectively
    double x_m, x_c;  // x for magma, cblas/fortran blas respectively
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    opts.tolerance = max( 100., opts.tolerance );
    double tol = opts.tolerance * lapackf77_dlamch("E");
    gTol = tol;
    
    printf( "!! Calling these CBLAS and Fortran BLAS sometimes crashes (segfault), which !!\n"
            "!! is why we use wrappers. It does not necesarily indicate a bug in MAGMA.  !!\n"
            "\n"
            "Diff  compares MAGMA wrapper        to CBLAS and BLAS function; should be exactly 0.\n"
            "Error compares MAGMA implementation to CBLAS and BLAS function; should be ~ machine epsilon.\n"
            "\n" );
    
    double total_diff  = 0.;
    double total_error = 0.;
    int inc[] = { 1 };  //{ -2, -1, 1, 2 };  //{ 1 };  //{ -1, 1 };
    int ninc = sizeof(inc)/sizeof(*inc);
    
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        m = opts.msize[itest];
        n = opts.nsize[itest];
        k = opts.ksize[itest];
        
    for( int iincx = 0; iincx < ninc; ++iincx ) {
        magma_int_t incx = inc[iincx];
        
    for( int iincy = 0; iincy < ninc; ++iincy ) {
        magma_int_t incy = inc[iincy];
        
        printf("=========================================================================\n");
        printf( "m=%d, n=%d, k=%d, incx = %d, incy = %d\n",
                (int) m, (int) n, (int) k, (int) incx, (int) incy );
        printf( "Function              MAGMA     CBLAS     BLAS        Diff      Error\n"
                "                      msec      msec      msec\n" );
        
        // allocate matrices
        // over-allocate so they can be any combination of
        // {m,n,k} * {abs(incx), abs(incy)} by
        // {m,n,k} * {abs(incx), abs(incy)}
        maxn = max( max( m, n ), k ) * max( abs(incx), abs(incy) );
        ld = max( 1, maxn );
        size = ld*maxn;
        magma_dmalloc_pinned( &A,  size );  assert( A   != NULL );
        magma_dmalloc_pinned( &B,  size );  assert( B   != NULL );
        
        // initialize matrices
        lapackf77_dlarnv( &ione, ISEED, &size, A );
        lapackf77_dlarnv( &ione, ISEED, &size, B );
        
        printf( "Level 1 BLAS ----------------------------------------------------------\n" );
        
        
        // ----- test DASUM
        // get one-norm of column j of A
        if ( incx > 0 && incx == incy ) {  // positive, no incy
            diff  = 0;
            error = 0;
            for( int j = 0; j < k; ++j ) {
                x_m = magma_cblas_dasum( m, A(0,j), incx );
                
                x_c = cblas_dasum( m, A(0,j), incx );
                diff += fabs( x_m - x_c );
                
                x_c = blasf77_dasum( &m, A(0,j), &incx );
                error += fabs( (x_m - x_c) / (m*x_c) );
            }
            output( "dasum", diff, error );
            total_diff  += diff;
            total_error += error;
        }
        
        // ----- test DNRM2
        // get two-norm of column j of A
        if ( incx > 0 && incx == incy ) {  // positive, no incy
            diff  = 0;
            error = 0;
            for( int j = 0; j < k; ++j ) {
                x_m = magma_cblas_dnrm2( m, A(0,j), incx );
                
                x_c = cblas_dnrm2( m, A(0,j), incx );
                diff += fabs( x_m - x_c );
                
                x_c = blasf77_dnrm2( &m, A(0,j), &incx );
                error += fabs( (x_m - x_c) / (m*x_c) );
            }
            output( "dnrm2", diff, error );
            total_diff  += diff;
            total_error += error;
        }
        
        // ----- test DDOT
        // dot columns, Aj^H Bj
        diff  = 0;
        error = 0;
        for( int j = 0; j < k; ++j ) {
            // MAGMA implementation, not just wrapper
            x2_m = magma_cblas_ddot( m, A(0,j), incx, B(0,j), incy );
            
            // crashes on MKL 11.1.2, ILP64
            #if ! defined( MAGMA_WITH_MKL )
                #ifdef COMPLEX
                cblas_ddot_sub( m, A(0,j), incx, B(0,j), incy, &x2_c );
                #else
                x2_c = cblas_ddot( m, A(0,j), incx, B(0,j), incy );
                #endif
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
            
            // crashes on MacOS 10.9
            #if ! defined( __APPLE__ )
                x2_c = blasf77_ddot( &m, A(0,j), &incx, B(0,j), &incy );
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
        }
        output( "ddot", diff, error );
        total_diff  += diff;
        total_error += error;
        total_error += error;
        
        // ----- test DDOT
        // dot columns, Aj^T * Bj
        diff  = 0;
        error = 0;
        for( int j = 0; j < k; ++j ) {
            // MAGMA implementation, not just wrapper
            x2_m = magma_cblas_ddot( m, A(0,j), incx, B(0,j), incy );
            
            // crashes on MKL 11.1.2, ILP64
            #if ! defined( MAGMA_WITH_MKL )
                #ifdef COMPLEX
                cblas_ddot_sub( m, A(0,j), incx, B(0,j), incy, &x2_c );
                #else
                x2_c = cblas_ddot( m, A(0,j), incx, B(0,j), incy );
                #endif
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
            
            // crashes on MacOS 10.9
            #if ! defined( __APPLE__ )
                x2_c = blasf77_ddot( &m, A(0,j), &incx, B(0,j), &incy );
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
        }
        output( "ddot", diff, error );
        total_diff  += diff;
        total_error += error;
        
        // tell user about disabled functions
        #if defined( MAGMA_WITH_MKL )
            printf( "cblas_ddot and cblas_ddot disabled with MKL (segfaults)\n" );
        #endif
        
        #if defined( __APPLE__ )
            printf( "blasf77_ddot and blasf77_ddot disabled on MacOS (segfaults)\n" );
        #endif
            
        // cleanup
        magma_free_pinned( A );
        magma_free_pinned( B );
        fflush( stdout );
    }}}  // itest, incx, incy
    
    // TODO use average error?
    printf( "sum diffs  = %8.2g, MAGMA wrapper        compared to CBLAS and Fortran BLAS; should be exactly 0.\n"
            "sum errors = %8.2e, MAGMA implementation compared to CBLAS and Fortran BLAS; should be ~ machine epsilon.\n\n",
            total_diff, total_error );
    if ( total_diff != 0. ) {
        printf( "some tests failed diff == 0.; see above.\n" );
    }
    else {
        printf( "all tests passed diff == 0.\n" );
    }
    
    TESTING_FINALIZE();
    
    int status = (total_diff != 0.);
    return status;
}
Exemple #11
0
double dense_vector_asum(DenseVector *vector) {
  return cblas_dasum((int)vector->size,vector->data,(int)vector->stride);
}
Exemple #12
0
double vector_t::norm1() const
{
	stack::fe_asserter dummy{};
	return cblas_dasum(len, data.get(), inc);
}
Exemple #13
0
double aa_la_trace( size_t n, const double *A ) {
    return cblas_dasum( (int)n, A, (int)(n+1) );
}
/**
 * @brief Compute \f$ k_{eff} \f$ from the total fission and absorption rates.
 * @details This method computes the current approximation to the
 *          multiplication factor on this iteration as follows:
 *          \f$ k_{eff} = \frac{\displaystyle\sum \displaystyle\sum \nu
 *                        \Sigma_f \Phi V}{\displaystyle\sum
 *                        \displaystyle\sum \Sigma_a \Phi V} \f$
 *
 */
void VectorizedSolver::computeKeff() {

  int tid;
  Material* material;
  FP_PRECISION* sigma_a;
  FP_PRECISION* nu_sigma_f;
  FP_PRECISION volume;

  FP_PRECISION tot_abs = 0.0;
  FP_PRECISION tot_fission = 0.0;

  int size = _num_FSRs * sizeof(FP_PRECISION);
  FP_PRECISION* FSR_rates = (FP_PRECISION*)MM_MALLOC(size, VEC_ALIGNMENT);

  size = _num_threads * _num_groups * sizeof(FP_PRECISION);
  FP_PRECISION* group_rates = (FP_PRECISION*)MM_MALLOC(size, VEC_ALIGNMENT);

  /* Loop over all FSRs and compute the volume-weighted absorption rates */
  #pragma omp parallel for private(tid, volume, \
    material, sigma_a) schedule(guided)
  for (int r=0; r < _num_FSRs; r++) {

    tid = omp_get_thread_num() * _num_groups;
    volume = _FSR_volumes[r];
    material = _FSR_materials[r];
    sigma_a = material->getSigmaA();

    /* Loop over each energy group vector length */
    for (int v=0; v < _num_vector_lengths; v++) {

      /* Loop over energy groups within this vector */
      #pragma simd vectorlength(VEC_LENGTH)
      for (int e=v*VEC_LENGTH; e < (v+1)*VEC_LENGTH; e++)
        group_rates[tid+e] = sigma_a[e] * _scalar_flux(r,e);
    }

    #ifdef SINGLE
    FSR_rates[r] = cblas_sasum(_num_groups, &group_rates[tid], 1) * volume;
    #else
    FSR_rates[r] = cblas_dasum(_num_groups, &group_rates[tid], 1) * volume;
    #endif
  }

  /* Reduce absorption and fission rates across FSRs, energy groups */
  #ifdef SINGLE
  tot_abs = cblas_sasum(_num_FSRs, FSR_rates, 1);
  #else
  tot_abs = cblas_dasum(_num_FSRs, FSR_rates, 1);
  #endif

  /* Loop over all FSRs and compute the volume-weighted fission rates */
  #pragma omp parallel for private(tid, volume, \
    material, nu_sigma_f) schedule(guided)
  for (int r=0; r < _num_FSRs; r++) {

    tid = omp_get_thread_num() * _num_groups;
    volume = _FSR_volumes[r];
    material = _FSR_materials[r];
    nu_sigma_f = material->getNuSigmaF();

    /* Loop over each energy group vector length */
    for (int v=0; v < _num_vector_lengths; v++) {

      /* Loop over energy groups within this vector */
      #pragma simd vectorlength(VEC_LENGTH)
      for (int e=v*VEC_LENGTH; e < (v+1)*VEC_LENGTH; e++)
        group_rates[tid+e] = nu_sigma_f[e] * _scalar_flux(r,e);
    }

    #ifdef SINGLE
    FSR_rates[r] = cblas_sasum(_num_groups, &group_rates[tid], 1) * volume;
    #else
    FSR_rates[r] = cblas_dasum(_num_groups, &group_rates[tid], 1) * volume;
    #endif
  }

  /* Reduce fission rates across FSRs */
  #ifdef SINGLE
  tot_fission = cblas_sasum(_num_FSRs, FSR_rates, 1);
  #else
  tot_fission = cblas_dasum(_num_FSRs, FSR_rates, 1);
  #endif

  /** Reduce leakage array across tracks, energy groups, polar angles */
  size = 2 * _tot_num_tracks * _polar_times_groups;

  #ifdef SINGLE
  _leakage = cblas_sasum(size, _boundary_leakage, 1) * 0.5;
  #else
  _leakage = cblas_dasum(size, _boundary_leakage, 1) * 0.5;
  #endif

  _k_eff = tot_fission / (tot_abs + _leakage);

  log_printf(DEBUG, "abs = %f, fission = %f, leakage = %f, k_eff = %f",
             tot_abs, tot_fission, _leakage, _k_eff);

  MM_FREE(FSR_rates);
  MM_FREE(group_rates);

return;
}
/**
 * @brief Computes the total source (fission and scattering) in each FSR.
 * @details This method computes the total source in each FSR based on
 *          this iteration's current approximation to the scalar flux. A
 *          residual for the source with respect to the source compute on
 *          the previous iteration is computed and returned. The residual
 *          is determined as follows:
 *          /f$ res = \sqrt{\frac{\displaystyle\sum \displaystyle\sum
 *                    \left(\frac{Q^i - Q^{i-1}{Q^i}\right)^2}{# FSRs}}} \f$
 *
 * @return the residual between this source and the previous source
 */
FP_PRECISION VectorizedSolver::computeFSRSources() {

  int tid;
  FP_PRECISION scatter_source;
  FP_PRECISION fission_source;
  FP_PRECISION* nu_sigma_f;
  FP_PRECISION* sigma_s;
  FP_PRECISION* sigma_t;
  FP_PRECISION* chi;
  Material* material;

  FP_PRECISION source_residual = 0.0;

  FP_PRECISION inverse_k_eff = 1.0 / _k_eff;

  /* For all FSRs, find the source */
  #pragma omp parallel for private(material, nu_sigma_f, chi, \
    sigma_s, sigma_t, fission_source, scatter_source) schedule(guided)
  for (int r=0; r < _num_FSRs; r++) {

    tid = omp_get_thread_num();
    material = _FSR_materials[r];
    nu_sigma_f = material->getNuSigmaF();
    chi = material->getChi();
    sigma_s = material->getSigmaS();
    sigma_t = material->getSigmaT();

    /* Initialize the source residual to zero */
    _source_residuals[r] = 0.;

    /* Compute fission source for each group */
    if (material->isFissionable()) {
      for (int v=0; v < _num_vector_lengths; v++) {

        /* Compute fission source for each group */
        #pragma simd vectorlength(VEC_LENGTH)
        for (int e=v*VEC_LENGTH; e < (v+1)*VEC_LENGTH; e++)
          _fission_sources(r,e) = _scalar_flux(r,e) * nu_sigma_f[e];
      }

      #ifdef SINGLE
      fission_source = cblas_sasum(_num_groups, &_fission_sources(r,0),1);
      #else
      fission_source = cblas_dasum(_num_groups, &_fission_sources(r,0),1);
      #endif

      fission_source *= inverse_k_eff;
    }

    else
      fission_source = 0.0;

    /* Compute total scattering source for group G */
    for (int G=0; G < _num_groups; G++) {
      scatter_source = 0;

      for (int v=0; v < _num_vector_lengths; v++) {

        #pragma simd vectorlength(VEC_LENGTH)
        for (int g=v*VEC_LENGTH; g < (v+1)*VEC_LENGTH; g++)
          _scatter_sources(tid,g) = sigma_s[G*_num_groups+g] *
                                    _scalar_flux(r,g);
      }

      #ifdef SINGLE
      scatter_source=cblas_sasum(_num_groups,&_scatter_sources(tid,0),1);
      #else
      scatter_source=cblas_dasum(_num_groups,&_scatter_sources(tid,0),1);
      #endif

      /* Set the total source for FSR r in group G */
      _source(r,G) = (fission_source * chi[G] + scatter_source)
                        * ONE_OVER_FOUR_PI;

      _reduced_source(r,G) = _source(r,G) / sigma_t[G];

      /* Compute the norm of residual of the source in the FSR */
      if (fabs(_source(r,G)) > 1E-10)
        _source_residuals[r] += pow((_source(r,G) - _old_source(r,G))
                                / _source(r,G), 2);

      /* Update the old source */
      _old_source(r,G) = _source(r,G);
    }
  }

  /* Sum up the residuals from each group and in each FSR */
  #ifdef SINGLE
  source_residual = cblas_sasum(_num_FSRs,_source_residuals,1);
  #else
  source_residual = cblas_dasum(_num_FSRs,_source_residuals,1);
  #endif

  source_residual = sqrt(source_residual / (_num_groups * _num_FSRs));

  return source_residual;
}
JNIEXPORT jdouble JNICALL Java_uncomplicate_neanderthal_CBLAS_dasum
(JNIEnv *env, jclass clazz, jint N, jobject X, jint offsetX, jint incX) {
    double *cX = (double *) (*env)->GetDirectBufferAddress(env, X);
    return cblas_dasum(N, cX + offsetX, incX);
};
double caffe_cpu_asum<double>(const int n, const double* x) {
    return cblas_dasum(n, x, 1);
}
inline double STARPU_DASUM(int N, double *X, int incX)
{
	return cblas_dasum(N, X, incX);
}