Beispiel #1
0
double tls_rand()
{
	/* Setup PRNG state for current thread. */
	UNUSED(pthread_once(&tls_prng_once, tls_prng_init));

	/* Create PRNG state if not exists. */
	dsfmt_t* s = pthread_getspecific(tls_prng_key);
	if (!s) {
		/* Initialize seed from system PRNG generator. */
		uint32_t seed = 0;
		FILE *fp = fopen("/dev/urandom", "r");
		if (fp == NULL) {
			fp = fopen("/dev/random", "r");
		}
		if (fp != NULL) {
			if (fread(&seed, sizeof(uint32_t), 1, fp) != 1) {
				fclose(fp);
				fp = NULL;
			}
		}
		if (fp == NULL) {
			fprintf(stderr, "error: PRNG: cannot seed from "
				"/dev/urandom, seeding from local time\n");
			struct timeval tv;
			if (gettimeofday(&tv, NULL) == 0) {
				seed = (uint32_t)(tv.tv_sec ^ tv.tv_usec);
			} else {
				/* Last resort. */
				seed = (uint32_t)time(NULL);
			}
		} else {
			fclose(fp);
		}

		/* Initialize PRNG state. */
#ifdef HAVE_POSIX_MEMALIGN
		if (posix_memalign((void **)&s, 16, sizeof(dsfmt_t)) != 0) {
			fprintf(stderr, "error: PRNG: not enough memory\n");
			return .0;
		}
#else
		if ((s = malloc(sizeof(dsfmt_t))) == NULL) {
			fprintf(stderr, "error: PRNG: not enough memory\n");
			return .0;
		}
#endif
		dsfmt_init_gen_rand(s, seed);
		UNUSED(pthread_setspecific(tls_prng_key, s));
	}

	return dsfmt_genrand_close_open(s);
}
Beispiel #2
0
void    rand_seed(ru32 s, void *d)
{
    // get time seed
    if( s == 0 ) {
        s = rand_time_seed(d);
    }

    // set seed
    if( d == NULL ) {
        dsfmt_gv_init_gen_rand(s);
    } else {
        dsfmt_t *p = (dsfmt_t*) d;
        dsfmt_init_gen_rand(p, s);
    }
}
Beispiel #3
0
int main(int argc, char* argv[]) {
    int i, inside, seed;
    double x, y, pi;
    const long n_steps = 1000000000;
    dsfmt_t dsfmt;

    seed = 142857;
    inside = 0;
    dsfmt_init_gen_rand(&dsfmt, seed);
    for (i = 0; i < n_steps; i++) {
        x = dsfmt_genrand_close_open(&dsfmt);
        y = dsfmt_genrand_close_open(&dsfmt);
        if (x * x + y * y < 1.0) {
            inside++;
        }
    }
    pi = (double)inside / n_steps * 4;
    printf("%.10g\n", pi);
    return 0;
}
microbialGA::microbialGA(unsigned int _populationSize, unsigned int _demeSize, unsigned int _geneSize, float _recombinationRate, float _mutationRate, objectiveFunctionEvaluator *_eval, fitnessComparisonTypes comparisonType, int reportEvery) : 
populationSize(_populationSize), demeSize(_demeSize), geneSize(_geneSize), 
recombinationRate(_recombinationRate), mutationRate(_mutationRate), evaluator(_eval), fitnessComparisonType(comparisonType),
reportPeriod(reportEvery)
{
    srand((int)time(NULL));
    
    mtRand = new dsfmt_t();
    dsfmt_init_gen_rand(mtRand, (int)time(NULL));
    
    //initialise the population randomly
    population.resize(populationSize);
    for(int i=0; i < populationSize; i++) {
        population[i].resize(geneSize);
        for(int j=0; j < geneSize; j++) {
            population[i][j] = randUF() * numeric_limits<unsigned int>::max();
        }
    }
    
    geneCharSize = geneSize * sizeof(unsigned int);
}
Beispiel #5
0
int
main(int argc, char *argv[])
{
  // 乱数初期化
  dsfmt_init_gen_rand(&dsfmt, 0);
  dsfmt_gv_init_gen_rand(0);

  // 周りの環境でクリップする
  Polygon_2 world;
  world.push_back(Polygon_2::Point_2(1, 1));
  world.push_back(Polygon_2::Point_2(2400, 1));
  world.push_back(Polygon_2::Point_2(2400, 2400));
  world.push_back(Polygon_2::Point_2(1, 2400));

#include "treedata.inc"

  /*
  world.push_back(Polygon_2::Point_2(500.0, 0.0));
  world.push_back(Polygon_2::Point_2(1000.0, 1000.0));
  world.push_back(Polygon_2::Point_2(0.0, 1000.0));
  TreeNode tn1("tn1", 500);
  TreeNode tn2("tn2", 100);
  TreeNode tn3("tn3", 300);
  TreeNode tn4("tn4", 400);
  TreeNode tn5("tn5", 800);
  TreeNode tn6("tn6", 400);
  TreeNode tnroot("root", 0);
  tnroot.add_child(tn1);
  tnroot.add_child(tn2);
  tnroot.add_child(tn3);
  tnroot.add_child(tn4);
  tnroot.add_child(tn5);
  tnroot.add_child(tn6);
#define ROOT_TREE_NODE tnroot
*/
  ROOT_TREE_NODE.region = world;
  voronoi_treemap(ROOT_TREE_NODE);
  ROOT_TREE_NODE.draw();
}
Beispiel #6
0
/** 
 * 
 * 
 * @param X 
 * 
 * @author Takahiro Misawa (The University of Tokyo)
 * @author Kazuyoshi Yoshimi (The University of Tokyo)
 * @return 
 */
int Lanczos_EigenValue(struct BindStruct *X)
{

  fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueStart);
  FILE *fp;
  char sdt[D_FileNameMax],sdt_2[D_FileNameMax];
  int stp, iproc;
  long int i,iv,i_max;      
  unsigned long int i_max_tmp, sum_i_max;
  int k_exct,Target;
  int iconv=-1;
  double beta1,alpha1; //beta,alpha1 should be real
  double  complex temp1,temp2;
  double complex cbeta1;
  double E[5],ebefor;
  int mythread;

// for GC
  double dnorm;
  double complex cdnorm;
  long unsigned int u_long_i;
  dsfmt_t dsfmt;

#ifdef lapack
  double **tmp_mat;
  double *tmp_E;
  int    int_i,int_j,mfint[7];
#endif
      
  sprintf(sdt_2, cFileNameLanczosStep, X->Def.CDataFileHead);

  i_max=X->Check.idim_max;      
  k_exct = X->Def.k_exct;

  if(initial_mode == 0){

    sum_i_max = SumMPI_li(X->Check.idim_max);
    X->Large.iv = (sum_i_max / 2 + X->Def.initial_iv) % sum_i_max + 1;
    iv=X->Large.iv;
    fprintf(stdoutMPI, "  initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d \n\n",initial_mode,iv,i_max,k_exct);       
#pragma omp parallel for default(none) private(i) shared(v0, v1) firstprivate(i_max)
    for(i = 1; i <= i_max; i++){
      v0[i]=0.0;
      v1[i]=0.0;
    }

    sum_i_max = 0;
    for (iproc = 0; iproc < nproc; iproc++) {

      i_max_tmp = BcastMPI_li(iproc, i_max);
      if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) {

        if (myrank == iproc) {
          v1[iv - sum_i_max+1] = 1.0;
          if (X->Def.iInitialVecType == 0) {
            v1[iv - sum_i_max+1] += 1.0*I;
            v1[iv - sum_i_max+1] /= sqrt(2.0);
          }
        }/*if (myrank == iproc)*/
      }/*if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp)*/

      sum_i_max += i_max_tmp;

    }/*for (iproc = 0; iproc < nproc; iproc++)*/
  }/*if(initial_mode == 0)*/
  else if(initial_mode==1){
    iv = X->Def.initial_iv;
    fprintf(stdoutMPI, "  initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d \n\n",initial_mode,iv,i_max,k_exct);       
    #pragma omp parallel default(none) private(i, u_long_i, mythread, dsfmt) \
            shared(v0, v1, iv, X, nthreads, myrank) firstprivate(i_max)
    {

#pragma omp for
      for (i = 1; i <= i_max; i++) {
        v0[i] = 0.0;
      }
      /*
       Initialise MT
      */
#ifdef _OPENMP
      mythread = omp_get_thread_num();
#else
      mythread = 0;
#endif
      u_long_i = 123432 + labs(iv) + mythread + nthreads * myrank;
      dsfmt_init_gen_rand(&dsfmt, u_long_i);

      if (X->Def.iInitialVecType == 0) {
#pragma omp for
        for (i = 1; i <= i_max; i++)
          v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I;
      }
      else {
#pragma omp for
        for (i = 1; i <= i_max; i++)
          v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5);
      }

    }/*#pragma omp parallel*/

    cdnorm=0.0;
#pragma omp parallel for default(none) private(i) shared(v1, i_max) reduction(+: cdnorm) 
    for(i=1;i<=i_max;i++){
     cdnorm += conj(v1[i])*v1[i];
    }
    cdnorm = SumMPI_dc(cdnorm);
    dnorm=creal(cdnorm);
    dnorm=sqrt(dnorm);
    #pragma omp parallel for default(none) private(i) shared(v1) firstprivate(i_max, dnorm)
    for(i=1;i<=i_max;i++){
      v1[i] = v1[i]/dnorm;
    }
  }/*else if(initial_mode==1)*/
  
  //Eigenvalues by Lanczos method
  TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueStart, "a");
  mltply(X, v0, v1);
  stp=1;
  TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp);

    alpha1=creal(X->Large.prdct) ;// alpha = v^{\dag}*H*v

  alpha[1]=alpha1;
  cbeta1=0.0;
  
#pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1)
  for(i = 1; i <= i_max; i++){
    cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]);
  }
  cbeta1 = SumMPI_dc(cbeta1);
  beta1=creal(cbeta1);
  beta1=sqrt(beta1);
  beta[1]=beta1;
  ebefor=0;

/*
      Set Maximum number of loop to the dimention of the Wavefunction
    */
  i_max_tmp = SumMPI_li(i_max);
  if(i_max_tmp < X->Def.Lanczos_max){
    X->Def.Lanczos_max = i_max_tmp;
  }
  if(i_max_tmp < X->Def.LanczosTarget){
    X->Def.LanczosTarget = i_max_tmp;
  }
  if(i_max_tmp == 1){
    E[1]=alpha[1];
    vec12(alpha,beta,stp,E,X);		
    X->Large.itr=stp;
    X->Phys.Target_energy=E[k_exct];
    iconv=0;
    fprintf(stdoutMPI,"  LanczosStep  E[1] \n");
    fprintf(stdoutMPI,"  stp=%d %.10lf \n",stp,E[1]);
  }
  else{
#ifdef lapack
    fprintf(stdoutMPI, "  LanczosStep  E[1] E[2] E[3] E[4] E_Max/Nsite\n");
#else
    fprintf(stdoutMPI, "  LanczosStep  E[1] E[2] E[3] E[4] \n");
#endif
  for(stp = 2; stp <= X->Def.Lanczos_max; stp++){
#pragma omp parallel for default(none) private(i,temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1)
    for(i=1;i<=i_max;i++){
      temp1 = v1[i];
      temp2 = (v0[i]-alpha1*v1[i])/beta1;
      v0[i] = -beta1*temp1;
      v1[i] =  temp2;
    }

      mltply(X, v0, v1);
      TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp);
    alpha1=creal(X->Large.prdct);
    alpha[stp]=alpha1;
    cbeta1=0.0;

#pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1)
    for(i=1;i<=i_max;i++){
      cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]);
    }
    cbeta1 = SumMPI_dc(cbeta1);
    beta1=creal(cbeta1);
    beta1=sqrt(beta1);
    beta[stp]=beta1;

    Target  = X->Def.LanczosTarget;
        
    if(stp==2){      
     #ifdef lapack
      d_malloc2(tmp_mat,stp,stp);
      d_malloc1(tmp_E,stp+1);

       for(int_i=0;int_i<stp;int_i++){
         for(int_j=0;int_j<stp;int_j++){
           tmp_mat[int_i][int_j] = 0.0;
         }
       } 
       tmp_mat[0][0]   = alpha[1]; 
       tmp_mat[0][1]   = beta[1]; 
       tmp_mat[1][0]   = beta[1]; 
       tmp_mat[1][1]   = alpha[2]; 
       DSEVvalue(stp,tmp_mat,tmp_E);
       E[1] = tmp_E[0];
       E[2] = tmp_E[1];
       E[3] = tmp_E[2];
       E[4] = tmp_E[3];
       d_free1(tmp_E,stp+1);
       d_free2(tmp_mat,stp,stp);
     #else
       bisec(alpha,beta,stp,E,4,eps_Bisec);
     #endif
       ebefor=E[Target];
       
       childfopenMPI(sdt_2,"w", &fp);
#ifdef lapack
       fprintf(stdoutMPI, "  stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]);

       fprintf(fp, "LanczosStep  E[1] E[2] E[3] E[4] E_Max/Nsite\n");
       fprintf(fp, "stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]);
#else
       fprintf(stdoutMPI, "  stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]);
       fprintf(fp, "LanczosStep  E[1] E[2] E[3] E[4] \n");
       fprintf(fp,"stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]);
#endif
       fclose(fp);
    }
            
    if(stp>2 && stp%2==0){
      
      childfopenMPI(sdt_2,"a", &fp);
      
#ifdef lapack
      d_malloc2(tmp_mat,stp,stp);
      d_malloc1(tmp_E,stp+1);

       for(int_i=0;int_i<stp;int_i++){
         for(int_j=0;int_j<stp;int_j++){
           tmp_mat[int_i][int_j] = 0.0;
         }
       } 
       tmp_mat[0][0]   = alpha[1]; 
       tmp_mat[0][1]   = beta[1]; 
       for(int_i=1;int_i<stp-1;int_i++){
         tmp_mat[int_i][int_i]     = alpha[int_i+1]; 
         tmp_mat[int_i][int_i+1]   = beta[int_i+1]; 
         tmp_mat[int_i][int_i-1]   = beta[int_i]; 
       }
       tmp_mat[int_i][int_i]       = alpha[int_i+1]; 
       tmp_mat[int_i][int_i-1]     = beta[int_i]; 
       DSEVvalue(stp,tmp_mat,tmp_E);
       E[1] = tmp_E[0];
       E[2] = tmp_E[1];
       E[3] = tmp_E[2];
       E[4] = tmp_E[3];
       E[0] = tmp_E[stp-1];
       d_free1(tmp_E,stp+1);
       d_free2(tmp_mat,stp,stp);       
       fprintf(stdoutMPI, "  stp = %d %.10lf %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4],E[0]/(double)X->Def.NsiteMPI);
       fprintf(fp,"stp=%d %.10lf %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4],E[0]/(double)X->Def.NsiteMPI);
#else
       bisec(alpha,beta,stp,E,4,eps_Bisec);
       fprintf(stdoutMPI, "  stp = %d %.10lf %.10lf %.10lf %.10lf \n",stp,E[1],E[2],E[3],E[4]);
       fprintf(fp,"stp=%d %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4]);
#endif 
       fclose(fp);

      if(fabs((E[Target]-ebefor)/E[Target])<eps_Lanczos || fabs(beta[stp])<pow(10.0, -14)){
        vec12(alpha,beta,stp,E,X);		
        X->Large.itr=stp;       
        X->Phys.Target_energy=E[k_exct];
	iconv=0;
	break;
      }

      ebefor=E[Target];            
    }
  }        
  }

  sprintf(sdt,cFileNameTimeKeep,X->Def.CDataFileHead);
  if(iconv!=0){
    sprintf(sdt,  cLogLanczos_EigenValueNotConverged);
    return -1;
  }

  TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueFinish, "a");
  fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueEnd);

  return 0;
}
Beispiel #7
0
static void ran_set1 (void *vstate, unsigned long int s){
    dsfmt_init_gen_rand((dsfmt *)vstate, (uint32_t)s);
}
Beispiel #8
0
int main(int argc, char *argv[])
{
  int ret = EXIT_FAILURE;

  // Set up the PRNG
  dsfmt_t *dsfmt = malloc(sizeof(dsfmt_t));
  if(dsfmt == NULL) {
    fprintf(stdout, "unable to allocate PRNG\n");
    goto skip_deallocate_prng;
  }
  dsfmt_init_gen_rand(dsfmt, SEED);

  // Set up the source values
  double *src = fftw_malloc(N*VL*sizeof(double));
  if(src == NULL) {
    fprintf(stdout, "unable to allocate source vector\n");
    goto skip_deallocate_src;
  }
  for(unsigned int i = 0; i < N*VL; ++i) {
    src[i] = dsfmt_genrand_open_close(dsfmt);
  }

  // Allocate the FFT destination array
  double complex *fft = fftw_malloc(N*VL*sizeof(double complex));
  if(fft == NULL) {
    fprintf(stdout, "unable to allocate fft vector\n");
    goto skip_deallocate_fft;
  }

  // Execute the forward FFT
  fftw_plan fwd_plan = fftw_plan_many_dft_r2c(1, &N, VL,
      src, NULL, VL, 1, fft, NULL, VL, 1, FFTW_ESTIMATE);
  if(fwd_plan == NULL) {
    fprintf(stdout, "unable to allocate fft forward plan\n");
    goto skip_deallocate_fwd_plan;
  }
  fftw_execute(fwd_plan);

  // Fill in the rest of the destination values using the Hermitian property.
  fft_r2c_1d_vec_finish(fft, N, VL);

  // Allocate the reverse FFT destination array
  double complex *dst = fftw_malloc(N*VL*sizeof(double complex));
  if(dst == NULL) {
    fprintf(stdout, "unable to allocate dst vector\n");
    goto skip_deallocate_dst;
  }

  // Perform the reverse FFT
  fftw_plan rev_plan = fftw_plan_many_dft(1, &N, VL, fft, NULL, VL, 1,
      dst, NULL, VL, 1, FFTW_BACKWARD, FFTW_ESTIMATE);
  if(rev_plan == NULL) {
    fprintf(stdout, "unable to allocate fft reverse plan\n");
    goto skip_deallocate_rev_plan;
  }
  fftw_execute(rev_plan);

  // Compare the two vectors by sup norm
  double norm = 0.0;
  for(unsigned int i = 0; i < N*VL; ++i) {
    // Divide the resulting by N, because FFTW computes the un-normalized DFT:
    // the forward followed by reverse transform scales the data by N.
    norm = fmax(norm, cabs(dst[i]/N - src[i]));
  }
  if(norm <= 1e-6) {
    ret = EXIT_SUCCESS;
  }

  fftw_destroy_plan(rev_plan);
skip_deallocate_rev_plan:
  fftw_free(dst);
skip_deallocate_dst:
  fftw_destroy_plan(fwd_plan);
skip_deallocate_fwd_plan:
  fftw_free(fft);
skip_deallocate_fft:
  fftw_free(src);
skip_deallocate_src:
  free(dsfmt);
skip_deallocate_prng:
  // Keep valgrind happy by having fftw clean up its internal structures. This
  // helps ensure we aren't leaking memory.
  fftw_cleanup();
  return ret;
}
Beispiel #9
0
void rngInit(RngEngine* rng, RngSeedType* seedValue, RngErrorType* info) {
	dsfmt_init_gen_rand(&(rng->m_dsfmt), *seedValue);
	*info = 0;
}
Beispiel #10
0
int main(int argc, char **argv)
{
    static const char **fields;
    static uint64_t *lengths;
    dsfmt_t state;
    Pvoid_t uuids = NULL;
    tdb_cons* c = tdb_cons_init();
    test_cons_settings(c);
    uint64_t i, j;
    __uint128_t prev_uuid = 0;
    Word_t key;
    int tst;

    assert(tdb_cons_open(c, argv[1], fields, 0) == 0);
    dsfmt_init_gen_rand(&state, 2489);

    for (i = 0; i < NUM_TRAILS; i++){
        uint8_t uuid[16];
        gen_random_uuid(uuid, &state);
        memcpy(&key, uuid, 8);

        J1S(tst, uuids, key);
        if (!tst){
            printf("half-word collision! change random seed!\n");
            return -1;
        }

        for (j = 0; j < NUM_EVENTS; j++)
            tdb_cons_add(c, uuid, i * 100 + j, fields, lengths);
    }
    J1C(key, uuids, 0, -1);
    assert(key == NUM_TRAILS);
    assert(tdb_cons_finalize(c) == 0);
    tdb_cons_close(c);

    tdb* t = tdb_init();
    assert(tdb_open(t, argv[1]) == 0);

    assert(tdb_num_trails(t) == NUM_TRAILS);
    assert(tdb_num_events(t) == NUM_TRAILS * NUM_EVENTS);

    for (i = 0; i < NUM_TRAILS; i++){
        __uint128_t this_uuid;

        /* uuids must be monotonically increasing */
        memcpy(&this_uuid, tdb_get_uuid(t, i), 16);
        assert(this_uuid > prev_uuid);
        prev_uuid = this_uuid;

        /* remove this uuid from the uuid set and make sure it exists */
        memcpy(&key, &this_uuid, 8);
        J1U(tst, uuids, key);
        assert(tst == 1);
    }

    /* make sure we retrieved all uuids */
    J1C(key, uuids, 0, -1);
    assert(key == 0);

    return 0;
}
Beispiel #11
0
struct double_pair randmatstat(int t) {
    dsfmt_t dsfmt;
    dsfmt_init_gen_rand(&dsfmt, 1234);

    int n = 5;
    struct double_pair r;
    double *v = (double*)calloc(t,sizeof(double));
    double *w = (double*)calloc(t,sizeof(double));
    double *a = (double*)malloc((n)*(n)*sizeof(double));
    double *b = (double*)malloc((n)*(n)*sizeof(double));
    double *c = (double*)malloc((n)*(n)*sizeof(double));
    double *d = (double*)malloc((n)*(n)*sizeof(double));
    double *P = (double*)malloc((n)*(4*n)*sizeof(double));
    double *Q = (double*)malloc((2*n)*(2*n)*sizeof(double));
    double *PtP1 = (double*)malloc((4*n)*(4*n)*sizeof(double));
    double *PtP2 = (double*)malloc((4*n)*(4*n)*sizeof(double));
    double *QtQ1 = (double*)malloc((2*n)*(2*n)*sizeof(double));
    double *QtQ2 = (double*)malloc((2*n)*(2*n)*sizeof(double));
    for (int i=0; i < t; i++) {
        randmtzig_fill_randn(&dsfmt, a, n*n);
        randmtzig_fill_randn(&dsfmt, b, n*n);
        randmtzig_fill_randn(&dsfmt, c, n*n);
        randmtzig_fill_randn(&dsfmt, d, n*n);
        memcpy(P+0*n*n, a, n*n*sizeof(double));
        memcpy(P+1*n*n, b, n*n*sizeof(double));
        memcpy(P+2*n*n, c, n*n*sizeof(double));
        memcpy(P+3*n*n, d, n*n*sizeof(double));
        for (int j=0; j < n; j++) {
            for (int k=0; k < n; k++) {
                Q[2*n*j+k]       = a[k];
                Q[2*n*j+n+k]     = b[k];
                Q[2*n*(n+j)+k]   = c[k];
                Q[2*n*(n+j)+n+k] = d[k];
            }
        }
        cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans,
                    n, n, 4*n, 1.0, P, 4*n, P, 4*n, 0.0, PtP1, 4*n);
        cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
                    4*n, 4*n, 4*n, 1.0, PtP1, 4*n, PtP1, 4*n, 0.0, PtP2, 4*n);
        cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
                    4*n, 4*n, 4*n, 1.0, PtP2, 4*n, PtP2, 4*n, 0.0, PtP1, 4*n);
        for (int j=0; j < n; j++) {
            v[i] += PtP1[(n+1)*j];
        }
        cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans,
                    2*n, 2*n, 2*n, 1.0, Q, 2*n, Q, 2*n, 0.0, QtQ1, 2*n);
        cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
                    2*n, 2*n, 2*n, 1.0, QtQ1, 2*n, QtQ1, 2*n, 0.0, QtQ2, 2*n);
        cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
                    2*n, 2*n, 2*n, 1.0, QtQ2, 2*n, QtQ2, 2*n, 0.0, QtQ1, 2*n);
        for (int j=0; j < 2*n; j++) {
            w[i] += QtQ1[(2*n+1)*j];
        }
    }
    free(PtP1);
    free(PtP2);
    free(QtQ1);
    free(QtQ2);
    free(P);
    free(Q);
    free(a);
    free(b);
    free(c);
    free(d);
    double v1=0.0, v2=0.0, w1=0.0, w2=0.0;
    for (int i=0; i < t; i++) {
        v1 += v[i]; v2 += v[i]*v[i];
        w1 += w[i]; w2 += w[i]*w[i];
    }
    free(v);
    free(w);
    r.s1 = sqrt((t*(t*v2-v1*v1))/((t-1)*v1*v1));
    r.s2 = sqrt((t*(t*w2-w1*w1))/((t-1)*w1*w1));
    return r;
}
Beispiel #12
0
/*
 * Main Function
 */
int run(int rank) {


  long long NP;		                                        // Number of particles
  pfloat sysLeng;	                                        // Length of system
  int numCells;	                                              	// Number of cells in system
  pfloat tol_std;                                      		// Tolerance for standard deviation calculations
  
  //Random number generator 
  dsfmt_t * dsfmt = (dsfmt_t*)malloc(sizeof(dsfmt_t));

  //stores user input
  NP = opt.NP;
  sysLeng = opt.sysLen;
  numCells = opt.numCells;
  tol_std = opt.tol_std;

  int nprocs;
#ifdef DO_MPI
  MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
#else
  nprocs = 1;
#endif

  //Initialization of vital simulation parameters
  pfloat dx = sysLeng / numCells;                             	//cell width
  pfloat eps = 0.1;                                          	//the threshold angle for tallying
  pfloat sig_t = opt.sig_t;                                   	//total collision cross-section
  pfloat sig_s = opt.sig_s;                                     //scattering cross-section

  //The source term uniform for now	
  pfloat q0_avg = 1.0;

  //Form data structure for simulation parameters
  struct data data;
  data.NP = NP;							//Number of Particles
  data.lx = sysLeng;						//Length of System
  data.nx = numCells;						//Number of Cells
  data.dx = dx;						        //Cell Width
  data.dx2 = dx*dx;						// Cell Width SQUARED
  data.dx_recip = 1.0/dx;					// Pre-computed reciprocal
  data.sig_t = sig_t;						//Total Collision Cross-Section
  data.sig_t_recip = 1.0/sig_t;					// Pre-computed reciprocal
  data.sig_s = sig_s;						//Scattering Collision Cross-Section
  data.q0_avg = q0_avg;                                         //Weight of each particle
  data.eps = eps;						//Threshold Angle for Tallying
  
  long long NPc = (long long )((pfloat)NP / numCells);          // number of particles in cell
  long long NP_tot = NP; 					//total number of particles for the CMC
  data.NP_tot = NP_tot;						// Total Number of Particles
  data.NPc = NPc;						// Number of Particles per Cell

  //Initialize the average values
  int iter = 0;     
  int iter_avg = 0 ;                                            //tally for iteration for convergence
  int i;

  // MT RNG
  dsfmt_init_gen_rand(dsfmt, (int)time(NULL));
  
  //time keeping structures
  struct timeval start_time, end_time, startPerIter, endPerIter;

  afloat * phi_n_avg = NULL;                                     //Vector to hold avg value of phi_n across iterations
  afloat * phi_n_tot = NULL;                                     //Vector to hold sum of phi_n across iterations
  afloat * phiS2_n_tot = NULL;
#if defined(L2_1) || defined(L2_2) || defined(L2_3)
  afloat * anal_soln = NULL;                                     //Holds the analytical solution if comparing with it
  pfloat l2;                                                    //Stores the l2 norm while comparing with analytical solution
#else
  meanDev * phi_nStats = NULL;                                   //Mean and standard deviation
#endif

  colData all_tallies;                                           //Stores the tallies for the current iteration
  
  if( rank == 0 )
    {
      phi_n_avg = (afloat *) calloc(numCells, sizeof(afloat));
      phi_n_tot  = (afloat*) calloc(numCells,sizeof(afloat));
      phiS2_n_tot = (afloat*) calloc(numCells,sizeof(afloat));
  
#if defined(L2_1) || defined(L2_2) || defined(L2_3)
      anal_soln = (afloat *) calloc(numCells, sizeof (afloat)); 			// Reference solution
#if defined(L2_1)
      init_L2_norm_1(anal_soln);
#elif defined(L2_2)
      init_L2_norm_2(anal_soln);
#elif defined(L2_3)
      init_L2_norm_3(anal_soln);
#endif
#else
      //Initialize vector to hold stats of the monte carlo
      phi_nStats = (meanDev*) malloc(sizeof(meanDev)*opt.numCells);
#endif
    }

  //Plot and print initial condition as well as simulation parameters
  if (rank == 0 && !opt.silent) sim_param_print(data);
  
  int flag_sim = 0;                                                                     //Flag to singal convergence
  long long samp_cnt = 0;               						//total history tally

  
  //Allocate tallies on each process. Aligned to cache line ( not really necessary for MPI )
  colData tallies = allocate_tallies(numCells);

#ifdef DO_MPI
  //In MPI mode, allocate on process 0, all_tallies to hold sum of tallies across processes
  if( rank == 0 )
    all_tallies = allocate_tallies(numCells);
  else
    {
      all_tallies.phi_n = NULL;
      all_tallies.phi_n2 = NULL;
      all_tallies.tot_col = NULL;
      all_tallies.full_buffer = NULL;
      all_tallies.buffer_size = tallies.buffer_size;
    }
#else
  //For sequential mode, alias all_tallies to tallies
  all_tallies.phi_n = tallies.phi_n;
  all_tallies.phi_n2 = tallies.phi_n2;
  all_tallies.tot_col = tallies.tot_col;
  all_tallies.buffer_size = tallies.buffer_size;
#endif

  double avg_col = 0;                                                                //Average collisions across all the particles streamed across all processes
  /* double tot_col = 0; */
  /* double all_col =  0; */
  double running_col = 0.0;
  
  //Calculation starts here
  gettimeofday(&start_time, NULL);
	
#ifdef DO_PAPI    
    PAPI_library_init(PAPI_VER_CURRENT);
    int EventSet = PAPI_NULL;
    long long start[4],stop[4];
    PAPI_create_eventset(&EventSet);
    PAPI_add_event(EventSet,PAPI_TOT_CYC);
    PAPI_add_event(EventSet,PAPI_TOT_INS);
    PAPI_add_event(EventSet,PAPI_FP_OPS); 
    PAPI_add_event(EventSet,PAPI_FP_INS); 
    PAPI_start(EventSet);
    PAPI_read(EventSet,start);
#endif
  // while not converged and total number of iterations < opt.numiters
  while (flag_sim == 0 && iter < opt.numIters) {

    /* //start time for iteration */
    /* gettimeofday(&startPerIter, NULL); */

    iter += 1;
    if(!opt.silent && rank==0) printf("This is the %dth iteration\n", iter);
    
    //calls collision and tally to stream particles and collect statistics
    *(tallies.tot_col) = collision_and_tally(data, &tallies,rank,nprocs,dsfmt);

#ifdef DO_MPI
    //In MPI mode, need to reduce phi_n and phi_n2 across all processes
    MPI_Reduce(tallies.full_buffer,all_tallies.full_buffer,tallies.buffer_size,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
#endif

    //Do the averaging and convergence calculations on process 0
    if( rank == 0 )
      {
	running_col += *(all_tallies.tot_col);
	avg_col = running_col / iter / NP;

	//Normalizing the values of phi_n
	for( i = 0 ; i < numCells ; i++ )
	  {
	    all_tallies.phi_n[i] /= (afloat)data.dx*NP;
	    all_tallies.phi_n[i] *= data.lx;
	  }
	
	/***************************************************
	  Calculates the averages 
	  **************************************************\/ */
	iter_avg += 1;
	samp_cnt = NP_tot * ((long long) iter_avg);

	//accumulate phase -- add new data to the running averages
	for (i = 0; i < numCells; i++) {
	  phi_n_tot[i] += all_tallies.phi_n[i];
	  phiS2_n_tot[i] += all_tallies.phi_n2[i];
	}

	// for each cell, calculate the average for phi_n, phi_lo and E_n
	for (i = 0; i < numCells; i++) {
	  phi_n_avg[i] = phi_n_tot[i] / iter_avg;
	}

	//prints out the necessary data
	if(!opt.silent && rank == 0){	    
	  printf("Running Average number of collisions = %lf\n",avg_col);
	}

	//check for convergence
#if !defined(L2_1) && !defined(L2_2) && !defined(L2_3)
	//Use mean and standard deviation for convergence 
	mean_std_calc(phi_n_tot, phiS2_n_tot, samp_cnt, opt.NP, opt.numCells, opt.dx, phi_nStats);
	if (maxFAS(&phi_nStats[0].stdDev, numCells, 2) <= tol_std) {
	  flag_sim = 1;
	}
	  
	if( !opt.silent ){
	  printf("The maximum standard deviation of flux at node is, max (sig_phi) = %f\n", maxFAS(&phi_nStats[0].stdDev, numCells, 2));
	}
#else
	//Use L2 norm of the analytical solution
	l2 = l2_norm_cmp(phi_n_avg, anal_soln, numCells, dx);
	flag_sim = l2 <= tol_std;
	if(rank == 0){
	  gettimeofday(&end_time, NULL);
	  printf("L2: %f, Sofar: %ldu_sec\n", l2, (end_time.tv_sec - start_time.tv_sec)*1000000 + (end_time.tv_usec - start_time.tv_usec));
	}
#endif	  
       
      }	
#ifdef DO_MPI
    //Broadcast to other processes if convergence is reached
    MPI_Bcast(&flag_sim,1,MPI_INT,0,MPI_COMM_WORLD);
#endif
    
    /* //end time per iteration */
    /* gettimeofday(&endPerIter, NULL); */
    /* printf("ID = %d, Time per Iteration: %ldu_sec, flags sim = %d\n\n", rank,(endPerIter.tv_sec - startPerIter.tv_sec)*1000000 + (endPerIter.tv_usec - startPerIter.tv_usec),flag_sim); */
  }
#ifdef DO_PAPI
    PAPI_read(EventSet,stop);
    printf("%lld %lld %lld %lld\n",stop[0] - start[0],stop[1] - start[1],stop[2] - start[2],stop[3] - start[3]);
    PAPI_cleanup_eventset(EventSet);
    PAPI_destroy_eventset(EventSet);
#endif

  
  gettimeofday(&end_time, NULL);
  if( rank == 0 )
    printf("Elapsed Time: %ldu_sec\n", (end_time.tv_sec - start_time.tv_sec)*1000000 + (end_time.tv_usec - start_time.tv_usec));

  if(rank == 0 && !opt.silent){
    printf("NODEAVG\n");
    for (i = 0; i < numCells; i++) {
      printf("%d %lf\n", i, phi_n_avg[i]);
    }
  }

  /************************************************
   * Free memory
   *************************************************/
  deallocate_tallies(tallies);
  free(dsfmt);
  if( rank == 0 )
    {
#ifdef DO_MPI
      deallocate_tallies(all_tallies);
#endif
      free(phi_n_tot);
      free(phiS2_n_tot);
      free(phi_n_avg);
#if defined(L2_1) || defined(L2_2) || defined(L2_3)
      free(anal_soln);
#else
      free(phi_nStats);
#endif
    }

  return (EXIT_SUCCESS);
}
Beispiel #13
0
void initRandomNumbers(int seed)
{
	dsfmt_init_gen_rand(&dsfmt_global_data, seed);
	initialised = true;
}
Beispiel #14
0
int main(int argc, char **argv)
{
  int err;
  // Error handling scheme: this function has failed until proven otherwise.
  int ret = EXIT_FAILURE;

  err = MPI_Init(&argc, &argv);
  if(err != MPI_SUCCESS) {
    // Theoretically, an error at this point will abort the program, and this
    // code path is never followed. This is here for completeness.
    fprintf(stderr, "unable to initialize MPI\n");
    goto die_immed;
  }

  // Install the MPI error handler that returns error codes, so we can perform
  // the usual process suicide ritual.
  err = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
  if(err != MPI_SUCCESS) {
    // Again, theoretically, the previous error handler (MPI_Abort) gets called
    // instead of reaching this fail point.
    fprintf(stderr, "unable to reset MPI error handler\n");
    goto die_finalize_mpi;
  }

  int size, rank;
  err = MPI_Comm_size(MPI_COMM_WORLD, &size);
  if(err == MPI_SUCCESS) err = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  if(err != MPI_SUCCESS) {
    fprintf(stderr, "unable to determine rank or size\n");
    goto die_finalize_mpi;
  }

  /* Create cartestian communicator */
  int dims[2] = {0, 0};
  int periods[2] = {1, 1};
  err = MPI_Dims_create(size, 2, dims);
  if(err != MPI_SUCCESS) {
    fprintf(stderr, "unable to create a cartestian topology\n");
    goto die_finalize_mpi;
  }
  MPI_Comm cart;
  err = MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &cart);
  if(err != MPI_SUCCESS) {
    fprintf(stderr, "unable to create cartestian communicator\n");
    goto die_finalize_mpi;
  }

  dsfmt_t *prng = malloc(sizeof(dsfmt_t));
  if(prng == NULL) {
    fprintf(stderr, "unable to allocate PRNG\n");
    goto die_free_cart_comm;
  }
  dsfmt_init_gen_rand(prng, SEED + rank);

  int const net_elems = proc_elems[0]*proc_elems[1];
  // Allocate master source array for FFT.
  double *const master = fftw_malloc(net_elems*sizeof(double));
  if(master == NULL) {
    fprintf(stderr, "unable to allocate master array\n");
    goto die_free_prng;
  }
  for(unsigned int i = 0; i < net_elems; ++i) {
    master[i] = dsfmt_genrand_open_close(prng) * 10;
  }

  /* Allocate source array for serial array. We copy the master array to this
   * array, then transform it in place, then reverse transform it. The idea is
   * that we should get the original data back, and we use this as a consistency
   * check. We need the original data to compare to.
   */
  double *const source = fftw_malloc(net_elems*sizeof(double));
  if(source == NULL) {
    fprintf(stderr, "unable to allocate source array\n");
    goto die_free_master;
  }
  for(int i = 0; i < net_elems; ++i) source[i] = master[i];

  /* Allocate the destination array */
  double complex *const dest = fftw_malloc(net_elems*sizeof(double complex));
  if(dest == NULL) {
    fprintf(stderr, "unable to allocate destination array\n");
    goto die_free_source;
  }

  /* Allocate a plan to compute the FFT */
  fft_par_plan plan = fft_par_plan_r2c(cart, proc_elems, 1, source,
      dest, &err);
  if(plan == NULL) {
    fprintf(stderr, "unable to initialize parallel FFT plan\n");
    goto die_free_dest;
  }

  /* Execute the forward plan */
  err = fft_par_execute_fwd(plan);
  if(err != MPI_SUCCESS) {
    fprintf(stderr, "error computing forward plan\n");
    goto die_free_plan;
  }

  /* Execute the reverse plan */
  err = fft_par_execute_rev(plan);
  if(err != MPI_SUCCESS) {
    fprintf(stderr, "error computing reverse plan\n");
    goto die_free_plan;
  }

  /* Compare source to master, use supremum norm */
  int norm = 0.0;
  for(int i = 0; i < net_elems; ++i) {
    /* Each FFT effectively multiplies by sqrt(net_elems*num_procs) */
    norm = fmax(norm, fabs(master[i] - source[i]/net_elems/size));
  }
  if(norm < 1.0e-6) {
    ret = EXIT_SUCCESS;
  }

die_free_plan:
  fft_par_plan_destroy(plan);
die_free_dest:
  fftw_free(dest);
die_free_source:
  fftw_free(source);
die_free_master:
  fftw_free(master);
die_free_prng:
  free(prng);
die_free_cart_comm:
  if(err == MPI_SUCCESS) err = MPI_Comm_free(&cart);
  if(err != MPI_SUCCESS) {
    fprintf(stderr, "unable to free cartestian communicator\n");
    ret = EXIT_FAILURE;
  }
die_finalize_mpi:
  if(err == MPI_SUCCESS) err = MPI_Finalize();
  if(err != MPI_SUCCESS) {
    fprintf(stderr, "unable to finalize MPI\n");
    ret = EXIT_FAILURE;
  }
die_immed:
  fftw_cleanup();
  return ret;
}
Beispiel #15
0
/** 
 * 
 * 
 * @param X 
 * @author Takahiro Misawa (The University of Tokyo)
 * @author Kazuyoshi Yoshimi (The University of Tokyo) 
 */
void Lanczos_EigenVector(struct BindStruct *X){

  printf("%s", cLogLanczos_EigenVectorStart);
  
  int i,j,i_max,iv;  	 
  int k_exct;
  double beta1,alpha1,dnorm, dnorm_inv;
  double complex temp1,temp2;

// for GC
  long unsigned int u_long_i;
  dsfmt_t dsfmt;

  k_exct = X->Def.k_exct;
	
  iv=X->Large.iv;
  i_max=X->Check.idim_max;
 
  //Eigenvectors by Lanczos method
  //initialization: initialization should be identical to that of Lanczos_EigenValue.c
#pragma omp parallel for default(none) private(i) shared(v0, v1, vg) firstprivate(i_max)
  for(i=1;i<=i_max;i++){
    v0[i]=0.0+0.0*I;
    v1[i]=0.0+0.0*I;
    vg[i]=0.0+0.0*I;
  }
    
  if(initial_mode == 0){
    v1[iv]=1.0;
    vg[iv]=vec[k_exct][1];
  }else if(initial_mode==1){      
    iv = X->Def.initial_iv;
    u_long_i = 123432 + abs(iv);
    dsfmt_init_gen_rand(&dsfmt, u_long_i);    
    for(i = 1; i <= i_max; i++){
      v1[i]=2.0*(dsfmt_genrand_close_open(&dsfmt)-0.5)+2.0*(dsfmt_genrand_close_open(&dsfmt)-0.5)*I;
    }
    dnorm=0;
    #pragma omp parallel for default(none) private(i) shared(v1, i_max) reduction(+: dnorm) 
    for(i=1;i<=i_max;i++){
      dnorm += conj(v1[i])*v1[i];
    }    
    dnorm=sqrt(dnorm);
    dnorm_inv=1.0/dnorm;
#pragma omp parallel for default(none) private(i) shared(v1,vg,vec,k_exct) firstprivate(i_max, dnorm_inv)
    for(i=1;i<=i_max;i++){
      v1[i]        = v1[i]*dnorm_inv;
      vg[i]        = v1[i]*vec[k_exct][1];
    }
  }
  
  mltply(X, v0, v1);
  
  alpha1=alpha[1];
  beta1=beta[1];

#pragma omp parallel for default(none) private(j) shared(vec, v0, v1, vg) firstprivate(alpha1, beta1, i_max, k_exct)
  for(j=1;j<=i_max;j++){
    vg[j]+=vec[k_exct][2]*(v0[j]-alpha1*v1[j])/beta1;
  }
    
  //iteration
  for(i=2;i<=X->Large.itr-1;i++){
#pragma omp parallel for default(none) private(j, temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1)
    for(j=1;j<=i_max;j++){
      temp1=v1[j];
      temp2=(v0[j]-alpha1*v1[j])/beta1;
      v0[j]=-beta1*temp1;
      v1[j]=temp2;        
    }
    mltply(X, v0, v1);   
	
    alpha1 = alpha[i];
    beta1  = beta[i];

#pragma omp parallel for default(none) private(j) shared(vec, v0, v1, vg) firstprivate(alpha1, beta1, i_max, k_exct, i)
    for(j=1;j<=i_max;j++){
      vg[j] += vec[k_exct][i+1]*(v0[j]-alpha1*v1[j])/beta1;
    }	
  }

#pragma omp parallel for default(none) private(j) shared(v0, vg) firstprivate(i_max)
    for(j=1;j<=i_max;j++){
      v0[j] = vg[j];
    } 
      
  //normalization
  dnorm=0.0;
#pragma omp parallel for default(none) reduction(+:dnorm) private(j) shared(v0) firstprivate(i_max)
  for(j=1;j<=i_max;j++){
    dnorm += conj(v0[j])*v0[j];
  }
  dnorm=sqrt(dnorm);
  dnorm_inv=dnorm;
#pragma omp parallel for default(none) private(j) shared(v0) firstprivate(i_max, dnorm_inv)
  for(j=1;j<=i_max;j++){
    v0[j] = v0[j]*dnorm_inv;
  }
  
  TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenVectorFinish, "a");
  printf("%s", cLogLanczos_EigenVectorEnd);
}
Beispiel #16
0
int main(int argc, char **argv)
{
  // Error handling scheme: this function has failed until proven otherwise.
  int ret = EXIT_FAILURE;

  if(MPI_Init(&argc, &argv) != MPI_SUCCESS) {
    // Theoretically, an error at this point will abort the program, and this
    // code path is never followed. This is here for completeness.
    fprintf(stderr, "unable to initialize MPI\n");
    goto die_immed;
  }

  // Install the MPI error handler that returns error codes, so we can perform
  // the usual process suicide ritual.
  if(MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN)
      != MPI_SUCCESS) {
    // Again, theoretically, the previous error handler (MPI_Abort) gets called
    // instead of reaching this fail point.
    fprintf(stderr, "unable to reset MPI error handler\n");
    goto die_finalize_mpi;
  }

  int size, rank;
  if(MPI_Comm_size(MPI_COMM_WORLD, &size) != MPI_SUCCESS ||
      MPI_Comm_rank(MPI_COMM_WORLD, &rank) != MPI_SUCCESS) {
    fprintf(stderr, "unable to determine rank and size\n");
    goto die_finalize_mpi;
  }

  dsfmt_t *prng = malloc(sizeof(dsfmt_t));
  if(prng == NULL) {
    fprintf(stderr, "unable to allocate PRNG\n");
    goto die_finalize_mpi;
  }
  dsfmt_init_gen_rand(prng, SEED);

  const int master_elems = proc_elems * size;

  double *const master = fftw_malloc(VL*master_elems*sizeof(double));
  if(master == NULL) {
    fprintf(stderr, "unable to allocate master array\n");
    goto die_free_prng;
  }
  for(int i = 0; i < master_elems*VL; ++i) {
    master[i] = 2*dsfmt_genrand_open_close(prng) - 1;
  }

  /* Allocate the array holding the serial result */
  double complex *const serial = fftw_malloc(VL*master_elems*sizeof(*serial));
  if(serial == NULL) {
    fprintf(stderr, "unable to allocate serial array\n");
    goto die_free_master;
  }

  /* Perform serial transform */
  fftw_plan serial_plan = fftw_plan_many_dft_r2c(1, &master_elems, VL,
      master, NULL, VL, 1, serial, NULL, VL, 1, FFTW_ESTIMATE);
  if(serial_plan == NULL) {
    fprintf(stderr, "unable to construct forward transform plan\n");
    goto die_free_serial;
  }

  /* Perform the serial transform, and complete it */
  fftw_execute(serial_plan);
  fft_r2c_1d_vec_finish(serial, master_elems, VL);

  /* Allocate space to hold the parallel transform result */
  double complex *const parallel = fftw_malloc(
      proc_elems*VL*sizeof(double complex));
  if(parallel == NULL) {
    fprintf(stderr, "unable to allocate space for parallel array\n");
    goto die_destroy_serial_plan;
  }

  /* Create the parallel plan */
  fft_par_plan par_plan = fft_par_plan_r2c_1d(MPI_COMM_WORLD, proc_elems, VL,
      master + rank*proc_elems*VL, parallel, NULL);
  if(par_plan == NULL) {
    fprintf(stderr, "unable to create parallel transform plan\n");
    goto die_free_parallel;
  }

  /* Execute the parallel transform */
  if(fft_par_execute_fwd(par_plan) != MPI_SUCCESS) {
    fprintf(stderr, "unable to execute parallel transform\n");
    goto die_destroy_par_plan;
  }

  /* Compare values */
  int sup = 0.0;
  for(int i = 0; i < proc_elems*VL; ++i) {
    sup = fmax(sup, cabs(serial[rank*proc_elems*VL + i] - parallel[i]));
  }
  if(sup < 1.0e-6) {
    ret = EXIT_SUCCESS;
  }

die_destroy_par_plan:
  fft_par_plan_destroy(par_plan);
die_free_parallel:
  fftw_free(parallel);
die_destroy_serial_plan:
  fftw_destroy_plan(serial_plan);
die_free_serial:
  fftw_free(serial);
die_free_master:
  fftw_free(master);
die_free_prng:
  free(prng);
die_finalize_mpi:
  if(MPI_Finalize() != MPI_SUCCESS) {
    fprintf(stderr, "unable to finalize MPI\n");
    ret = EXIT_FAILURE;
  }
die_immed:
  fftw_cleanup();
  return ret;
}