Пример #1
0
void cSystem::startNthreadsFFTW(void)
{
    require( fftw_init_threads() != 0, "void cSystem::startNthreadsFFTW(void)");
    require(fftwf_init_threads() != 0, "void cSystem::startNthreadsFFTW(void)");

     fftw_plan_with_nthreads(getNumProcessors());
    fftwf_plan_with_nthreads(getNumProcessors());

    std::cout << "FFTW multithreading is turned on: " << getNumProcessors() << " threads\n\n";
}
Пример #2
0
Файл: demo.c Проект: psava/cwp12
int main(){
    int nthreads = 4;
    omp_set_num_threads(nthreads);
    #pragma omp parallel 
        fprintf(stderr,"nthreads %d \n", omp_get_num_threads());
 
    int n3 = 128;
    int n2 = 128;
    int n1 = 128;
//    float ***array = sf_floatalloc3(n1,n2,n3);
    
    float *array = fftwf_alloc_real(n3*n2*n1);
    fftwf_complex* cout = fftwf_alloc_complex(n3*n2*n1);

    int err = fftwf_init_threads();
    if (err == 0) {
        fprintf(stderr,"something went wrong with fftw\n");
    }

    fprintf(stderr,"Got here\n");

    double start,end;
    start = omp_get_wtime()*omp_get_wtick();
    fftwf_plan_with_nthreads(nthreads);
    fftwf_plan plan =  fftwf_plan_dft_r2c_3d(
                                    n1,n2,n3,
                                    array,cout,
                                    FFTW_MEASURE);
    end = omp_get_wtime()*omp_get_wtick();
    fprintf(stderr,"elapsed time: %f %f %f\n",end,start,end-start);

    for(int i = 0; i < n3*n2*n1; ++i)
        array[i] = rand()/RAND_MAX;
 
    //float start = clock()/CLOCKS_PER_SEC;
    start = omp_get_wtime();

    for(int i=0; i < 1001; ++i)
        fftwf_execute(plan);
   
    //float end = clock()/CLOCKS_PER_SEC;
    end = omp_get_wtime();
    fprintf(stderr,"elapsed time: %f time/calc %f\n",
        end-start,(end-start)/100.0);

    fftwf_cleanup_threads();
    fftwf_cleanup();
    fftwf_destroy_plan(plan);

    fftwf_free(cout);
    fftwf_free(array);
    //free(**array); free(*array); free(array);
    return 0;

}
Пример #3
0
int mcfft3_init(int pad1           /* padding on the first axis */,
	       int nx,   int ny,  int nz   /* input data size */, 
	       int *nx2, int *ny2, int *nz2 /* padded data size */,
               int *n_local, int *o_local /* local size & start */)
/*< initialize >*/
{
  int cpuid;
  MPI_Comm_rank(MPI_COMM_WORLD, &cpuid);

  if (threads_ok) threads_ok = fftwf_init_threads();

  fftwf_mpi_init();

  if (false)
    sf_warning("Using threaded FFTW3! \n");
  if (threads_ok)
    fftwf_plan_with_nthreads(omp_get_max_threads());

  /* axis 1 */
  nk = n1 = kiss_fft_next_fast_size(nx*pad1);
  /* axis 2 */
  n2 = kiss_fft_next_fast_size(ny);
  /* axis 3 */
  n3 = kiss_fft_next_fast_size(nz);

  alloc_local = fftwf_mpi_local_size_3d(n3, n2, n1, MPI_COMM_WORLD, &local_n0, &local_0_start);

  //cc = sf_complexalloc3(n1,n2,n3);
  cc = sf_complexalloc(alloc_local);

  cfg  = fftwf_mpi_plan_dft_3d(n3,n2,n1,
                               (fftwf_complex *) cc,
                               (fftwf_complex *) cc,
                               MPI_COMM_WORLD,
                               FFTW_FORWARD, FFTW_MEASURE);

  icfg = fftwf_mpi_plan_dft_3d(n3,n2,n1,
                               (fftwf_complex *) cc, 
                               (fftwf_complex *) cc,
                               MPI_COMM_WORLD,
                               FFTW_BACKWARD, FFTW_MEASURE);

  if (NULL == cfg || NULL == icfg) sf_error("FFTW failure.");

  *nx2 = n1;
  *ny2 = n2;
  *nz2 = n3;
  *n_local = (int) local_n0;
  *o_local = (int) local_0_start;
	
  wt =  1.0/(n3*n2*n1);

  return (nk*n2*n3);
}
Пример #4
0
int cfft2_init(int pad1           /* padding on the first axis */,
	       int nx,   int ny   /* input data size */, 
	       int *nx2, int *ny2 /* padded data size */)
/*< initialize >*/
{

#ifdef SF_HAS_FFTW
#ifdef _OPENMP
    fftwf_init_threads();
    sf_warning("Using threaded FFTW3! \n");
    fftwf_plan_with_nthreads(omp_get_max_threads());
#endif
#endif

#ifndef SF_HAS_FFTW
    int i2;
#endif

    nk = n1 = kiss_fft_next_fast_size(nx*pad1);
    
#ifndef SF_HAS_FFTW
    cfg1  = kiss_fft_alloc(n1,0,NULL,NULL);
    icfg1 = kiss_fft_alloc(n1,1,NULL,NULL);
#endif
  
    n2 = kiss_fft_next_fast_size(ny);

    cc = sf_complexalloc2(n1,n2);
    dd = sf_complexalloc2(nk,n2);
    
#ifndef SF_HAS_FFTW
    cfg2  = kiss_fft_alloc(n2,0,NULL,NULL);
    icfg2 = kiss_fft_alloc(n2,1,NULL,NULL);
 	
    tmp =    (kiss_fft_cpx **) sf_alloc(n2,sizeof(*tmp));
    tmp[0] = (kiss_fft_cpx *)  sf_alloc(nk*n2,sizeof(kiss_fft_cpx));
    for (i2=0; i2 < n2; i2++) {
	tmp[i2] = tmp[0]+i2*nk;
    }
	
    trace2 = sf_complexalloc(n2);
    ctrace2 = (kiss_fft_cpx *) trace2;
#endif

    *nx2 = n1;
    *ny2 = n2;
	
    wt =  1.0/(n1*n2);
	
    return (nk*n2);
}
Пример #5
0
int cfft2_init(int pad1           /* padding on the first axis */,
	       int nx,   int ny   /* input data size */, 
	       int *nx2, int *ny2 /* padded data size */,
               int *n_local, int *o_local /* local size & start */,
               MPI_Comm comm)
/*< initialize >*/
{
  if (threads_ok) threads_ok = fftwf_init_threads();

  fftwf_mpi_init();

  if (false)
    sf_warning("Using threaded FFTW3! \n");
  if (threads_ok)
    fftwf_plan_with_nthreads(omp_get_max_threads());

  nk = n1 = kiss_fft_next_fast_size(nx*pad1);
  n2 = kiss_fft_next_fast_size(ny);

  alloc_local = fftwf_mpi_local_size_2d(n2, n1, comm, &local_n0, &local_0_start);

  //cc = sf_complexalloc2(n1,n2);
  //dd = sf_complexalloc2(nk,n2);
  cc = sf_complexalloc(alloc_local);
  dd = sf_complexalloc(alloc_local);

  cfg = fftwf_mpi_plan_dft_2d(n2,n1,
                              (fftwf_complex *) cc,
                              (fftwf_complex *) dd,
                              comm,
                              FFTW_FORWARD, FFTW_MEASURE);

  icfg = fftwf_mpi_plan_dft_2d(n2,n1,
                               (fftwf_complex *) dd, 
                               (fftwf_complex *) cc,
                               comm,
                               FFTW_BACKWARD, FFTW_MEASURE);

  if (NULL == cfg || NULL == icfg) sf_error("FFTW failure.");

  *nx2 = n1;
  *ny2 = n2;
  *n_local = (int) local_n0;
  *o_local = (int) local_0_start;
	
  wt =  1.0/(n1*n2);
	
  return (nk*n2);
}
Пример #6
0
void fft_set_num_threads(unsigned int n)
{
#ifdef FFTWTHREADS
	#pragma omp critical
	if (!fft_threads_init) {

		fft_threads_init = true;
		fftwf_init_threads();
	}

	#pragma omp critical
        fftwf_plan_with_nthreads(n);
#else
	UNUSED(n);
#endif
}
Пример #7
0
void Fft::prepareFft(){
	fftwf_init_threads();
	fftwf_plan_with_nthreads(FFT_THREADS);
}
Пример #8
0
int fft2_init(bool cmplx1        /* if complex transform */,
	      int pad1           /* padding on the first axis */,
	      int nx,   int ny   /* input data size */, 
	      int *nx2, int *ny2 /* padded data size */)
/*< initialize >*/
{
#ifdef SF_HAS_FFTW
#ifdef _OPENMP
    fftwf_init_threads();
    sf_warning("Using threaded FFTW3!\n");
    fftwf_plan_with_nthreads(omp_get_max_threads());
#endif
#else
   int i2;
#endif
	
    cmplx = cmplx1;
	
    if (cmplx) {
	nk = n1 = kiss_fft_next_fast_size(nx*pad1);
		
#ifndef SF_HAS_FFTW
	cfg1  = kiss_fft_alloc(n1,0,NULL,NULL);
	icfg1 = kiss_fft_alloc(n1,1,NULL,NULL);
#endif
    } else {
	nk = kiss_fft_next_fast_size(pad1*(nx+1)/2)+1;
	n1 = 2*(nk-1);
		
#ifndef SF_HAS_FFTW
	cfg  = kiss_fftr_alloc(n1,0,NULL,NULL);
	icfg = kiss_fftr_alloc(n1,1,NULL,NULL);
#endif
    }
		
    n2 = kiss_fft_next_fast_size(ny);

    if (cmplx) {
	cc = sf_complexalloc2(n1,n2);
    } else {
	ff = sf_floatalloc2(n1,n2);
    }
    dd = sf_complexalloc(nk*n2);
	
#ifndef SF_HAS_FFTW
    cfg2  = kiss_fft_alloc(n2,0,NULL,NULL);
    icfg2 = kiss_fft_alloc(n2,1,NULL,NULL);
 	
    tmp =    (kiss_fft_cpx **) sf_alloc(n2,sizeof(*tmp));
    tmp[0] = (kiss_fft_cpx *)  sf_alloc(nk*n2,sizeof(kiss_fft_cpx));
    for (i2=0; i2 < n2; i2++) {
	tmp[i2] = tmp[0]+i2*nk;
    }
	
    trace2 = sf_complexalloc(n2);
    ctrace2 = (kiss_fft_cpx *) trace2;
#endif

    *nx2 = n1;
    *ny2 = n2;
	
    wt =  1.0/(n1*n2);
	
    return (nk*n2);
}
Пример #9
0
int main (int argc, char *argv[])
{
  bool verb, snap;
  bool abc, adj;
  int nz, nx, nt, ns, nr;
  float dz, dx, dt, oz, ox;
  int nz0, nx0, nb;
  float oz0, ox0;
  int nkz, nkx;
  int nzpad, nxpad;
  
  float **u1, **u0;
  float *ws, *wr;
  
  sf_file file_src = NULL, file_rec = NULL;
  sf_file file_inp = NULL, file_out = NULL;
  sf_file file_mdl = NULL;
  sf_axis az = NULL, ax = NULL, at = NULL, as = NULL, ar = NULL;
  pt2d *src2d = NULL;
  pt2d *rec2d = NULL;
  scoef2d cssinc = NULL;
  scoef2d crsinc = NULL;
  float *wi = NULL, *wo = NULL;
  sf_axis ai = NULL, ao = NULL;
  scoef2d cisinc = NULL, cosinc = NULL;
  bool spt = false, rpt = false;
  bool ipt = false, opt = false;
  
  sf_init(argc, argv);
  
  if (!sf_getbool("verb", &verb)) verb = false;
  if (!sf_getbool("snap", &snap)) snap = false;
  if (!sf_getbool("adj", &adj)) adj = false;
  if (!sf_getint("nb", &nb)) nb = 4;
  if (sf_getstring("sou") != NULL) { 
    spt = true;
    if (adj) opt = true;
    else     ipt = true;
  }
  if (sf_getstring("rec") != NULL) {
    rpt = true;
    if (adj) ipt = true;
    else     opt = true;
  }
  
  file_inp = sf_input("in");
  file_mdl = sf_input("model");
  if (spt) file_src = sf_input("sou");
  if (rpt) file_rec = sf_input("rec");
  file_out = sf_output("out");

  if (ipt) at = sf_iaxa(file_inp, 2);
  else     at = sf_iaxa(file_inp, 3);
  if (spt) as = sf_iaxa(file_src, 2);
  if (rpt) ar = sf_iaxa(file_rec, 2);
  az = sf_iaxa(file_mdl, 1);
  ax = sf_iaxa(file_mdl, 2);
  nt = sf_n(at);  dt = sf_d(at);  //ot = sf_o(at);
  nz0 = sf_n(az);  dz = sf_d(az);  oz0 = sf_o(az);
  nx0 = sf_n(ax);  dx = sf_d(ax);  ox0 = sf_o(ax);

  if (spt) ns = sf_n(as);
  if (rpt) nr = sf_n(ar);
  nz = nz0 + 2 * nb;
  nx = nx0 + 2 * nb;
  oz = oz0 - nb * dz;
  ox = ox0 - nb * dx;
  abc = nb ? true : false;
  // sf_error("ox=%f ox0=%f oz=%f oz0=%f",ox,ox0,oz,oz0);
  
  nzpad = kiss_fft_next_fast_size( ((nz+1)>>1)<<1 );
  nkx = nxpad = kiss_fft_next_fast_size(nx);
  nkz = nzpad / 2 + 1;
  /* float okx = - 0.5f / dx; */
  float okx = 0.f;
  float okz = 0.f;
  float dkx = 1.f / (nxpad * dx);
  float dkz = 1.f / (nzpad * dz);

  float **vp, **eps, **del;
  vp  = sf_floatalloc2(nz, nx);
  eps = sf_floatalloc2(nz, nx);
  del = sf_floatalloc2(nz, nx);
  float **tmparray = sf_floatalloc2(nz0, nx0);
  sf_floatread(tmparray[0], nz0*nx0, file_mdl); expand2d(vp[0], tmparray[0], nz, nx, nz0, nx0);
  sf_floatread(tmparray[0], nz0*nx0, file_mdl); expand2d(eps[0], tmparray[0], nz, nx, nz0, nx0);
  sf_floatread(tmparray[0], nz0*nx0, file_mdl); expand2d(del[0], tmparray[0], nz, nx, nz0, nx0);

  float **vn, **vh;  
  float **eta, **lin_eta;
  lin_eta = NULL, vh = NULL;
 
  vn = sf_floatalloc2(nz, nx);
  vh = sf_floatalloc2(nz, nx);
  eta = sf_floatalloc2(nz, nx);
  lin_eta = sf_floatalloc2(nz, nx);

  for (int ix=0; ix<nx; ix++) {
    for (int iz=0; iz<nz; iz++){
      vp[ix][iz] *= vp[ix][iz];
      vn[ix][iz] = vp[ix][iz] * (1.f + 2.f * del[ix][iz]);
      vh[ix][iz] = vp[ix][iz] * (1.f + 2.f * eps[ix][iz]);
      eta[ix][iz] = (eps[ix][iz] - del[ix][iz]) / (1.f + 2.f * del[ix][iz]);
      lin_eta[ix][iz] = eta[ix][iz] * (1.f + 2.f * del[ix][iz]);
    }
  }


  float *kx = sf_floatalloc(nkx);
  float *kz = sf_floatalloc(nkz);
  for (int ikx=0; ikx<nkx; ++ikx) {
    kx[ikx] = okx + ikx * dkx;
    /* if (ikx >= nkx/2) kx[ikx] = (nkx - ikx) * dkx; */
    if (ikx >= nkx/2) kx[ikx] = (ikx - nkx) * dkx;
    kx[ikx] *= 2 * SF_PI;
    kx[ikx] *= kx[ikx];
  }
  for (int ikz=0; ikz<nkz; ++ikz) {
    kz[ikz] = okz + ikz * dkz;
    kz[ikz] *= 2 * SF_PI;
    kz[ikz] *= kz[ikz];
  }

  if (adj) {
    ai = ar; ao = as;
  } else {
    ai = as; ao = ar;
  }

  if (opt) {
    sf_oaxa(file_out, ao, 1);
    sf_oaxa(file_out, at, 2);
  } else {
    sf_oaxa(file_out, az, 1);
    sf_oaxa(file_out, ax, 2);
    sf_oaxa(file_out, at, 3);
  }
  sf_fileflush(file_out, NULL);

  if (spt) {
    src2d = pt2dalloc1(ns);
    pt2dread1(file_src, src2d, ns, 2);
    cssinc = sinc2d_make(ns, src2d, nz, nx, dz, dx, oz, ox);
    ws = sf_floatalloc(ns);
    if (adj) { cosinc = cssinc;  wo = ws; }
    else     { cisinc = cssinc;  wi = ws; }
  }
  if (rpt) {
    rec2d = pt2dalloc1(nr);
    pt2dread1(file_rec, rec2d, nr, 2);
    crsinc = sinc2d_make(nr, rec2d, nz, nx, dz, dx, oz, ox);
    wr = sf_floatalloc(nr);
    if (adj) { cisinc = crsinc;  wi = wr; }
    else     { cosinc = crsinc;  wo = wr; }
  }

  u0 = sf_floatalloc2(nz, nx);
  u1 = sf_floatalloc2(nz, nx);
  float *rwave = (float *) fftwf_malloc(nzpad*nxpad*sizeof(float));
  float *rwavem = (float *) fftwf_malloc(nzpad*nxpad*sizeof(float));
  fftwf_complex *cwave = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex));
  fftwf_complex *cwavem = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex));
  /* float *rwavem = (float *) fftwf_malloc(nzpad*nxpad*sizeof(float));
  fftwf_complex *cwave = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex));
  fftwf_complex *cwavem = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex)); */

  /* boundary conditions */
  float **ucut = NULL;
  float *damp = NULL;
  if (!(ipt &&opt)) ucut = sf_floatalloc2(nz0, nx0);
  damp = damp_make(nb);
    
  float wt = 1./(nxpad * nzpad);
  wt *= dt * dt;
  fftwf_plan forward_plan;
  fftwf_plan inverse_plan;
#ifdef _OPENMP
#ifdef SF_HAS_FFTW_OMP
  fftwf_init_threads();
  fftwf_plan_with_nthreads(omp_get_max_threads());
#endif
#endif
  forward_plan = fftwf_plan_dft_r2c_2d(nxpad, nzpad,
              rwave, cwave, FFTW_MEASURE); 
#ifdef _OPENMP
#ifdef SF_HAS_FFTW_OMP
  fftwf_plan_with_nthreads(omp_get_max_threads());
#endif
#endif
  inverse_plan = fftwf_plan_dft_c2r_2d(nxpad, nzpad,
              cwavem, rwavem, FFTW_MEASURE); 
  int itb, ite, itc;
  if (adj) {
    itb = nt -1; ite = -1; itc = -1;
  } else {
    itb = 0; ite = nt; itc = 1;
  }

  if (adj) {
    for (int it=0; it<nt; it++) {
      if (opt) sf_floatwrite(wo, sf_n(ao), file_out);
      else     sf_floatwrite(ucut[0], nz0*nx0, file_out);
    }
    sf_seek(file_out, 0, SEEK_SET);
  }

  float **ptrtmp = NULL;
  memset(u0[0], 0, sizeof(float)*nz*nx);
  memset(u1[0], 0, sizeof(float)*nz*nx);
  memset(rwave, 0, sizeof(float)*nzpad*nxpad);
  memset(rwavem, 0, sizeof(float)*nzpad*nxpad);
  memset(cwave, 0, sizeof(float)*nkz*nkx*2);
  memset(cwavem, 0, sizeof(float)*nkz*nkx*2);

  for (int it=itb; it!=ite; it+=itc) { if (verb) sf_warning("it = %d;",it);
#ifdef _OPENMP
    double tic = omp_get_wtime();
#endif
    if (ipt) {
      if (adj) sf_seek(file_inp, (off_t)(it)*sizeof(float)*sf_n(ai), SEEK_SET);
      sf_floatread(wi, sf_n(ai), file_inp);
      for (int i=0; i<sf_n(ai); i++)
        wi[i] *= dt* dt;
    } else {
      if (adj) sf_seek(file_inp, (off_t)(it)*sizeof(float)*nz0*nx0, SEEK_SET);
      sf_floatread(ucut[0], nz0*nx0, file_inp);
      for (int j=0; j<nx0; j++)
      for (int i=0; i<nz0; i++)
        ucut[j][i] *= dt * dt;
    }

    /* apply absorbing boundary condition: E \times u@n-1 */
    damp2d_apply(u0, damp, nz, nx, nb);
    fft_stepforward(u0, u1, rwave, rwavem, cwave, cwavem,
        vp, vn, eta, vh, eps, lin_eta, kz, kx,
        forward_plan, inverse_plan,
        nz, nx, nzpad, nxpad, nkz, nkx, wt, adj);

    // sinc2d_inject1(u0, ws[it][s_idx], cssinc[s_idx]);
    if (ipt) sinc2d_inject(u0, wi, cisinc);
    else     wfld2d_inject(u0, ucut, nz0, nx0, nb);

    /* apply absorbing boundary condition: E \times u@n+1 */
    damp2d_apply(u0, damp, nz, nx, nb);

    /* loop over pointers */
    ptrtmp = u0;  u0 = u1;  u1 = ptrtmp;
    
    if (opt) {
      if (adj) sf_seek(file_out, (off_t)(it)*sizeof(float)*sf_n(ao),SEEK_SET);
      sinc2d_extract(u0, wo, cosinc);
      sf_floatwrite(wo, sf_n(ao), file_out);
    } else {
      if (adj) sf_seek(file_out, (off_t)(it)*sizeof(float)*nz0*nx0,SEEK_SET);
      wwin2d(ucut, u0, nz0, nx0, nb);
      sf_floatwrite(ucut[0], nz0*nx0, file_out);
    }

#ifdef _OPENMP
    double toc = omp_get_wtime();
    if (verb) fprintf(stderr," clock = %lf;", toc-tic);
#endif
  } /* END OF TIME LOOP */
  return 0;
}
Пример #10
0
int cfft3_init(int pad1           /* padding on the first axis */,
	      int nx,   int ny,   int nz   /* input data size */, 
	      int *nx2, int *ny2, int *nz2 /* padded data size */)
/*< initialize >*/
{

#ifdef SF_HAS_FFTW
#ifdef _OPENMP
    fftwf_init_threads();
    sf_warning("Using threaded FFTW3! %d\n",omp_get_max_threads());
    fftwf_plan_with_nthreads(omp_get_max_threads());
#endif
#else
    int i2, i3;
#endif

    /* axis 1 */

    nk = n1 = kiss_fft_next_fast_size(nx*pad1);

#ifndef SF_HAS_FFTW
    cfg1  = kiss_fft_alloc(n1,0,NULL,NULL);
    icfg1 = kiss_fft_alloc(n1,1,NULL,NULL);
#endif

    /* axis 2 */

    n2 = kiss_fft_next_fast_size(ny);

#ifndef SF_HAS_FFTW
    cfg2  = kiss_fft_alloc(n2,0,NULL,NULL);
    icfg2 = kiss_fft_alloc(n2,1,NULL,NULL);

    trace2 = sf_complexalloc(n2);
    ctrace2 = (kiss_fft_cpx *) trace2;
#endif

    /* axis 3 */

    n3 = kiss_fft_next_fast_size(nz);

#ifndef SF_HAS_FFTW
    cfg3  = kiss_fft_alloc(n3,0,NULL,NULL);
    icfg3 = kiss_fft_alloc(n3,1,NULL,NULL);

    trace3 = sf_complexalloc(n3);
    ctrace3 = (kiss_fft_cpx *) trace3;

    /* --- */

    tmp = (kiss_fft_cpx***) sf_alloc (n3,sizeof(kiss_fft_cpx**));
    tmp[0] = (kiss_fft_cpx**) sf_alloc (n2*n3,sizeof(kiss_fft_cpx*));
    tmp[0][0] = (kiss_fft_cpx*) sf_alloc (nk*n2*n3,sizeof(kiss_fft_cpx));

    for (i2=1; i2 < n2*n3; i2++) {
	tmp[0][i2] = tmp[0][0]+i2*nk;
    }

    for (i3=1; i3 < n3; i3++) {
	tmp[i3] = tmp[0]+i3*n2;
    }
#endif

    cc = sf_complexalloc3(n1,n2,n3);

    *nx2 = n1;
    *ny2 = n2;
    *nz2 = n3;

    wt =  1.0/(n3*n2*n1);

    return (nk*n2*n3);
}
Пример #11
0
int main(int argc, char** argv){
	float tr[6];

	const float ZAP=32;
	const uint64_t TSIZE=18;
	const uint64_t zapE=64;
	fftwf_init_threads();
	fftwf_plan_with_nthreads(omp_get_max_threads());

	logmsg("Open file '%s'",argv[1]);
	FILE* f = fopen(argv[1],"r");

	int hdr_bytes = read_header(f);
	const uint64_t nskip = hdr_bytes;
	const uint64_t nchan = nchans;
	logmsg("Nchan=%"PRIu64", tsamp=%f",nchan,tsamp);
	mjk_rand_t *random = mjk_rand_init(12345);

	rewind(f);
	FILE* of = fopen("clean.fil","w");
	uint8_t hdr[nskip];
	fread(hdr,1,nskip,f);
	fwrite(hdr,1,nskip,of);
	const uint64_t nsamp_per_block=round(pow(2,TSIZE));

	logmsg("Tblock = %f",nsamp_per_block*tsamp);

	mjk_clock_t *t_all = init_clock();
	start_clock(t_all);

	mjk_clock_t *t_read = init_clock();
	mjk_clock_t *t_trns= init_clock();
	mjk_clock_t *t_rms = init_clock();
	mjk_clock_t *t_fft = init_clock();
	mjk_clock_t *t_spec = init_clock();

	const uint64_t bytes_per_block = nchan*nsamp_per_block;

	uint8_t *buffer = calloc(bytes_per_block,1);
	float **data = malloc_2df(nchan,nsamp_per_block);
	float **clean = malloc_2df(nchan,nsamp_per_block);
	float *bpass = calloc(nchan,sizeof(float));
	float  *ch_var=NULL;
	float  *ch_mean=NULL;
	float  *ch_fft_n=NULL;
	float  *ch_fft_p=NULL;


	logmsg("Planning FFT - this will take a long time the first time it is run!");
	start_clock(t_fft);
	FILE * wisfile;
	if(wisfile=fopen("wisdom.txt","r")){
		fftwf_import_wisdom_from_file(wisfile);
		fclose(wisfile);
	}
	const int fftX=nsamp_per_block;
	const int fftY=nchan;
	const int fftXo=nsamp_per_block/2+1;

	float *X = fftwf_malloc(sizeof(float)*fftX);

	for (uint64_t i = 0; i < nsamp_per_block ; i++){
		X[i]=i;
	}
	float *tseries = fftwf_malloc(sizeof(float)*fftX);
	float complex *fseries = fftwf_malloc(sizeof(float complex)*fftXo);
	float *pseries = fftwf_malloc(sizeof(float)*fftXo);
	uint8_t *mask = malloc(sizeof(uint8_t)*fftXo);
	fftwf_plan fft_1d = fftwf_plan_dft_r2c_1d(fftX,tseries,fseries,FFTW_MEASURE|FFTW_DESTROY_INPUT);

	complex float * fftd = fftwf_malloc(sizeof(complex float)*(fftXo*fftY));
	fftwf_plan fft_plan = fftwf_plan_many_dft_r2c(
			1,&fftX,fftY,
			data[0] ,&fftX,1,fftX,
			fftd    ,&fftXo,1,fftXo,
			FFTW_MEASURE|FFTW_PRESERVE_INPUT);
	logmsg("Planning iFFT - this will take a long time the first time it is run!");
	fftwf_plan ifft_plan = fftwf_plan_many_dft_c2r(
			1,&fftX,fftY,
			fftd ,&fftXo,1,fftXo,
			clean[0] ,&fftX,1,fftX,
			FFTW_MEASURE|FFTW_PRESERVE_INPUT);

	if(!fft_plan){
		logmsg("Error - could not do FFT plan");
		exit(2);
	}

	wisfile=fopen("wisdom.txt","w");
	fftwf_export_wisdom_to_file(wisfile);
	fclose(wisfile);
	stop_clock(t_fft);
	logmsg("T(planFFT)= %.2lfs",read_clock(t_fft));
	reset_clock(t_fft);



	float min_var=1e9;
	float max_var=0;

	float min_fft_n=1e9;
	float max_fft_n=0;


	float min_fft_p=1e9;
	float max_fft_p=0;

	float min_mean=1e9;
	float max_mean=0;
	uint64_t nblocks=0;
	uint64_t totread=0;
	while(!feof(f)){
		nblocks++;
		ch_var = realloc(ch_var,nchan*nblocks*sizeof(float));
		ch_mean = realloc(ch_mean,nchan*nblocks*sizeof(float));
		ch_fft_n = realloc(ch_fft_n,nchan*nblocks*sizeof(float));
		ch_fft_p = realloc(ch_fft_p,nchan*nblocks*sizeof(float));
		start_clock(t_read);
		uint64_t read = fread(buffer,1,bytes_per_block,f);
		stop_clock(t_read);
		if (read!=bytes_per_block){
			nblocks--;
			break;
		}
		totread+=read;
		logmsg("read=%"PRIu64" bytes. T=%fs",read,totread*tsamp/(float)nchan);
		uint64_t offset = (nblocks-1)*nchan;
		start_clock(t_trns);
		// transpose with small blocks in order to increase cache efficiency.
#define BLK 8
#pragma omp parallel for schedule(static,2) shared(buffer,data)
		for (uint64_t j = 0; j < nchan ; j+=BLK){
			for (uint64_t i = 0; i < nsamp_per_block ; i++){
				for (uint64_t k = 0; k < BLK ; k++){
					data[j+k][i] = buffer[i*nchan+j+k];
				}
			}
		}

#pragma omp parallel for shared(data)
		for (uint64_t j = 0; j < nchan ; j++){
			if(j<zapE || (nchan-j) < zapE){ 
				for (uint64_t i = 0; i < nsamp_per_block ; i++){
					data[j][i]=ZAP;
				}
			}
		}

		if(nblocks==1){
#pragma omp parallel for shared(data,bpass)
			for (uint64_t j = 0; j < nchan ; j++){
				for (uint64_t i = 0; i < nsamp_per_block ; i++){
					bpass[j]+=data[j][i];
				}
				bpass[j]/=(float)nsamp_per_block;
				bpass[j]-=ZAP;
			}
		}
#pragma omp parallel for shared(data,bpass)
		for (uint64_t j = 0; j < nchan ; j++){
			for (uint64_t i = 0; i < nsamp_per_block ; i++){
				data[j][i]-=bpass[j];
			}
		}




		stop_clock(t_trns);

		start_clock(t_rms);
#pragma omp parallel for shared(data,ch_mean,ch_var)
		for (uint64_t j = 0; j < nchan ; j++){
			float mean=0;
			for (uint64_t i = 0; i < nsamp_per_block ; i++){
				mean+=data[j][i];
			}
			mean/=(float)nsamp_per_block;
			if(mean > ZAP+5 || mean < ZAP-5){
				logmsg("ZAP ch=%"PRIu64,j);
				for (uint64_t i = 0; i < nsamp_per_block ; i++){
					data[j][i]=ZAP;
				}
			}

			float ss=0;
			float x=0;
			for (uint64_t i = 0; i < nsamp_per_block ; i++){
				x = data[j][i]-mean;
				ss+=x*x;
			}
			float var=ss/(float)nsamp_per_block;
			if (var > 0){
				for (uint64_t i = 0; i < nsamp_per_block ; i++){
					float v = (data[j][i]-mean)/sqrt(var);
					if(v > 3 || v < -3){
						data[j][i]=mjk_rand_gauss(random)*sqrt(var)+mean;
					}
				}
			}

			ch_var[offset+j] = var;
			ch_mean[offset+j] = mean;

		}
		stop_clock(t_rms);

		for (uint64_t i = 0; i < nsamp_per_block ; i++){
			tseries[i]=0;
		}

		float tmean=0;
		float tvar=0;
		float max=0;
		float min=1e99;
		//#pragma omp parallel for shared(data,tseries)
		// NOT THREAD SAFE
		for (uint64_t j = 0; j < nchan ; j++){
			tmean+=ch_mean[offset+j];
			tvar+=ch_var[offset+j];
			for (uint64_t i = 0; i < nsamp_per_block ; i++){
				tseries[i]+=data[j][i];
				if(data[j][i]>max)max=data[j][i];
				if(data[j][i]<min)min=data[j][i];
			}
		}
		float ss=0;
		float mm=0;
		for (uint64_t i = 0; i < nsamp_per_block ; i++){
			float x=tseries[i]-tmean;
			mm+=tseries[i];
			ss+=x*x;
		}
		float rvar=ss/(float)nsamp_per_block;
		logmsg("var=%g tvar=%g",ss/(float)nsamp_per_block,tvar);
		logmsg("mean=%g tmean=%g",mm/(float)nsamp_per_block,tmean);
		cpgopen("3/xs");
		cpgsvp(0.1,0.9,0.1,0.9);
		cpgswin(0,fftX,tmean-sqrt(tvar)*30,tmean+sqrt(tvar)*30);
		cpgbox("ABN",0,0,"ABN",0,0);
		cpgline(fftX,X,tseries);
		cpgsci(2);
		cpgclos();
		tr[0] = 0.0 ;
		tr[1] = 1;
		tr[2] = 0;
		tr[3] = 0.5;
		tr[4] = 0;
		tr[5] = 1;

		logmsg("max=%g min=%g",max,min);

		cpgopen("4/xs");
		cpgsvp(0.1,0.9,0.1,0.9);
		cpgswin(0,nsamp_per_block,0,nchan);
		cpgbox("ABN",0,0,"ABN",0,0);
		cpggray(*data,nsamp_per_block,nchan,1,nsamp_per_block,1,nchan,tmean/(float)nchan+sqrt(rvar/(float)nchan),tmean/(float)nchan-sqrt(rvar/(float)nchan),tr);
		cpgclos();




		start_clock(t_fft);
		fftwf_execute(fft_1d);
		fftwf_execute(fft_plan);
		stop_clock(t_fft);

		{
			float T = sqrt(fftXo*tvar)*12;
			logmsg("Zap T=%.2e",T);

			float fx[fftXo];
			float fT[fftXo];
#pragma omp parallel for shared(fseries,pseries,mask)
			for (uint64_t i = 0; i < fftXo ; i++){
				mask[i]=1;
			}
#pragma omp parallel for shared(fseries,pseries,mask)
			for (uint64_t i = 0; i < fftXo ; i++){
				pseries[i]=camp(fseries[i]);
				fx[i]=i;
				float TT = T;
				if (i>512)TT=T/2.0;
				if(i>32){
					fT[i]=TT;
					if (pseries[i] > TT) {
						mask[i]=0;
					}
				} else fT[i]=0;
			}

			uint64_t nmask=0;
			for (uint64_t i = 0; i < fftXo ; i++){
				if (mask[i]==0){
					nmask++;
				}
			}
			logmsg("masked=%d (%.2f%%)",nmask,100*nmask/(float)fftXo);
			cpgopen("1/xs");
			cpgsvp(0.1,0.9,0.1,0.9);
			cpgswin(0,fftXo,0,T*10);
			cpgbox("ABN",0,0,"ABN",0,0);
			cpgline(fftXo,fx,pseries);
			cpgsci(2);
			cpgline(fftXo,fx,fT);
			cpgclos();

		}


		//		exit(1);

		start_clock(t_spec);

		//FILE* ff=fopen("plot","w");
#pragma omp parallel for shared(fftd,ch_mean,ch_fft_n,ch_fft_p)
		for (uint64_t j = 0; j < nchan ; j++){
			float var = ch_var[offset+j];
			float m=sqrt(var*fftXo/2.0);
			float T = sqrt(var*fftXo)*3;
			uint64_t n=0;
			float p=0;
			float complex *fftch = fftd + fftXo*j;
			for(uint64_t i = 1; i < fftXo; i++){
				if (camp(fftch[i]) > T) {
					n++;
					p+=camp(fftch[i]);
				}
				//	 if(j==512)fprintf(ff,"%f ",camp(fftch[i]));
				if(mask[i]==0){
					fftch[i]=m*(mjk_rand_gauss(random) + I*mjk_rand_gauss(random)); 
				}
				//	 if(j==512)fprintf(ff,"%f\n",camp(fftch[i]));
			}
			ch_fft_n[offset+j]=n;
			ch_fft_p[offset+j]=p;
		}
		// fclose(ff);

		logmsg("iFFT");
		fftwf_execute(ifft_plan);

#pragma omp parallel for schedule(static,2) shared(buffer,clean)
		for (uint64_t j = 0; j < nchan ; j+=BLK){
			for (uint64_t i = 0; i < nsamp_per_block ; i++){
				for (uint64_t k = 0; k < BLK ; k++){
					clean[j+k][i]/=(float)fftX;
					buffer[i*nchan+j+k] = round(clean[j+k][i]);
				}
			}

			if(j==512){
				cpgopen("2/xs");
				cpgsvp(0.1,0.9,0.1,0.9);
				cpgswin(0,fftX,ch_mean[j]-sqrt(ch_var[j])*10,ch_mean[j]+sqrt(ch_var[j])*10);
				cpgbox("ABN",0,0,"ABN",0,0);
				cpgline(fftX,X,data[j]);
				cpgsci(2);
				cpgline(fftX,X,clean[j]);
				cpgclos();

			}

		}
		fwrite(buffer,1,bytes_per_block,of);


		for (uint64_t i = 0; i < nsamp_per_block ; i++){
			tseries[i]=0;
		}

		tmean=0;
		tvar=0;
		max=0;
		min=1e99;
		//#pragma omp parallel for shared(clean,tseries)
		// NOT THREAD SAFE
		for (uint64_t j = 0; j < nchan ; j++){
			tmean+=ch_mean[offset+j];
			tvar+=ch_var[offset+j];
			for (uint64_t i = 0; i < nsamp_per_block ; i++){
				tseries[i]+=clean[j][i];
				if(clean[j][i]>max)max=clean[j][i];
				if(clean[j][i]<min)min=clean[j][i];
			}
		}
		ss=0;
		mm=0;
		for (uint64_t i = 0; i < nsamp_per_block ; i++){
			float x=tseries[i]-tmean;
			mm+=tseries[i];
			ss+=x*x;
		}
		rvar=ss/(float)nsamp_per_block;
		logmsg("var=%g tvar=%g",ss/(float)nsamp_per_block,tvar);
		logmsg("mean=%g tmean=%g",mm/(float)nsamp_per_block,tmean);
		cpgopen("5/xs");
		cpgsvp(0.1,0.9,0.1,0.9);
		cpgswin(0,fftX,tmean-sqrt(tvar)*30,tmean+sqrt(tvar)*30);
		cpgbox("ABN",0,0,"ABN",0,0);
		cpgline(fftX,X,tseries);
		cpgsci(2);
		cpgclos();
		tr[0] = 0.0 ;
		tr[1] = 1;
		tr[2] = 0;
		tr[3] = 0.5;
		tr[4] = 0;
		tr[5] = 1;

		logmsg("max=%g min=%g",max,min);

		cpgopen("6/xs");
		cpgsvp(0.1,0.9,0.1,0.9);
		cpgswin(0,nsamp_per_block,0,nchan);
		cpgbox("ABN",0,0,"ABN",0,0);
		cpggray(*clean,nsamp_per_block,nchan,1,nsamp_per_block,1,nchan,tmean/(float)nchan+sqrt(rvar/(float)nchan),tmean/(float)nchan-sqrt(rvar/(float)nchan),tr);
		cpgclos();




		stop_clock(t_spec);
		for (uint64_t j = 0; j < nchan ; j++){
			float mean=ch_mean[offset+j];
			if (mean > max_mean)max_mean=mean;
			if (mean < min_mean)min_mean=mean;

			float var=ch_var[offset+j];
			if (var > max_var)max_var=var;
			if (var < min_var)min_var=var;
			float fft_n=ch_fft_n[offset+j];
			if (fft_n > max_fft_n)max_fft_n=fft_n;
			if (fft_n < min_fft_n)min_fft_n=fft_n;
			float fft_p=ch_fft_p[offset+j];
			if (fft_p > max_fft_p)max_fft_p=fft_p;
			if (fft_p < min_fft_p)min_fft_p=fft_p;

		}
	}
	stop_clock(t_all);

	fclose(of);

	logmsg("T(all)  = %.2lfs",read_clock(t_all));
	logmsg("T(read) = %.2lfs",read_clock(t_read));
	logmsg("T(trans)= %.2lfs",read_clock(t_trns));
	logmsg("T(fft)  = %.2lfs",read_clock(t_fft));
	logmsg("T(fan)  = %.2lfs",read_clock(t_spec));
	logmsg("T(rms)  = %.2lfs",read_clock(t_rms));
	logmsg("T(rest) = %.2lfs",read_clock(t_all)-read_clock(t_read)-read_clock(t_trns)-read_clock(t_rms)-read_clock(t_fft)-read_clock(t_spec));


	tr[0] = -tsamp*nsamp_per_block*0.5;
	tr[2] = tsamp*nsamp_per_block;
	tr[1] = 0;
	tr[3] = 0.5;
	tr[5] = 0;
	tr[4] = 1;




	cpgopen("1/xs");
	cpgsvp(0.1,0.9,0.1,0.9);
	cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan);
	cpgbox("ABN",600,10,"ABN",100,1);
	cpggray(ch_mean,nchan,nblocks,1,nchan,1,nblocks,max_mean,min_mean,tr);
	cpgclos();

	cpgopen("2/xs");
	cpgsvp(0.1,0.9,0.1,0.9);
	cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan);
	cpgbox("ABN",600,10,"ABN",100,1);
	cpggray(ch_var,nchan,nblocks,1,nchan,1,nblocks,max_var,min_var,tr);
	cpgclos();

	cpgopen("3/xs");
	cpgsvp(0.1,0.9,0.1,0.9);
	cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan);
	cpgbox("ABN",600,10,"ABN",100,1);
	cpggray(ch_fft_n,nchan,nblocks,1,nchan,1,nblocks,max_fft_n,min_fft_n,tr);
	cpgclos();

	cpgopen("4/xs");
	cpgsvp(0.1,0.9,0.1,0.9);
	cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan);
	cpgbox("ABN",600,10,"ABN",100,1);
	cpggray(ch_fft_p,nchan,nblocks,1,nchan,1,nblocks,max_fft_p,min_fft_p,tr);
	cpgclos();



	cpgopen("mean.ps/vcps");
	cpgsvp(0.1,0.9,0.1,0.9);
	cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan);
	cpgbox("ABN",600,10,"ABN",100,1);
	cpggray(ch_mean,nchan,nblocks,1,nchan,1,nblocks,max_mean,min_mean,tr);
	cpgclos();

	cpgopen("var.ps/vcps");
	cpgsvp(0.1,0.9,0.1,0.9);
	cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan);
	cpgbox("ABN",600,10,"ABN",100,1);
	cpggray(ch_var,nchan,nblocks,1,nchan,1,nblocks,max_var,min_var,tr);
	cpgclos();


	cpgopen("fft_n.ps/vcps");
	cpgsvp(0.1,0.9,0.1,0.9);
	cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan);
	cpgbox("ABN",600,10,"ABN",100,1);
	cpggray(ch_fft_n,nchan,nblocks,1,nchan,1,nblocks,max_fft_n,min_fft_n,tr);
	cpgclos();

	cpgopen("fft_p.ps/vcps");
	cpgsvp(0.1,0.9,0.1,0.9);
	cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan);
	cpgbox("ABN",600,10,"ABN",100,1);
	cpggray(ch_fft_p,nchan,nblocks,1,nchan,1,nblocks,max_fft_p,min_fft_p,tr);
	cpgclos();



	fclose(f);
	free(buffer);
	free_2df(data);

	return 0;

}
Пример #12
0
int main(int argc, char *argv[]) {

    int opt=0, verb=0;
    int max_harm = 64, max_lag=0;
    int isub = 1;
    while ((opt=getopt(argc,argv,"hvI:H:L:"))!=-1) {
        switch (opt) {
            case 'v':
                verb++;
                break;
            case 'I':
                isub = atoi(optarg);
                break;
            case 'H':
                max_harm = atoi(optarg);
                break;
            case 'L':
                max_lag = atoi(optarg);
                break;
            case 'h':
                usage();
                exit(0);
                break;
        }
    }

    if (optind==argc) {
        usage();
        exit(1);
    }

    int i, rv;

    /* Open file */
    fitsfile *f;
    int status;
    fits_open_file(&f, argv[optind], READONLY, &status);
    fits_error_check_fatal();

    /* Get basic dims */
    struct cyclic_work w;
    cyclic_load_params(f, &w, &status);
    fits_error_check_fatal();
    if (verb) { 
        printf("Read nphase=%d npol=%d nchan=%d\n", 
                w.nphase, w.npol, w.nchan);
        fflush(stdout);
    }
    int orig_npol = w.npol;
    w.npol = 1;

    /* Init FFTs */
    fftwf_init_threads();
    fftwf_plan_with_nthreads(4);
    if (verb) { printf("Planning FFTs\n"); fflush(stdout); }
#define WF "/home/pdemores/share/cyclic_wisdom.dat"
    FILE *wf = fopen(WF,"r");
    if (wf!=NULL) { fftwf_import_wisdom_from_file(wf); fclose(wf); }
    rv = cyclic_init_ffts(&w);
    if (rv) {
        fprintf(stderr, "Error planning ffts (rv=%d)\n", rv);
        exit(1);
    }
    wf = fopen(WF,"w");
    if (wf!=NULL) { fftwf_export_wisdom_to_file(wf); fclose(wf); }

    /* Alloc some stuff */
    struct periodic_spectrum raw;
    struct cyclic_spectrum cs, model_cs;
    struct filter_time ht;
    struct filter_freq hf;
    struct profile_phase pp;
    struct profile_harm ph;

    raw.nphase = pp.nphase = w.nphase;
    raw.nchan = cs.nchan = hf.nchan = w.nchan;
    cs.nharm = ph.nharm =  w.nharm;
    ht.nlag = w.nlag;
    raw.npol = orig_npol;
    cs.npol = 1;

    model_cs.nchan = cs.nchan;
    model_cs.nharm = cs.nharm;
    model_cs.npol = cs.npol;

    cyclic_alloc_ps(&raw);
    cyclic_alloc_cs(&cs);
    cyclic_alloc_cs(&model_cs);
    filter_alloc_time(&ht);
    filter_alloc_freq(&hf);
    profile_alloc_phase(&pp);
    profile_alloc_harm(&ph);

#if 0  // XXX not implemented yet
    /* Check bounds */
    if (max_harm > w.nharm) { max_harm = w.nharm; }
    if (max_lag > w.nlag/2) { max_lag = w.nlag/2; }
    if (verb) {
        printf("Using max of %d harmonics and %d lags\n", max_harm, max_lag);
    }
#endif

    /* Load data */
    cyclic_load_ps(f, &raw, isub, &status);
    fits_error_check_fatal();

    /* Add polns w/o calibration */
    cyclic_pscrunch_ps(&raw, 1.0, 1.0);

    /* Initialize H, profile guesses */
    cyclic_fscrunch_ps(&pp, &raw);
    profile_phase2harm(&pp, &ph, &w);
    ht.data[0] = 1.0;
    for (i=1; i<ht.nlag; i++) { ht.data[i] = 0.0; }
    filter_profile_norm(&ht, &ph, w.nharm);
    profile_harm2phase(&ph, &pp, &w);

    /* convert input data to cyclic spectrum */
    cyclic_ps2cs(&raw, &cs, &w);

    /* could output initial profile */

    /* Fill in data struct for nlopt */
    struct cyclic_data cdata;
    cdata.cs = &cs;
    cdata.s0 = &ph;
    cdata.ht = &ht;
    cdata.model_cs = &model_cs;
    cdata.w = &w;

    /* Set up minimizer */
    const int dim = 2*(w.nharm-1) + 2*w.nlag; /* number of free params */
    printf("number of fit params = %d\n", dim);
    nlopt_opt op;
    op = nlopt_create(NLOPT_LN_COBYLA, dim);
    nlopt_set_min_objective(op, cyclic_ms_difference_nlopt, &cdata);
    nlopt_set_xtol_rel(op, 1e-4);

    /* Set up initial params */
    double *x = (double *)malloc(sizeof(double) * dim);
    double *xtmp = x;
    for (i=1; i<ph.nharm; i++) { 
        xtmp[0] = creal(ph.data[i]);
        xtmp[1] = cimag(ph.data[i]);
        xtmp += 2;
    }
    for (i=0; i<ht.nlag; i++) { 
        xtmp[0] = creal(ht.data[i]);
        xtmp[1] = cimag(ht.data[i]);
        xtmp += 2;
    }

    /* Run optimization */
    double min;
    if (nlopt_optimize(op, x, &min)) {
        fprintf(stderr, "nlopt_optimize failed\n");
        exit(1);
    }

    /* TODO: some kind of output */

    /* All done :) */
    nlopt_destroy(op);
    exit(0);
}
Пример #13
0
int
main(int argc, char *argv[])
{
    uint16_t *addr_in;
    float *addr_out;
    int fd_in;
    int fd_out;
    struct stat sb;
    size_t length;
    uint64_t fftlen;
    size_t num_ffts;
    size_t index;

    if (argc < 4|| argc > 5) {
        fprintf(stderr, "%s filein fileout fftlength [nthreads]\n", argv[0]);
        exit(EXIT_FAILURE);
    }

    fd_in = open(argv[1], O_RDONLY|O_LARGEFILE);
    if (fd_in == -1)
        handle_error("in open");
    fd_out = open(argv[2], O_LARGEFILE|O_CREAT|O_TRUNC|O_RDWR, 0666);
    if (fd_out == -1)
        handle_error("out open");

    fftlen = atoll(argv[3]);
    int nthreads = (argc == 5) ? 4 : atoi(argv[4]);

    posix_fallocate(fd_out, 0, fftlen* sizeof(float));

    if (fstat(fd_in, &sb) == -1)           /* To obtain file size */
        handle_error("fstat");

    length = sb.st_size; //two bytes per short, two shorts per complex value
    num_ffts = length/fftlen;

    //map the input into memory.
    addr_in = mmap(NULL, length, PROT_READ,
                   MAP_PRIVATE, fd_in, 0);
    if (addr_in == MAP_FAILED)
        handle_error("input mmap");

    //map the output into memory.
    addr_out = mmap(NULL, fftlen*sizeof(float), PROT_WRITE,
                    MAP_SHARED, fd_out, 0);
    if (addr_out == MAP_FAILED)
        handle_error("output mmap");


    //fftwf stuff
    fftwf_init_threads();
    fftwf_plan_with_nthreads(nthreads);
    fftwf_complex *in, *out;
    fftwf_plan my_plan;
    in = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)*fftlen);
    out = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)*fftlen);
    my_plan = fftwf_plan_dft_1d(fftlen, in, out, FFTW_FORWARD, FFTW_ESTIMATE);

    float *fin = (float*)in;
    float *fout = (float*)out;

    for(index = 0; index < fftlen*2; index++) {
        float tmp =  (float) addr_in[index];
        fin[index] =tmp;
    }
    addr_in += 2 * fftlen;

    //run that FFT
    fftwf_execute(my_plan);

    //calculate Power
    size_t currindex=0;
    float real, imag;
    for(index = 0; index < fftlen; index++)	{
        real = fout[currindex++];
        imag = fout[currindex++];
        addr_out[index] = 10.0f*log10f( real * real + imag * imag );
    }

    fftwf_destroy_plan(my_plan);
    fftwf_free(in);
    fftwf_free(out);
    msync(addr_out, fftlen * sizeof(float), MS_SYNC);

    exit(EXIT_SUCCESS);
}
Пример #14
0
int main(int argc, char *argv[]) {

    int opt=0, verb=0;
    int max_harm = 64, max_lag=0;
    int causal_filter = 0;
    while ((opt=getopt(argc,argv,"hvH:L:C"))!=-1) {
        switch (opt) {
            case 'v':
                verb++;
                break;
            case 'H':
                max_harm = atoi(optarg);
                break;
            case 'L':
                max_lag = atoi(optarg);
                break;
            case 'C':
                causal_filter = 1;
                break;
            case 'h':
                usage();
                exit(0);
                break;
        }
    }

    if (optind==argc) {
        usage();
        exit(1);
    }

    int i, rv;

    /* Open file */
    fitsfile *f;
    int status;
    fits_open_file(&f, argv[optind], READONLY, &status);
    fits_error_check_fatal();

    /* Get basic dims */
    struct cyclic_work w;
    cyclic_load_params(f, &w, &status);
    fits_error_check_fatal();
    if (verb) { 
        printf("Read nphase=%d npol=%d nchan=%d\n", 
                w.nphase, w.npol, w.nchan);
        fflush(stdout);
    }
    int orig_npol = w.npol;
    w.npol = 1;

    /* Init FFTs */
    fftwf_init_threads();
    fftwf_plan_with_nthreads(4);
    if (verb) { printf("Planning FFTs\n"); fflush(stdout); }
#define WF "/home/pdemores/share/cyclic_wisdom.dat"
    FILE *wf = fopen(WF,"r");
    if (wf!=NULL) { fftwf_import_wisdom_from_file(wf); fclose(wf); }
    rv = cyclic_init_ffts(&w);
    if (rv) {
        fprintf(stderr, "Error planning ffts (rv=%d)\n", rv);
        exit(1);
    }
    wf = fopen(WF,"w");
    if (wf!=NULL) { fftwf_export_wisdom_to_file(wf); fclose(wf); }

    /* Alloc some stuff */
    struct periodic_spectrum raw;
    struct cyclic_spectrum cs, cs_neg;
    struct filter_time ht, ht_new;
    struct filter_freq hf, hf_new;
    struct filter_freq *hf_shift_pos, *hf_shift_neg;
    hf_shift_pos = (struct filter_freq *)malloc(
            sizeof(struct filter_freq)*w.nharm);
    hf_shift_neg = (struct filter_freq *)malloc(
            sizeof(struct filter_freq)*w.nharm);
    struct profile_phase pp, pp_new;
    struct profile_harm ph, ph_new;

    raw.nphase = pp.nphase = pp_new.nphase = w.nphase;
    raw.nchan = cs.nchan = hf.nchan = hf_new.nchan = w.nchan;
    cs.nharm = ph.nharm = ph_new.nharm = w.nharm;
    ht.nlag = ht_new.nlag = w.nlag;
    for (i=0; i<w.nharm; i++) { hf_shift_pos[i].nchan = w.nchan; }
    for (i=0; i<w.nharm; i++) { hf_shift_neg[i].nchan = w.nchan; }
    raw.npol = orig_npol;
    cs.npol = 1;

    cs_neg.nchan = cs.nchan;
    cs_neg.nharm = cs.nharm;
    cs_neg.npol = cs.npol;

    cyclic_alloc_ps(&raw);
    cyclic_alloc_cs(&cs);
    cyclic_alloc_cs(&cs_neg);
    filter_alloc_time(&ht);
    filter_alloc_time(&ht_new);
    filter_alloc_freq(&hf);
    filter_alloc_freq(&hf_new);
    for (i=0; i<w.nharm; i++) {
        filter_alloc_freq(&hf_shift_pos[i]);
        filter_alloc_freq(&hf_shift_neg[i]);
    }
    profile_alloc_phase(&pp);
    profile_alloc_phase(&pp_new);
    profile_alloc_harm(&ph);
    profile_alloc_harm(&ph_new);

    /* Check bounds */
    if (max_harm > w.nharm) { max_harm = w.nharm; }
    if (max_lag > w.nlag/2) { max_lag = w.nlag/2; }
    if (verb) {
        printf("Using max of %d harmonics and %d lags\n", max_harm, max_lag);
    }

    /* Run procedure on subint 0 */
    int isub = 1;

    /* Load data */
    cyclic_load_ps(f, &raw, isub, &status);
    fits_error_check_fatal();

    /* Add polns w/o calibration */
    cyclic_pscrunch_ps(&raw, 1.0, 1.0);

    /* Initialize H, profile guesses */
    cyclic_fscrunch_ps(&pp, &raw);
    profile_phase2harm(&pp, &ph, &w);
    ht.data[0] = 1.0;
    for (i=1; i<ht.nlag; i++) { ht.data[i] = 0.0; }
    filter_profile_norm(&ht, &ph, max_harm);
    profile_harm2phase(&ph, &pp, &w);

    /* convert to CS, produce shifted version */
    cyclic_ps2cs(&raw, &cs, &w);
    cyclic_ps2cs(&raw, &cs_neg, &w);
    cyclic_shift_cs(&cs, +1, &w);
    cyclic_shift_cs(&cs_neg, -1, &w);

    /* TODO output initial profile */

    /* Remove old files */
#define FILT "filters.dat"
#define TFILT "tfilters.dat"
#define PROF "profs.dat"
#define FPROF "fprofs.dat"
    unlink(FILT);
    unlink(TFILT);
    unlink(PROF);
    unlink(FPROF);

    FILE *it = fopen("iter.dat", "w");

    /* iterate */
    int nit=0;
    double mse=0.0, last_mse=0.0;
    signal(SIGINT, cc);
    do { 

        if (verb) {
            printf("iter %d\n", nit); 
            fflush(stdout);
        }

        /* Make freq domain filter */
        filter_time2freq(&ht, &hf, &w);
        write_filter(TFILT, &ht);
        write_filter_freq(FILT, &hf);

        /* Make shifted filter array */
        filter_shift(hf_shift_pos, &ht, w.nharm, 
                raw.ref_freq/(raw.bw*1e6), &w);
        filter_shift(hf_shift_neg, &ht, w.nharm, 
                -1.0*raw.ref_freq/(raw.bw*1e6), &w);

        mse = cyclic_mse(&cs, &cs_neg, &ph, hf_shift_pos, hf_shift_neg, 
                max_harm);

        /* Update filter, prof */
        cyclic_update_filter(&hf_new, &cs, &cs_neg, &ph, 
                hf_shift_pos, hf_shift_neg, max_harm);
        cyclic_update_profile(&ph_new, &cs, &cs_neg, 
                hf_shift_pos, hf_shift_neg);


        /* Back to time domain filter */
        filter_freq2time(&hf_new, &ht_new, &w);

        /* Fix filter normalization */
        for (i=0; i<ht_new.nlag; i++) 
            ht_new.data[i] /= (float)ht_new.nlag;

        /* Zero out negative lags */
        if (causal_filter) {
            for (i=ht_new.nlag/2; i<ht_new.nlag; i++) 
                ht_new.data[i] = 0.0;
        }
        
        /* Zero out large lags */
        if (max_lag>0) { 
            for (i=max_lag; i<ht_new.nlag-max_lag; i++) 
                ht_new.data[i] = 0.0;
        }

        /* Kill nyquist point?? */
        ht_new.data[ht_new.nlag/2] = 0.0;

        /* Normalize prof and filter */
        filter_profile_norm(&ht_new, &ph_new, max_harm);

        /* TODO some kind of convergence test */
        double prof_diff = profile_ms_difference(&ph, &ph_new, max_harm);
        double filt_diff = filter_ms_difference(&ht, &ht_new);

        /* TODO zero out high harmonics ?? */

        /* Step halfway to new versions, except first time */
        if (nit==0) {
            for (i=0; i<w.nharm; i++) 
                ph.data[i] = ph_new.data[i];
            for (i=0; i<w.nlag; i++) 
                ht.data[i] = ht_new.data[i]; 
        } else {
            //double fac = (mse<last_mse) ? 1.0 : 0.5*sqrt(mse/last_mse);
            double fac=0.25;
            for (i=0; i<w.nharm; i++) 
                ph.data[i] = (1.0-fac)*ph.data[i] + fac*ph_new.data[i];
            for (i=0; i<w.nlag; i++) 
                ht.data[i] = (1.0-fac)*ht.data[i] + fac*ht_new.data[i]; 
        }

        /* Back to phase domain profile */
        ph.data[0] = 0.0;
        profile_harm2phase(&ph, &pp_new, &w);

        /* Write out current profiles */
        write_profile(PROF, &pp_new);
        write_fprofile(FPROF, &ph);

        /* Print convergence params */
        if (verb) {
            fprintf(it,"%.3e %.3e %.8e %.8e\n", prof_diff, filt_diff, mse,
                    mse - last_mse);
        }
        last_mse = mse;

        /* Update iter count */
        nit++;

    } while (run);

    fclose(it);

    exit(0);

}
Пример #15
0
int cfft2_init(int pad1           /* padding on the first axis */,
	       int nx,   int ny   /* input data size */, 
	       int *nx2, int *ny2 /* padded data size */)
/*< initialize >*/
{

#ifdef SF_HAS_FFTW
#ifdef _OPENMP
    fftwf_init_threads();
    if (false)
      sf_warning("Using threaded FFTW3! \n");
    fftwf_plan_with_nthreads(omp_get_max_threads());
#else
    if (false)
      sf_warning("Using FFTW3! \n");
#endif
#else
    if (false)
      sf_warning("Using KissFFT! \n");
#endif

#ifndef SF_HAS_FFTW
    int i2;
#endif

    nk = n1 = kiss_fft_next_fast_size(nx*pad1);
    n2 = kiss_fft_next_fast_size(ny);

    cc = sf_complexalloc2(n1,n2);
    
#ifdef SF_HAS_FFTW
    dd = sf_complexalloc2(nk,n2);

    cfg = fftwf_plan_dft_2d(n2,n1,
			    (fftwf_complex *) cc[0], 
			    (fftwf_complex *) dd[0],
			    FFTW_FORWARD, FFTW_MEASURE);

    icfg = fftwf_plan_dft_2d(n2,n1,
			     (fftwf_complex *) dd[0], 
			     (fftwf_complex *) cc[0],
			     FFTW_BACKWARD, FFTW_MEASURE);

    if (NULL == cfg || NULL == icfg) sf_error("FFTW failure.");
#else
    cfg1  = kiss_fft_alloc(n1,0,NULL,NULL);
    icfg1 = kiss_fft_alloc(n1,1,NULL,NULL);

    cfg2  = kiss_fft_alloc(n2,0,NULL,NULL);
    icfg2 = kiss_fft_alloc(n2,1,NULL,NULL);
 	
    tmp =    (kiss_fft_cpx **) sf_alloc(n2,sizeof(*tmp));
    tmp[0] = (kiss_fft_cpx *)  sf_alloc(nk*n2,sizeof(kiss_fft_cpx));
#ifdef _OPENMP
#pragma omp parallel for private(i2) default(shared)
#endif
    for (i2=0; i2 < n2; i2++) {
	tmp[i2] = tmp[0]+i2*nk;
    }
	
    trace2 = sf_complexalloc(n2);
    ctrace2 = (kiss_fft_cpx *) trace2;
#endif

    *nx2 = n1;
    *ny2 = n2;
	
    wt =  1.0/(n1*n2);
	
    return (nk*n2);
}
Пример #16
0
 static int init_threads() {
   int res = fftwf_init_threads();
   return res;
 }
Пример #17
0
float* CalcFFT(GDALDataset *srcDS1, GDALDataset *srcDS2, GDALDataset *dstDS) 
{
	fftwf_plan plan1, plan2, planI;
	fftwf_complex *img1, *img2;
	unsigned char *out;
	int band;

	const size_t px_count = dstDS->GetRasterXSize() * dstDS->GetRasterYSize();
	const size_t buffer_len = sizeof(fftwf_complex) * px_count;
	img1  = (fftwf_complex*) fftwf_malloc(buffer_len);
	img2  = (fftwf_complex*) fftwf_malloc(buffer_len);
	out   = (unsigned char*) fftwf_malloc(sizeof(unsigned char) * px_count); 
		/* ^ not used in fft, but aligned is good anyway */
	if(img1 == NULL || img2 == NULL || out == NULL)
		error("Could not allocate memory\n");

	if(fftwf_init_threads())
		fftwf_plan_with_nthreads(CORES);

	plan1 = fftwf_plan_dft_2d(dstDS->GetRasterYSize(), dstDS->GetRasterXSize(), 
	                        img1, img1, FFTW_FORWARD, FFTW_ESTIMATE);

	plan2 = fftwf_plan_dft_2d(dstDS->GetRasterYSize(), dstDS->GetRasterXSize(), 
	                        img2, img2, FFTW_FORWARD, FFTW_ESTIMATE);

	planI = fftwf_plan_dft_2d(dstDS->GetRasterYSize(), dstDS->GetRasterXSize(), 
	                        img2, img2, FFTW_BACKWARD, FFTW_ESTIMATE);

	if(plan1 == NULL || plan2 == NULL || planI == NULL)
		error("Could not plan FFT\n");

	for(band = 1; band <= dstDS->GetRasterCount(); band++) {
		printf("FFT 1 band %d\n", band);
		runFFT( plan1, srcDS1, img1, band, dstDS );
		printf("FFT 2 band %d\n", band);
		runFFT( plan2, srcDS2, img2, band, dstDS );

		
		printf("Complex Conj band %d\n", band);
		/* mult img1 and conj of img2 */
		for(int px = 0; px < px_count; px++) {
			img2[px] = img1[px] * conj(img2[px]);
		}
	
		/* IFFT of result */	
		printf("IFFT band %d\n", band);
		fftwf_execute(planI);	

		printf("normalize band %d\n", band);
		complex float norm = csqrt(px_count + 0I);
		float max = cabs(img2[0] / norm);
		float min = cabs(img2[0] / norm);
		for(int i = 0; i < px_count; i++) {
			img2[i] = img2[i] / norm;
			
			if(cabs(img2[i]) < min)
				min = cabs(img2[i]);
			if(cabs(img2[i]) > max)
				max = cabs(img2[i]);
		}
		/* img2 should now be real - normalize 0-255 and -- write output */
		printf("Save band %d; min = %f max = %f\n", band, min, max);
		for(int i = 0; i < px_count; i++) {
			out[i] = floor( ((cabs(img2[i]) - min) / (max-min) ) * 255.0 );
		}

		fft2shift(out, dstDS->GetRasterYSize(), dstDS->GetRasterXSize());

 		dstDS->GetRasterBand(band)->RasterIO( GF_Write, 0, 0, 
					   dstDS->GetRasterXSize(), dstDS->GetRasterYSize(),
					   out, dstDS->GetRasterXSize(), dstDS->GetRasterYSize(),
					   GDT_Byte, 0, 0);
	}



	fftwf_destroy_plan(plan1);
	fftwf_destroy_plan(plan2);
	fftwf_destroy_plan(planI);
	fftwf_free(img1);
	fftwf_free(img2);
	fftwf_free(out);
}
Пример #18
0
int main(int argc, char *argv[]) {
  
  char fname[300];
  FILE *fid;
  DIR* dir;
  size_t elem;
  long int ii,ij,ik, ii_c, ij_c, ik_c, a, b, c;   
  long int ncells_1D;
  long int i,j,p,indi,indj,ind;
  int flag_bub,iz;
  double redshift,tmp;
  double kk;
  double bfactor; /* value by which to divide bubble size R */
  double neutral,*xHI;
  float *halo_map, *top_hat_r, *density_map,*bubblef, *bubble;  
  fftwf_complex *halo_map_c, *top_hat_c, *collapsed_mass_c, *density_map_c, *total_mass_c, *bubble_c;
  fftwf_plan pr2c1,pr2c2,pr2c3,pr2c4,pc2r1,pc2r2,pc2r3;
  double zmin,zmax,dz;
  double R;
  
  
  if(argc != 2) {
    printf("Generates boxes with ionization fraction for a range of redshifts\n");
    printf("usage: get_HIIbubbles base_dir\n");
    printf("base_dir contains simfast21.ini and directory structure\n");
    exit(1);
  }  
  get_Simfast21_params(argv[1]);
  zmin=global_Zminsim;
  zmax=global_Zmaxsim;
  dz=global_Dzsim;
  bfactor=pow(10.0,log10(global_bubble_Rmax/global_dx_smooth)/global_bubble_Nbins);
  printf("Bubble radius ratio (bfactor): %f\n", bfactor); fflush(0);
 
#ifdef _OMPTHREAD_
  omp_set_num_threads(global_nthreads);
  fftwf_init_threads();
  fftwf_plan_with_nthreads(global_nthreads);
  printf("Using %d threads\n",global_nthreads);fflush(0);
#endif
  /* Create directory Ionization */
  sprintf(fname,"%s/Ionization",argv[1]);
  if((dir=opendir(fname))==NULL) {  
    printf("Creating Ionization directory\n");
    if(mkdir(fname,(S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH))!=0) {
      printf("Error creating directory!\n");
      exit(1);
    }
  }    
  sprintf(fname,"%s/Output_text_files",argv[1]);
  if((dir=opendir(fname))==NULL) {  
    printf("Creating Output_text_files directory\n");
    if(mkdir(fname,(S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH))!=0) {
      printf("Error creating directory!\n");
      exit(1);
    }
  }  
  
  
  
 /* Memory allocation - we could do some of the FFTs inline... */
 /*************************************************************/
  
  /* density_map mass */ 
  if(!(density_map=(float *) fftwf_malloc(global_N3_smooth*sizeof(float)))) {
    printf("Problem1...\n");
    exit(1);
  }
  if(!(density_map_c=(fftwf_complex *) fftwf_malloc(global_N_smooth*global_N_smooth*(global_N_smooth/2+1)*sizeof(fftwf_complex)))) {
    printf("Problem2...\n");
    exit(1);
  }
  if(!(pr2c1=fftwf_plan_dft_r2c_3d(global_N_smooth, global_N_smooth, global_N_smooth, density_map, density_map_c, FFTWflag))) { 
    printf("Problem3...\n");
    exit(1);
  }  
  /* halo_map mass */ 
  if(!(halo_map=(float *) fftwf_malloc(global_N3_smooth*sizeof(float)))) {
    printf("Problem4...\n");
    exit(1);
  }
  if(!(halo_map_c=(fftwf_complex *) fftwf_malloc(global_N_smooth*global_N_smooth*(global_N_smooth/2+1)*sizeof(fftwf_complex)))) {
    printf("Problem5...\n");
    exit(1);
  }
  if(!(pr2c2=fftwf_plan_dft_r2c_3d(global_N_smooth, global_N_smooth, global_N_smooth, halo_map, halo_map_c, FFTWflag))) { 
    printf("Problem6...\n");
    exit(1);
  }  
  /* total mass */
  if(!(total_mass_c=(fftwf_complex *) fftwf_malloc(global_N_smooth*global_N_smooth*(global_N_smooth/2+1)*sizeof(fftwf_complex)))) {
    printf("Problem7...\n");
    exit(1);
  }
  if(!(pc2r1=fftwf_plan_dft_c2r_3d(global_N_smooth, global_N_smooth, global_N_smooth, total_mass_c, density_map, FFTWflag))) { 
    printf("Problem8...\n");
    exit(1);
  }    
  /* collapsed mass */
  if(!(collapsed_mass_c=(fftwf_complex *) fftwf_malloc(global_N_smooth*global_N_smooth*(global_N_smooth/2+1)*sizeof(fftwf_complex)))) {
    printf("Problem9...\n");
    exit(1);
  }
  if(!(pc2r2=fftwf_plan_dft_c2r_3d(global_N_smooth, global_N_smooth, global_N_smooth, collapsed_mass_c, halo_map, FFTWflag))) { 
    printf("Problem10...\n");
    exit(1);
  }  
  /* top hat window */
  if(!(top_hat_r=(float *) fftwf_malloc(global_N3_smooth*sizeof(float)))) {
    printf("Problem11...\n");
    exit(1);
  }
  if(!(top_hat_c=(fftwf_complex *) fftwf_malloc(global_N_smooth*global_N_smooth*(global_N_smooth/2+1)*sizeof(fftwf_complex)))) {
    printf("Problem12...\n");
    exit(1);
  }
  if(!(pr2c3=fftwf_plan_dft_r2c_3d(global_N_smooth, global_N_smooth, global_N_smooth, top_hat_r, top_hat_c, FFTWflag))) { 
    printf("Problem13...\n");
    exit(1);
  } 
  /* bubble boxes */
  if(!(bubble=(float *) fftwf_malloc(global_N3_smooth*sizeof(float)))) {
    printf("Problem14...\n");
    exit(1);
  }
  if(!(bubble_c=(fftwf_complex *) fftwf_malloc(global_N_smooth*global_N_smooth*(global_N_smooth/2+1)*sizeof(fftwf_complex)))) {
    printf("Problem15...\n");
    exit(1);
  }
  if(!(bubblef=(float *) malloc(global_N3_smooth*sizeof(float)))) {
    printf("Problem16...\n");
    exit(1);
  }
  if(!(pr2c4=fftwf_plan_dft_r2c_3d(global_N_smooth, global_N_smooth, global_N_smooth, bubble, bubble_c, FFTWflag))) { 
    printf("Problem17...\n");
    exit(1);
  } 
  if(!(pc2r3=fftwf_plan_dft_c2r_3d(global_N_smooth, global_N_smooth, global_N_smooth, bubble_c, bubble, FFTWflag))) { 
    printf("Problem18...\n");
    exit(1);
  }  
  if(!(xHI=(double *) malloc((int)((zmax-zmin)/dz+2)*sizeof(double)))) {
    printf("Problem19...\n");
    exit(1);
  }


  
  /****************************************************/
  /***************** Redshift cycle *******************/
  printf("Number of bubble sizes: %d\n",(int)((log(global_bubble_Rmax)-log(2.*global_dx_smooth))/log(bfactor)));
  printf("Redshift cycle...\n");fflush(0);
  iz=0;
  neutral=0.;
  for(redshift=zmin;redshift<(zmax+dz/10) && (neutral < xHlim);redshift+=dz){    
    printf("z = %f\n",redshift);fflush(0);

    sprintf(fname, "%s/delta/deltanl_z%.3f_N%ld_L%.1f.dat",argv[1],redshift,global_N_smooth,global_L/global_hubble); 
    fid=fopen(fname,"rb");
    if (fid==NULL) {printf("\nError reading deltanl file... Check path or if the file exists..."); exit (1);}
    elem=fread(density_map,sizeof(float),global_N3_smooth,fid);
    fclose(fid);
    
#ifdef _OMPTHREAD_
#pragma omp parallel for shared(global_N3_smooth, density_map,global_rho_m, global_dx_smooth,bubblef) private(i)
#endif
    for(i=0;i<(global_N3_smooth);i++){
      density_map[i]=(1.0+density_map[i])*global_rho_m*global_dx_smooth*global_dx_smooth*global_dx_smooth; /* total mass in 1 cell */
      bubblef[i]=0.0;
    }
    sprintf(fname, "%s/Halos/masscoll_z%.3f_N%ld_L%.1f.dat",argv[1],redshift,global_N_smooth,global_L/global_hubble); 
    fid=fopen(fname,"rb");
    if (fid==NULL) {printf("\nError reading %s file... Check path or if the file exists...",fname); exit (1);}
    elem=fread(halo_map,sizeof(float),global_N3_smooth,fid);
    fclose(fid);

    /* Quick fill of single cells before going to bubble cycle */
#ifdef _OMPTHREAD_
#pragma omp parallel for shared(global_N3_smooth,halo_map,density_map,global_eff,bubblef) private(i,tmp)
#endif
    for(i=0;i<global_N3_smooth;i++) {
      if(halo_map[i]>0.) {
	if(density_map[i]>0.) 
	  tmp=(double)halo_map[i]*global_eff/density_map[i];
	else tmp=1.0;
      }else tmp=0.;
      if(tmp>=1.0) bubblef[i]=1.0; else bubblef[i]=tmp;
    }
    /* FFT density and halos */
    fftwf_execute(pr2c1);    
    fftwf_execute(pr2c2);

    
    /************** going over the bubble sizes ****************/
    R=global_bubble_Rmax;    /* Maximum bubble size...*/
    while(R>=2*global_dx_smooth){ 
    
      printf("bubble radius R= %lf\n", R);fflush(0);    
      //      printf("Filtering halo and density boxes...\n");fflush(0);
    
#ifdef _OMPTHREAD_
#pragma omp parallel for shared(collapsed_mass_c,halo_map_c,total_mass_c,density_map_c,global_N_smooth,global_dk,R) private(i,j,p,indi,indj,kk)
#endif
      for(i=0;i<global_N_smooth;i++) {
	if(i>global_N_smooth/2) {
	  indi=-(global_N_smooth-i);
	}else indi=i;      
	for(j=0;j<global_N_smooth;j++) {
	  if(j>global_N_smooth/2) {
	    indj=-(global_N_smooth-j);
	  }else indj=j;
	  for(p=0;p<=global_N_smooth/2;p++) {
	    kk=global_dk*sqrt(indi*indi+indj*indj+p*p);
	    total_mass_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]=density_map_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]*W_filter(kk*R);
	    collapsed_mass_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]=halo_map_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]*W_filter(kk*R);
	  }
	}
      }
      
      fftwf_execute(pc2r1);     
      fftwf_execute(pc2r2); 
      
      flag_bub=0;
      
      //      printf("Starting to find and fill bubbles...\n");fflush(0);

      /* signal center of bubbles */      
#ifdef _OMPTHREAD_
#pragma omp parallel for shared(halo_map,density_map,bubble,global_N_smooth,global_eff,flag_bub) private(ii,ij,ik,ind)
#endif  
      for(ii=0;ii<global_N_smooth;ii++){
	for(ij=0;ij<global_N_smooth;ij++){
	  for(ik=0;ik<global_N_smooth;ik++){
	    ind=ii*global_N_smooth*global_N_smooth+ij*global_N_smooth+ik;
	    if(halo_map[ind]>0.) {
	      if(density_map[ind]>0.) { 
		if((double)halo_map[ind]/density_map[ind]>=1.0/global_eff) {
		  flag_bub=1;
		  bubble[ind]=1.0;  	     	    	    
		}else bubble[ind]=0;
	      }else {
		flag_bub=1;
		bubble[ind]=1.0;  	     	    	    
	      }
	    }else bubble[ind]=0;
	  }
	}
      }
    
      /* generate spherical window in real space for a given R */
      if(flag_bub>0){
	printf("Found bubble...\n");fflush(0);
#ifdef _OMPTHREAD_
#pragma omp parallel for shared(top_hat_r,R,global_dx_smooth,global_N_smooth) private(i,j,p)
#endif  
	for(i=0;i<global_N_smooth;i++){
	  for(j=0;j<global_N_smooth;j++){
	    for(p=0;p<global_N_smooth;p++){ 
	      if(sqrt(i*i+j*j+p*p)*global_dx_smooth<=R || sqrt(i*i+(j-global_N_smooth)*(j-global_N_smooth)+p*p)*global_dx_smooth<=R  || sqrt(i*i+(j-global_N_smooth)*(j-global_N_smooth)+(p-global_N_smooth)*(p-global_N_smooth))*global_dx_smooth <=R || sqrt(i*i+(p-global_N_smooth)*(p-global_N_smooth)+j*j)*global_dx_smooth<=R || sqrt((i-global_N_smooth)*(i-global_N_smooth)+j*j+p*p)*global_dx_smooth<=R ||  sqrt((i-global_N_smooth)*(i-global_N_smooth)+(j-global_N_smooth)*(j-global_N_smooth)+p*p)*global_dx_smooth<=R ||  sqrt((i-global_N_smooth)*(i-global_N_smooth)+(j-global_N_smooth)*(j-global_N_smooth)+(p-global_N_smooth)*(p-global_N_smooth))*global_dx_smooth<=R ||  sqrt((i-global_N_smooth)*(i-global_N_smooth)+j*j+(p-global_N_smooth)*(p-global_N_smooth))*global_dx_smooth<=R ) {
		top_hat_r[i*global_N_smooth*global_N_smooth+j*global_N_smooth+p]=1.0;
	      }else top_hat_r[i*global_N_smooth*global_N_smooth+j*global_N_smooth+p]=0.0;	  
	    }
	  }
	}
	/* FFT bubble centers and window */ 
	fftwf_execute(pr2c3);
	fftwf_execute(pr2c4);
      
	/* Make convolution */
#ifdef _OMPTHREAD_
#pragma omp parallel for shared(bubble_c,top_hat_c,global_N_smooth) private(i)
#endif
	for(i=0;i<global_N_smooth*global_N_smooth*(global_N_smooth/2+1);i++) {
	  bubble_c[i]*=top_hat_c[i];
	}
	fftwf_execute(pc2r3);     
	
	/* after dividing by global_N3_smooth, values in bubble are between 0 (neutral)and global_N3_smooth */     
#ifdef _OMPTHREAD_
#pragma omp parallel for shared(bubble,bubblef,global_N3_smooth) private(i)
#endif
	for (i=0; i<global_N3_smooth; i++){
	  bubble[i]/=global_N3_smooth;
	  if (bubble[i]>0.2) bubblef[i]=1.0; /* neutral should be zero */
	} 
	
      } /* ends filling out bubbles in box for R */
    
      R/=bfactor;  
    } /* ends R cycle */
 
    /* just to check smallest bubbles through older method */
    printf("Going to smaller R cycle...\n"); fflush(0);
    while(R>=global_dx_smooth){
  
      printf("bubble radius R= %lf\n", R);fflush(0); 
      flag_bub=0;
#ifdef _OMPTHREAD_
#pragma omp parallel for shared(collapsed_mass_c,halo_map_c,total_mass_c,density_map_c,global_N_smooth,global_dx_smooth,global_dk,R) private(i,j,p,indi,indj,kk)
#endif
      for(i=0;i<global_N_smooth;i++) {
	if(i>global_N_smooth/2) {
	  indi=-(global_N_smooth-i);
	}else indi=i;      
	for(j=0;j<global_N_smooth;j++) {
	  if(j>global_N_smooth/2) {
	    indj=-(global_N_smooth-j);
	  }else indj=j;
	  for(p=0;p<=global_N_smooth/2;p++) {
	    kk=global_dk*sqrt(indi*indi+indj*indj+p*p);
	    total_mass_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]=density_map_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]*W_filter(kk*R);
	    collapsed_mass_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]=halo_map_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]*W_filter(kk*R);
	  }
	}
      }
      fftwf_execute(pc2r1); 
      fftwf_execute(pc2r2); 
   
      /* fill smaller bubbles in box */
      ncells_1D=(long int)(R/global_dx_smooth);    
      //      printf("Starting to find and fill bubbles...\n");fflush(0);
#ifdef _OMPTHREAD_
#pragma omp parallel for shared(halo_map,density_map,global_eff,global_N_smooth,global_dx_smooth,R,ncells_1D,bubblef,flag_bub) private(ii_c,ij_c,ik_c,ii,ij,ik,a,b,c,ind)
#endif
      for(ii_c=0;ii_c<global_N_smooth;ii_c++){
	for(ij_c=0;ij_c<global_N_smooth;ij_c++){
	  for(ik_c=0;ik_c<global_N_smooth;ik_c++){
	    ind=ii_c*global_N_smooth*global_N_smooth+ij_c*global_N_smooth+ik_c;
	    if(halo_map[ind]>0.) {
	      if(!(density_map[ind]>0.) || ((double)halo_map[ind]/density_map[ind]>=1.0/global_eff)) {
		flag_bub=1;
		for(ii=-(ncells_1D+1);ii<=ncells_1D+1;ii++){
		  a=check_borders(ii_c+ii,global_N_smooth);
		  for(ij=-(ncells_1D+1);ij<=ncells_1D+1;ij++){
		    if(sqrt(ii*ii+ij*ij)*global_dx_smooth <= R){
		      b=check_borders(ij_c+ij,global_N_smooth);
		      for(ik=-(ncells_1D+1);ik<=ncells_1D+1;ik++){
			c=check_borders(ik_c+ik,global_N_smooth);
			if(sqrt(ii*ii+ij*ij+ik*ik)*global_dx_smooth <= R){
			  bubblef[a*global_N_smooth*global_N_smooth+b*global_N_smooth+c]=1.0;  	     
			}
		      }
		    }
		  }
		}
	      }
	    }
	  }
	}
      }
      if(flag_bub>0){printf("Found bubble...\n");fflush(0);}

      R/=bfactor;
    } /* ends small bubbles R cycle */

    neutral=0.;
    for (i=0; i<global_N3_smooth; i++){
      neutral+=1.0-bubblef[i];
    }
    neutral/=global_N3_smooth;
    printf("neutral fraction=%lf\n",neutral);fflush(0);
    xHI[iz]=neutral;
    sprintf(fname, "%s/Ionization/xHII_z%.3f_eff%.2lf_N%ld_L%.1f.dat",argv[1],redshift,global_eff,global_N_smooth,global_L/global_hubble); 
    if((fid = fopen(fname,"wb"))==NULL) {
      printf("Error opening file:%s\n",fname);
      exit(1);
    }
    elem=fwrite(bubblef,sizeof(float),global_N3_smooth,fid);  
    fclose(fid);
    iz++;
  } /* ends redshift cycle */


  /* z cycle for neutral>=xHlim */
  while(redshift<(zmax+dz/10)) {
    printf("z(>%f) = %f\n",xHlim,redshift);fflush(0);
    xHI[iz]=1.0;
    sprintf(fname, "%s/Ionization/xHII_z%.3f_eff%.2lf_N%ld_L%.1f.dat",argv[1],redshift,global_eff,global_N_smooth,global_L/global_hubble); 
    if((fid = fopen(fname,"wb"))==NULL) {
      printf("Error opening file:%s\n",fname);
      exit(1);
    }
#ifdef _OMPTHREAD_
#pragma omp parallel for shared(bubblef,global_N3_smooth) private(i)
#endif
    for(i=0;i<global_N3_smooth;i++) bubblef[i]=0.0;
    elem=fwrite(bubblef,sizeof(float),global_N3_smooth,fid);  
    fclose(fid);
    iz++;
    redshift+=dz;
  }    

  sprintf(fname, "%s/Output_text_files/zsim.txt",argv[1]);
  if((fid = fopen(fname,"a"))==NULL) {
    printf("Error opening file:%s\n",fname);
    exit(1);
  }
  for(redshift=zmax;redshift>(zmin-dz/10);redshift-=dz) fprintf(fid,"%f\n",redshift); /* first line should be highest redshift */
  fclose(fid);
  sprintf(fname, "%s/Output_text_files/x_HI_eff%.2lf_N%ld_L%.1f.dat",argv[1],global_eff,global_N_smooth,global_L/global_hubble);
  if((fid = fopen(fname,"a"))==NULL) {
    printf("Error opening file:%s\n",fname);
    exit(1);
  }
  for(i=iz-1;i>=0;i--) fprintf(fid,"%lf\n",xHI[i]); /* first line should be highest redshift */
  fclose(fid);
  
  free(xHI);
  free(bubblef);
  fftwf_free(top_hat_r);
  fftwf_free(top_hat_c);
  fftwf_free(collapsed_mass_c);
  fftwf_free(density_map);
  fftwf_free(density_map_c);
  fftwf_free(halo_map);
  fftwf_free(halo_map_c);
  fftwf_free(total_mass_c);
  fftwf_free(bubble);
  fftwf_free(bubble_c);


  exit(0);
}
Пример #19
0
GLFFTWater::GLFFTWater(GLFFTWaterParams &params) {
#ifdef _WIN32
    m_h = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4);
    m_dx = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4);
    m_dz = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4);
    m_w = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N)*(params.N)), 4);
#else
    posix_memalign((void **)&m_h,4,sizeof(float)*(params.N+2)*(params.N));
    posix_memalign((void **)&m_dx,4,sizeof(float)*(params.N+2)*(params.N));
    posix_memalign((void **)&m_dz,4,sizeof(float)*(params.N+2)*(params.N));
    posix_memalign((void **)&m_w,4,sizeof(float)*(params.N)*(params.N));
#endif

    m_htilde0 = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex)*(params.N)*(params.N));
    m_heightmap = new float3[(params.N)*(params.N)];
    m_params = params;

    std::tr1::mt19937 prng(1337);
    std::tr1::normal_distribution<float> normal;
    std::tr1::uniform_real<float> uniform;
    std::tr1::variate_generator<std::tr1::mt19937, std::tr1::normal_distribution<float> > randn(prng,normal);
    std::tr1::variate_generator<std::tr1::mt19937, std::tr1::uniform_real<float> > randu(prng,uniform);
    for(int i=0, k=0; i<params.N; i++) {
	    float k_x = (-(params.N-1)*0.5f+i)*(2.f*3.141592654f / params.L);
	    for(int j=0; j<params.N; j++, k++) {
		    float k_y = (-(params.N-1)*0.5f+j)*(2.f*3.141592654f / params.L);
		    float A = randn();
		    float theta = randu()*2.f*3.141592654f;
		    float P = (k_x==0.f && k_y==0.0f) ? 0.f : sqrtf(phillips(k_x,k_y,m_w[k]));
		    m_htilde0[k][0] = m_htilde0[k][1] = P*A*sinf(theta);
	    }
    }


    m_kz = new float[params.N*(params.N / 2 + 1)];
    m_kx = new float[params.N*(params.N / 2 + 1)];

    const int hN = m_params.N / 2;
    for(int y=0; y<m_params.N; y++) {
	float kz = (float) (y - hN);
	for(int x=0; x<=hN; x++) {
		float kx = (float) (x - hN);
		float k = 1.f/sqrtf(kx*kx+kz*kz);
		m_kz[y*(hN+1)+x] = kz*k;
		m_kx[y*(hN+1)+x] = kx*k;
	}
    }

    if(!fftwf_init_threads()) {
	cerr << "Error initializing multithreaded fft."  << endl;
    } else {
	fftwf_plan_with_nthreads(2);
    }
  
    m_fftplan = fftwf_plan_dft_c2r_2d(m_params.N, m_params.N, (fftwf_complex *)m_h, m_h, 
				      FFTW_ESTIMATE);

    glGenTextures(1, &m_texId);
    glBindTexture(GL_TEXTURE_2D, m_texId);
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB16F, params.N, params.N, 0, GL_RGB, GL_FLOAT, 0);
    glBindTexture(GL_TEXTURE_2D, 0);
}
Пример #20
0
int main(int argc, char **argv){


  FILE *fid;
  size_t elem;
  long int i,j,p;
  long int indi, indj;
  double kk;
  fftwf_complex  *map_vel_c;
  float *map;
  fftwf_complex *map_in_c; 
  fftwf_plan pc2r;
  fftwf_plan pr2c;
  char fname[256];
  DIR* dir;

 
  if(argc != 2) {
    printf("Usage: get_velocityfield work_dir\n");
    printf("work_dir - directory containing simfast21.ini \n");
    exit(1);
  }  

  get_Simfast21_params(argv[1]);
  

#ifdef _OMPTHREAD_
  omp_set_num_threads(global_nthreads);
  fftwf_init_threads();
  fftwf_plan_with_nthreads(global_nthreads);
  printf("Using %d threads\n",global_nthreads);
#endif

 if(!(map=(float *) fftwf_malloc(global_N_halo*global_N_halo*global_N_halo*sizeof(float)))) {  
    printf("Problem...\n");
    exit(1);
  }
 if(!(map_in_c = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * global_N_halo*global_N_halo*(global_N_halo/2+1)))) {  
   printf(" Out of memory...\n");
   exit(1);
 }
 if(!(map_vel_c = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * global_N_halo*global_N_halo*(global_N_halo/2+1)))) {  
   printf("Problem allocating memory for x velocity field in k-space...\n");
   exit(1);
 } 
 /* Tansformacoes de fourier para calcular as caixas vx(x) vx(y) e vx(z) */
 if(!(pc2r=fftwf_plan_dft_c2r_3d(global_N_halo, global_N_halo, global_N_halo, map_vel_c, map, FFTW_ESTIMATE))) { 
   printf("Problem...\n");
   exit(1);
 }
 /* FFT para map */
 if(!(pr2c=fftwf_plan_dft_r2c_3d(global_N_halo, global_N_halo, global_N_halo, map , map_in_c, FFTW_ESTIMATE))) { 
   printf("Problem...\n");
   exit(1);
 }
 
 sprintf(fname, "%s/delta/delta_z0_N%ld_L%d.dat", argv[1],global_N_halo,(int)(global_L));
 /*Leitura do campo de densidades no espaco real*/
 fid=fopen(fname,"rb");	/* second argument contains name of input file */
 if (fid==NULL) {
   printf("\n Density file path is not correct or the file does not exit...\n"); 
   exit (1);
 }
 elem=fread(map,sizeof(float),global_N_halo*global_N_halo*global_N_halo,fid);
 fclose(fid);
 
 
 /***********************************************************************************/
 // Conversao do mapa de densidades de real para complexo
  
 fftwf_execute(pr2c);


 /********************************************************************/
 /********************************************************************/
 /********************************************************************/
 /* Computing velocity fields */

 /* Create directory Velocity */
  sprintf(fname,"%s/Velocity",argv[1]);
  if((dir=opendir(fname))==NULL) {  
    printf("Creating Velocity directory\n");
    if(mkdir(fname,(S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH))!=0) {
      printf("Error creating directory!\n");
      exit(1);
    }
  }
   
 /********************************************************************/
 printf("\nComputing v_x field...\n");fflush(0);

#ifdef _OMPTHREAD_
#pragma omp parallel for shared(global_N_halo,global_dk,map_vel_c,map_in_c,global_dx_halo) private(i,indi,j,indj,p,kk) 
#endif
 for(i=0;i<global_N_halo;i++) {         
   if(i>global_N_halo/2) {    /* Large frequencies are equivalent to smaller negative ones */
     indi=-(global_N_halo-i);
   }else indi=i;
   for(j=0;j<global_N_halo;j++) {           
     if(j>global_N_halo/2) {  
       indj=-(global_N_halo-j);
     }else indj=j;  
     for(p=0;p<=global_N_halo/2;p++) {
       kk=global_dk*sqrt(indi*indi+indj*indj+p*p);	
       if(kk>0){ 
	 //Normalizacao pois a biblioteca fftw3 não tem dx nem global_dk nos integrais
	 map_in_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]=I*(global_dk)*(1/(kk*kk))*map_in_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]*global_dx_halo*global_dx_halo*global_dx_halo/global_L3;  
	 map_vel_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]=indi*map_in_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p];  
       }else{
	 map_vel_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]=0;
       }
     }
   }
 }
 
 box_symmetriesf(map_vel_c,global_N_halo);
 
 /* Executes FFT */
 fftwf_execute(pc2r);
 
 printf("\nWriting v_x field to file...\n");fflush(0);

 sprintf(fname, "%s/Velocity/vel_x_z0_N%ld_L%d.dat", argv[1],global_N_halo,(int)(global_L)); 
 if((fid=fopen(fname,"wb"))==NULL){  
   printf("\nThe file cannot be open\n");
   return 0;
 } 
 elem=fwrite(map,sizeof(float),global_N_halo*global_N_halo*global_N_halo,fid); 
 fclose(fid);


 /********************************************************************/
  printf("\nComputing v_y field...\n");fflush(0);

#ifdef _OMPTHREAD_
#pragma omp parallel for shared(global_N_halo,global_dk,map_vel_c,map_in_c,global_dx_halo) private(i,indi,j,indj,p,kk) 
#endif
 for(i=0;i<global_N_halo;i++) {         
   if(i>global_N_halo/2) {    /* Large frequencies are equivalent to smaller negative ones */
     indi=-(global_N_halo-i);
   }else indi=i;
   for(j=0;j<global_N_halo;j++) {           
     if(j>global_N_halo/2) {  
       indj=-(global_N_halo-j);
     }else indj=j;  
     for(p=0;p<=global_N_halo/2;p++) {
       kk=global_dk*sqrt(indi*indi+indj*indj+p*p);	
       if(kk>0){ 
	 //Normalizacao pois a biblioteca fftw3 não tem dx nem global_dk nos integrais
	 map_vel_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]=indj*map_in_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p];  
       }else{
	 map_vel_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]=0;
       }
     }
   }
 }
 
 box_symmetriesf(map_vel_c,global_N_halo);
 
 /* Executes FFT */
 fftwf_execute(pc2r);
   
 printf("\nWriting v_y field to file...\n");fflush(0); 
 
 sprintf(fname, "%s/Velocity/vel_y_z0_N%ld_L%d.dat", argv[1],global_N_halo,(int)(global_L)); 
 if((fid=fopen(fname,"wb"))==NULL){  
   printf("\nThe file cannot be open\n");
   return 0;
 } 
 elem=fwrite(map,sizeof(float),global_N_halo*global_N_halo*global_N_halo,fid); 
 fclose(fid);


 /********************************************************************/
 printf("\nComputing v_z field...\n");fflush(0);

#ifdef _OMPTHREAD_
#pragma omp parallel for shared(global_N_halo,global_dk,map_vel_c,map_in_c,global_dx_halo) private(i,indi,j,indj,p,kk) 
#endif
 for(i=0;i<global_N_halo;i++) {         
   if(i>global_N_halo/2) {    /* Large frequencies are equivalent to smaller negative ones */
     indi=-(global_N_halo-i);
   }else indi=i;
   for(j=0;j<global_N_halo;j++) {           
     if(j>global_N_halo/2) {  
       indj=-(global_N_halo-j);
     }else indj=j;  
     for(p=0;p<=global_N_halo/2;p++) {
       kk=global_dk*sqrt(indi*indi+indj*indj+p*p);	
       if(kk>0){ 
	 //Normalizacao pois a biblioteca fftw3 não tem dx nem global_dk nos integrais
	 map_vel_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]=p*map_in_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p];  
       }else{
	 map_vel_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]=0;
       }
     }
   }
 }
 
 box_symmetriesf(map_vel_c,global_N_halo);
 
 /* Executes FFT */
 fftwf_execute(pc2r);
   
 printf("\nWriting v_z field to file...\n");fflush(0);
 
 sprintf(fname, "%s/Velocity/vel_z_z0_N%ld_L%d.dat", argv[1],global_N_halo,(int)(global_L)); 
 if((fid=fopen(fname,"wb"))==NULL){  
   printf("\nThe file cannot be open\n");
   return 0;
 } 
 elem=fwrite(map,sizeof(float),global_N_halo*global_N_halo*global_N_halo,fid); 
 fclose(fid);
 
 fftwf_free(map);
 fftwf_free(map_in_c);
 fftwf_free(map_vel_c);
 fftwf_destroy_plan(pc2r);
 fftwf_destroy_plan(pr2c);

 
 exit(0);    


}