Пример #1
0
/** discrete mpolar FFT */
static int mpolar_dft(fftw_complex *f_hat, int NN, fftw_complex *f, int T, int R, int m)
{
  ticks t0, t1;
  int j,k;                              /**< index for nodes and freqencies   */
  nfft_plan my_nfft_plan;               /**< plan for the nfft-2D             */

  double *x, *w;                        /**< knots and associated weights     */

  int N[2],n[2];
  int M;                                /**< number of knots                  */

  N[0]=NN; n[0]=2*N[0];                 /**< oversampling factor sigma=2      */
  N[1]=NN; n[1]=2*N[1];                 /**< oversampling factor sigma=2      */

  x = (double *)nfft_malloc(5*(T/2)*R*(sizeof(double)));
  if (x==NULL)
    return -1;

  w = (double *)nfft_malloc(5*(T*R)/4*(sizeof(double)));
  if (w==NULL)
    return -1;

  /** init two dimensional NFFT plan */
  M=mpolar_grid(T,R,x,w);
  nfft_init_guru(&my_nfft_plan, 2, N, M, n, m,
                  PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE,
                  FFTW_MEASURE| FFTW_DESTROY_INPUT);

  /** init nodes from mpolar grid*/
  for(j=0;j<my_nfft_plan.M_total;j++)
  {
    my_nfft_plan.x[2*j+0] = x[2*j+0];
    my_nfft_plan.x[2*j+1] = x[2*j+1];
  }

  /** init Fourier coefficients from given image */
  for(k=0;k<my_nfft_plan.N_total;k++)
    my_nfft_plan.f_hat[k] = f_hat[k];

  t0 = getticks();

  /** NDFT-2D */
  nfft_trafo_direct(&my_nfft_plan);

  t1 = getticks();
  GLOBAL_elapsed_time = nfft_elapsed_seconds(t1,t0);

  /** copy result */
  for(j=0;j<my_nfft_plan.M_total;j++)
    f[j] = my_nfft_plan.f[j];

  /** finalise the plans and free the variables */
  nfft_finalize(&my_nfft_plan);
  nfft_free(x);
  nfft_free(w);

  return EXIT_SUCCESS;
}
Пример #2
0
static void simple_test_nfft_1d(void)
{
  nfft_plan p;
  double t;

  int N=14;
  int M=19;
  ticks t0, t1;

  /** init an one dimensional plan */
  nfft_init_1d(&p,N,M);

  /** init pseudo random nodes */
  nfft_vrand_shifted_unit_double(p.x,p.M_total);
 
  /** precompute psi, the entries of the matrix B */
  if(p.nfft_flags & PRE_ONE_PSI)
      nfft_precompute_one_psi(&p);

  /** init pseudo random Fourier coefficients and show them */
  nfft_vrand_unit_complex(p.f_hat,p.N_total);
  nfft_vpr_complex(p.f_hat,p.N_total,"given Fourier coefficients, vector f_hat");

  /** direct trafo and show the result */
  t0 = getticks();
  nfft_trafo_direct(&p);
  t1 = getticks();
  t = nfft_elapsed_seconds(t1,t0);
  nfft_vpr_complex(p.f,p.M_total,"ndft, vector f");
  printf(" took %e seconds.\n",t);

  /** approx. trafo and show the result */
  nfft_trafo(&p);
  nfft_vpr_complex(p.f,p.M_total,"nfft, vector f");

  /** approx. adjoint and show the result */
  nfft_adjoint_direct(&p);
  nfft_vpr_complex(p.f_hat,p.N_total,"adjoint ndft, vector f_hat");

  /** approx. adjoint and show the result */
  nfft_adjoint(&p);
  nfft_vpr_complex(p.f_hat,p.N_total,"adjoint nfft, vector f_hat");

  /** finalise the one dimensional plan */
  nfft_finalize(&p);
}
Пример #3
0
/**
 * Compares execution times for the fast and discrete Gauss transform.
 *
 * \arg ths The pointer to the fgt plan
 * \arg dgt If this parameter is set \ref dgt_trafo is called as well
 *
 * \author Stefan Kunis
 */
double fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
{
  int r;
  ticks t0, t1;
  double t_out;
  double tau=0.01;

  t_out=0;
  r=0;
  while(t_out<tau)
    {
      r++;
      t0 = getticks();
      if (dgt)
        dgt_trafo(ths);
      else
        fgt_trafo(ths);
      t1 = getticks();
      t_out += nfft_elapsed_seconds(t1,t0);
    }
  t_out/=r;

  return t_out;
}
Пример #4
0
int main(int argc, char **argv)
{
  int j,k,t;                                         /**< indices                 */
  int d;                                             /**< number of dimensions    */
  int N;                                             /**< number of source nodes  */
  int M;                                             /**< number of target nodes  */
  int n;                                             /**< expansion degree        */
  int m;                                             /**< cut-off parameter       */
  int p;                                             /**< degree of smoothness    */
  char *s;                                           /**< name of kernel          */
  double _Complex (*kernel)(double , int , const double *);  /**< kernel function         */
  double c;                                          /**< parameter for kernel    */
  fastsum_plan my_fastsum_plan;                      /**< plan for fast summation */
  double _Complex *direct;                                   /**< array for direct computation */
  ticks t0, t1;                                      /**< for time measurement    */
  double time;                                       /**< for time measurement    */
  double error=0.0;                                  /**< for error computation   */
  double eps_I;                                      /**< inner boundary          */
  double eps_B;                                      /**< outer boundary          */

  if (argc!=11)
  {
    printf("\nfastsum_test d N M n m p kernel c eps_I eps_B\n\n");
    printf("  d       dimension                 \n");
    printf("  N       number of source nodes    \n");
    printf("  M       number of target nodes    \n");
    printf("  n       expansion degree          \n");
    printf("  m       cut-off parameter         \n");
    printf("  p       degree of smoothness      \n");
    printf("  kernel  kernel function  (e.g., gaussian)\n");
    printf("  c       kernel parameter          \n");
    printf("  eps_I   inner boundary            \n");
    printf("  eps_B   outer boundary            \n\n");
    exit(-1);
  }
  else
  {
    d=atoi(argv[1]);
    N=atoi(argv[2]); c=1.0/pow((double)N,1.0/(double)d);
    M=atoi(argv[3]);
    n=atoi(argv[4]);
    m=atoi(argv[5]);
    p=atoi(argv[6]);
    s=argv[7];
    c=atof(argv[8]);
    eps_I=atof(argv[9]);
    eps_B=atof(argv[10]);
    if (strcmp(s,"gaussian")==0)
      kernel = gaussian;
    else if (strcmp(s,"multiquadric")==0)
      kernel = multiquadric;
    else if (strcmp(s,"inverse_multiquadric")==0)
      kernel = inverse_multiquadric;
    else if (strcmp(s,"logarithm")==0)
      kernel = logarithm;
    else if (strcmp(s,"thinplate_spline")==0)
      kernel = thinplate_spline;
    else if (strcmp(s,"one_over_square")==0)
      kernel = one_over_square;
    else if (strcmp(s,"one_over_modulus")==0)
      kernel = one_over_modulus;
    else if (strcmp(s,"one_over_x")==0)
      kernel = one_over_x;
    else if (strcmp(s,"inverse_multiquadric3")==0)
      kernel = inverse_multiquadric3;
    else if (strcmp(s,"sinc_kernel")==0)
      kernel = sinc_kernel;
    else if (strcmp(s,"cosc")==0)
      kernel = cosc;
    else if (strcmp(s,"cot")==0)
      kernel = kcot;
    else
    {
      s="multiquadric";
      kernel = multiquadric;
    }
  }
  printf("d=%d, N=%d, M=%d, n=%d, m=%d, p=%d, kernel=%s, c=%g, eps_I=%g, eps_B=%g \n",d,N,M,n,m,p,s,c,eps_I,eps_B);
#ifdef NF_KUB
  printf("nearfield correction using piecewise cubic Lagrange interpolation\n");
#elif defined(NF_QUADR)
  printf("nearfield correction using piecewise quadratic Lagrange interpolation\n");
#elif defined(NF_LIN)
  printf("nearfield correction using piecewise linear Lagrange interpolation\n");
#endif

#ifdef _OPENMP
  #pragma omp parallel
  {
    #pragma omp single
    {
      printf("nthreads=%d\n", omp_get_max_threads());
    }
  }

  fftw_init_threads();
#endif

  /** init d-dimensional fastsum plan */
  fastsum_init_guru(&my_fastsum_plan, d, N, M, kernel, &c, 0, n, m, p, eps_I, eps_B);
  //fastsum_init_guru(&my_fastsum_plan, d, N, M, kernel, &c, NEARFIELD_BOXES, n, m, p, eps_I, eps_B);

  if (my_fastsum_plan.flags & NEARFIELD_BOXES)
    printf("determination of nearfield candidates based on partitioning into boxes\n");
  else
    printf("determination of nearfield candidates based on search tree\n");

  /** init source knots in a d-ball with radius 0.25-eps_b/2 */
  k = 0;
  while (k < N)
  {
    double r_max = 0.25 - my_fastsum_plan.eps_B/2.0;
    double r2 = 0.0;

    for (j=0; j<d; j++)
      my_fastsum_plan.x[k*d+j] = 2.0 * r_max * (double)rand()/(double)RAND_MAX - r_max;

    for (j=0; j<d; j++)
      r2 += my_fastsum_plan.x[k*d+j] * my_fastsum_plan.x[k*d+j];

    if (r2 >= r_max * r_max)
      continue;

    k++;
  }

  for (k=0; k<N; k++)
  {
/*    double r=(0.25-my_fastsum_plan.eps_B/2.0)*pow((double)rand()/(double)RAND_MAX,1.0/d);
    my_fastsum_plan.x[k*d+0] = r;
    for (j=1; j<d; j++)
    {
      double phi=2.0*KPI*(double)rand()/(double)RAND_MAX;
      my_fastsum_plan.x[k*d+j] = r;
      for (t=0; t<j; t++)
      {
        my_fastsum_plan.x[k*d+t] *= cos(phi);
      }
      my_fastsum_plan.x[k*d+j] *= sin(phi);
    }
*/
    my_fastsum_plan.alpha[k] = (double)rand()/(double)RAND_MAX + _Complex_I*(double)rand()/(double)RAND_MAX;
  }

  /** init target knots in a d-ball with radius 0.25-eps_b/2 */
  k = 0;
  while (k < M)
  {
    double r_max = 0.25 - my_fastsum_plan.eps_B/2.0;
    double r2 = 0.0;

    for (j=0; j<d; j++)
      my_fastsum_plan.y[k*d+j] = 2.0 * r_max * (double)rand()/(double)RAND_MAX - r_max;

    for (j=0; j<d; j++)
      r2 += my_fastsum_plan.y[k*d+j] * my_fastsum_plan.y[k*d+j];

    if (r2 >= r_max * r_max)
      continue;

    k++;
  }
/*  for (k=0; k<M; k++)
  {
    double r=(0.25-my_fastsum_plan.eps_B/2.0)*pow((double)rand()/(double)RAND_MAX,1.0/d);
    my_fastsum_plan.y[k*d+0] = r;
    for (j=1; j<d; j++)
    {
      double phi=2.0*KPI*(double)rand()/(double)RAND_MAX;
      my_fastsum_plan.y[k*d+j] = r;
      for (t=0; t<j; t++)
      {
        my_fastsum_plan.y[k*d+t] *= cos(phi);
      }
      my_fastsum_plan.y[k*d+j] *= sin(phi);
    }
  } */

  /** direct computation */
  printf("direct computation: "); fflush(NULL);
  t0 = getticks();
  fastsum_exact(&my_fastsum_plan);
  t1 = getticks();
  time=nfft_elapsed_seconds(t1,t0);
  printf("%fsec\n",time);

  /** copy result */
  direct = (double _Complex *)nfft_malloc(my_fastsum_plan.M_total*(sizeof(double _Complex)));
  for (j=0; j<my_fastsum_plan.M_total; j++)
    direct[j]=my_fastsum_plan.f[j];

  /** precomputation */
  printf("pre-computation:    "); fflush(NULL);
  t0 = getticks();
  fastsum_precompute(&my_fastsum_plan);
  t1 = getticks();
  time=nfft_elapsed_seconds(t1,t0);
  printf("%fsec\n",time);

  /** fast computation */
  printf("fast computation:   "); fflush(NULL);
  t0 = getticks();
  fastsum_trafo(&my_fastsum_plan);
  t1 = getticks();
  time=nfft_elapsed_seconds(t1,t0);
  printf("%fsec\n",time);

  /** compute max error */
  error=0.0;
  for (j=0; j<my_fastsum_plan.M_total; j++)
  {
    if (cabs(direct[j]-my_fastsum_plan.f[j])/cabs(direct[j])>error)
      error=cabs(direct[j]-my_fastsum_plan.f[j])/cabs(direct[j]);
  }
  printf("max relative error: %e\n",error);

  /** finalise the plan */
  fastsum_finalize(&my_fastsum_plan);

  return 0;
}
Пример #5
0
void bench_openmp(int trafo_adjoint, int N, int M, double *x, C *f_hat, C *f, int m, int nfsft_flags, int psi_flags)
{
  nfsft_plan plan;
  int k, n;
//  int N, M, trafo_adjoint;
  int t, j;
  ticks t0, t1;
  double tt_total, tt_pre;

//  fscanf(infile, "%d %d %d", &trafo_adjoint, &N, &M);

/*#ifdef _OPENMP
  fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan");
#else
  fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan");
#endif*/

  /* precomputation (for fast polynomial transform) */
//  nfsft_precompute(N,1000.0,0U,0U);

  /* Initialize transform plan using the guru interface. All input and output
   * arrays are allocated by nfsft_init_guru(). Computations are performed with
   * respect to L^2-normalized spherical harmonics Y_k^n. The array of spherical
   * Fourier coefficients is preserved during transformations. The NFFT uses a
   * cut-off parameter m = 6. See the NFFT 3 manual for details.
   */
  nfsft_init_guru(&plan, N, M, nfsft_flags | NFSFT_MALLOC_X | NFSFT_MALLOC_F |
    NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT,
    PRE_PHI_HUT | psi_flags | FFTW_INIT | FFT_OUT_OF_PLACE, m);

/*#ifdef _OPENMP
  fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan");
#else
  fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan");
#endif*/

  for (j=0; j < plan.M_total; j++)
  {
    for (t=0; t < 2; t++)
 //     fscanf(infile, "%lg", plan.x+2*j+t);
      plan.x[2*j+t] = x[2*j+t];
  }

  if (trafo_adjoint==0)
  {
    memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
    for (k = 0; k <= plan.N; k++)
      for (n = -k; n <= k; n++)
      {
//        fscanf(infile, "%lg %lg", &re, &im);
//        plan.f_hat[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im;
        plan.f_hat[NFSFT_INDEX(k,n,&plan)] = f_hat[NFSFT_INDEX(k,n,&plan)];
      }
  }
  else
  {
    for (j=0; j < plan.M_total; j++)
    {
//      fscanf(infile, "%lg %lg", &re, &im);
//      plan.f[j] = re + _Complex_I * im;
      plan.f[j] = f[j];
    }

    memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
  }

  t0 = getticks();
  /* precomputation (for NFFT, node-dependent) */
  nfsft_precompute_x(&plan);
  t1 = getticks();
  tt_pre = nfft_elapsed_seconds(t1,t0);

  if (trafo_adjoint==0)
    nfsft_trafo(&plan);
  else
    nfsft_adjoint(&plan);
  t1 = getticks();
  tt_total = nfft_elapsed_seconds(t1,t0);

#ifndef MEASURE_TIME
  plan.MEASURE_TIME_t[0] = 0.0;
  plan.MEASURE_TIME_t[2] = 0.0;
#endif

#ifndef MEASURE_TIME_FFTW
  plan.MEASURE_TIME_t[1] = 0.0;
#endif

  printf("%.6e %.6e %6e %.6e %.6e %.6e\n", tt_pre, plan.MEASURE_TIME_t[0], plan.MEASURE_TIME_t[1], plan.MEASURE_TIME_t[2], tt_total-tt_pre-plan.MEASURE_TIME_t[0]-plan.MEASURE_TIME_t[1]-plan.MEASURE_TIME_t[2], tt_total);

  /** finalise the one dimensional plan */
  nfsft_finalize(&plan);
}
Пример #6
0
/**
 * The main program.
 *
 * \param argc The number of arguments
 * \param argv An array containing the arguments as C-strings
 *
 * \return Exit code
 */
int main (int argc, char **argv)
{
  int tc;                      /**< The index variable for testcases          */
  int tc_max;                  /**< The number of testcases                   */

  int *NQ;                     /**< The array containing the cut-off degrees  *
                                    \f$N\f$                                   */
  int NQ_max;                  /**< The maximum cut-off degree \f$N\f$ for the*
                                    current testcase                          */
  int *SQ;                     /**< The array containing the grid size
                                    parameters                                */
  int SQ_max;                  /**< The maximum grid size parameter           */
  int *RQ;                     /**< The array containing the grid size
                                    parameters                                */
  int iNQ;                     /**< Index variable for cut-off degrees        */
  int iNQ_max;                 /**< The maximum number of cut-off degrees     */
  int testfunction;            /**< The testfunction                          */
  int N;                       /**< The test function's bandwidth             */

  int use_nfsft;               /**< Whether to use the NFSFT algorithm or not */
  int use_nfft;                /**< Whether to use the NFFT algorithm or not  */
  int use_fpt;                 /**< Whether to use the FPT algorithm or not   */
  int cutoff;                  /**< The current NFFT cut-off parameter        */
  double threshold;            /**< The current NFSFT threshold parameter     */

  int gridtype;                /**< The type of quadrature grid to be used    */
  int repetitions;             /**< The number of repetitions to be performed */
  int mode;                    /**< The number of repetitions to be performed */

  double *w;                   /**< The quadrature weights                    */
  double *x_grid;              /**< The quadrature nodes                      */
  double *x_compare;           /**< The quadrature nodes                      */
  double _Complex *f_grid;             /**< The reference function values             */
  double _Complex *f_compare;          /**< The function values                       */
  double _Complex *f;                  /**< The function values                       */
  double _Complex *f_hat_gen;         /**< The reference spherical Fourier           *
                                    coefficients                              */
  double _Complex *f_hat;              /**< The spherical Fourier coefficients        */

  nfsft_plan plan_adjoint;     /**< The NFSFT plan                            */
  nfsft_plan plan;             /**< The NFSFT plan                            */
  nfsft_plan plan_gen;         /**< The NFSFT plan                            */

  double t_avg;                /**< The average computation time needed       */
  double err_infty_avg;        /**< The average error \f$E_\infty\f$          */
  double err_2_avg;            /**< The average error \f$E_2\f$               */

  int i;                       /**< A loop variable                           */
  int k;                       /**< A loop variable                           */
  int n;                       /**< A loop variable                           */
  int d;                       /**< A loop variable                           */

  int m_theta;                 /**< The current number of different           *
                                    colatitudinal angles (for grids)          */
  int m_phi;                   /**< The current number of different           *
                                    longitudinal angles (for grids).          */
  int m_total;                 /**< The total number nodes.                   */
  double *theta;               /**< An array for saving the angles theta of a *
                                    grid                                      */
  double *phi;                 /**< An array for saving the angles phi of a   *
                                    grid                                      */
  fftw_plan fplan;             /**< An FFTW plan for computing Clenshaw-Curtis
                                    quadrature weights                        */
  //int nside;                   /**< The size parameter for the HEALPix grid   */
  int d2;
  int M;
  double theta_s;
  double x1,x2,x3,temp;
  int m_compare;
  nfsft_plan *plan_adjoint_ptr;
  nfsft_plan *plan_ptr;
  double *w_temp;
  int testmode;
  ticks t0, t1;

  /* Read the number of testcases. */
  fscanf(stdin,"testcases=%d\n",&tc_max);
  fprintf(stdout,"%d\n",tc_max);

  /* Process each testcase. */
  for (tc = 0; tc < tc_max; tc++)
  {
    /* Check if the fast transform shall be used. */
    fscanf(stdin,"nfsft=%d\n",&use_nfsft);
    fprintf(stdout,"%d\n",use_nfsft);
    if (use_nfsft != NO)
    {
      /* Check if the NFFT shall be used. */
      fscanf(stdin,"nfft=%d\n",&use_nfft);
      fprintf(stdout,"%d\n",use_nfsft);
      if (use_nfft != NO)
      {
        /* Read the cut-off parameter. */
        fscanf(stdin,"cutoff=%d\n",&cutoff);
        fprintf(stdout,"%d\n",cutoff);
      }
      else
      {
        /* TODO remove this */
        /* Initialize unused variable with dummy value. */
        cutoff = 1;
      }
      /* Check if the fast polynomial transform shall be used. */
      fscanf(stdin,"fpt=%d\n",&use_fpt);
      fprintf(stdout,"%d\n",use_fpt);
      if (use_fpt != NO)
      {
        /* Read the NFSFT threshold parameter. */
        fscanf(stdin,"threshold=%lf\n",&threshold);
        fprintf(stdout,"%lf\n",threshold);
      }
      else
      {
        /* TODO remove this */
        /* Initialize unused variable with dummy value. */
        threshold = 1000.0;
      }
    }
    else
    {
      /* TODO remove this */
      /* Set dummy values. */
      use_nfft = NO;
      use_fpt = NO;
      cutoff = 3;
      threshold = 1000.0;
    }

    /* Read the testmode type. */
    fscanf(stdin,"testmode=%d\n",&testmode);
    fprintf(stdout,"%d\n",testmode);

    if (testmode == ERROR)
    {
      /* Read the quadrature grid type. */
      fscanf(stdin,"gridtype=%d\n",&gridtype);
      fprintf(stdout,"%d\n",gridtype);

      /* Read the test function. */
      fscanf(stdin,"testfunction=%d\n",&testfunction);
      fprintf(stdout,"%d\n",testfunction);

      /* Check if random bandlimited function has been chosen. */
      if (testfunction == FUNCTION_RANDOM_BANDLIMITED)
      {
        /* Read the bandwidht. */
        fscanf(stdin,"bandlimit=%d\n",&N);
        fprintf(stdout,"%d\n",N);
      }
      else
      {
        N = 1;
      }

      /* Read the number of repetitions. */
      fscanf(stdin,"repetitions=%d\n",&repetitions);
      fprintf(stdout,"%d\n",repetitions);

      fscanf(stdin,"mode=%d\n",&mode);
      fprintf(stdout,"%d\n",mode);

      if (mode == RANDOM)
      {
        /* Read the bandwidht. */
        fscanf(stdin,"points=%d\n",&m_compare);
        fprintf(stdout,"%d\n",m_compare);
        x_compare = (double*) nfft_malloc(2*m_compare*sizeof(double));
        d = 0;
        while (d < m_compare)
        {
          x1 = 2.0*(((double)rand())/RAND_MAX) - 1.0;
          x2 = 2.0*(((double)rand())/RAND_MAX) - 1.0;
          x3 = 2.0*(((double)rand())/RAND_MAX) - 1.0;
          temp = sqrt(x1*x1+x2*x2+x3*x3);
          if (temp <= 1)
          {
            x_compare[2*d+1] = acos(x3);
            if (x_compare[2*d+1] == 0 || x_compare[2*d+1] == KPI)
            {
              x_compare[2*d] = 0.0;
            }
            else
            {
              x_compare[2*d] = atan2(x2/sin(x_compare[2*d+1]),x1/sin(x_compare[2*d+1]));
            }
            x_compare[2*d] *= 1.0/(2.0*KPI);
            x_compare[2*d+1] *= 1.0/(2.0*KPI);
            d++;
          }
        }
        f_compare = (double _Complex*) nfft_malloc(m_compare*sizeof(double _Complex));
        f = (double _Complex*) nfft_malloc(m_compare*sizeof(double _Complex));
      }
    }

    /* Initialize maximum cut-off degree and grid size parameter. */
    NQ_max = 0;
    SQ_max = 0;

    /* Read the number of cut-off degrees. */
    fscanf(stdin,"bandwidths=%d\n",&iNQ_max);
    fprintf(stdout,"%d\n",iNQ_max);

    /* Allocate memory for the cut-off degrees and grid size parameters. */
    NQ = (int*) nfft_malloc(iNQ_max*sizeof(int));
    SQ = (int*) nfft_malloc(iNQ_max*sizeof(int));
    if (testmode == TIMING)
    {
      RQ = (int*) nfft_malloc(iNQ_max*sizeof(int));
    }

    /* Read the cut-off degrees and grid size parameters. */
    for (iNQ = 0; iNQ < iNQ_max; iNQ++)
    {
      if (testmode == TIMING)
      {
        /* Read cut-off degree and grid size parameter. */
        fscanf(stdin,"%d %d %d\n",&NQ[iNQ],&SQ[iNQ],&RQ[iNQ]);
        fprintf(stdout,"%d %d %d\n",NQ[iNQ],SQ[iNQ],RQ[iNQ]);
        NQ_max = MAX(NQ_max,NQ[iNQ]);
        SQ_max = MAX(SQ_max,SQ[iNQ]);
      }
      else
      {
        /* Read cut-off degree and grid size parameter. */
        fscanf(stdin,"%d %d\n",&NQ[iNQ],&SQ[iNQ]);
        fprintf(stdout,"%d %d\n",NQ[iNQ],SQ[iNQ]);
        NQ_max = MAX(NQ_max,NQ[iNQ]);
        SQ_max = MAX(SQ_max,SQ[iNQ]);
      }
    }

    /* Do precomputation. */
    //fprintf(stderr,"NFSFT Precomputation\n");
    //fflush(stderr);
    nfsft_precompute(NQ_max, threshold,
      ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U)), 0U);

    if (testmode == TIMING)
    {
      /* Allocate data structures. */
      f_hat = (double _Complex*) nfft_malloc(NFSFT_F_HAT_SIZE(NQ_max)*sizeof(double _Complex));
      f = (double _Complex*) nfft_malloc(SQ_max*sizeof(double _Complex));
      x_grid = (double*) nfft_malloc(2*SQ_max*sizeof(double));
      for (d = 0; d < SQ_max; d++)
      {
        f[d] = (((double)rand())/RAND_MAX)-0.5 + _Complex_I*((((double)rand())/RAND_MAX)-0.5);
        x_grid[2*d] = (((double)rand())/RAND_MAX) - 0.5;
        x_grid[2*d+1] = (((double)rand())/RAND_MAX) * 0.5;
      }
    }

    //fprintf(stderr,"Entering loop\n");
    //fflush(stderr);
    /* Process all cut-off bandwidths. */
    for (iNQ = 0; iNQ < iNQ_max; iNQ++)
    {
      if (testmode == TIMING)
      {
        nfsft_init_guru(&plan,NQ[iNQ],SQ[iNQ], NFSFT_NORMALIZED |
          ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) |
          ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)),
          PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFTW_MEASURE | FFT_OUT_OF_PLACE,
          cutoff);

        plan.f_hat = f_hat;
        plan.x = x_grid;
        plan.f = f;

        nfsft_precompute_x(&plan);

        t_avg = 0.0;

        for (i = 0; i < RQ[iNQ]; i++)
        {
          t0 = getticks();

          if (use_nfsft != NO)
          {
            /* Execute the adjoint NFSFT transformation. */
            nfsft_adjoint(&plan);
          }
          else
          {
            /* Execute the adjoint direct NDSFT transformation. */
            nfsft_adjoint_direct(&plan);
          }

          t1 = getticks();
          t_avg += nfft_elapsed_seconds(t1,t0);
        }

        t_avg = t_avg/((double)RQ[iNQ]);

        nfsft_finalize(&plan);

        fprintf(stdout,"%+le\n", t_avg);
        fprintf(stderr,"%d: %4d %4d %+le\n", tc, NQ[iNQ], SQ[iNQ], t_avg);
      }
      else
      {
        /* Determine the maximum number of nodes. */
        switch (gridtype)
        {
          case GRID_GAUSS_LEGENDRE:
            /* Calculate grid dimensions. */
            m_theta = SQ[iNQ] + 1;
            m_phi = 2*SQ[iNQ] + 2;
            m_total = m_theta*m_phi;
            break;
          case GRID_CLENSHAW_CURTIS:
            /* Calculate grid dimensions. */
            m_theta = 2*SQ[iNQ] + 1;
            m_phi = 2*SQ[iNQ] + 2;
            m_total = m_theta*m_phi;
            break;
          case GRID_HEALPIX:
            m_theta = 1;
            m_phi = 12*SQ[iNQ]*SQ[iNQ];
            m_total = m_theta * m_phi;
            //fprintf("HEALPix: SQ = %d, m_theta = %d, m_phi= %d, m");
            break;
          case GRID_EQUIDISTRIBUTION:
          case GRID_EQUIDISTRIBUTION_UNIFORM:
            m_theta = 2;
            //fprintf(stderr,"ed: m_theta = %d\n",m_theta);
            for (k = 1; k < SQ[iNQ]; k++)
            {
              m_theta += (int)floor((2*KPI)/acos((cos(KPI/(double)SQ[iNQ])-
                cos(k*KPI/(double)SQ[iNQ])*cos(k*KPI/(double)SQ[iNQ]))/
                (sin(k*KPI/(double)SQ[iNQ])*sin(k*KPI/(double)SQ[iNQ]))));
              //fprintf(stderr,"ed: m_theta = %d\n",m_theta);
            }
            //fprintf(stderr,"ed: m_theta final = %d\n",m_theta);
            m_phi = 1;
            m_total = m_theta * m_phi;
            break;
        }

        /* Allocate memory for data structures. */
        w = (double*) nfft_malloc(m_theta*sizeof(double));
        x_grid = (double*) nfft_malloc(2*m_total*sizeof(double));

        //fprintf(stderr,"NQ = %d\n",NQ[iNQ]);
        //fflush(stderr);
        switch (gridtype)
        {
          case GRID_GAUSS_LEGENDRE:
            //fprintf(stderr,"Generating grid for NQ = %d, SQ = %d\n",NQ[iNQ],SQ[iNQ]);
            //fflush(stderr);

            /* Read quadrature weights. */
            for (k = 0; k < m_theta; k++)
            {
              fscanf(stdin,"%le\n",&w[k]);
              w[k] *= (2.0*KPI)/((double)m_phi);
            }

            //fprintf(stderr,"Allocating theta and phi\n");
            //fflush(stderr);
            /* Allocate memory to store the grid's angles. */
            theta = (double*) nfft_malloc(m_theta*sizeof(double));
            phi = (double*) nfft_malloc(m_phi*sizeof(double));

            //if (theta == NULL || phi == NULL)
            //{
              //fprintf(stderr,"Couldn't allocate theta and phi\n");
              //fflush(stderr);
            //}


            /* Read angles theta. */
            for (k = 0; k < m_theta; k++)
            {
              fscanf(stdin,"%le\n",&theta[k]);
            }

            /* Generate the grid angles phi. */
            for (n = 0; n < m_phi; n++)
            {
              phi[n] = n/((double)m_phi);
              phi[n] -= ((phi[n]>=0.5)?(1.0):(0.0));
            }

            //fprintf(stderr,"Generating grid nodes\n");
            //fflush(stderr);

            /* Generate the grid's nodes. */
            d = 0;
            for (k = 0; k < m_theta; k++)
            {
              for (n = 0; n < m_phi; n++)
              {
                x_grid[2*d] = phi[n];
                x_grid[2*d+1] = theta[k];
                d++;
              }
            }

            //fprintf(stderr,"Freeing theta and phi\n");
            //fflush(stderr);
            /* Free the arrays for the grid's angles. */
            nfft_free(theta);
            nfft_free(phi);

            break;

          case GRID_CLENSHAW_CURTIS:

            /* Allocate memory to store the grid's angles. */
            theta = (double*) nfft_malloc(m_theta*sizeof(double));
            phi = (double*) nfft_malloc(m_phi*sizeof(double));

            /* Generate the grid angles theta. */
            for (k = 0; k < m_theta; k++)
            {
              theta[k] = k/((double)2*(m_theta-1));
            }

            /* Generate the grid angles phi. */
            for (n = 0; n < m_phi; n++)
            {
              phi[n] = n/((double)m_phi);
              phi[n] -= ((phi[n]>=0.5)?(1.0):(0.0));
            }

            /* Generate quadrature weights. */
            fplan = fftw_plan_r2r_1d(SQ[iNQ]+1, w, w, FFTW_REDFT00, 0U);
            for (k = 0; k < SQ[iNQ]+1; k++)
            {
              w[k] = -2.0/(4*k*k-1);
            }
            fftw_execute(fplan);
            w[0] *= 0.5;

            for (k = 0; k < SQ[iNQ]+1; k++)
            {
              w[k] *= (2.0*KPI)/((double)(m_theta-1)*m_phi);
              w[m_theta-1-k] = w[k];
            }
            fftw_destroy_plan(fplan);

            /* Generate the grid's nodes. */
            d = 0;
            for (k = 0; k < m_theta; k++)
            {
              for (n = 0; n < m_phi; n++)
              {
                x_grid[2*d] = phi[n];
                x_grid[2*d+1] = theta[k];
                d++;
              }
            }

            /* Free the arrays for the grid's angles. */
            nfft_free(theta);
            nfft_free(phi);

            break;

          case GRID_HEALPIX:

            d = 0;
            for (k = 1; k <= SQ[iNQ]-1; k++)
            {
              for (n = 0; n <= 4*k-1; n++)
              {
                x_grid[2*d+1] = 1 - (k*k)/((double)(3.0*SQ[iNQ]*SQ[iNQ]));
                x_grid[2*d] =  ((n+0.5)/(4*k));
                x_grid[2*d] -= (x_grid[2*d]>=0.5)?(1.0):(0.0);
                d++;
              }
            }

            d2 = d-1;

            for (k = SQ[iNQ]; k <= 3*SQ[iNQ]; k++)
            {
              for (n = 0; n <= 4*SQ[iNQ]-1; n++)
              {
                x_grid[2*d+1] = 2.0/(3*SQ[iNQ])*(2*SQ[iNQ]-k);
                x_grid[2*d] = (n+((k%2==0)?(0.5):(0.0)))/(4*SQ[iNQ]);
                x_grid[2*d] -= (x_grid[2*d]>=0.5)?(1.0):(0.0);
                d++;
              }
            }

            for (k = 1; k <= SQ[iNQ]-1; k++)
            {
              for (n = 0; n <= 4*k-1; n++)
              {
                x_grid[2*d+1] = -x_grid[2*d2+1];
                x_grid[2*d] =  x_grid[2*d2];
                d++;
                d2--;
              }
            }

            for (d = 0; d < m_total; d++)
            {
              x_grid[2*d+1] = acos(x_grid[2*d+1])/(2.0*KPI);
            }

            w[0] = (4.0*KPI)/(m_total);
            break;

          case GRID_EQUIDISTRIBUTION:
          case GRID_EQUIDISTRIBUTION_UNIFORM:
            /* TODO Compute the weights. */

            if (gridtype == GRID_EQUIDISTRIBUTION)
            {
              w_temp = (double*) nfft_malloc((SQ[iNQ]+1)*sizeof(double));
              fplan = fftw_plan_r2r_1d(SQ[iNQ]/2+1, w_temp, w_temp, FFTW_REDFT00, 0U);
              for (k = 0; k < SQ[iNQ]/2+1; k++)
              {
                w_temp[k] = -2.0/(4*k*k-1);
              }
              fftw_execute(fplan);
              w_temp[0] *= 0.5;

              for (k = 0; k < SQ[iNQ]/2+1; k++)
              {
                w_temp[k] *= (2.0*KPI)/((double)(SQ[iNQ]));
                w_temp[SQ[iNQ]-k] = w_temp[k];
              }
              fftw_destroy_plan(fplan);
            }

            d = 0;
            x_grid[2*d] = -0.5;
            x_grid[2*d+1] = 0.0;
            if (gridtype == GRID_EQUIDISTRIBUTION)
            {
              w[d] = w_temp[0];
            }
            else
            {
              w[d] = (4.0*KPI)/(m_total);
            }
            d = 1;
            x_grid[2*d] = -0.5;
            x_grid[2*d+1] = 0.5;
            if (gridtype == GRID_EQUIDISTRIBUTION)
            {
              w[d] = w_temp[SQ[iNQ]];
            }
            else
            {
              w[d] = (4.0*KPI)/(m_total);
            }
            d = 2;

            for (k = 1; k < SQ[iNQ]; k++)
            {
              theta_s = (double)k*KPI/(double)SQ[iNQ];
              M = (int)floor((2.0*KPI)/acos((cos(KPI/(double)SQ[iNQ])-
                cos(theta_s)*cos(theta_s))/(sin(theta_s)*sin(theta_s))));

              for (n = 0; n < M; n++)
              {
                x_grid[2*d] = (n + 0.5)/M;
                x_grid[2*d] -= (x_grid[2*d]>=0.5)?(1.0):(0.0);
                x_grid[2*d+1] = theta_s/(2.0*KPI);
                if (gridtype == GRID_EQUIDISTRIBUTION)
                {
                  w[d] = w_temp[k]/((double)(M));
                }
                else
                {
                  w[d] = (4.0*KPI)/(m_total);
                }
                d++;
              }
            }

            if (gridtype == GRID_EQUIDISTRIBUTION)
            {
              nfft_free(w_temp);
            }
            break;

          default:
            break;
        }

        /* Allocate memory for grid values. */
        f_grid = (double _Complex*) nfft_malloc(m_total*sizeof(double _Complex));

        if (mode == RANDOM)
        {
        }
        else
        {
          m_compare = m_total;
          f_compare = (double _Complex*) nfft_malloc(m_compare*sizeof(double _Complex));
          x_compare = x_grid;
          f = f_grid;
        }

        //fprintf(stderr,"Generating test function\n");
        //fflush(stderr);
        switch (testfunction)
        {
          case FUNCTION_RANDOM_BANDLIMITED:
            f_hat_gen = (double _Complex*) nfft_malloc(NFSFT_F_HAT_SIZE(N)*sizeof(double _Complex));
            //fprintf(stderr,"Generating random test function\n");
            //fflush(stderr);
            /* Generate random function samples by sampling a bandlimited
             * function. */
            nfsft_init_guru(&plan_gen,N,m_total, NFSFT_NORMALIZED |
              ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) |
              ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)),
              ((N>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT |
              FFT_OUT_OF_PLACE, cutoff);

            plan_gen.f_hat = f_hat_gen;
            plan_gen.x = x_grid;
            plan_gen.f = f_grid;

            nfsft_precompute_x(&plan_gen);

            for (k = 0; k < plan_gen.N_total; k++)
            {
              f_hat_gen[k] = 0.0;
            }

            for (k = 0; k <= N; k++)
            {
              for (n = -k; n <= k; n++)
              {
                f_hat_gen[NFSFT_INDEX(k,n,&plan_gen)] =
                (((double)rand())/RAND_MAX)-0.5 + _Complex_I*((((double)rand())/RAND_MAX)-0.5);
              }
            }

            if (use_nfsft != NO)
            {
              /* Execute the NFSFT transformation. */
              nfsft_trafo(&plan_gen);
            }
            else
            {
              /* Execute the direct NDSFT transformation. */
              nfsft_trafo_direct(&plan_gen);
            }

            nfsft_finalize(&plan_gen);

            if (mode == RANDOM)
            {
              nfsft_init_guru(&plan_gen,N,m_compare, NFSFT_NORMALIZED |
                ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) |
                ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)),
                ((N>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT |
                FFT_OUT_OF_PLACE, cutoff);

              plan_gen.f_hat = f_hat_gen;
              plan_gen.x = x_compare;
              plan_gen.f = f_compare;

              nfsft_precompute_x(&plan_gen);

              if (use_nfsft != NO)
              {
                /* Execute the NFSFT transformation. */
                nfsft_trafo(&plan_gen);
              }
              else
              {
                /* Execute the direct NDSFT transformation. */
                nfsft_trafo_direct(&plan_gen);
              }

              nfsft_finalize(&plan_gen);
            }
            else
            {
              memcpy(f_compare,f_grid,m_total*sizeof(double _Complex));
            }

            nfft_free(f_hat_gen);

            break;

          case FUNCTION_F1:
            for (d = 0; d < m_total; d++)
            {
              x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI);
              x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI);
              x3 = cos(x_grid[2*d+1]*2.0*KPI);
              f_grid[d] = x1*x2*x3;
            }
            if (mode == RANDOM)
            {
              for (d = 0; d < m_compare; d++)
              {
                x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI);
                x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI);
                x3 = cos(x_compare[2*d+1]*2.0*KPI);
                f_compare[d] = x1*x2*x3;
              }
            }
            else
            {
              memcpy(f_compare,f_grid,m_total*sizeof(double _Complex));
            }
            break;
          case FUNCTION_F2:
            for (d = 0; d < m_total; d++)
            {
              x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI);
              x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI);
              x3 = cos(x_grid[2*d+1]*2.0*KPI);
              f_grid[d] = 0.1*exp(x1+x2+x3);
            }
            if (mode == RANDOM)
            {
              for (d = 0; d < m_compare; d++)
              {
                x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI);
                x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI);
                x3 = cos(x_compare[2*d+1]*2.0*KPI);
                f_compare[d] = 0.1*exp(x1+x2+x3);
              }
            }
            else
            {
              memcpy(f_compare,f_grid,m_total*sizeof(double _Complex));
            }
            break;
          case FUNCTION_F3:
            for (d = 0; d < m_total; d++)
            {
              x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI);
              x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI);
              x3 = cos(x_grid[2*d+1]*2.0*KPI);
              temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3);
              f_grid[d] = 0.1*temp;
            }
            if (mode == RANDOM)
            {
              for (d = 0; d < m_compare; d++)
              {
                x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI);
                x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI);
                x3 = cos(x_compare[2*d+1]*2.0*KPI);
                temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3);
                f_compare[d] = 0.1*temp;
              }
            }
            else
            {
              memcpy(f_compare,f_grid,m_total*sizeof(double _Complex));
            }
            break;
          case FUNCTION_F4:
            for (d = 0; d < m_total; d++)
            {
              x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI);
              x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI);
              x3 = cos(x_grid[2*d+1]*2.0*KPI);
              temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3);
              f_grid[d] = 1.0/(temp);
            }
            if (mode == RANDOM)
            {
              for (d = 0; d < m_compare; d++)
              {
                x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI);
                x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI);
                x3 = cos(x_compare[2*d+1]*2.0*KPI);
                temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3);
                f_compare[d] = 1.0/(temp);
              }
            }
            else
            {
              memcpy(f_compare,f_grid,m_total*sizeof(double _Complex));
            }
            break;
          case FUNCTION_F5:
            for (d = 0; d < m_total; d++)
            {
              x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI);
              x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI);
              x3 = cos(x_grid[2*d+1]*2.0*KPI);
              temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3);
              f_grid[d] = 0.1*sin(1+temp)*sin(1+temp);
            }
            if (mode == RANDOM)
            {
              for (d = 0; d < m_compare; d++)
              {
                x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI);
                x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI);
                x3 = cos(x_compare[2*d+1]*2.0*KPI);
                temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3);
                f_compare[d] = 0.1*sin(1+temp)*sin(1+temp);
              }
            }
            else
            {
              memcpy(f_compare,f_grid,m_total*sizeof(double _Complex));
            }
            break;
          case FUNCTION_F6:
            for (d = 0; d < m_total; d++)
            {
              if (x_grid[2*d+1] <= 0.25)
              {
                f_grid[d] = 1.0;
              }
              else
              {
                f_grid[d] = 1.0/(sqrt(1+3*cos(2.0*KPI*x_grid[2*d+1])*cos(2.0*KPI*x_grid[2*d+1])));
              }
            }
            if (mode == RANDOM)
            {
              for (d = 0; d < m_compare; d++)
              {
                if (x_compare[2*d+1] <= 0.25)
                {
                  f_compare[d] = 1.0;
                }
                else
                {
                  f_compare[d] = 1.0/(sqrt(1+3*cos(2.0*KPI*x_compare[2*d+1])*cos(2.0*KPI*x_compare[2*d+1])));
                }
              }
            }
            else
            {
              memcpy(f_compare,f_grid,m_total*sizeof(double _Complex));
            }
            break;
          default:
            //fprintf(stderr,"Generating one function\n");
            //fflush(stderr);
            for (d = 0; d < m_total; d++)
            {
              f_grid[d] = 1.0;
            }
            if (mode == RANDOM)
            {
              for (d = 0; d < m_compare; d++)
              {
                f_compare[d] = 1.0;
              }
            }
            else
            {
              memcpy(f_compare,f_grid,m_total*sizeof(double _Complex));
            }
            break;
        }

        //fprintf(stderr,"Initializing trafo\n");
        //fflush(stderr);
        /* Init transform plan. */
        nfsft_init_guru(&plan_adjoint,NQ[iNQ],m_total, NFSFT_NORMALIZED |
          ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) |
          ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)),
          ((NQ[iNQ]>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT |
          FFT_OUT_OF_PLACE, cutoff);

        plan_adjoint_ptr = &plan_adjoint;

        if (mode == RANDOM)
        {
          nfsft_init_guru(&plan,NQ[iNQ],m_compare, NFSFT_NORMALIZED |
            ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) |
            ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)),
            ((NQ[iNQ]>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT |
            FFT_OUT_OF_PLACE, cutoff);
          plan_ptr = &plan;
        }
        else
        {
          plan_ptr = &plan_adjoint;
        }

        f_hat = (double _Complex*) nfft_malloc(NFSFT_F_HAT_SIZE(NQ[iNQ])*sizeof(double _Complex));

        plan_adjoint_ptr->f_hat = f_hat;
        plan_adjoint_ptr->x = x_grid;
        plan_adjoint_ptr->f = f_grid;

        plan_ptr->f_hat = f_hat;
        plan_ptr->x = x_compare;
        plan_ptr->f = f;

        //fprintf(stderr,"Precomputing for x\n");
        //fflush(stderr);
        nfsft_precompute_x(plan_adjoint_ptr);
        if (plan_adjoint_ptr != plan_ptr)
        {
          nfsft_precompute_x(plan_ptr);
        }

        /* Initialize cumulative time variable. */
        t_avg = 0.0;
        err_infty_avg = 0.0;
        err_2_avg = 0.0;

        /* Cycle through all runs. */
        for (i = 0; i < 1/*repetitions*/; i++)
        {
          //fprintf(stderr,"Copying original values\n");
          //fflush(stderr);
          /* Copy exact funtion values to working array. */
          //memcpy(f,f_grid,m_total*sizeof(double _Complex));

          /* Initialize time measurement. */
          t0 = getticks();

          //fprintf(stderr,"Multiplying with quadrature weights\n");
          //fflush(stderr);
          /* Multiplication with the quadrature weights. */
          /*fprintf(stderr,"\n");*/
          d = 0;
          for (k = 0; k < m_theta; k++)
          {
            for (n = 0; n < m_phi; n++)
            {
              /*fprintf(stderr,"f_ref[%d] = %le + I*%le,\t f[%d] = %le + I*%le,  \t w[%d] = %le\n",
              d,creal(f_ref[d]),cimag(f_ref[d]),d,creal(f[d]),cimag(f[d]),k,
              w[k]);*/
              f_grid[d] *= w[k];
              d++;
            }
          }

          t1 = getticks();
          t_avg += nfft_elapsed_seconds(t1,t0);

          nfft_free(w);

          t0 = getticks();

          /*fprintf(stderr,"\n");
          d = 0;
          for (d = 0; d < grid_total; d++)
          {
            fprintf(stderr,"f[%d] = %le + I*%le, theta[%d] = %le, phi[%d] = %le\n",
                    d,creal(f[d]),cimag(f[d]),d,x[2*d+1],d,x[2*d]);
          }*/

          //fprintf(stderr,"Executing adjoint\n");
          //fflush(stderr);
          /* Check if the fast NFSFT algorithm shall be tested. */
          if (use_nfsft != NO)
          {
            /* Execute the adjoint NFSFT transformation. */
            nfsft_adjoint(plan_adjoint_ptr);
          }
          else
          {
            /* Execute the adjoint direct NDSFT transformation. */
            nfsft_adjoint_direct(plan_adjoint_ptr);
          }

          /* Multiplication with the Fourier-Legendre coefficients. */
          /*for (k = 0; k <= m[im]; k++)
          {
            for (n = -k; n <= k; n++)
            {
              fprintf(stderr,"f_hat[%d,%d] = %le\t + I*%le\n",k,n,
                      creal(f_hat[NFSFT_INDEX(k,n,&plan_adjoint)]),
                      cimag(f_hat[NFSFT_INDEX(k,n,&plan_adjoint)]));
            }
          }*/

          //fprintf(stderr,"Executing trafo\n");
          //fflush(stderr);
          if (use_nfsft != NO)
          {
            /* Execute the NFSFT transformation. */
            nfsft_trafo(plan_ptr);
          }
          else
          {
            /* Execute the direct NDSFT transformation. */
            nfsft_trafo_direct(plan_ptr);
          }

          t1 = getticks();
          t_avg += nfft_elapsed_seconds(t1,t0);

          //fprintf(stderr,"Finalizing\n");
          //fflush(stderr);
          /* Finalize the NFSFT plans */
          nfsft_finalize(plan_adjoint_ptr);
          if (plan_ptr != plan_adjoint_ptr)
          {
            nfsft_finalize(plan_ptr);
          }

          /* Free data arrays. */
          nfft_free(f_hat);
          nfft_free(x_grid);

          err_infty_avg += X(error_l_infty_complex)(f, f_compare, m_compare);
          err_2_avg += X(error_l_2_complex)(f, f_compare, m_compare);

          nfft_free(f_grid);

          if (mode == RANDOM)
          {
          }
          else
          {
            nfft_free(f_compare);
          }

          /*for (d = 0; d < m_total; d++)
          {
            fprintf(stderr,"f_ref[%d] = %le + I*%le,\t f[%d] = %le + I*%le\n",
              d,creal(f_ref[d]),cimag(f_ref[d]),d,creal(f[d]),cimag(f[d]));
          }*/
        }

        //fprintf(stderr,"Calculating the error\n");
        //fflush(stderr);
        /* Calculate average time needed. */
        t_avg = t_avg/((double)repetitions);

        /* Calculate the average error. */
        err_infty_avg = err_infty_avg/((double)repetitions);

        /* Calculate the average error. */
        err_2_avg = err_2_avg/((double)repetitions);

        /* Print out the error measurements. */
        fprintf(stdout,"%+le %+le %+le\n", t_avg, err_infty_avg, err_2_avg);
        fprintf(stderr,"%d: %4d %4d %+le %+le %+le\n", tc, NQ[iNQ], SQ[iNQ],
          t_avg, err_infty_avg, err_2_avg);
      }
    } /* for (im = 0; im < im_max; im++) - Process all cut-off
       * bandwidths.*/
    fprintf(stderr,"\n");

    /* Delete precomputed data. */
    nfsft_forget();

    /* Free memory for cut-off bandwidths and grid size parameters. */
    nfft_free(NQ);
    nfft_free(SQ);
    if (testmode == TIMING)
    {
      nfft_free(RQ);
    }

    if (mode == RANDOM)
    {
      nfft_free(x_compare);
      nfft_free(f_compare);
      nfft_free(f);
    }

    if (testmode == TIMING)
    {
      /* Allocate data structures. */
      nfft_free(f_hat);
      nfft_free(f);
      nfft_free(x_grid);
    }

  } /* for (tc = 0; tc < tc_max; tc++) - Process each testcase. */

  /* Return exit code for successful run. */
  return EXIT_SUCCESS;
}
Пример #7
0
/** fast NFFT-based summation */
void fastsum_trafo(fastsum_plan *ths)
{
  int j,k,t;
  ticks t0, t1;

  ths->MEASURE_TIME_t[4] = 0.0; 
  ths->MEASURE_TIME_t[5] = 0.0;
  ths->MEASURE_TIME_t[6] = 0.0;
  ths->MEASURE_TIME_t[7] = 0.0;

#ifdef MEASURE_TIME
  t0 = getticks();
#endif
  /** first step of algorithm */
  nfft_adjoint(&(ths->mv1));
#ifdef MEASURE_TIME
  t1 = getticks();
  ths->MEASURE_TIME_t[4] += nfft_elapsed_seconds(t1,t0);
#endif


#ifdef MEASURE_TIME
  t0 = getticks();
#endif
  /** second step of algorithm */
  #pragma omp parallel for default(shared) private(k)
  for (k=0; k<ths->mv2.N_total; k++)
    ths->mv2.f_hat[k] = ths->b[k] * ths->mv1.f_hat[k];
#ifdef MEASURE_TIME
  t1 = getticks();
  ths->MEASURE_TIME_t[5] += nfft_elapsed_seconds(t1,t0);
#endif


#ifdef MEASURE_TIME
  t0 = getticks();
#endif
  /** third step of algorithm */
  nfft_trafo(&(ths->mv2));
#ifdef MEASURE_TIME
  t1 = getticks();
  ths->MEASURE_TIME_t[6] += nfft_elapsed_seconds(t1,t0);
#endif


#ifdef MEASURE_TIME
  t0 = getticks();
#endif
  /** add near field */
  #pragma omp parallel for default(shared) private(j,k,t)
  for (j=0; j<ths->M_total; j++)
  {
    double ymin[ths->d], ymax[ths->d]; /** limits for d-dimensional near field box */

    if (ths->flags & NEARFIELD_BOXES)
    {
      ths->f[j] = ths->mv2.f[j] + SearchBox(ths->y + ths->d*j, ths);
    }
    else
    {
      for (t=0; t<ths->d; t++)
      {
        ymin[t] = ths->y[ths->d*j+t] - ths->eps_I;
        ymax[t] = ths->y[ths->d*j+t] + ths->eps_I;
      }
      ths->f[j] = ths->mv2.f[j] + SearchTree(ths->d,0, ths->x, ths->alpha, ymin, ymax, ths->N_total, ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags);
    }
    /* ths->f[j] = ths->mv2.f[j]; */
    /* ths->f[j] = SearchTree(ths->d,0, ths->x, ths->alpha, ymin, ymax, ths->N_total, ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags); */
  }

#ifdef MEASURE_TIME
  t1 = getticks();
  ths->MEASURE_TIME_t[7] += nfft_elapsed_seconds(t1,t0);
#endif
}
Пример #8
0
/** precomputation for fastsum */
void fastsum_precompute(fastsum_plan *ths)
{
  int j,k,t;
  int n_total;
  ticks t0, t1;

  ths->MEASURE_TIME_t[0] = 0.0;
  ths->MEASURE_TIME_t[1] = 0.0;
  ths->MEASURE_TIME_t[2] = 0.0;
  ths->MEASURE_TIME_t[3] = 0.0;

#ifdef MEASURE_TIME
  t0 = getticks();
#endif


  if (ths->flags & NEARFIELD_BOXES)
  {
    BuildBox(ths);
  }
  else
  {
    /** sort source knots */
    BuildTree(ths->d,0,ths->x,ths->alpha,ths->N_total);
  }

#ifdef MEASURE_TIME
  t1 = getticks();
  ths->MEASURE_TIME_t[3] += nfft_elapsed_seconds(t1,t0);
#endif


#ifdef MEASURE_TIME
  t0 = getticks();
#endif
  /** precompute spline values for near field*/
  if (!(ths->flags & EXACT_NEARFIELD))
  {
    if (ths->d==1)
      #pragma omp parallel for default(shared) private(k)
      for (k=-ths->Ad/2-2; k <= ths->Ad/2+2; k++)
        ths->Add[k+ths->Ad/2+2] = regkern1(ths->k, ths->eps_I*(double)k/ths->Ad*2, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B);
    else
      #pragma omp parallel for default(shared) private(k)
      for (k=0; k <= ths->Ad+2; k++)
        ths->Add[k] = regkern3(ths->k, ths->eps_I*(double)k/ths->Ad, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B);
  }
#ifdef MEASURE_TIME
  t1 = getticks();
  ths->MEASURE_TIME_t[0] += nfft_elapsed_seconds(t1,t0);
#endif


#ifdef MEASURE_TIME
  t0 = getticks();
#endif
  /** init NFFT plan for transposed transform in first step*/
  for (k=0; k<ths->mv1.M_total; k++)
    for (t=0; t<ths->mv1.d; t++)
      ths->mv1.x[ths->mv1.d*k+t] = - ths->x[ths->mv1.d*k+t];  /* note the factor -1 for transposed transform instead of adjoint*/

  /** precompute psi, the entries of the matrix B */
  if(ths->mv1.nfft_flags & PRE_LIN_PSI)
    nfft_precompute_lin_psi(&(ths->mv1));

  if(ths->mv1.nfft_flags & PRE_PSI)
    nfft_precompute_psi(&(ths->mv1));

  if(ths->mv1.nfft_flags & PRE_FULL_PSI)
    nfft_precompute_full_psi(&(ths->mv1));
#ifdef MEASURE_TIME
  t1 = getticks();
  ths->MEASURE_TIME_t[1] += nfft_elapsed_seconds(t1,t0);
#endif

  /** init Fourier coefficients */
  for(k=0; k<ths->mv1.M_total;k++)
    ths->mv1.f[k] = ths->alpha[k];

#ifdef MEASURE_TIME
  t0 = getticks();
#endif
  /** init NFFT plan for transform in third step*/
  for (j=0; j<ths->mv2.M_total; j++)
    for (t=0; t<ths->mv2.d; t++)
      ths->mv2.x[ths->mv2.d*j+t] = - ths->y[ths->mv2.d*j+t];  /* note the factor -1 for conjugated transform instead of standard*/

  /** precompute psi, the entries of the matrix B */
  if(ths->mv2.nfft_flags & PRE_LIN_PSI)
    nfft_precompute_lin_psi(&(ths->mv2));

  if(ths->mv2.nfft_flags & PRE_PSI)
    nfft_precompute_psi(&(ths->mv2));

  if(ths->mv2.nfft_flags & PRE_FULL_PSI)
    nfft_precompute_full_psi(&(ths->mv2));
#ifdef MEASURE_TIME
  t1 = getticks();
  ths->MEASURE_TIME_t[2] += nfft_elapsed_seconds(t1,t0);
#endif


#ifdef MEASURE_TIME
  t0 = getticks();
#endif
  /** precompute Fourier coefficients of regularised kernel*/
  n_total = 1;
  for (t=0; t<ths->d; t++)
    n_total *= ths->n;

  #pragma omp parallel for default(shared) private(j,k,t)
  for (j=0; j<n_total; j++)
  {
    if (ths->d==1)
      ths->b[j] = regkern1(ths->k, (double)j / (ths->n) - 0.5, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B)/n_total;
    else
    {
      k=j;
      ths->b[j]=0.0;
      for (t=0; t<ths->d; t++)
      {
        ths->b[j] += ((double)(k % (ths->n)) / (ths->n) - 0.5) * ((double)(k % (ths->n)) / (ths->n) - 0.5);
        k = k / (ths->n);
      }
      ths->b[j] = regkern3(ths->k, sqrt(ths->b[j]), ths->p, ths->kernel_param, ths->eps_I, ths->eps_B)/n_total;
    }
  }

  nfft_fftshift_complex(ths->b, ths->mv1.d, ths->mv1.N);
  fftw_execute(ths->fft_plan);
  nfft_fftshift_complex(ths->b, ths->mv1.d, ths->mv1.N);
#ifdef MEASURE_TIME
  t1 = getticks();
  ths->MEASURE_TIME_t[0] += nfft_elapsed_seconds(t1,t0);
#endif
}
Пример #9
0
static void simple_test_nfft_2d(void)
{
  int K,N[2],n[2],M;
  double t;
  ticks t0, t1;

  nfft_plan p;

  N[0]=32; n[0]=64;
  N[1]=14; n[1]=32;
  M=N[0]*N[1];
  K=16;

  t0 = getticks();
  /** init a two dimensional plan */
  nfft_init_guru(&p, 2, N, M, n, 7,
		 PRE_PHI_HUT| PRE_FULL_PSI| MALLOC_F_HAT| MALLOC_X| MALLOC_F |
		 FFTW_INIT| FFT_OUT_OF_PLACE,
		 FFTW_ESTIMATE| FFTW_DESTROY_INPUT);

  /** init pseudo random nodes */
  nfft_vrand_shifted_unit_double(p.x,p.d*p.M_total);

  /** precompute psi, the entries of the matrix B */
  if(p.nfft_flags & PRE_ONE_PSI)
    nfft_precompute_one_psi(&p);

  /** init pseudo random Fourier coefficients and show them */
  nfft_vrand_unit_complex(p.f_hat,p.N_total);

  t1 = getticks();
  t = nfft_elapsed_seconds(t1,t0);
  nfft_vpr_complex(p.f_hat,K,
              "given Fourier coefficients, vector f_hat (first few entries)");
  printf(" ... initialisation took %e seconds.\n",t);

  /** direct trafo and show the result */
  t0 = getticks();
  nfft_trafo_direct(&p);
  t1 = getticks();
  t = nfft_elapsed_seconds(t1,t0);
  nfft_vpr_complex(p.f,K,"ndft, vector f (first few entries)");
  printf(" took %e seconds.\n",t);

  /** approx. trafo and show the result */
  t0 = getticks();
  nfft_trafo(&p);
  t1 = getticks();
  t = nfft_elapsed_seconds(t1,t0);
  nfft_vpr_complex(p.f,K,"nfft, vector f (first few entries)");
  printf(" took %e seconds.\n",t);

  /** direct adjoint and show the result */
  t0 = getticks();
  nfft_adjoint_direct(&p);
  t1 = getticks();
  t = nfft_elapsed_seconds(t1,t0);
  nfft_vpr_complex(p.f_hat,K,"adjoint ndft, vector f_hat (first few entries)");
  printf(" took %e seconds.\n",t);

  /** approx. adjoint and show the result */
  t0 = getticks();
  nfft_adjoint(&p);
  t1 = getticks();
  t = nfft_elapsed_seconds(t1,t0);
  nfft_vpr_complex(p.f_hat,K,"adjoint nfft, vector f_hat (first few entries)");
  printf(" took %e seconds.\n",t);

  /** finalise the two dimensional plan */
  nfft_finalize(&p);
}
static void reconstruct(char* filename,int N,int M,int iteration , int weight)
{
  int j,k,l;
  double time,min_time,max_time,min_inh,max_inh;
  ticks t0, t1;
  double t,real,imag;
  double w,epsilon=0.0000003;     /* epsilon is a the break criterium for
                                   the iteration */;
  mri_inh_2d1d_plan my_plan;
  solver_plan_complex my_iplan;
  FILE* fp,*fw,*fout_real,*fout_imag,*finh,*ftime;
  int my_N[3],my_n[3];
  int flags = PRE_PHI_HUT| PRE_PSI |MALLOC_X| MALLOC_F_HAT|
                      MALLOC_F| FFTW_INIT| FFT_OUT_OF_PLACE;
  unsigned infft_flags = CGNR | PRECOMPUTE_DAMP;

  double Ts;
  double W,T;
  int N3;
  int m=2;
  double sigma = 1.25;

  ftime=fopen("readout_time.dat","r");
  finh=fopen("inh.dat","r");

  min_time=INT_MAX; max_time=INT_MIN;
  for(j=0;j<M;j++)
  {
    fscanf(ftime,"%le ",&time);
    if(time<min_time)
      min_time = time;
    if(time>max_time)
      max_time = time;
  }

  fclose(ftime);

  Ts=(min_time+max_time)/2.0;


  min_inh=INT_MAX; max_inh=INT_MIN;
  for(j=0;j<N*N;j++)
  {
    fscanf(finh,"%le ",&w);
    if(w<min_inh)
      min_inh = w;
    if(w>max_inh)
      max_inh = w;
  }
  fclose(finh);

  N3=ceil((MAX(fabs(min_inh),fabs(max_inh))*(max_time-min_time)/2.0+(m)/(2*sigma))*4*sigma);
	/* N3 has to be even */
	if(N3%2!=0)
	  N3++;

	T=((max_time-min_time)/2.0)/(0.5-((double) (m))/N3);
  W=N3/T;

  my_N[0]=N; my_n[0]=ceil(N*sigma);
  my_N[1]=N; my_n[1]=ceil(N*sigma);
  my_N[2]=N3; my_n[2]=N3;

  /* initialise nfft */
  mri_inh_2d1d_init_guru(&my_plan, my_N, M, my_n, m, sigma, flags,
                      FFTW_MEASURE| FFTW_DESTROY_INPUT);


  /* precompute lin psi if set */
  if(my_plan.plan.nfft_flags & PRE_LIN_PSI)
    nfft_precompute_lin_psi(&my_plan.plan);

  if (weight)
    infft_flags = infft_flags | PRECOMPUTE_WEIGHT;

  /* initialise my_iplan, advanced */
  solver_init_advanced_complex(&my_iplan,(nfft_mv_plan_complex*)(&my_plan), infft_flags );

  /* get the weights */
  if(my_iplan.flags & PRECOMPUTE_WEIGHT)
  {
    fw=fopen("weights.dat","r");
    for(j=0;j<my_plan.M_total;j++)
    {
        fscanf(fw,"%le ",&my_iplan.w[j]);
    }
    fclose(fw);
  }

  /* get the damping factors */
  if(my_iplan.flags & PRECOMPUTE_DAMP)
  {
    for(j=0;j<N;j++){
      for(k=0;k<N;k++) {
        int j2= j-N/2;
        int k2= k-N/2;
        double r=sqrt(j2*j2+k2*k2);
        if(r>(double) N/2)
          my_iplan.w_hat[j*N+k]=0.0;
        else
          my_iplan.w_hat[j*N+k]=1.0;
      }
    }
  }

  fp=fopen(filename,"r");
  ftime=fopen("readout_time.dat","r");

  for(j=0;j<my_plan.M_total;j++)
  {
    fscanf(fp,"%le %le %le %le",&my_plan.plan.x[2*j+0],&my_plan.plan.x[2*j+1],&real,&imag);
    my_iplan.y[j]=real+ _Complex_I*imag;
    fscanf(ftime,"%le ",&my_plan.t[j]);

    my_plan.t[j] = (my_plan.t[j]-Ts)/T;
  }
  fclose(fp);
  fclose(ftime);


  finh=fopen("inh.dat","r");
  for(j=0;j<N*N;j++)
  {
    fscanf(finh,"%le ",&my_plan.w[j]);
    my_plan.w[j]/=W;
  }
  fclose(finh);


  if(my_plan.plan.nfft_flags & PRE_PSI) {
    nfft_precompute_psi(&my_plan.plan);
  }
  if(my_plan.plan.nfft_flags & PRE_FULL_PSI) {
      nfft_precompute_full_psi(&my_plan.plan);
  }

  /* init some guess */
  for(j=0;j<my_plan.N_total;j++)
  {
    my_iplan.f_hat_iter[j]=0.0;
  }

  t0 = getticks();

  /* inverse trafo */
  solver_before_loop_complex(&my_iplan);
  for(l=0;l<iteration;l++)
  {
    /* break if dot_r_iter is smaller than epsilon*/
    if(my_iplan.dot_r_iter<epsilon)
    break;
    fprintf(stderr,"%e,  %i of %i\n",sqrt(my_iplan.dot_r_iter),
    l+1,iteration);
    solver_loop_one_step_complex(&my_iplan);
  }

  t1 = getticks();
  t = nfft_elapsed_seconds(t1,t0);

  fout_real=fopen("output_real.dat","w");
  fout_imag=fopen("output_imag.dat","w");

  for (j=0;j<N*N;j++) {
    /* Verschiebung wieder herausrechnen */
    my_iplan.f_hat_iter[j]*=cexp(-2.0*_Complex_I*KPI*Ts*my_plan.w[j]*W);

    fprintf(fout_real,"%le ",creal(my_iplan.f_hat_iter[j]));
    fprintf(fout_imag,"%le ",cimag(my_iplan.f_hat_iter[j]));
  }

  fclose(fout_real);
  fclose(fout_imag);
  solver_finalize_complex(&my_iplan);
  mri_inh_2d1d_finalize(&my_plan);
}
Пример #11
0
/** Comparison of the FFTW, mpolar FFT, and inverse mpolar FFT */
static int comparison_fft(FILE *fp, int N, int T, int R)
{
  ticks t0, t1;
  fftw_plan my_fftw_plan;
  fftw_complex *f_hat,*f;
  int m,k;
  double t_fft, t_dft_mpolar;

  f_hat = (fftw_complex *)nfft_malloc(sizeof(fftw_complex)*N*N);
  f     = (fftw_complex *)nfft_malloc(sizeof(fftw_complex)*(T*R/4)*5);

  my_fftw_plan = fftw_plan_dft_2d(N,N,f_hat,f,FFTW_BACKWARD,FFTW_MEASURE);

  for(k=0; k<N*N; k++)
    f_hat[k] = (((double)rand())/RAND_MAX) + _Complex_I* (((double)rand())/RAND_MAX);

  t0 = getticks();
  for(m=0;m<65536/N;m++)
    {
      fftw_execute(my_fftw_plan);
      /* touch */
      f_hat[2]=2*f_hat[0];
    }
  t1 = getticks();
  GLOBAL_elapsed_time = nfft_elapsed_seconds(t1,t0);
  t_fft=N*GLOBAL_elapsed_time/65536;

  if(N<256)
    {
      mpolar_dft(f_hat,N,f,T,R,1);
      t_dft_mpolar=GLOBAL_elapsed_time;
    }

  for (m=3; m<=9; m+=3)
    {
      if((m==3)&&(N<256))
        fprintf(fp,"%d\t&\t&\t%1.1e&\t%1.1e&\t%d\t",N,t_fft,t_dft_mpolar,m);
      else
        if(m==3)
	  fprintf(fp,"%d\t&\t&\t%1.1e&\t       &\t%d\t",N,t_fft,m);
	else
	  fprintf(fp,"  \t&\t&\t       &\t       &\t%d\t",m);

      printf("N=%d\tt_fft=%1.1e\tt_dft_mpolar=%1.1e\tm=%d\t",N,t_fft,t_dft_mpolar,m);

      mpolar_fft(f_hat,N,f,T,R,m);
      fprintf(fp,"%1.1e&\t",GLOBAL_elapsed_time);
      printf("t_mpolar=%1.1e\t",GLOBAL_elapsed_time);
      inverse_mpolar_fft(f,T,R,f_hat,N,2*m,m);
      if(m==9)
	fprintf(fp,"%1.1e\\\\\\hline\n",GLOBAL_elapsed_time);
      else
	fprintf(fp,"%1.1e\\\\\n",GLOBAL_elapsed_time);
      printf("t_impolar=%1.1e\n",GLOBAL_elapsed_time);
    }

  fflush(fp);

  nfft_free(f);
  nfft_free(f_hat);

  return EXIT_SUCCESS;
}
Пример #12
0
/** inverse NFFT-based mpolar FFT */
static int inverse_mpolar_fft(fftw_complex *f, int T, int R, fftw_complex *f_hat, int NN, int max_i, int m)
{
  ticks t0, t1;
  int j,k;                              /**< index for nodes and freqencies   */
  nfft_plan my_nfft_plan;               /**< plan for the nfft-2D             */
  solver_plan_complex my_infft_plan;             /**< plan for the inverse nfft        */

  double *x, *w;                        /**< knots and associated weights     */
  int l;                                /**< index for iterations             */

  int N[2],n[2];
  int M;                                /**< number of knots                  */

  N[0]=NN; n[0]=2*N[0];                 /**< oversampling factor sigma=2      */
  N[1]=NN; n[1]=2*N[1];                 /**< oversampling factor sigma=2      */

  x = (double *)nfft_malloc(5*T*R/2*(sizeof(double)));
  if (x==NULL)
    return -1;

  w = (double *)nfft_malloc(5*T*R/4*(sizeof(double)));
  if (w==NULL)
    return -1;

  /** init two dimensional NFFT plan */
  M=mpolar_grid(T,R,x,w);
  nfft_init_guru(&my_nfft_plan, 2, N, M, n, m,
                  PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE,
                  FFTW_MEASURE| FFTW_DESTROY_INPUT);

  /** init two dimensional infft plan */
    solver_init_advanced_complex(&my_infft_plan,(nfft_mv_plan_complex*)(&my_nfft_plan), CGNR | PRECOMPUTE_WEIGHT );

  /** init nodes, given samples and weights */
  for(j=0;j<my_nfft_plan.M_total;j++)
  {
    my_nfft_plan.x[2*j+0] = x[2*j+0];
    my_nfft_plan.x[2*j+1] = x[2*j+1];
    my_infft_plan.y[j]    = f[j];
    my_infft_plan.w[j]    = w[j];
  }

  /** precompute psi, the entries of the matrix B */
  if(my_nfft_plan.nfft_flags & PRE_LIN_PSI)
    nfft_precompute_lin_psi(&my_nfft_plan);

  if(my_nfft_plan.nfft_flags & PRE_PSI)
    nfft_precompute_psi(&my_nfft_plan);

  if(my_nfft_plan.nfft_flags & PRE_FULL_PSI)
    nfft_precompute_full_psi(&my_nfft_plan);


 /** initialise damping factors */
 if(my_infft_plan.flags & PRECOMPUTE_DAMP)
   for(j=0;j<my_nfft_plan.N[0];j++)
     for(k=0;k<my_nfft_plan.N[1];k++)
     {
        my_infft_plan.w_hat[j*my_nfft_plan.N[1]+k]=
          (sqrt(pow(j-my_nfft_plan.N[0]/2,2)+pow(k-my_nfft_plan.N[1]/2,2))>(my_nfft_plan.N[0]/2)?0:1);
     }

  /** initialise some guess f_hat_0 */
  for(k=0;k<my_nfft_plan.N_total;k++)
      my_infft_plan.f_hat_iter[k] = 0.0 + _Complex_I*0.0;

  t0 = getticks();

  /** solve the system */
  solver_before_loop_complex(&my_infft_plan);

  if (max_i<1)
  {
    l=1;
    for(k=0;k<my_nfft_plan.N_total;k++)
      my_infft_plan.f_hat_iter[k] = my_infft_plan.p_hat_iter[k];
  }
  else
  {
    for(l=1;l<=max_i;l++)
    {
      solver_loop_one_step_complex(&my_infft_plan);
    }
  }

  t1 = getticks();
  GLOBAL_elapsed_time = nfft_elapsed_seconds(t1,t0);

  /** copy result */
  for(k=0;k<my_nfft_plan.N_total;k++)
    f_hat[k] = my_infft_plan.f_hat_iter[k];

  /** finalise the plans and free the variables */
  solver_finalize_complex(&my_infft_plan);
  nfft_finalize(&my_nfft_plan);
  nfft_free(x);
  nfft_free(w);

  return EXIT_SUCCESS;
}
Пример #13
0
/**
 * The main program.
 *
 * \param argc The number of arguments
 * \param argv An array containing the arguments as C-strings
 *
 * \return Exit code
 *
 * \author Jens Keiner
 */
int main (int argc, char **argv)
{
  double **p;                  /* The array containing the parameter sets     *
                                * for the kernel functions                    */
  int *m;                      /* The array containing the cut-off degrees M  */
  int **ld;                    /* The array containing the numbers of source  *
                                * and target nodes, L and D                   */
  int ip;                      /* Index variable for p                        */
  int im;                      /* Index variable for m                        */
  int ild;                     /* Index variable for l                        */
  int ipp;                     /* Index for kernel parameters                 */
  int ip_max;                  /* The maximum index for p                     */
  int im_max;                  /* The maximum index for m                     */
  int ild_max;                 /* The maximum index for l                     */
  int ipp_max;                 /* The maximum index for ip                    */
  int tc_max;                  /* The number of testcases                     */
  int m_max;                   /* The maximum cut-off degree M for the        *
                                * current dataset                             */
  int l_max;                   /* The maximum number of source nodes L for    *
                                * the current dataset                         */
  int d_max;                   /* The maximum number of target nodes D for    *
                                * the current dataset                         */
  long ld_max_prec;            /* The maximum number of source and target     *
                                * nodes for precomputation multiplied         */
  long l_max_prec;             /* The maximum number of source nodes for      *
                                * precomputation                              */
  int tc;                      /* Index variable for testcases                */
  int kt;                      /* The kernel function                         */
  int cutoff;                  /* The current NFFT cut-off parameter          */
  double threshold;            /* The current NFSFT threshold parameter       */
  double t_d;                  /* Time for direct algorithm in seconds        */
  double t_dp;                 /* Time for direct algorithm with              *
                                  precomputation in seconds                   */
  double t_fd;                 /* Time for fast direct algorithm in seconds   */
  double t_f;                  /* Time for fast algorithm in seconds          */
  double temp;                 /*                                             */
  double err_f;                /* Error E_infty for fast algorithm            */
  double err_fd;               /* Error E_\infty for fast direct algorithm    */
  ticks t0, t1;                /*                                             */
  int precompute = NO;         /*                                             */
  fftw_complex *ptr;         /*                                             */
  double* steed;               /*                                             */
  fftw_complex *b;           /* The weights (b_l)_{l=0}^{L-1}               */
  fftw_complex *f_hat;       /* The spherical Fourier coefficients          */
  fftw_complex *a;           /* The Fourier-Legendre coefficients           */
  double *xi;                  /* Target nodes                                */
  double *eta;                 /* Source nodes                                */
  fftw_complex *f_m;         /* Approximate function values                 */
  fftw_complex *f;           /* Exact function values                       */
  fftw_complex *prec = NULL; /*                                             */
  nfsft_plan plan;             /* NFSFT plan                                  */
  nfsft_plan plan_adjoint;     /* adjoint NFSFT plan                          */
  int i;                       /*                                             */
  int k;                       /*                                             */
  int n;                       /*                                             */
  int d;                       /*                                             */
  int l;                       /*                                             */
  int use_nfsft;               /*                                             */
  int use_nfft;                /*                                             */
  int use_fpt;                 /*                                             */
  int rinc;                    /*                                             */
  double constant;             /*                                             */

  /* Read the number of testcases. */
  fscanf(stdin,"testcases=%d\n",&tc_max);
  fprintf(stdout,"%d\n",tc_max);

  /* Process each testcase. */
  for (tc = 0; tc < tc_max; tc++)
  {
    /* Check if the fast transform shall be used. */
    fscanf(stdin,"nfsft=%d\n",&use_nfsft);
    fprintf(stdout,"%d\n",use_nfsft);
    if (use_nfsft != NO)
    {
      /* Check if the NFFT shall be used. */
      fscanf(stdin,"nfft=%d\n",&use_nfft);
      fprintf(stdout,"%d\n",use_nfft);
      if (use_nfft != NO)
      {
        /* Read the cut-off parameter. */
        fscanf(stdin,"cutoff=%d\n",&cutoff);
        fprintf(stdout,"%d\n",cutoff);
      }
      else
      {
        /* TODO remove this */
        /* Initialize unused variable with dummy value. */
        cutoff = 1;
      }
      /* Check if the fast polynomial transform shall be used. */
      fscanf(stdin,"fpt=%d\n",&use_fpt);
      fprintf(stdout,"%d\n",use_fpt);
      /* Read the NFSFT threshold parameter. */
      fscanf(stdin,"threshold=%lf\n",&threshold);
      fprintf(stdout,"%lf\n",threshold);
    }
    else
    {
      /* TODO remove this */
      /* Set dummy values. */
      cutoff = 3;
      threshold = 1000000000000.0;
    }

    /* Initialize bandwidth bound. */
    m_max = 0;
    /* Initialize source nodes bound. */
    l_max = 0;
    /* Initialize target nodes bound. */
    d_max = 0;
    /* Initialize source nodes bound for precomputation. */
    l_max_prec = 0;
    /* Initialize source and target nodes bound for precomputation. */
    ld_max_prec = 0;

    /* Read the kernel type. This is one of KT_ABEL_POISSON, KT_SINGULARITY,
     * KT_LOC_SUPP and KT_GAUSSIAN. */
    fscanf(stdin,"kernel=%d\n",&kt);
    fprintf(stdout,"%d\n",kt);

    /* Read the number of parameter sets. */
    fscanf(stdin,"parameter_sets=%d\n",&ip_max);
    fprintf(stdout,"%d\n",ip_max);

    /* Allocate memory for pointers to parameter sets. */
    p = (double**) nfft_malloc(ip_max*sizeof(double*));

    /* We now read in the parameter sets. */

    /* Read number of parameters. */
    fscanf(stdin,"parameters=%d\n",&ipp_max);
    fprintf(stdout,"%d\n",ipp_max);

    for (ip = 0; ip < ip_max; ip++)
    {
      /* Allocate memory for the parameters. */
      p[ip] = (double*) nfft_malloc(ipp_max*sizeof(double));

      /* Read the parameters. */
      for (ipp = 0; ipp < ipp_max; ipp++)
      {
        /* Read the next parameter. */
        fscanf(stdin,"%lf\n",&p[ip][ipp]);
        fprintf(stdout,"%lf\n",p[ip][ipp]);
      }
    }

    /* Read the number of cut-off degrees. */
    fscanf(stdin,"bandwidths=%d\n",&im_max);
    fprintf(stdout,"%d\n",im_max);
    m = (int*) nfft_malloc(im_max*sizeof(int));

    /* Read the cut-off degrees. */
    for (im = 0; im < im_max; im++)
    {
      /* Read cut-off degree. */
      fscanf(stdin,"%d\n",&m[im]);
      fprintf(stdout,"%d\n",m[im]);
      m_max = MAX(m_max,m[im]);
    }

    /* Read number of node specifications. */
    fscanf(stdin,"node_sets=%d\n",&ild_max);
    fprintf(stdout,"%d\n",ild_max);
    ld = (int**) nfft_malloc(ild_max*sizeof(int*));

    /* Read the run specification. */
    for (ild = 0; ild < ild_max; ild++)
    {
      /* Allocate memory for the run parameters. */
      ld[ild] = (int*) nfft_malloc(5*sizeof(int));

      /* Read number of source nodes. */
      fscanf(stdin,"L=%d ",&ld[ild][0]);
      fprintf(stdout,"%d\n",ld[ild][0]);
      l_max = MAX(l_max,ld[ild][0]);

      /* Read number of target nodes. */
      fscanf(stdin,"D=%d ",&ld[ild][1]);
      fprintf(stdout,"%d\n",ld[ild][1]);
      d_max = MAX(d_max,ld[ild][1]);

      /* Determine whether direct and fast algorithm shall be compared. */
      fscanf(stdin,"compare=%d ",&ld[ild][2]);
      fprintf(stdout,"%d\n",ld[ild][2]);

      /* Check if precomputation for the direct algorithm is used. */
      if (ld[ild][2] == YES)
      {
        /* Read whether the precomputed version shall also be used. */
        fscanf(stdin,"precomputed=%d\n",&ld[ild][3]);
        fprintf(stdout,"%d\n",ld[ild][3]);

        /* Read the number of repetitions over which measurements are
         * averaged. */
        fscanf(stdin,"repetitions=%d\n",&ld[ild][4]);
        fprintf(stdout,"%d\n",ld[ild][4]);

        /* Update ld_max_prec and l_max_prec. */
        if (ld[ild][3] == YES)
        {
          /* Update ld_max_prec. */
          ld_max_prec = MAX(ld_max_prec,ld[ild][0]*ld[ild][1]);
          /* Update l_max_prec. */
          l_max_prec = MAX(l_max_prec,ld[ild][0]);
          /* Turn on the precomputation for the direct algorithm. */
          precompute = YES;
        }
      }
      else
      {
        /* Set default value for the number of repetitions. */
        ld[ild][4] = 1;
      }
    }

    /* Allocate memory for data structures. */
    b = (fftw_complex*) nfft_malloc(l_max*sizeof(fftw_complex));
    eta = (double*) nfft_malloc(2*l_max*sizeof(double));
    f_hat = (fftw_complex*) nfft_malloc(NFSFT_F_HAT_SIZE(m_max)*sizeof(fftw_complex));
    a = (fftw_complex*) nfft_malloc((m_max+1)*sizeof(fftw_complex));
    xi = (double*) nfft_malloc(2*d_max*sizeof(double));
    f_m = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex));
    f = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex));

    /* Allocate memory for precomputed data. */
    if (precompute == YES)
    {
      prec = (fftw_complex*) nfft_malloc(ld_max_prec*sizeof(fftw_complex));
    }

    /* Generate random source nodes and weights. */
    for (l = 0; l < l_max; l++)
    {
      b[l] = (((double)rand())/RAND_MAX) - 0.5;
      eta[2*l] = (((double)rand())/RAND_MAX) - 0.5;
      eta[2*l+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI);
    }

    /* Generate random target nodes. */
    for (d = 0; d < d_max; d++)
    {
      xi[2*d] = (((double)rand())/RAND_MAX) - 0.5;
      xi[2*d+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI);
    }

    /* Do precomputation. */
    nfsft_precompute(m_max,threshold,
      ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U/*NFSFT_NO_DIRECT_ALGORITHM*/)), 0U);

    /* Process all parameter sets. */
    for (ip = 0; ip < ip_max; ip++)
    {
      /* Compute kernel coeffcients up to the maximum cut-off degree m_max. */
      switch (kt)
      {
        case KT_ABEL_POISSON:
          /* Compute Fourier-Legendre coefficients for the Poisson kernel. */
          for (k = 0; k <= m_max; k++)
            a[k] = SYMBOL_ABEL_POISSON(k,p[ip][0]);
          break;

        case KT_SINGULARITY:
          /* Compute Fourier-Legendre coefficients for the singularity
           * kernel. */
          for (k = 0; k <= m_max; k++)
            a[k] = SYMBOL_SINGULARITY(k,p[ip][0]);
          break;

        case KT_LOC_SUPP:
          /* Compute Fourier-Legendre coefficients for the locally supported
           * kernel. */
          a[0] = 1.0;
          if (1 <= m_max)
            a[1] = ((p[ip][1]+1+p[ip][0])/(p[ip][1]+2.0))*a[0];
          for (k = 2; k <= m_max; k++)
            a[k] = (1.0/(k+p[ip][1]+1))*((2*k-1)*p[ip][0]*a[k-1] -
              (k-p[ip][1]-2)*a[k-2]);
          break;

        case KT_GAUSSIAN:
          /* Fourier-Legendre coefficients */
          steed = (double*) nfft_malloc((m_max+1)*sizeof(double));
          smbi(2.0*p[ip][0],0.5,m_max+1,2,steed);
          for (k = 0; k <= m_max; k++)
            a[k] = K2PI*(sqrt(KPI/p[ip][0]))*steed[k];

          nfft_free(steed);
          break;
      }

      /* Normalize Fourier-Legendre coefficients. */
      for (k = 0; k <= m_max; k++)
        a[k] *= (2*k+1)/(K4PI);

      /* Process all node sets. */
      for (ild = 0; ild < ild_max; ild++)
      {
        /* Check if the fast algorithm shall be used. */
        if (ld[ild][2] != NO)
        {
          /* Check if the direct algorithm with precomputation should be
           * tested. */
          if (ld[ild][3] != NO)
          {
            /* Get pointer to start of data. */
            ptr = prec;
            /* Calculate increment from one row to the next. */
            rinc = l_max_prec-ld[ild][0];

            /* Process al target nodes. */
            for (d = 0; d < ld[ild][1]; d++)
            {
              /* Process all source nodes. */
              for (l = 0; l < ld[ild][0]; l++)
              {
                /* Compute inner product between current source and target
                 * node. */
                temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);

                /* Switch by the kernel type. */
                switch (kt)
                {
                  case KT_ABEL_POISSON:
                    /* Evaluate the Poisson kernel for the current value. */
                    *ptr++ = poissonKernel(temp,p[ip][0]);
                   break;

                  case KT_SINGULARITY:
                    /* Evaluate the singularity kernel for the current
                     * value. */
                    *ptr++ = singularityKernel(temp,p[ip][0]);
                    break;

                  case KT_LOC_SUPP:
                     /* Evaluate the localized kernel for the current
                      * value. */
                    *ptr++ = locallySupportedKernel(temp,p[ip][0],p[ip][1]);
                    break;

                    case KT_GAUSSIAN:
                       /* Evaluate the spherical Gaussian kernel for the current
                        * value. */
                      *ptr++ = gaussianKernel(temp,p[ip][0]);
                       break;
                }
              }
              /* Increment pointer for next row. */
              ptr += rinc;
            }

            /* Initialize cumulative time variable. */
            t_dp = 0.0;

            /* Initialize time measurement. */
            t0 = getticks();

            /* Cycle through all runs. */
            for (i = 0; i < ld[ild][4]; i++)
            {

              /* Reset pointer to start of precomputed data. */
              ptr = prec;
              /* Calculate increment from one row to the next. */
              rinc = l_max_prec-ld[ild][0];

              /* Check if the localized kernel is used. */
              if (kt == KT_LOC_SUPP)
              {
                /* Perform final summation */

                /* Calculate the multiplicative constant. */
                constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1)));

                /* Process all target nodes. */
                for (d = 0; d < ld[ild][1]; d++)
                {
                  /* Initialize function value. */
                  f[d] = 0.0;

                  /* Process all source nodes. */
                  for (l = 0; l < ld[ild][0]; l++)
                    f[d] += b[l]*(*ptr++);

                  /* Multiply with the constant. */
                  f[d] *= constant;

                  /* Proceed to next row. */
                  ptr += rinc;
                }
              }
              else
              {
                /* Process all target nodes. */
                for (d = 0; d < ld[ild][1]; d++)
                {
                  /* Initialize function value. */
                  f[d] = 0.0;

                  /* Process all source nodes. */
                  for (l = 0; l < ld[ild][0]; l++)
                    f[d] += b[l]*(*ptr++);

                  /* Proceed to next row. */
                  ptr += rinc;
                }
              }
            }

            /* Calculate the time needed. */
            t1 = getticks();
            t_dp = nfft_elapsed_seconds(t1,t0);

            /* Calculate average time needed. */
            t_dp = t_dp/((double)ld[ild][4]);
          }
          else
          {
            /* Initialize cumulative time variable with dummy value. */
            t_dp = -1.0;
          }

          /* Initialize cumulative time variable. */
          t_d = 0.0;

          /* Initialize time measurement. */
          t0 = getticks();

          /* Cycle through all runs. */
          for (i = 0; i < ld[ild][4]; i++)
          {
            /* Switch by the kernel type. */
            switch (kt)
            {
              case KT_ABEL_POISSON:

                /* Process all target nodes. */
                for (d = 0; d < ld[ild][1]; d++)
                {
                  /* Initialize function value. */
                  f[d] = 0.0;

                  /* Process all source nodes. */
                  for (l = 0; l < ld[ild][0]; l++)
                  {
                    /* Compute the inner product for the current source and
                     * target nodes. */
                    temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);

                    /* Evaluate the Poisson kernel for the current value and add
                     * to the result. */
                    f[d] += b[l]*poissonKernel(temp,p[ip][0]);
                  }
                }
                break;

              case KT_SINGULARITY:
                /* Process all target nodes. */
                for (d = 0; d < ld[ild][1]; d++)
                {
                  /* Initialize function value. */
                  f[d] = 0.0;

                  /* Process all source nodes. */
                  for (l = 0; l < ld[ild][0]; l++)
                  {
                    /* Compute the inner product for the current source and
                     * target nodes. */
                    temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);

                    /* Evaluate the Poisson kernel for the current value and add
                     * to the result. */
                    f[d] += b[l]*singularityKernel(temp,p[ip][0]);
                  }
                }
                break;

              case KT_LOC_SUPP:
                /* Calculate the multiplicative constant. */
                constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1)));

                /* Process all target nodes. */
                for (d = 0; d < ld[ild][1]; d++)
                {
                  /* Initialize function value. */
                  f[d] = 0.0;

                  /* Process all source nodes. */
                  for (l = 0; l < ld[ild][0]; l++)
                  {
                    /* Compute the inner product for the current source and
                     * target nodes. */
                    temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);

                    /* Evaluate the Poisson kernel for the current value and add
                     * to the result. */
                    f[d] += b[l]*locallySupportedKernel(temp,p[ip][0],p[ip][1]);
                  }

                  /* Multiply result with constant. */
                  f[d] *= constant;
                }
                break;

                case KT_GAUSSIAN:
                  /* Process all target nodes. */
                  for (d = 0; d < ld[ild][1]; d++)
                  {
                    /* Initialize function value. */
                    f[d] = 0.0;

                    /* Process all source nodes. */
                    for (l = 0; l < ld[ild][0]; l++)
                    {
                      /* Compute the inner product for the current source and
                       * target nodes. */
                      temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
                      /* Evaluate the Poisson kernel for the current value and add
                       * to the result. */
                      f[d] += b[l]*gaussianKernel(temp,p[ip][0]);
                    }
                  }
                  break;
            }
          }

          /* Calculate and add the time needed. */
          t1 = getticks();
          t_d = nfft_elapsed_seconds(t1,t0);
          /* Calculate average time needed. */
          t_d = t_d/((double)ld[ild][4]);
        }
        else
        {
          /* Initialize cumulative time variable with dummy value. */
          t_d = -1.0;
          t_dp = -1.0;
        }

        /* Initialize error and cumulative time variables for the fast
         * algorithm. */
        err_fd = -1.0;
        err_f = -1.0;
        t_fd = -1.0;
        t_f = -1.0;

        /* Process all cut-off bandwidths. */
        for (im = 0; im < im_max; im++)
        {
          /* Init transform plans. */
          nfsft_init_guru(&plan_adjoint, m[im],ld[ild][0],
            ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) |
            ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)),
            PRE_PHI_HUT | PRE_PSI | FFTW_INIT |
            FFT_OUT_OF_PLACE, cutoff);
          nfsft_init_guru(&plan,m[im],ld[ild][1],
            ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) |
            ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)),
            PRE_PHI_HUT | PRE_PSI | FFTW_INIT |
            FFT_OUT_OF_PLACE,
             cutoff);
          plan_adjoint.f_hat = f_hat;
          plan_adjoint.x = eta;
          plan_adjoint.f = b;
          plan.f_hat = f_hat;
          plan.x = xi;
          plan.f = f_m;
          nfsft_precompute_x(&plan_adjoint);
          nfsft_precompute_x(&plan);

          /* Check if direct algorithm shall also be tested. */
          if (use_nfsft == BOTH)
          {
            /* Initialize cumulative time variable. */
            t_fd = 0.0;

            /* Initialize time measurement. */
            t0 = getticks();

            /* Cycle through all runs. */
            for (i = 0; i < ld[ild][4]; i++)
            {

              /* Execute adjoint direct NDSFT transformation. */
              nfsft_adjoint_direct(&plan_adjoint);

              /* Multiplication with the Fourier-Legendre coefficients. */
              for (k = 0; k <= m[im]; k++)
                for (n = -k; n <= k; n++)
                  f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k];

              /* Execute direct NDSFT transformation. */
              nfsft_trafo_direct(&plan);

            }

            /* Calculate and add the time needed. */
            t1 = getticks();
            t_fd = nfft_elapsed_seconds(t1,t0);

            /* Calculate average time needed. */
            t_fd = t_fd/((double)ld[ild][4]);

            /* Check if error E_infty should be computed. */
            if (ld[ild][2] != NO)
            {
              /* Compute the error E_infinity. */
              err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
                ld[ild][0]);
            }
          }

          /* Check if the fast NFSFT algorithm shall also be tested. */
          if (use_nfsft != NO)
          {
            /* Initialize cumulative time variable for the NFSFT algorithm. */
            t_f = 0.0;
          }
          else
          {
            /* Initialize cumulative time variable for the direct NDSFT
             * algorithm. */
            t_fd = 0.0;
          }

          /* Initialize time measurement. */
          t0 = getticks();

          /* Cycle through all runs. */
          for (i = 0; i < ld[ild][4]; i++)
          {
            /* Check if the fast NFSFT algorithm shall also be tested. */
            if (use_nfsft != NO)
            {
              /* Execute the adjoint NFSFT transformation. */
              nfsft_adjoint(&plan_adjoint);
            }
            else
            {
              /* Execute the adjoint direct NDSFT transformation. */
              nfsft_adjoint_direct(&plan_adjoint);
            }

            /* Multiplication with the Fourier-Legendre coefficients. */
            for (k = 0; k <= m[im]; k++)
              for (n = -k; n <= k; n++)
                f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k];

            /* Check if the fast NFSFT algorithm shall also be tested. */
            if (use_nfsft != NO)
            {
              /* Execute the NFSFT transformation. */
              nfsft_trafo(&plan);
            }
            else
            {
              /* Execute the NDSFT transformation. */
              nfsft_trafo_direct(&plan);
            }
          }

          /* Check if the fast NFSFT algorithm has been used. */
          t1 = getticks();

          if (use_nfsft != NO)
            t_f = nfft_elapsed_seconds(t1,t0);
          else
            t_fd = nfft_elapsed_seconds(t1,t0);

          /* Check if the fast NFSFT algorithm has been used. */
          if (use_nfsft != NO)
          {
            /* Calculate average time needed. */
            t_f = t_f/((double)ld[ild][4]);
          }
          else
          {
            /* Calculate average time needed. */
            t_fd = t_fd/((double)ld[ild][4]);
          }

          /* Check if error E_infty should be computed. */
          if (ld[ild][2] != NO)
          {
            /* Check if the fast NFSFT algorithm has been used. */
            if (use_nfsft != NO)
            {
              /* Compute the error E_infinity. */
              err_f = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
                ld[ild][0]);
            }
            else
            {
              /* Compute the error E_infinity. */
              err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
                ld[ild][0]);
            }
          }

          /* Print out the error measurements. */
          fprintf(stdout,"%e\n%e\n%e\n%e\n%e\n%e\n\n",t_d,t_dp,t_fd,t_f,err_fd,
            err_f);

          /* Finalize the NFSFT plans */
          nfsft_finalize(&plan_adjoint);
          nfsft_finalize(&plan);
        } /* for (im = 0; im < im_max; im++) - Process all cut-off
           * bandwidths.*/
      } /* for (ild = 0; ild < ild_max; ild++) - Process all node sets. */
    } /* for (ip = 0; ip < ip_max; ip++) - Process all parameter sets. */

    /* Delete precomputed data. */
    nfsft_forget();

    /* Check if memory for precomputed data of the matrix K has been
     * allocated. */
    if (precompute == YES)
    {
      /* Free memory for precomputed matrix K. */
      nfft_free(prec);
    }
    /* Free data arrays. */
    nfft_free(f);
    nfft_free(f_m);
    nfft_free(xi);
    nfft_free(eta);
    nfft_free(a);
    nfft_free(f_hat);
    nfft_free(b);

    /* Free memory for node sets. */
    for (ild = 0; ild < ild_max; ild++)
      nfft_free(ld[ild]);
    nfft_free(ld);

    /* Free memory for cut-off bandwidths. */
    nfft_free(m);

    /* Free memory for parameter sets. */
    for (ip = 0; ip < ip_max; ip++)
      nfft_free(p[ip]);
    nfft_free(p);
  } /* for (tc = 0; tc < tc_max; tc++) - Process each testcase. */

  /* Return exit code for successful run. */
  return EXIT_SUCCESS;
}
Пример #14
0
void bench_openmp(FILE *infile, int m, int psi_flag)
{
  nfft_plan p;
  int *N;
  int *n;
  int M, d, trafo_adjoint;
  int t, j;
  double re,im;
  ticks t0, t1;
  double tt_total, tt_preonepsi;

  fscanf(infile, "%d %d", &d, &trafo_adjoint);

  N = malloc(d*sizeof(int));
  n = malloc(d*sizeof(int));

  for (t=0; t<d; t++)
    fscanf(infile, "%d", N+t);

  for (t=0; t<d; t++)
    fscanf(infile, "%d", n+t);

  fscanf(infile, "%d", &M);

#ifdef _OPENMP
  fftw_import_wisdom_from_filename("nfft_benchomp_detail_threads.plan");
#else
  fftw_import_wisdom_from_filename("nfft_benchomp_detail_single.plan");
#endif

  /** init an d-dimensional plan */
  nfft_init_guru(&p, d, N, M, n, m,
                   PRE_PHI_HUT| psi_flag | MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE,
                   FFTW_MEASURE| FFTW_DESTROY_INPUT);

#ifdef _OPENMP
  fftw_export_wisdom_to_filename("nfft_benchomp_detail_threads.plan");
#else
  fftw_export_wisdom_to_filename("nfft_benchomp_detail_single.plan");
#endif

  for (j=0; j < p.M_total; j++)
  {
    for (t=0; t < p.d; t++)
      fscanf(infile, "%lg", p.x+p.d*j+t);
  }

  if (trafo_adjoint==0)
  {
    for (j=0; j < p.N_total; j++)
    {
      fscanf(infile, "%lg %lg", &re, &im);
      p.f_hat[j] = re + _Complex_I * im;
    }
  }
  else
  {
    for (j=0; j < p.M_total; j++)
    {
      fscanf(infile, "%lg %lg", &re, &im);
      p.f[j] = re + _Complex_I * im;
    }
  }

  t0 = getticks();
  /** precompute psi, the entries of the matrix B */
  if(p.nfft_flags & PRE_ONE_PSI)
      nfft_precompute_one_psi(&p);
  t1 = getticks();
  tt_preonepsi = nfft_elapsed_seconds(t1,t0);

  if (trafo_adjoint==0)
    nfft_trafo(&p);
  else
    nfft_adjoint(&p);
  t1 = getticks();
  tt_total = nfft_elapsed_seconds(t1,t0);

#ifndef MEASURE_TIME
  p.MEASURE_TIME_t[0] = 0.0;
  p.MEASURE_TIME_t[2] = 0.0;
#endif

#ifndef MEASURE_TIME_FFTW
  p.MEASURE_TIME_t[1] = 0.0;
#endif

  printf("%.6e %.6e %6e %.6e %.6e %.6e\n", tt_preonepsi, p.MEASURE_TIME_t[0], p.MEASURE_TIME_t[1], p.MEASURE_TIME_t[2], tt_total-tt_preonepsi-p.MEASURE_TIME_t[0]-p.MEASURE_TIME_t[1]-p.MEASURE_TIME_t[2], tt_total);
//  printf("%.6e\n", tt);

  free(N);
  free(n);

  /** finalise the one dimensional plan */
  nfft_finalize(&p);
}