void so3CombineCoef_fftw( int bwIn,
			  int bwOut,
			  int degLim,
			  REAL *sigCoefR, REAL *sigCoefI,
			  REAL *patCoefR, REAL *patCoefI,
			  fftw_complex *so3Coef )
{
  int l, m1, m2 ;
  int fudge, dummy ;
  REAL tmpSigCoefR, tmpSigCoefI ;
  REAL tmpPatCoefR, tmpPatCoefI ;

  /* for sanity, set all so3coefs to 0 */
  memset( so3Coef, 0, sizeof(fftw_complex) * totalCoeffs_so3( bwOut ) );

  for( l = 0 ; l <= degLim ; l ++ )
    {
      for( m1 = -l ; m1 <= l ; m1 ++ )
	{
	  /* grab signal coefficient, at this degree and order */
	  dummy = seanindex(-m1, l, bwIn) ;
	  tmpSigCoefR = sigCoefR[ dummy ];
	  tmpSigCoefI = sigCoefI[ dummy ];

	  /* need to reset the -1 fudge factor */
	  if ( (m1 + l) % 2 )
	    fudge = -1 ;
	  else
	    fudge = 1 ;

	  for( m2 = -l ; m2 <= l ; m2 ++ )
	    {
	      dummy = seanindex( -m2, l, bwIn );

	      /*
		first take the CONJUGATE of the pattern coef
		and multiply it by the fudge factor
	      */
	      tmpPatCoefR = fudge *   patCoefR[ dummy ];
	      tmpPatCoefI = fudge * (-patCoefI[ dummy ]);

	      /* now multiply the signal coef by the pattern coef,
		 and save it in the so3 coefficient array */
	      
	      dummy = so3CoefLoc( m1, m2, l, bwOut ) ;

	      so3Coef[ dummy ][0] =
		tmpSigCoefR*tmpPatCoefR -
		tmpSigCoefI*tmpPatCoefI ;

	      so3Coef[ dummy ][1] =
		tmpSigCoefR*tmpPatCoefI +
		tmpSigCoefI*tmpPatCoefR ;

	      /* multiply fudge factor by -1 */
	      fudge *= -1 ;
	    }
	}
    }
}
Exemple #2
0
void so3CombineCoef( int bwIn,
		     int bwOut,
		     int degLim,
		     double *sigCoefR, double *sigCoefI,
		     double *patCoefR, double *patCoefI,
		     double *so3CoefR, double *so3CoefI )
{
  int l, m1, m2 ;
  int fudge, dummy ;
  double tmpSigCoefR, tmpSigCoefI ;
  double tmpPatCoefR, tmpPatCoefI ;
  double wigNorm ;

  /* for sanity, set all so3coefs to 0 */
  memset( so3CoefR, 0, sizeof(double) * totalCoeffs_so3( bwOut ) );
  memset( so3CoefI, 0, sizeof(double) * totalCoeffs_so3( bwOut ) );

  for( l = 0 ; l <= degLim ; l ++ )
    {
      wigNorm = 2.*M_PI*sqrt(2./(2.*l+1)) ;
      for( m1 = -l ; m1 <= l ; m1 ++ )
	{
	  /* grab signal coefficient, at this degree and order */
	  dummy = seanindex(-m1, l, bwIn) ;
	  tmpSigCoefR = sigCoefR[ dummy ];
	  tmpSigCoefI = sigCoefI[ dummy ];

	  /* need to reset the -1 fudge factor */
	  if ( (m1 + l) % 2 )
	    fudge = -1 ;
	  else
	    fudge = 1 ;

	  for( m2 = -l ; m2 <= l ; m2 ++ )
	    {
	      dummy = seanindex( -m2, l, bwIn );

	      /*
		first take the CONJUGATE of the pattern coef
		and multiply it by the fudge factor
	      */
	      tmpPatCoefR = fudge *   patCoefR[ dummy ];
	      tmpPatCoefI = fudge * (-patCoefI[ dummy ]);

	      /* now multiply the signal coef by the pattern coef,
		 and save it in the so3 coefficient array */
	      
	      dummy = so3CoefLoc( m1, m2, l, bwOut ) ;
	      so3CoefR[ dummy ] = wigNorm *
		(
		 tmpSigCoefR*tmpPatCoefR -
		 tmpSigCoefI*tmpPatCoefI ) ;

	      so3CoefI[ dummy ] = wigNorm *
		(
		 tmpSigCoefR*tmpPatCoefI +
		 tmpSigCoefI*tmpPatCoefR ) ;

	      /* multiply fudge factor by -1 */
	      fudge *= -1 ;
	    }
	}
    }
}
void FST_semi_memo(double *rdata, double *idata,
		   double *rcoeffs, double *icoeffs, 
		   int bw,
		   double **seminaive_naive_table,
		   double *workspace,
		   int dataformat,
		   int cutoff,
		   fftw_plan *dctPlan,
		   fftw_plan *fftPlan,
		   double *weights )
{
  int size, m, i, j;
  int dummy0, dummy1 ;
  double *rres, *ires;
  double *rdataptr, *idataptr;
  double *fltres, *scratchpad;
  double *eval_pts;
  double pow_one;
  double tmpA, tmpSize ;

  size = 2*bw ;
  tmpSize = 1./ ((double ) size);
  tmpA = sqrt( 2. * M_PI ) ;

  rres = workspace;  /* needs (size * size) = (4 * bw^2) */
  ires = rres + (size * size); /* needs (size * size) = (4 * bw^2) */ 
  fltres = ires + (size * size) ; /* needs bw  */
  eval_pts = fltres + bw; /* needs (2*bw)  */
  scratchpad = eval_pts + (2*bw); /* needs (4 * bw)  */
 
  /* total workspace is (8 * bw^2) + (7 * bw) */

  /* do the FFTs along phi */
  fftw_execute_split_dft( *fftPlan,
			  rdata, idata,
			  rres, ires ) ; 
  /*
    normalize

    note that I'm getting the sqrt(2pi) in there at
    this point ... to account for the fact that the spherical
    harmonics are of norm 1: I need to account for
    the fact that the associated Legendres are
    of norm 1
  */
  tmpSize *= tmpA ;
  for( j = 0 ; j < size*size ; j ++ )
    {
      rres[j] *= tmpSize  ;
      ires[j] *= tmpSize  ;
    }
  
  /* point to start of output data buffers */
  rdataptr = rcoeffs;
  idataptr = icoeffs;
  
  for (m=0; m<bw; m++) {
    /*
      fprintf(stderr,"m = %d\n",m);
    */
    
    /*** test to see if before cutoff or after ***/
    if (m < cutoff){
      
      /* do the real part */
      SemiNaiveReduced(rres+(m*size), 
		       bw, 
		       m, 
		       fltres,
		       scratchpad,
		       seminaive_naive_table[m],
		       weights,
		       dctPlan);
      
      /* now load real part of coefficients into output space */  
      memcpy(rdataptr, fltres, sizeof(double) * (bw - m));
      
      rdataptr += bw-m;
      
      /* do imaginary part */
      SemiNaiveReduced(ires+(m*size),
		       bw, 
		       m, 
		       fltres,
		       scratchpad,
		       seminaive_naive_table[m],
		       weights,
		       dctPlan);

      /* now load imaginary part of coefficients into output space */  
      memcpy(idataptr, fltres, sizeof(double) * (bw - m));
      
      idataptr += bw-m;
      
    }
    else{
      /* do real part */      
      Naive_AnalysisX(rres+(m*size),
		      bw,
		      m,
		      weights,
		      fltres,
		      seminaive_naive_table[m],
		      scratchpad );
      memcpy(rdataptr, fltres, sizeof(double) * (bw - m));
      rdataptr += bw-m;
      
      /* do imaginary part */
      Naive_AnalysisX(ires+(m*size),
		      bw,
		      m,
		      weights,
		      fltres,
		      seminaive_naive_table[m],
		      scratchpad );
      memcpy(idataptr, fltres, sizeof(double) * (bw - m));
      idataptr += bw-m;
    }
  }
  
  /*** now do upper coefficients ****/
  
  /* now if the data is real, we don't have to compute the
     coefficients whose order is less than 0, i.e. since
     the data is real, we know that
     f-hat(l,-m) = (-1)^m * conjugate(f-hat(l,m)),
     so use that to get the rest of the coefficients
     
     dataformat =0 -> samples are complex, =1 -> samples real
     
  */
  
  if( dataformat == 0 ){
    
    /* note that m is greater than bw here, but this is for
       purposes of indexing the input data arrays.  
       The "true" value of m as a parameter for Pml is
       size - m  */
    
    for (m=bw+1; m<size; m++) {
      /*
	fprintf(stderr,"m = %d\n",-(size-m));
      */
      
      if ( (size-m) < cutoff )
	{
	  /* do real part */
	  SemiNaiveReduced(rres+(m*size), 
			   bw, 
			   size-m, 
			   fltres,
			   scratchpad,
			   seminaive_naive_table[size-m],
			   weights,
			   dctPlan );
      
	  /* now load real part of coefficients into output space */  
	  if ((m % 2) != 0) {
	    for (i=0; i<m-bw; i++)
	      rdataptr[i] = -fltres[i];
	  }
	  else {
	    memcpy(rdataptr, fltres, sizeof(double) * (m - bw));
	  }
	  rdataptr += m-bw;
      
	  /* do imaginary part */
	  SemiNaiveReduced(ires+(m*size), 
			   bw, 
			   size-m, 
			   fltres,
			   scratchpad,
			   seminaive_naive_table[size-m],
			   weights,
			   dctPlan);
      
	  /* now load imag part of coefficients into output space */  
	  if ((m % 2) != 0) {
	    for (i=0; i<m-bw; i++)
	      idataptr[i] = -fltres[i];
	  }
	  else {
	    memcpy(idataptr, fltres, sizeof(double) * (m - bw));
	  }
	  idataptr += m-bw;
	}
      else
	{
	  Naive_AnalysisX(rres+(m*size),
			  bw,
			  size-m,
			  weights,
			  fltres,
			  seminaive_naive_table[size-m],
			  scratchpad);

	  /* now load real part of coefficients into output space */  
	  if ((m % 2) != 0) {
	    for (i=0; i<m-bw; i++)
	      rdataptr[i] = -fltres[i];
	  }
	  else {
	    memcpy(rdataptr, fltres, sizeof(double) * (m - bw));
	  }
	  rdataptr += m-bw;

	  /* do imaginary part */
	  Naive_AnalysisX(ires+(m*size),
			  bw,
			  size-m,
			  weights,
			  fltres,
			  seminaive_naive_table[size-m],
			  scratchpad);

	  /* now load imag part of coefficients into output space */  
	  if ((m % 2) != 0) {
	    for (i=0; i<m-bw; i++)
	      idataptr[i] = -fltres[i];
	  }
	  else {
	    memcpy(idataptr, fltres, sizeof(double) * (m - bw));
	  }
	  idataptr += m-bw;

	}

    }
  }
  else        /**** if the data is real ****/
    {            
      pow_one = 1.0;
      for(i = 1; i < bw; i++){
	pow_one *= -1.0;
	for( j = i; j < bw; j++){
	  /*
	  SEANINDEXP(dummy0,i,j,bw);
	  SEANINDEXN(dummy1,-i,j,bw);
	  */

	  dummy0 = seanindex(i, j, bw);
	  dummy1 = seanindex(-i, j, bw);

	  rcoeffs[dummy1] =
	    pow_one * rcoeffs[dummy0];
	  icoeffs[dummy1] =
	    -pow_one * icoeffs[dummy0];
	}
      }
    }
  
}
Exemple #4
0
int main(int argc, char **argv)
{
  FILE *errorsfp;
  int i, j, bw, size, loops;
  int l, m, dummy, cutoff ;
  int rank, howmany_rank ;
  double *rcoeffs, *icoeffs, *rdata, *idata, *rresult, *iresult;
  double *workspace, *weights;
  double dumx, dumy ;
  double *relerror, *curmax, granderror, grandrelerror;
  double realtmp, imagtmp,origmag, tmpmag;
  double ave_error, ave_relerror, stddev_error, stddev_relerror;
  double total_time, for_time, inv_time;
  double tstart, tstop;
  time_t seed;
  fftw_plan dctPlan, idctPlan ;
  fftw_plan fftPlan, ifftPlan ;
  fftw_iodim dims[1], howmany_dims[1];

  if (argc < 3)
    {
      fprintf(stdout,"Usage: test_s2_semi_fly bw loops [error_file]\n");
      exit(0);
    }

  bw = atoi(argv[1]);
  loops = atoi(argv[2]);

  /*** ASSUMING WILL SEMINAIVE ALL ORDERS ***/
  cutoff = bw ;

  size = 2*bw;
  total_time = 0.0;
  for_time = 0.0;
  inv_time = 0.0;
  granderror = 0.0;
  grandrelerror = 0.0; 

  /*
    allocate memory
  */

  rcoeffs = (double *) malloc(sizeof(double) * (bw * bw));
  icoeffs = (double *) malloc(sizeof(double) * (bw * bw));
  rdata = (double *) malloc(sizeof(double) * (size * size));
  idata = (double *) malloc(sizeof(double) * (size * size));
  rresult = (double *) malloc(sizeof(double) * (bw * bw));
  iresult = (double *) malloc(sizeof(double) * (bw * bw));
  workspace = (double *) malloc(sizeof(double) * 
				((10 * (bw*bw)) + 
				 (24 * bw)));

  /** space for errors **/
  relerror = (double *) malloc(sizeof(double) * loops);
  curmax = (double *) malloc(sizeof(double) * loops);

  /* make array for weights */
  weights = (double *) malloc(sizeof(double) * 4 * bw);


  /****
    At this point, check to see if all the memory has been
    allocated. If it has not, there's no point in going further.
    ****/
  if ( (rdata == NULL) || (idata == NULL) ||
       (rresult == NULL) || (iresult == NULL) ||
       (rcoeffs == NULL) || (icoeffs == NULL) ||
       (workspace == NULL) || (weights == NULL) )
    {
      perror("Error in allocating memory");
      exit( 1 ) ;
    }

  /*** generate a seed, needed to generate random data ***/
  time(&seed);
  srand48( seed );

  /*
    construct fftw plans
  */

  /* make DCT plans -> note that I will be using the GURU
     interface to execute these plans within the routines*/

  /* forward DCT */
  dctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata,
			      FFTW_REDFT10, FFTW_ESTIMATE ) ;
      
  /* inverse DCT */
  idctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata,
			       FFTW_REDFT01, FFTW_ESTIMATE );

  /*
    fftw "preamble" ;
    note that this plan places the output in a transposed array
  */
  rank = 1 ;
  dims[0].n = 2*bw ;
  dims[0].is = 1 ;
  dims[0].os = 2*bw ;
  howmany_rank = 1 ;
  howmany_dims[0].n = 2*bw ;
  howmany_dims[0].is = 2*bw ;
  howmany_dims[0].os = 1 ;
  
  /* forward fft */
  fftPlan = fftw_plan_guru_split_dft( rank, dims,
				      howmany_rank, howmany_dims,
				      rdata, idata,
				      workspace, workspace+(4*bw*bw),
				      FFTW_ESTIMATE );
  
  /*
    now plan for inverse fft - note that this plans assumes
    that I'm working with a transposed array, e.g. the inputs
    for a length 2*bw transform are placed every 2*bw apart,
    the output will be consecutive entries in the array
  */
  rank = 1 ;
  dims[0].n = 2*bw ;
  dims[0].is = 2*bw ;
  dims[0].os = 1 ;
  howmany_rank = 1 ;
  howmany_dims[0].n = 2*bw ;
  howmany_dims[0].is = 1 ;
  howmany_dims[0].os = 2*bw ;

  /* inverse fft */
  ifftPlan = fftw_plan_guru_split_dft( rank, dims,
				       howmany_rank, howmany_dims,
				       rdata, idata,
				       workspace, workspace+(4*bw*bw),
				       FFTW_ESTIMATE );

  /* now make the weights */
  makeweights( bw, weights );

  /*
    now start the looping
  */
  fprintf(stdout,"about to enter loop\n\n");
  for(i=0; i<loops; i++){

    /****
	 loop to generate spherical harmonic coefficients
	 of a real-valued function
    *****/
    for(m=0;m<bw;m++)
      for(l=m;l<bw;l++){
	dumx = 2.0 * (drand48()-0.5);
	dumy = 2.0 * (drand48()-0.5);
	dummy = seanindex(m,l,bw);
	rcoeffs[dummy] = dumx;
	icoeffs[dummy] = dumy;
	dummy = seanindex(-m,l,bw);
	rcoeffs[dummy] = ((double) pow(-1.0, (double) m)) * dumx;
	icoeffs[dummy] = ((double) pow(-1.0, (double) (m + 1))) * dumy;
      }

    /* have to zero out the m=0 coefficients, since those are real */
    for(m=0;m<bw;m++)
      icoeffs[m] = 0.0;

    /* do the inverse spherical transform */
    tstart = csecond();    

    InvFST_semi_fly(rcoeffs,icoeffs,
		    rdata, idata,
		    bw,
		    workspace,
		    1,
		    cutoff,
		    &idctPlan,
		    &ifftPlan );

    tstop = csecond();
    inv_time += (tstop - tstart);
    
    fprintf(stdout,"inv time \t = %.4e\n", tstop - tstart);
    
    /* now do the forward spherical transform */
    tstart = csecond();

    FST_semi_fly(rdata, idata,
		 rresult, iresult,
		 bw,
		 workspace,
		 1,
		 cutoff,
		 &dctPlan,
		 &fftPlan,
		 weights ) ;

    tstop = csecond();    
    for_time += (tstop - tstart);
    
    fprintf(stdout,"forward time \t = %.4e\n", tstop - tstart);

    /* now to compute the error */
    relerror[i] = 0.0;
    curmax[i] = 0.0;
    for(j=0;j<(bw*bw);j++){
      realtmp = rresult[j]-rcoeffs[j];
      imagtmp = iresult[j]-icoeffs[j];
      origmag = sqrt((rcoeffs[j]*rcoeffs[j]) + (icoeffs[j]*icoeffs[j]));
      tmpmag  = sqrt((realtmp*realtmp) + (imagtmp*imagtmp));
      relerror[i] = max(relerror[i],tmpmag/(origmag + pow(10.0, -50.0)));
      curmax[i]  = max(curmax[i],tmpmag);
    }

    fprintf(stdout,"r-o error\t = %.12f\n", curmax[i]);
    fprintf(stdout,"(r-o)/o error\t = %.12f\n\n", relerror[i]);

    granderror += curmax[i];
    grandrelerror += relerror[i];
    
  }

  total_time = inv_time + for_time;

  ave_error = granderror / ( (double) loops );
  ave_relerror = grandrelerror / ( (double) loops );
  stddev_error = 0.0 ; stddev_relerror = 0.0;
  for( i = 0 ; i < loops ; i ++ )
    {
      stddev_error += pow( ave_error - curmax[i] , 2.0 );
      stddev_relerror += pow( ave_relerror - relerror[i] , 2.0 );
    }
  /*** this won't work if loops == 1 ***/
  if( loops != 1 )
    {
      stddev_error = sqrt(stddev_error / ( (double) (loops - 1) ) );
      stddev_relerror = sqrt(stddev_relerror / ( (double) (loops - 1) ) );
    }

  fprintf(stdout,"Program: test_s2_semi_fly\n");
  fprintf(stdout,"Bandwidth = %d\n", bw);

#ifndef WALLCLOCK
  fprintf(stdout,"Total elapsed cpu time :\t\t %.4e seconds.\n",
	  total_time);
  fprintf(stdout,"Average cpu forward per iteration:\t %.4e seconds.\n",
	  for_time/((double) loops));  
  fprintf(stdout,"Average cpu inverse per iteration:\t %.4e seconds.\n",
	  inv_time/((double) loops));
#else
  fprintf(stdout,"Total elapsed wall time :\t\t %.4e seconds.\n",
	  total_time);
  fprintf(stdout,"Average wall forward per iteration:\t %.4e seconds.\n",
	  for_time/((double) loops));  
  fprintf(stdout,"Average wall inverse per iteration:\t %.4e seconds.\n",
	  inv_time/((double) loops));
#endif

  fprintf(stdout,"Average r-o error:\t\t %.4e\t",
	  granderror/((double) loops));
  fprintf(stdout,"std dev: %.4e\n",stddev_error);
  fprintf(stdout,"Average (r-o)/o error:\t\t %.4e\t",
	  grandrelerror/((double) loops));
  fprintf(stdout,"std dev: %.4e\n\n",stddev_relerror);

  if (argc == 4)
    {
      errorsfp = fopen(argv[3],"w");
      for(m = 0 ; m < bw ; m++ )
	{
	  for(l = m ; l< bw ; l++ )
	    {
	      dummy = seanindex(m,l,bw);
	      fprintf(errorsfp,
		      "dummy = %d\t m = %d\tl = %d\t%.10f  %.10f\n",
		      dummy, m, l,
		      fabs(rcoeffs[dummy] - rresult[dummy]),
		      fabs(icoeffs[dummy] - iresult[dummy]));
	      
	      dummy = seanindex(-m,l,bw);	      
	      fprintf(errorsfp,
		      "dummy = %d\t m = %d\tl = %d\t%.10f  %.10f\n",
		      dummy, -m, l,
		      fabs(rcoeffs[dummy] - rresult[dummy]),
		      fabs(icoeffs[dummy] - iresult[dummy]));
	    }
	}
      fclose(errorsfp);
    }

  /* destroy fftw plans */
  fftw_destroy_plan( ifftPlan );
  fftw_destroy_plan( fftPlan );
  fftw_destroy_plan( idctPlan );
  fftw_destroy_plan( dctPlan );

  /* free memory */
  free( weights );
  free(curmax);
  free(relerror);
  free(workspace);
  free(iresult);
  free(rresult);
  free(idata);
  free(rdata);
  free(icoeffs);
  free(rcoeffs);

  return 0 ;

}