Example #1
0
int ranhprob(int n, int a, int m) 
// hypergeometric sampling 
// rejection sampling.  Devroye.  Computing (1987) General method for log-concave densities 
// where mode is known
/** 
 urn with n balls . a black balls.  Pick m without replacement.  Return number of black balls picked.
*/
{
 double  y ;
 double pm, logpm, w, ru, rw, rat ;
 int mode, x, zans ;

 mode = modehprob(n, a, m) ;
 logpm = loghprob(n, a, m, mode) ;
 pm = exp(logpm) ;              
 w = 1 + pm ; 
 for (;;) { 
  ru = DRAND() ;
  rw = DRAND() ;
  if (ru <= w/(1+w)) y = DRAND()*w/pm ;
  else y = (w+ranexp())/pm ;
  x = nnint(y) ; 
  if (ranmod(2)==0) x = -x ;
  zans = mode+x ;
  if (zans<0) continue ;
  if (zans>a) continue ;
  rat = exp(loghprob(n, a, m, zans)-logpm) ; 
  rw *= MIN(1, exp(w-pm*y)) ;
  if (rw <= rat) break ;
 }
 return zans ;
 
}
Example #2
0
void testnd_out_of_place(int rank, int *n, fftw_direction dir,
			 fftwnd_plan validated_plan)
{
     int istride, ostride;
     int N, dim, i;
     fftw_complex *in1, *in2, *out1, *out2;
     fftwnd_plan p;
     int flags = measure_flag | wisdom_flag;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     N = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];

     in1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex));
     out1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex));
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     p = fftwnd_create_plan(rank, n, dir, flags);

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* generate random inputs */
	  for (i = 0; i < N; ++i) {
	       int j;
	       c_re(in2[i]) = DRAND();
	       c_im(in2[i]) = DRAND();
	       for (j = 0; j < istride; ++j) {
		    c_re(in1[i * istride + j]) = c_re(in2[i]);
		    c_im(in1[i * istride + j]) = c_im(in2[i]);
	       }
	  }

	  for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) {
	       int howmany = (istride < ostride) ? istride : ostride;

	       if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
		    fftwnd_threads(nthreads, p, howmany, in1, istride, 1,
				   out1, ostride, 1);
	       else
		    fftwnd_threads_one(nthreads, p, in1, out1);

	       fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1);

	       for (i = 0; i < howmany; ++i)
		    CHECK(compute_error_complex(out1 + i, ostride, out2, 1, N)
			  < TOLERANCE,
			  "testnd_out_of_place: wrong answer");
	  }
     }

     fftwnd_destroy_plan(p);

     fftw_free(out2);
     fftw_free(in2);
     fftw_free(out1);
     fftw_free(in1);
}
Example #3
0
/*************************************************
 * correctness tests
 *************************************************/
void fill_random(fftw_complex *a, int n)
{
     int i;

     /* generate random inputs */
     for (i = 0; i < n; ++i) {
	  c_re(a[i]) = DRAND();
	  c_im(a[i]) = DRAND();
     }
}
void fill_random(fftw_real * a, int n, int stride)
{
     int i;

     /* generate random inputs */
     for (i = 0; i < n; ++i)
	  a[i * stride] = DRAND();
}
Example #5
0
/* pick random list level */
static int random_level (int maxlevel)
{
  int n = 0;

  while (DRAND() < 0.5 && n < maxlevel) n ++;

  return n;
}
Example #6
0
/**
 * Load random 16 floats between 0 and 1 int matrix \pname
 * and return them in \m.
 */
static void
load_matrix(float m[16], const GLenum pname)
{
	glMatrixMode(pname);
	for (int i = 0; i < 16; ++i)
		m[i] = DRAND();
	glLoadMatrixf(m);
}
/*************************************************
 * Ergun's test for real->complex transforms
 *************************************************/
static void rfill_random(fftw_real *a, int n)
{
     int i;

     for (i = 0; i < n; ++i) {
	  a[i] = DRAND();
     }
}
Example #8
0
double drand2()  
{
  double x, y ;
  double maxran, maxran1 ;
  static double eps = -1.0 ;
/** 
 DRAND is quantized 1/2^31 
 call it twice and get max precision 
*/

  if (eps < 0.0) {
   maxran = 1.0-DBL_EPSILON  ;
   maxran1 = (double) (BIGINT-1) / (double) BIGINT ;
   eps = maxran - maxran1 ;
  }

  x = DRAND() ;
  y = DRAND() ;
  return x + y * eps ;
}
Example #9
0
int ranhprob(int n, int a, int m) 
// rejection sampling.  Devroye
{
 double v, y ;
 double pm, logpm, w, ru, rw, rat ;
 int mode, k, x, zans ;

 v = (double) (a+1)*(m+1) / (double) (n+1) ; 
 mode = (int) v ;

/**
 for (k=-5; k<=5; ++k) {  
  x = mode+k ;
  y = exp(loghprob(n, a, m, x)) ;
  printf("%4d %4d %12.6f\n", mode, x, y)  ;
 }
*/

 logpm = loghprob(n, a, m, mode) ;
 pm = exp(logpm) ;              
 w = 1 + pm ; 
 for (;;) { 
  ru = DRAND() ;
  rw = DRAND() ;
  if (ru <= w/(1+w)) y = DRAND()*w/pm ;
  else y = (w+ranexp())/pm ;
  x = nnint(y) ; 
  if (ranmod(2)==0) x = -x ;
  zans = mode+x ;
  if (zans<0) continue ;
  if (zans>a) continue ;
  rat = exp(loghprob(n, a, m, zans)-logpm) ; 
  rw *= MIN(1, exp(1.0-pm*y)) ;
  if (rw <= rat) break ;
 }
 return zans ;
 
}
Example #10
0
enum piglit_result
piglit_display(void)
{
	bool pass = true;
	float val[4];

	/* Material Property Bindings */
	for (int s = 0; s < 2; ++s) {
		for (int p = 0; p < 4; ++p) {
			const GLenum pname[] = {GL_EMISSION, GL_AMBIENT,
						GL_DIFFUSE, GL_SPECULAR};

			random_vec4(val);
			glMaterialfv(GL_FRONT + s, pname[p], val);
			pass = check_prg_param(val, "state.material.%s.%s",
					       s ? "back" : "front",
					       enum2program(pname[p])) &&
			       pass;

			/* The front material bindings are also accessible
			 * without ".front.".
			 */
			if (s == 0)
				pass = check_prg_param(
					       val, "state.material.%s",
					       enum2program(pname[p])) &&
				       pass;
		}

		val[0] = DRAND();
		val[1] = 0;
		val[2] = 0;
		val[3] = 1;
		glMaterialf(GL_FRONT + s, GL_SHININESS, val[0]);
		pass = check_prg_param(val, "state.material.%s.shininess",
				       s ? "back" : "front") && pass;

		if (s == 0)
			pass = check_prg_param(val,
					       "state.material.shininess") &&
			       pass;
	}

	/* Light Property Bindings */
	int max_lights;
	glGetIntegerv(GL_MAX_LIGHTS, &max_lights);
	for (int l = 0; l < max_lights; ++l) {
		for (int p = 0; p < 4; ++p) {
			const GLenum pname[] = {GL_AMBIENT, GL_DIFFUSE,
						GL_SPECULAR, GL_POSITION};
			random_vec4(val);
			glLightfv(GL_LIGHT0 + l, pname[p], val);
			pass = check_prg_param(val, "state.light[%d].%s", l,
					       enum2program(pname[p])) &&
			       pass;
		}

		random_vec4(val);
		glLightf(GL_LIGHT0 + l, GL_CONSTANT_ATTENUATION, val[0]);
		glLightf(GL_LIGHT0 + l, GL_LINEAR_ATTENUATION, val[1]);
		glLightf(GL_LIGHT0 + l, GL_QUADRATIC_ATTENUATION, val[2]);
		glLightf(GL_LIGHT0 + l, GL_SPOT_EXPONENT, val[3]);
		pass = check_prg_param(val, "state.light[%d].attenuation",
				       l) && pass;

		random_vec4(val);
		glLightfv(GL_LIGHT0 + l, GL_SPOT_DIRECTION, val);
		glLightf(GL_LIGHT0 + l, GL_SPOT_CUTOFF, val[3]);
		val[3] = cosf(val[3] / 180 * M_PI);
		pass = check_prg_param(val, "state.light[%d].spot.direction",
				       l) && pass;

		for (int c = 0; c < 3; ++c)
			val[c] = DRAND();
		val[3] = 1;
		glLightfv(GL_LIGHT0 + l, GL_POSITION, val);
		normalize(val);
		val[2] += 1;
		normalize(val);
		pass = check_prg_param(val, "state.light[%d].half", l) &&
		       pass;
	}

	random_vec4(val);
	glLightModelfv(GL_LIGHT_MODEL_AMBIENT, val);
	pass = check_prg_param(val, "state.lightmodel.ambient") && pass;

	for (int s = 0; s < 2; ++s) {
		float scene_color[4];

		for (int c = 0; c < 4; ++c)
			scene_color[c] = val[c] = DRAND();
		glMaterialfv(GL_FRONT + s, GL_AMBIENT, val);
		for (int c = 0; c < 4; ++c)
			scene_color[c] *= val[c] = DRAND();
		glLightModelfv(GL_LIGHT_MODEL_AMBIENT, val);
		for (int c = 0; c < 4; ++c)
			scene_color[c] += val[c] = DRAND();
		glMaterialfv(GL_FRONT + s, GL_EMISSION, val);

		/* Page 63 (77 of the PDF) of the OpenGL 2.0 spec says:
		 *
		 *      "The value of A produced by lighting is the alpha
		 *      value associated with d_{cm}."
		 *
		 * I'm not sure if this applies to the scene color, but both
		 * Mesa and the NVIDIA driver do this.
		 */
		random_vec4(val);
		glMaterialfv(GL_FRONT + s, GL_DIFFUSE, val);
		scene_color[3] = val[3];

		pass = check_prg_param(scene_color,
				       "state.lightmodel.%s.scenecolor",
				       s ? "back" : "front") && pass;

		if (s == 0)
			pass = check_prg_param(
				       scene_color,
				       "state.lightmodel.scenecolor") && pass;
	}

	for (int s = 0; s < 2; ++s) {
		for (int l = 0; l < max_lights; ++l) {
			const GLenum pname[] = {GL_AMBIENT, GL_DIFFUSE,
						GL_SPECULAR};
			for (int p = 0; p < 3; ++p) {
				float light_product[4];
				for (int c = 0; c < 4; ++c)
					light_product[c] = val[c] = DRAND();
				glLightfv(GL_LIGHT0 + l, pname[p], val);
				for (int c = 0; c < 4; ++c)
					light_product[c] *= val[c] = DRAND();
				glMaterialfv(GL_FRONT + s, pname[p], val);
				/* XXX: I have no Idea where the spec says the
				 * alpha value of the light product is the
				 * material's alpha value, but both Mesa and
				 * the NVIDIA driver do this.
				 */
				light_product[3] = val[3];

				pass = check_prg_param(
					       light_product,
					       "state.lightprod[%d].%s.%s", l,
					       s ? "back" : "front",
					       enum2program(pname[p])) &&
				       pass;

				if (s == 0)
					pass = check_prg_param(
						       light_product,
						       "state.lightprod[%d]."
						       "%s",
						       l,
						       enum2program(
							       pname[p])) &&
					       pass;
			}
		}
	}

	/* Texture Coordinate Generation Property Bindings */
	int max_texture_coords;
	glGetIntegerv(GL_MAX_TEXTURE_COORDS, &max_texture_coords);
	for (int t = 0; t < max_texture_coords; ++t) {
		const GLenum coord[] = {GL_S, GL_T, GL_R, GL_Q};
		glActiveTexture(GL_TEXTURE0 + t);

		for (int co = 0; co < 4; ++co) {
			const GLenum plane[] = {GL_EYE_PLANE,
						GL_OBJECT_PLANE};
			const char *plane_name[] = {"eye", "object"};
			for (int pl = 0; pl < 2; ++pl) {
				random_vec4(val);
				glTexGenfv(coord[co], plane[pl], val);
				pass = check_prg_param(
					       val, "state.texgen[%d].%s.%s",
					       t, plane_name[pl],
					       enum2program(coord[co])) &&
				       pass;
				if (t == 0)
					pass = check_prg_param(
						       val,
						       "state.texgen.%s.%s",
						       plane_name[pl],
						       enum2program(
							       coord[co])) &&
					       pass;
			}
		}
	}

	/* Fog Property Bindings */
	random_vec4(val);
	glFogfv(GL_FOG_COLOR, val);
	pass = check_prg_param(val, "state.fog.color") && pass;

	random_vec4(val);
	glFogf(GL_FOG_DENSITY, val[0]);
	glFogf(GL_FOG_START, val[1]);
	glFogf(GL_FOG_END, val[2]);
	val[3] = 1 / (val[2] - val[1]);
	pass = check_prg_param(val, "state.fog.params") && pass;

	/* Clip Plane Property Bindings */
	int max_clip_planes;
	glGetIntegerv(GL_MAX_CLIP_PLANES, &max_clip_planes);
	for (int cp = 0; cp < max_clip_planes; ++cp) {
		double vald[4];
		for (int c = 0; c < 4; ++c)
			vald[c] = val[c] = DRAND();
		glClipPlane(GL_CLIP_PLANE0 + cp, vald);
		pass = check_prg_param(val, "state.clip[%d].plane", cp) &&
		       pass;
	}

	/* Point Property Bindings */
	random_vec4(val);
	glPointSize(val[0]);
	glPointParameterf(GL_POINT_SIZE_MIN, val[1]);
	glPointParameterf(GL_POINT_SIZE_MAX, val[2]);
	glPointParameterf(GL_POINT_FADE_THRESHOLD_SIZE, val[3]);
	pass = check_prg_param(val, "state.point.size") && pass;

	random_vec4(val);
	val[3] = 1;
	glPointParameterfv(GL_POINT_DISTANCE_ATTENUATION, val);
	pass = check_prg_param(&val[0], "state.point.attenuation") && pass;

	return pass ? PIGLIT_PASS : PIGLIT_FAIL;
}
Example #11
0
static void
random_vec4(float *v)
{
	for (int i = 0; i < 4; ++i)
		v[i] = DRAND();
}
Example #12
0
void testnd_in_place(int rank, int *n, fftw_direction dir,
		     fftwnd_plan validated_plan,
		     int alternate_api, int specific, int force_buffered)
{
     int istride;
     int N, dim, i;
     fftw_complex *in1, *in2, *out2;
     fftwnd_plan p;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     if (force_buffered)
	  flags |= FFTWND_FORCE_BUFFERED;

     N = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];

     in1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex));
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     if (!specific) {
	  if (alternate_api && (rank == 2 || rank == 3)) {
	       if (rank == 2)
		    p = fftw2d_create_plan(n[0], n[1], dir, flags);
	       else
		    p = fftw3d_create_plan(n[0], n[1], n[2], dir, flags);
	  } else		/* standard api */
	       p = fftwnd_create_plan(rank, n, dir, flags);
     } else {			/* specific plan creation */
	  if (alternate_api && (rank == 2 || rank == 3)) {
	       if (rank == 2)
		    p = fftw2d_create_plan_specific(n[0], n[1], dir, flags,
						    in1, 1,
					       (fftw_complex *) NULL, 1);
	       else
		    p = fftw3d_create_plan_specific(n[0], n[1], n[2], dir, flags,
						    in1, 1,
					       (fftw_complex *) NULL, 1);
	  } else		/* standard api */
	       p = fftwnd_create_plan_specific(rank, n, dir, flags,
					       in1, 1,
					       (fftw_complex *) NULL, 1);

     }

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* 
	   * generate random inputs */
	  for (i = 0; i < N; ++i) {
	       int j;
	       c_re(in2[i]) = DRAND();
	       c_im(in2[i]) = DRAND();
	       for (j = 0; j < istride; ++j) {
		    c_re(in1[i * istride + j]) = c_re(in2[i]);
		    c_im(in1[i * istride + j]) = c_im(in2[i]);
	       }
	  }

	  if (istride != 1 || istride != 1 || coinflip())
	       fftwnd(p, istride, in1, istride, 1, (fftw_complex *) NULL, 1, 1);
	  else
	       fftwnd_one(p, in1, NULL);

	  fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1);

	  for (i = 0; i < istride; ++i)
	       CHECK(compute_error_complex(in1 + i, istride, out2, 1, N) < TOLERANCE,
		     "testnd_in_place: wrong answer");
     }

     fftwnd_destroy_plan(p);

     fftw_free(out2);
     fftw_free(in2);
     fftw_free(in1);
}
Example #13
0
void test_in_place(int n, int istride, int howmany, fftw_direction dir,
		   fftw_plan validated_plan, int specific)
{
     fftw_complex *in1, *in2, *out2;
     fftw_plan plan;
     int i, j;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     in1 = (fftw_complex *) fftw_malloc(istride * n * sizeof(fftw_complex) * howmany);
     in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);
     out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);

     if (!specific)
	  plan = fftw_create_plan(n, dir, flags);
     else
	  plan = fftw_create_plan_specific(n, dir, flags,
					   in1, istride,
					   (fftw_complex *) NULL, 0);

     /* generate random inputs */
     for (i = 0; i < n * howmany; ++i) {
	  c_re(in1[i * istride]) = c_re(in2[i]) = DRAND();
	  c_im(in1[i * istride]) = c_im(in2[i]) = DRAND();
     }

     /* 
      * fill in other positions of the array, to make sure that
      * fftw doesn't overwrite them 
      */
     for (j = 1; j < istride; ++j)
	  for (i = 0; i < n * howmany; ++i) {
	       c_re(in1[i * istride + j]) = i * istride + j;
	       c_im(in1[i * istride + j]) = i * istride - j;
	  }
     CHECK(plan != NULL, "can't create plan");
     WHEN_VERBOSE(2, fftw_print_plan(plan));

     /* fft-ize */
     if (howmany != 1 || istride != 1 || coinflip())
	  fftw(plan, howmany, in1, istride, n * istride,
	       (fftw_complex *) NULL, 0, 0);
     else
	  fftw_one(plan, in1, NULL);

     fftw_destroy_plan(plan);

     /* check for overwriting */
     for (j = 1; j < istride; ++j)
	  for (i = 0; i < n * howmany; ++i)
	       CHECK(c_re(in1[i * istride + j]) == i * istride + j &&
		     c_im(in1[i * istride + j]) == i * istride - j,
		     "input has been overwritten");

     for (i = 0; i < howmany; ++i) {
	  fftw(validated_plan, 1, in2 + n * i, 1, n, out2 + n * i, 1, n);
     }

     CHECK(compute_error_complex(in1, istride, out2, 1, n * howmany) < TOLERANCE,
	   "test_in_place: wrong answer");
     WHEN_VERBOSE(2, printf("OK\n"));

     fftw_free(in1);
     fftw_free(in2);
     fftw_free(out2);
}
Example #14
0
/* Same as test_ergun, but for multi-dimensional transforms: */
void testnd_ergun(int rank, int *n, fftw_direction dir, fftwnd_plan plan)
{
     fftw_complex *inA, *inB, *inC, *outA, *outB, *outC;
     fftw_complex *tmp;
     fftw_complex impulse;

     int N, n_before, n_after, dim;
     int i, which_impulse;
     int rounds = 20;
     FFTW_TRIG_REAL twopin;

     N = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];

     inA = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     inB = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     inC = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     outA = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     outB = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     outC = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     tmp = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     WHEN_VERBOSE(2,
		  printf("Validating plan, N = %d, dir = %s\n", N,
			 dir == FFTW_FORWARD ? "FORWARD" : "BACKWARD"));

     /* test 1: check linearity */
     for (i = 0; i < rounds; ++i) {
	  fftw_complex alpha, beta;
	  c_re(alpha) = DRAND();
	  c_im(alpha) = DRAND();
	  c_re(beta) = DRAND();
	  c_im(beta) = DRAND();
	  fill_random(inA, N);
	  fill_random(inB, N);
	  fftwnd(plan, 1, inA, 1, N, outA, 1, N);
	  fftwnd(plan, 1, inB, 1, N, outB, 1, N);
	  array_scale(outA, alpha, N);
	  array_scale(outB, beta, N);
	  array_add(tmp, outA, outB, N);
	  array_scale(inA, alpha, N);
	  array_scale(inB, beta, N);
	  array_add(inC, inA, inB, N);
	  fftwnd(plan, 1, inC, 1, N, outC, 1, N);
	  array_compare(outC, tmp, N);
     }

     /*
      * test 2: check that the unit impulse is transformed properly -- we
      * need to test both the real and imaginary impulses 
      */

     for (which_impulse = 0; which_impulse < 2; ++which_impulse) {
	  if (which_impulse == 0) {	/* real impulse */
	       c_re(impulse) = 1.0;
	       c_im(impulse) = 0.0;
	  } else {		/* imaginary impulse */
	       c_re(impulse) = 0.0;
	       c_im(impulse) = 1.0;
	  }

	  for (i = 0; i < N; ++i) {
	       /* impulse */
	       c_re(inA[i]) = 0.0;
	       c_im(inA[i]) = 0.0;

	       /* transform of the impulse */
	       outA[i] = impulse;
	  }
	  inA[0] = impulse;

	  for (i = 0; i < rounds; ++i) {
	       fill_random(inB, N);
	       array_sub(inC, inA, inB, N);
	       fftwnd(plan, 1, inB, 1, N, outB, 1, N);
	       fftwnd(plan, 1, inC, 1, N, outC, 1, N);
	       array_add(tmp, outB, outC, N);
	       array_compare(tmp, outA, N);
	  }
     }

     /* test 3: check the time-shift property */
     /* the paper performs more tests, but this code should be fine too */
     /* -- we have to check shifts in each dimension */

     n_before = 1;
     n_after = N;
     for (dim = 0; dim < rank; ++dim) {
	  int n_cur = n[dim];

	  n_after /= n_cur;
	  twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n_cur;

	  for (i = 0; i < rounds; ++i) {
	       int j, jb, ja;

	       fill_random(inA, N);
	       array_rol(inB, inA, n_cur, n_before, n_after);
	       fftwnd(plan, 1, inA, 1, N, outA, 1, N);
	       fftwnd(plan, 1, inB, 1, N, outB, 1, N);

	       for (jb = 0; jb < n_before; ++jb)
		    for (j = 0; j < n_cur; ++j) {
			 FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin);
			 FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin);

			 for (ja = 0; ja < n_after; ++ja) {
			      c_re(tmp[(jb * n_cur + j) * n_after + ja]) =
				  c_re(outB[(jb * n_cur + j) * n_after + ja]) * c
				  - c_im(outB[(jb * n_cur + j) * n_after + ja]) * s;
			      c_im(tmp[(jb * n_cur + j) * n_after + ja]) =
				  c_re(outB[(jb * n_cur + j) * n_after + ja]) * s
				  + c_im(outB[(jb * n_cur + j) * n_after + ja]) * c;
			 }
		    }

	       array_compare(tmp, outA, N);
	  }

	  n_before *= n_cur;
     }

     WHEN_VERBOSE(2, printf("Validation done\n"));

     fftw_free(tmp);
     fftw_free(outC);
     fftw_free(outB);
     fftw_free(outA);
     fftw_free(inC);
     fftw_free(inB);
     fftw_free(inA);
}
Example #15
0
/*
 * Implementation of the FFT tester described in
 *
 * Funda Ergün. Testing multivariate linear functions: Overcoming the
 * generator bottleneck. In Proceedings of the Twenty-Seventh Annual
 * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas,
 * Nevada, 29 May--1 June 1995.
 */
void test_ergun(int n, fftw_direction dir, fftw_plan plan)
{
     fftw_complex *inA, *inB, *inC, *outA, *outB, *outC;
     fftw_complex *tmp;
     fftw_complex impulse;
     int i;
     int rounds = 20;
     FFTW_TRIG_REAL twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n;

     inA = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));
     inB = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));
     inC = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));
     outA = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));
     outB = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));
     outC = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));
     tmp = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));

     WHEN_VERBOSE(2,
		  printf("Validating plan, n = %d, dir = %s\n", n,
			 dir == FFTW_FORWARD ? "FORWARD" : "BACKWARD"));

     /* test 1: check linearity */
     for (i = 0; i < rounds; ++i) {
	  fftw_complex alpha, beta;
	  c_re(alpha) = DRAND();
	  c_im(alpha) = DRAND();
	  c_re(beta) = DRAND();
	  c_im(beta) = DRAND();
	  fill_random(inA, n);
	  fill_random(inB, n);
	  fftw_out_of_place(plan, n, inA, outA);
	  fftw_out_of_place(plan, n, inB, outB);
	  array_scale(outA, alpha, n);
	  array_scale(outB, beta, n);
	  array_add(tmp, outA, outB, n);
	  array_scale(inA, alpha, n);
	  array_scale(inB, beta, n);
	  array_add(inC, inA, inB, n);
	  fftw_out_of_place(plan, n, inC, outC);
	  array_compare(outC, tmp, n);
     }

     /* test 2: check that the unit impulse is transformed properly */

     c_re(impulse) = 1.0;
     c_im(impulse) = 0.0;
     
     for (i = 0; i < n; ++i) {
	  /* impulse */
	  c_re(inA[i]) = 0.0;
	  c_im(inA[i]) = 0.0;
	  
	  /* transform of the impulse */
	  outA[i] = impulse;
     }
     inA[0] = impulse;
     
     for (i = 0; i < rounds; ++i) {
	  fill_random(inB, n);
	  array_sub(inC, inA, inB, n);
	  fftw_out_of_place(plan, n, inB, outB);
	  fftw_out_of_place(plan, n, inC, outC);
	  array_add(tmp, outB, outC, n);
	  array_compare(tmp, outA, n);
     }

     /* test 3: check the time-shift property */
     /* the paper performs more tests, but this code should be fine too */
     for (i = 0; i < rounds; ++i) {
	  int j;

	  fill_random(inA, n);
	  array_rol(inB, inA, n, 1, 1);
	  fftw_out_of_place(plan, n, inA, outA);
	  fftw_out_of_place(plan, n, inB, outB);
	  for (j = 0; j < n; ++j) {
	       FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin);
	       FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin);
	       c_re(tmp[j]) = c_re(outB[j]) * c - c_im(outB[j]) * s;
	       c_im(tmp[j]) = c_re(outB[j]) * s + c_im(outB[j]) * c;
	  }

	  array_compare(tmp, outA, n);
     }

     WHEN_VERBOSE(2, printf("Validation done\n"));

     fftw_free(tmp);
     fftw_free(outC);
     fftw_free(outB);
     fftw_free(outA);
     fftw_free(inC);
     fftw_free(inB);
     fftw_free(inA);
}
void testnd_in_place(int rank, int *n, fftwnd_plan validated_plan,
		     int alternate_api, int specific)
{
     int istride, ostride, howmany;
     int N, dim, i, j, k;
     int nc, nhc, nr;
     fftw_real *in1, *out3;
     fftw_complex *in2, *out1, *out2;
     fftwnd_plan p, ip;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     N = nc = nr = nhc = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];
     if (rank > 0) {
	  nr = n[rank - 1];
	  nc = N / nr;
	  nhc = nr / 2 + 1;
     }
     in1 = (fftw_real *) fftw_malloc(2 * nhc * nc * MAX_STRIDE * sizeof(fftw_real));
     out3 = in1;
     out1 = (fftw_complex *) in1;
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     if (alternate_api && specific && (rank == 2 || rank == 3)) {
	  if (rank == 2) {
	       p = rfftw2d_create_plan_specific(n[0], n[1],
					     FFTW_REAL_TO_COMPLEX, flags,
						in1, MAX_STRIDE, 0, 0);
	       ip = rfftw2d_create_plan_specific(n[0], n[1],
					     FFTW_COMPLEX_TO_REAL, flags,
						 in1, MAX_STRIDE, 0, 0);
	  } else {
	       p = rfftw3d_create_plan_specific(n[0], n[1], n[2],
					     FFTW_REAL_TO_COMPLEX, flags,
						in1, MAX_STRIDE, 0, 0);
	       ip = rfftw3d_create_plan_specific(n[0], n[1], n[2],
					     FFTW_COMPLEX_TO_REAL, flags,
						 in1, MAX_STRIDE, 0, 0);
	  }
     } else if (specific) {
	  p = rfftwnd_create_plan_specific(rank, n, FFTW_REAL_TO_COMPLEX,
					   flags,
				       in1, MAX_STRIDE, in1, MAX_STRIDE);
	  ip = rfftwnd_create_plan_specific(rank, n, FFTW_COMPLEX_TO_REAL,
					    flags,
				       in1, MAX_STRIDE, in1, MAX_STRIDE);
     } else if (alternate_api && (rank == 2 || rank == 3)) {
	  if (rank == 2) {
	       p = rfftw2d_create_plan(n[0], n[1], FFTW_REAL_TO_COMPLEX,
				       flags);
	       ip = rfftw2d_create_plan(n[0], n[1], FFTW_COMPLEX_TO_REAL,
					flags);
	  } else {
	       p = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_REAL_TO_COMPLEX,
				       flags);
	       ip = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_COMPLEX_TO_REAL,
					flags);
	  }
     } else {
	  p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags);
	  ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags);
     }

     CHECK(p != NULL && ip != NULL, "can't create plan");

     for (i = 0; i < nc * nhc * 2 * MAX_STRIDE; ++i)
	  out3[i] = 0;

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* generate random inputs */
	  for (i = 0; i < nc; ++i)
	       for (j = 0; j < nr; ++j) {
		    c_re(in2[i * nr + j]) = DRAND();
		    c_im(in2[i * nr + j]) = 0.0;
		    for (k = 0; k < istride; ++k)
			 in1[(i * nhc * 2 + j) * istride + k]
			     = c_re(in2[i * nr + j]);
	       }

	  fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1);

	  howmany = ostride = istride;

	  WHEN_VERBOSE(2, printf("\n    testing in-place stride %d...",
				 istride));

	  if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
	       rfftwnd_real_to_complex(p, howmany, in1, istride, 1,
				       out1, ostride, 1);
	  else
	       rfftwnd_one_real_to_complex(p, in1, NULL);

	  for (i = 0; i < nc; ++i)
	       for (k = 0; k < howmany; ++k)
		    CHECK(compute_error_complex(out1 + i * nhc * ostride + k,
						ostride,
						out2 + i * nr, 1,
						nhc) < TOLERANCE,
			  "in-place (r2c): wrong answer");

	  if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
	       rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1,
				       out3, istride, 1);
	  else
	       rfftwnd_one_complex_to_real(ip, out1, NULL);

	  for (i = 0; i < nc * nhc * 2 * istride; ++i)
	       out3[i] *= 1.0 / N;

	  for (i = 0; i < nc; ++i)
	       for (k = 0; k < howmany; ++k)
		    CHECK(compute_error(out3 + i * nhc * 2 * istride + k,
					istride,
					(fftw_real *) (in2 + i * nr), 2,
					nr) < TOLERANCE,
			  "in-place (c2r): wrong answer (check 2)");
     }

     rfftwnd_destroy_plan(p);
     rfftwnd_destroy_plan(ip);

     fftw_free(out2);
     fftw_free(in2);
     fftw_free(in1);
}
Example #17
0
int generate(int count)
{
    FILE *ofp;
    int length;     /* length of seqence */
    double probs[20];   /* probability mass function for AAs */
    double cum_probs[20]; /* probability distribution
                 * (cumulative sum of probs)
             */
    int s;  /* counter for sequences */
    int p;  /* counter for position in sequence */
    int a;  /* counter for amino acid number */
    int alo, ahi, amid; /* variables for binary search to get amino acid */
    
    double sum; /* temporary for computing cum_probs */
    
    double x;   /* uniform random number */
    gen_dirch_mix_param comp_gen; /* composition generator */
    
    ofp = fopen("GeneratedSequences.txt", "w");
    if (ofp == NULL) {
        fprintf(stderr, "Can't open output file\n");
        return 1;
    }
    
    srandom(getpid());
    gen_dirch_mix_initialize(&comp_gen, 20, 6,  mix_coeff, 
        (const double **)comps);
    
    for (s=0; s<count; s++)
    {
        length = (int) exp(mean_log_length +stddev_log_length*gen_norm());
    gen_dirch_mix(&comp_gen, probs);
    
    sum = 0.;
    /* convert probs to cumulative probabilities */
    for (a=0; a<20; a++)
    {   sum += probs[a];
        cum_probs[a] =sum;
    }
    
    for (p=0; p< length; p++)
    {   x = DRAND();
       /* do binary search to determine amino acid */
        alo = -1;    ahi=19;
        while (alo<ahi-1)
        {   /* invariant: 
         *    cum_probs[alo] < x <=  cum_probs[ahi]
         */
        assert (x <= cum_probs[ahi]);
        amid = (alo+ahi+1)/2;
        if (x > cum_probs[amid]) alo=amid;
        else ahi=amid;
        }
        if  (  fputc(AA[ahi], ofp) == EOF ) return 1;
    }
    if  (  fputc('\n', ofp) == EOF ) return 1;
    
    }
    
    if  ( fclose(ofp) != EOF ) return 0;
    else return 1;
}
Example #18
0
int  InitVelocities(double h)
{
   int i, nmoles1, nmoles2, iseed;
   double ts, sp, sc, r, s;
   double u1, u2, v1, v2, ujunk,tscale;
   double DRAND(double);

   iseed = 4711;
   ujunk = DRAND(iseed);
   (void)ujunk;       //  explicitly turn off unused variable warning
   iseed = 0;
   tscale = (16.0)/(1.0*numMoles - 1.0);

   for ( i =0; i< n3; i=i+2) {
     do {
       u1 = DRAND(iseed);
       u2 = DRAND(iseed);
       v1 = 2.0 * u1   - 1.0;
       v2 = 2.0 * u2   - 1.0;
       s  = v1*v1  + v2*v2 ;
     } while( s >= 1.0 );

     r = SQRT( -2.0*log(s)/s );

     vh[i]    = v1 * r;
     vh[i+1]  = v2 * r;
   }

   // There are three parts - repeat for each part
   nmoles1 = n3/3 ;
   nmoles2 = nmoles1 * 2;

   //  Find the average speed  for the 1st part
   sp   = 0.0 ;
   for ( i=0; i<nmoles1; i++) {
     sp = sp + vh[i];
   }
   sp   = sp/nmoles1;

   //  Subtract average from all velocities of 1st part
   for ( i=0; i<nmoles1; i++) {
     vh[i] = vh[i] - sp;
   }

   //  Find the average speed for 2nd part
   sp   = 0.0 ;
   for ( i=nmoles1; i<nmoles2; i++) {
     sp = sp + vh[i];
   }
   sp   = sp/(nmoles2-nmoles1);

   //  Subtract average from all velocities of 2nd part
   for ( i=nmoles1; i<nmoles2; i++) {
     vh[i] = vh[i] - sp;
   }

   //  Find the average speed for 2nd part
   sp   = 0.0 ;
   for ( i=nmoles2; i<n3; i++) {
     sp = sp + vh[i];
   }
   sp   = sp/(n3-nmoles2);

   //  Subtract average from all velocities of 2nd part
   for ( i=nmoles2; i<n3; i++) {
     vh[i] = vh[i] - sp;
   }

   // Determine total kinetic energy
   ekin = 0.0 ;

   for ( i=0 ; i< n3; i++ ) {
     ekin  = ekin  + vh[i]*vh[i] ;
   }

   ts = tscale * ekin ;
   sc = h * SQRT(TEMPERATURE/ts);
   for ( i=0; i< n3; i++) {
     vh[i] = vh[i] * sc ;
   }

   return 0;
}
/*
 * This is a real (as opposed to complex) variation of the FFT tester
 * described in
 *
 * Funda Ergün. Testing multivariate linear functions: Overcoming the
 * generator bottleneck. In Proceedings of the Twenty-Seventh Annual
 * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas,
 * Nevada, 29 May--1 June 1995.
 */
void test_ergun(int n, fftw_direction dir, fftw_plan plan)
{
     fftw_real *inA, *inB, *inC, *outA, *outB, *outC;
     fftw_real *inA1, *inB1;
     fftw_real *tmp;
     int i;
     int rounds = 20;
     FFTW_TRIG_REAL twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n;

     inA = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     inB = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     inA1 = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     inB1 = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     inC = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     outA = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     outB = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     outC = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     tmp = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));

     WHEN_VERBOSE(2,
		  printf("Validating plan, n = %d, dir = %s\n", n,
			 dir == FFTW_REAL_TO_COMPLEX ? 
			 "REAL_TO_COMPLEX" : "COMPLEX_TO_REAL"));

     /* test 1: check linearity */
     for (i = 0; i < rounds; ++i) {
	  fftw_real alpha, beta;
	  alpha = DRAND();
	  beta = DRAND();
	  rfill_random(inA, n);
	  rfill_random(inB, n);
	  rarray_scale(inA1, inA, alpha, n);
	  rarray_scale(inB1, inB, beta, n);
	  rarray_add(inC, inA1, inB1, n);
	  rfftw_out_of_place(plan, n, inA, outA);
	  rfftw_out_of_place(plan, n, inB, outB);
	  rarray_scale(outA, outA, alpha, n);
	  rarray_scale(outB, outB, beta, n);
	  rarray_add(tmp, outA, outB, n);
	  rfftw_out_of_place(plan, n, inC, outC);
	  rarray_compare(outC, tmp, n);
     }

     /* test 2: check that the unit impulse is transformed properly */
     for (i = 0; i < n; ++i) {
	  /* impulse */
	  inA[i] = 0.0;
	  
	  /* transform of the impulse */
	  if (2 * i <= n)
	       outA[i] = 1.0;
	  else
	       outA[i] = 0.0;
     }
     inA[0] = 1.0;

     if (dir == FFTW_REAL_TO_COMPLEX) {
	  for (i = 0; i < rounds; ++i) {
	       rfill_random(inB, n);
	       rarray_sub(inC, inA, inB, n);
	       rfftw_out_of_place(plan, n, inB, outB);
	       rfftw_out_of_place(plan, n, inC, outC);
	       rarray_add(tmp, outB, outC, n);
	       rarray_compare(tmp, outA, n);
	  }
     } else {
	  for (i = 0; i < rounds; ++i) {
	       rfill_random(outB, n);
	       rarray_sub(outC, outA, outB, n);
	       rfftw_out_of_place(plan, n, outB, inB);
	       rfftw_out_of_place(plan, n, outC, inC);
	       rarray_add(tmp, inB, inC, n);
	       rarray_scale(tmp, tmp, 1.0 / ((double) n), n);
	       rarray_compare(tmp, inA, n);
	  }
     }

     /* test 3: check the time-shift property */
     /* the paper performs more tests, but this code should be fine too */
     if (dir == FFTW_REAL_TO_COMPLEX) {
	  for (i = 0; i < rounds; ++i) {
	       int j;

	       rfill_random(inA, n);
	       rarray_rol(inB, inA, n, 1, 1);
	       rfftw_out_of_place(plan, n, inA, outA);
	       rfftw_out_of_place(plan, n, inB, outB);
	       tmp[0] = outB[0];
	       for (j = 1; 2 * j < n; ++j) {
		    FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin);
		    FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin);
		    tmp[j] = outB[j] * c - outB[n - j] * s;
		    tmp[n - j] = outB[j] * s + outB[n - j] * c;
	       }
	       if (2 * j == n)
		    tmp[j] = -outB[j];

	       rarray_compare(tmp, outA, n);
	  }
     } else {
	  for (i = 0; i < rounds; ++i) {
	       int j;

	       rfill_random(inA, n);
	       inB[0] = inA[0];
	       for (j = 1; 2 * j < n; ++j) {
		    FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin);
		    FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin);
		    inB[j] = inA[j] * c - inA[n - j] * s;
		    inB[n - j] = inA[j] * s + inA[n - j] * c;
	       }
	       if (2 * j == n)
		    inB[j] = -inA[j];

	       rfftw_out_of_place(plan, n, inA, outA);
	       rfftw_out_of_place(plan, n, inB, outB);	       
	       rarray_rol(tmp, outA, n, 1, 1);
	       rarray_compare(tmp, outB, n);
	  }
     }

     WHEN_VERBOSE(2, printf("Validation done\n"));

     fftw_free(tmp);
     fftw_free(outC);
     fftw_free(outB);
     fftw_free(outA);
     fftw_free(inC);
     fftw_free(inB1);
     fftw_free(inA1);
     fftw_free(inB);
     fftw_free(inA);
}
Example #20
0
void test_in_place(int n, int istride, int howmany, fftw_direction dir,
		   fftw_plan validated_plan, int specific)
{
     int local_n, local_start, local_n_after_transform,
	  local_start_after_transform, total_local_size;
     fftw_complex *in1, *work = NULL, *in2, *out2;
     fftw_mpi_plan plan;
     int i;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (specific) {
	  WHEN_VERBOSE(2, my_printf("N/A\n"));
	  return;
     }

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir, flags);

     fftw_mpi_local_sizes(plan, &local_n, &local_start,
			  &local_n_after_transform,
			  &local_start_after_transform,
			  &total_local_size);

     in1 = (fftw_complex *) fftw_malloc(total_local_size 
					* sizeof(fftw_complex) * howmany);
     if (coinflip()) {
	  WHEN_VERBOSE(2, my_printf("w/work..."));
	  work = (fftw_complex *) fftw_malloc(total_local_size
                                        * sizeof(fftw_complex) * howmany);
     }
     in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);
     out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);

     /* generate random inputs */
     for (i = 0; i < n * howmany; ++i) {
	  c_re(in2[i]) = DRAND();
	  c_im(in2[i]) = DRAND();
     }
     for (i = 0; i < local_n * howmany; ++i) {
	  c_re(in1[i]) = c_re(in2[i + local_start*howmany]);
	  c_im(in1[i]) = c_im(in2[i + local_start*howmany]);
     }	  

     /* fft-ize */
     fftw_mpi(plan, howmany, in1, work);

     fftw_mpi_destroy_plan(plan);

     fftw(validated_plan, howmany, in2, howmany, 1, out2, howmany, 1);

     CHECK(compute_error_complex(in1, 1,
				 out2 + local_start_after_transform*howmany, 1,
				 howmany*local_n_after_transform) < TOLERANCE,
	   "test_in_place: wrong answer");

     WHEN_VERBOSE(2, my_printf("OK\n"));

     fftw_free(in1);
     fftw_free(work);
     fftw_free(in2);
     fftw_free(out2);
}
Example #21
0
double gen_beta(const gen_beta_param *gen)
{
    
    /* The following temporaries are recomputed on each iteration,
     * or restored from gen->param array.
     */
    double c,r,s,t,u1,u2,v,w,z,lambda;
    double logv, logw, log_sum;
    
    double a = gen->a;
    double b = gen->b;
    double min_ab = gen->min_ab;
    double max_ab = gen->max_ab;
    double sum_ab = gen->sum_ab;
    
    
    
    if (max_ab<0.5)
    {   /* Use Joehnk's algorithm. 
     * Use logv and logw, rather than v and w, to avoid
	 * floating-point underflow with very small a or b values.
	 */
        do 
        {   u1 = DRAND();
            u2 = DRAND();
            logv = log(u1)/a;
            logw = log(u2)/b;
            log_sum = logv>logw? 
            logv + log(1+ exp(logw-logv))
            :  logw + log(1+ exp(logv-logw));
        } while (log_sum>0.0);
        assert(logv<=log_sum);
        return exp(logv - log_sum);
    }
    
    if (min_ab > 1.0)
    {    /* use Algorithm BB */
    	lambda = gen->param[0];
        c = gen->param[1];
        do 
        {
            u1 = DRAND();
            u2 = DRAND();
            v = lambda*log(u1/(1.0-u1));
            w = aexp(min_ab,v);
            z = u1*u1*u2;
            r = c*v-1.38629436112;
            s = min_ab+r-w;
            if(s+2.609438 >= 5.0*z) break;
            t = log(z);
        } while ( /* s<=t && */
                 r+sum_ab*log(sum_ab/(max_ab+w)) < t);
    	return ret(a,min_ab, max_ab, w); 
    }
    
    if (max_ab>= 1.0)
    {   /* use Atkinson's switching method, as
     * described in Dagpunar's book
	 * p=min_ab, q=max_ab
	 * t stored as gen->param[0], r as gen->param[1]
	 */
        t = gen->param[0];
        r = gen->param[1]; 
        for(;;) 
        {   u1 = DRAND();
            u2 = DRAND(); 
            if (u1 < r)
            {   w = t*pow(u1/r, 1/min_ab);
                if (log(u2) < (max_ab -1)*log(1-w)) break;
            }
            else
            {   w = 1- (1-t)*pow((1-u1)/(1-r), 1/max_ab);
                if (log(u2) < (min_ab -1) * log(w/t)) break;
            }
        } 
        return (a==min_ab)? w : 1-w;
    }
    else
    {	/* use Atkinson's Algorithm
	 */
        t = gen->param[0];
        r = gen->param[1];
        for(;;)
        {
            u1 = DRAND();
            u2 = DRAND();
            if (u1 < r)
            {   w = t*pow(u1/r, 1/min_ab);
                if (log(u2) < (max_ab -1)*log((1-w)/(1-t))) break;
            }
            else
            {   w = 1- (1-t)*pow((1-u1)/(1-r), 1/max_ab);
                if (log(u2) < (min_ab -1) * log(w/t)) break;
            }
        } 
        return (a==min_ab)? w : 1-w;
    }
}
Example #22
0
void testnd_in_place(int rank, int *n, fftw_direction dir,
		     fftwnd_plan validated_plan,
		     int alternate_api, int specific, int force_buffered)
{
     int local_nx, local_x_start, local_ny_after_transpose,
          local_y_start_after_transpose, total_local_size;
     int istride;
     int N, dim, i;
     fftw_complex *in1, *work = 0, *in2;
     fftwnd_mpi_plan p = 0;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (specific || rank < 2)
	  return;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     if (force_buffered)
	  flags |= FFTWND_FORCE_BUFFERED;

     N = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];

     if (alternate_api && (rank == 2 || rank == 3)) {
	  if (rank == 2)
	       p = fftw2d_mpi_create_plan(MPI_COMM_WORLD,
					  n[0], n[1], dir, flags);
	  else
	       p = fftw3d_mpi_create_plan(MPI_COMM_WORLD,
					  n[0], n[1], n[2], dir, flags);
     }
     else		/* standard api */
	  p = fftwnd_mpi_create_plan(MPI_COMM_WORLD, rank, n, dir, flags);

     fftwnd_mpi_local_sizes(p, &local_nx, &local_x_start,
                            &local_ny_after_transpose,
                            &local_y_start_after_transpose,
                            &total_local_size);

     in1 = (fftw_complex *) fftw_malloc(total_local_size * MAX_STRIDE
					* sizeof(fftw_complex));
     if (coinflip()) {
	  WHEN_VERBOSE(1, my_printf("w/work..."));
	  work = (fftw_complex *) fftw_malloc(total_local_size * MAX_STRIDE
					      * sizeof(fftw_complex));
     }
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* generate random inputs */
	  for (i = 0; i < N; ++i) {
	       c_re(in2[i]) = DRAND();
	       c_im(in2[i]) = DRAND();
	  }

	  for (i = 0; i < local_nx * (N/n[0]); ++i) {
	       int j;
	       for (j = 0; j < istride; ++j) {
		    c_re(in1[i * istride + j]) = c_re((in2 + local_x_start 
						       * (N/n[0])) [i]);
		    c_im(in1[i * istride + j]) = c_im((in2 + local_x_start
                                                       * (N/n[0])) [i]);
	       }
	  }

	  fftwnd_mpi(p, istride, in1, work, FFTW_NORMAL_ORDER);

	  fftwnd(validated_plan, 1, in2, 1, 1, NULL, 0, 0);

	  for (i = 0; i < istride; ++i)
	       CHECK(compute_error_complex(in1 + i, istride,
					   in2 + local_x_start * (N/n[0]),
					   1, local_nx * (N/n[0])) < TOLERANCE,
		     "testnd_in_place: wrong answer");
     }

     fftwnd_mpi_destroy_plan(p);

     fftw_free(in2);
     fftw_free(work);
     fftw_free(in1);
}
void testnd_out_of_place(int rank, int *n, fftwnd_plan validated_plan)
{
     int istride, ostride;
     int N, dim, i, j, k;
     int nc, nhc, nr;
     fftw_real *in1, *out3;
     fftw_complex *in2, *out1, *out2;
     fftwnd_plan p, ip;
     int flags = measure_flag | wisdom_flag;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     N = nc = nr = nhc = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];
     if (rank > 0) {
	  nr = n[rank - 1];
	  nc = N / nr;
	  nhc = nr / 2 + 1;
     }
     in1 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real));
     out3 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real));
     out1 = (fftw_complex *) fftw_malloc(nhc * nc * MAX_STRIDE
					 * sizeof(fftw_complex));
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags);
     ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags);
     CHECK(p != NULL && ip != NULL, "can't create plan");

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* generate random inputs */
	  for (i = 0; i < nc; ++i)
	       for (j = 0; j < nr; ++j) {
		    c_re(in2[i * nr + j]) = DRAND();
		    c_im(in2[i * nr + j]) = 0.0;
		    for (k = 0; k < istride; ++k)
			 in1[(i * nr + j) * istride + k]
			     = c_re(in2[i * nr + j]);
	       }
	  for (i = 0; i < N * istride; ++i)
	       out3[i] = 0.0;

	  fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1);

	  for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) {
	       int howmany = (istride < ostride) ? istride : ostride;

	       WHEN_VERBOSE(2, printf("\n    testing stride %d/%d...",
				      istride, ostride));

	       if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
		    rfftwnd_real_to_complex(p, howmany, in1, istride, 1,
					    out1, ostride, 1);
	       else
		    rfftwnd_one_real_to_complex(p, in1, out1);

	       for (i = 0; i < nc; ++i)
		    for (k = 0; k < howmany; ++k)
			 CHECK(compute_error_complex(out1 + i * nhc * ostride + k,
						     ostride,
						     out2 + i * nr, 1,
						     nhc) < TOLERANCE,
			       "out-of-place (r2c): wrong answer");

	       if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
		    rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1,
					    out3, istride, 1);
	       else
		    rfftwnd_one_complex_to_real(ip, out1, out3);

	       for (i = 0; i < N * istride; ++i)
		    out3[i] *= 1.0 / N;

	       if (istride == howmany)
		    CHECK(compute_error(out3, 1, in1, 1, N * istride)
			< TOLERANCE, "out-of-place (c2r): wrong answer");
	       for (i = 0; i < nc; ++i)
		    for (k = 0; k < howmany; ++k)
			 CHECK(compute_error(out3 + i * nr * istride + k,
					     istride,
					 (fftw_real *) (in2 + i * nr), 2,
					     nr) < TOLERANCE,
			   "out-of-place (c2r): wrong answer (check 2)");
	  }
     }

     rfftwnd_destroy_plan(p);
     rfftwnd_destroy_plan(ip);

     fftw_free(out3);
     fftw_free(out2);
     fftw_free(in2);
     fftw_free(out1);
     fftw_free(in1);
}
Example #24
0
int main (int argc, char *argv[])
{
  int max_points_per_rank = 100;
  int num_time_steps = 100;
  REAL time_step = 0.001;
  int n, i, rank, size, *ranks;
  REAL *point[3], *velo[3];
  struct timing t;
  double dt[2], gt[2];

  if (argc == 1)
  {
    printf ("SYNOPSIS: test_dynlb max_points_per_rank [num_time_steps = 100] [time_step = 0.001]\n");
    return 0;
  }

  if (argc >= 2) max_points_per_rank = atoi(argv[1]);

  if (argc >= 3) num_time_steps = atoi(argv[2]);

  if (argc >= 4) time_step = atof(argv[3]);

  MPI_Init (&argc, &argv);

  MPI_Comm_rank (MPI_COMM_WORLD, &rank);

  MPI_Comm_size (MPI_COMM_WORLD, &size);

  srand(time(NULL) + rank);

  n = rand() % max_points_per_rank + 1;

  if (rank == 0) printf ("Generating random points in unit cube...\n");

  timerstart (&t);

  printf ("Generating %d points on rank %d.\n", n, rank);

  ERRMEM (point[0] = malloc (n * sizeof (REAL)));
  ERRMEM (point[1] = malloc (n * sizeof (REAL)));
  ERRMEM (point[2] = malloc (n * sizeof (REAL)));
  ERRMEM (velo[0] = malloc (n * sizeof (REAL)));
  ERRMEM (velo[1] = malloc (n * sizeof (REAL)));
  ERRMEM (velo[2] = malloc (n * sizeof (REAL)));
  ERRMEM (ranks = malloc (n * sizeof (int)));

  for (i = 0; i < n; i ++)
  {
    point[0][i] = DRAND();
    point[1][i] = DRAND();
    point[2][i] = DRAND();
    velo[0][i] = DRAND()-0.5;
    velo[1][i] = DRAND()-0.5;
    velo[2][i] = DRAND()-0.5;
  }

  dt[0] = ptimerend (&t);

  if (rank == 0) printf ("Generating points took %g sec.\n", dt[0]);

  if (rank == 0) printf ("Timing %d simple morton based balancing steps...\n", num_time_steps);

  for (i = 0, dt[0] = 0.0, dt[1] = 0.0; i < num_time_steps; i ++)
  {
    timerstart (&t);

    dynlb_morton_balance (n, point, ranks);

    dt[0] += timerend (&t);

    if (rank == 0) printf ("."), fflush (stdout);

    timerstart (&t);

    unit_cube_step (0, n, point, velo, time_step);

    dt[1] += timerend (&t);
  }

  MPI_Allreduce (dt, gt, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

  gt[0] /= (double)size * (double)num_time_steps;
  gt[1] /= (double)size * (double)num_time_steps;

  if (rank == 0) printf ("\nMORTON: avg. integration: %g sec. per step, avg. balancing: %g sec. per step; ratio: %g\n", gt[0]+gt[1], gt[1], gt[1]/(gt[0]+gt[1]));

  if (rank == 0) printf ("Creating partitioning tree based balancer ...\n");

  timerstart (&t);

  struct dynlb *lb = dynlb_create (0, n, point, 0, 0.5, DYNLB_RCB_TREE);

  dt[0] += ptimerend (&t);

  if (rank == 0) printf ("Took %g sec.\nInitial imbalance %g\n", dt[0], lb->imbalance);

  if (rank == 0) printf ("Timing %d partitioning tree based balancing steps...\n", num_time_steps);

  for (i = 0, dt[0] = 0.0, dt[1] = 0.0; i < num_time_steps; i ++)
  {
    timerstart (&t);

    dynlb_update (lb, n, point);

    dt[0] += timerend (&t);

    if (rank == 0) printf ("Step %d imbalance %g\n", i, lb->imbalance);

    timerstart (&t);

    unit_cube_step (0, n, point, velo, time_step);

    dt[1] += timerend (&t);
  }

  MPI_Allreduce (dt, gt, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

  gt[0] /= (double)size * (double)num_time_steps;
  gt[1] /= (double)size * (double)num_time_steps;

  if (rank == 0) printf ("TREE: avg. integration: %g sec. per step, avg. balancing: %g sec. per step; ratio: %g\n", gt[0]+gt[1], gt[1], gt[1]/(gt[0]+gt[1]));

  free (point[0]);
  free (point[1]);
  free (point[2]);
  free (velo[0]);
  free (velo[1]);
  free (velo[2]);
  free (ranks);

  MPI_Finalize ();

  return 0;
}
Example #25
0
/* generate test */
static void gen ()
{
  double d [3], move [3];

  switch (mode)
  {
  case GJK_CONVEX_CONVEX:
  {
    asize = bsize = 0;

    while (asize < minim) asize = rand () % limit;

    while (bsize < minim) bsize = rand () % limit;

    SETRAND (move, 1.0);

    for (int n = 0; n < asize; n ++)
    { SETRAND (apoint [n], 0.75);
      ADD (apoint [n], move, apoint [n]); }

    for (int n = 0; n < bsize; n ++)
    { SETRAND (bpoint [n], 0.75);
      SUB (bpoint [n], move, bpoint [n]); }

    free (a);
    free (b);
    a = hull ((double*)apoint, asize, &alength);
    b = hull ((double*)bpoint, bsize, &blength);

    double *va, *vb;
    int nva, nvb;

    va = TRI_Vertices (a, alength, &nva);
    vb = TRI_Vertices (b, blength, &nvb);

    gjk (va, nva, vb, nvb, p, q);

    free (va);
    free (vb);
  }
  break;
  case GJK_CONVEX_SPHERE:
  {
    asize = bsize = 0;

    while (asize < minim) asize = rand () % limit;

    SETRAND (move, 1.0);

    for (int n = 0; n < asize; n ++)
    { SETRAND (apoint [n], 0.75);
      ADD (apoint [n], move, apoint [n]); }

    free (a);
    a = hull ((double*)apoint, asize, &alength);

    SETRAND (center, 1.0);
    SUB (center, move, center);
    radius = 0.75 * DRAND ();

    double *va;
    int nva;

    va = TRI_Vertices (a, alength, &nva);

    gjk_convex_sphere (va, nva, center, radius, p, q);

    free (va);
  }
  break;
  case GJK_CONVEX_POINT:
  {
    asize = bsize = 0;

    while (asize < minim) asize = rand () % limit;

    SETRAND (move, 1.0);

    for (int n = 0; n < asize; n ++)
    { SETRAND (apoint [n], 0.75);
      ADD (apoint [n], move, apoint [n]); }

    free (a);
    a = hull ((double*)apoint, asize, &alength);

    SETRAND (center, 1.0);
    SUB (center, move, center);
    radius = 0.0;

    double *va;
    int nva;

    va = TRI_Vertices (a, alength, &nva);

    COPY (center, p);
    gjk_convex_point (va, nva, p, q);

    free (va);
  }
  break;
  case GJK_CONVEX_ELLIP:
  {
    asize = bsize = 0;

    while (asize < minim) asize = rand () % limit;

    SETRAND (move, 1.0);

    for (int n = 0; n < asize; n ++)
    { SETRAND (apoint [n], 0.75);
      ADD (apoint [n], move, apoint [n]); }

    free (a);
    a = hull ((double*)apoint, asize, &alength);

    SETRAND (el1_center, 1.0);
    SUB (el1_center, move, el1_center);
    el1_sca [0] = 0.75 * DRAND ();
    el1_sca [1] = 0.75 * DRAND ();
    el1_sca [2] = 0.75 * DRAND ();
    EXPMAP (el1_sca, el1_rot);

    double *va;
    int nva;

    va = TRI_Vertices (a, alength, &nva);

    gjk_convex_ellip (va, nva, el1_center, el1_sca, el1_rot, p, q);

    free (va);
  }
  break;
  case GJK_SPHERE_ELLIP:
  {
    SETRAND (move, 1.0);

    SETRAND (center, 1.0);
    radius = 0.75 * DRAND ();

    SETRAND (el1_center, 1.0);
    SUB (el1_center, move, el1_center);
    el1_sca [0] = 0.75 * DRAND ();
    el1_sca [1] = 0.75 * DRAND ();
    el1_sca [2] = 0.75 * DRAND ();
    EXPMAP (el1_sca, el1_rot);

    gjk_sphere_ellip (center, radius, el1_center, el1_sca, el1_rot, p, q);
  }
  break;
  case GJK_ELLIP_ELLIP:
  {
    SETRAND (move, 1.0);

    SETRAND (el1_center, 1.0);
    el1_sca [0] = 0.75 * DRAND ();
    el1_sca [1] = 0.75 * DRAND ();
    el1_sca [2] = 0.75 * DRAND ();
    EXPMAP (el1_sca, el1_rot);

    SETRAND (el2_center, 1.0);
    SUB (el2_center, move, el2_center);
    el2_sca [0] = 0.75 * DRAND ();
    el2_sca [1] = 0.75 * DRAND ();
    el2_sca [2] = 0.75 * DRAND ();
    EXPMAP (el2_sca, el2_rot);

    gjk_ellip_ellip (el1_center, el1_sca, el1_rot, el2_center, el2_sca, el2_rot, p, q);
  }
  break;
  case GJK_ELLIP_POINT:
  {
    SETRAND (move, 1.0);

    SETRAND (el1_center, 1.0);
    el1_sca [0] = 0.75 * DRAND ();
    el1_sca [1] = 0.75 * DRAND ();
    el1_sca [2] = 0.75 * DRAND ();
    EXPMAP (el1_sca, el1_rot);

    SETRAND (center, 1.0);
    SUB (center, move, center);
    radius = 0.0;

    COPY (center, p);
    gjk_ellip_point (el1_center, el1_sca, el1_rot, p, q);
  }
  break;
  }

  SUB (p, q, d);
  printf ("|p-q|=%g\n", LEN (d));
}