void box_gaussian_conv_(T *dest_data, T *buffer_data, const T *src, long N, long stride, T sigma, int K)
{
	struct
	{
		T *data;
		long stride;
	} dest, buffer, cur, next;
	T scale;
	long r;
	int step;

	assert(dest_data && buffer_data && src && dest_data != buffer_data && N > 0 && sigma > 0 && K > 0);

	/* Compute the box radius according to Wells' formula. */
	r = (long)(0.5 * sqrt((12.0 * sigma * sigma) / K + 1.0));
	scale = (T)(1.0 / (double)pow(2.0*r + 1.0, K));

	dest.data = dest_data;
	dest.stride = stride;
	buffer.data = buffer_data;
	buffer.stride = (buffer_data == src) ? stride : 1;

	/* Here we decide whether dest or buffer should be the first output array.
	   If K is even, then buffer is the better choice so that the result is in
	   dest after K iterations, e.g. for K = 4 (as in the function comment),

	   src -> buffer -> dest -> buffer -> dest.

	   If K is odd, we would like to choose dest, e.g. for K = 3,

	   src -> dest -> buffer -> dest.

	   However, if src and dest point to the same memory (i.e., in-place
	   computation), then we must select buffer as the first output array. */
	if (buffer_data == src || (dest_data != src && K % 2 == 1))
		next = dest;
	else
		next = buffer;

	/* Perform the first step of box filtering. */
	box_filter(next.data, next.stride, src, stride, N, r);

	/* Perform another (K - 1) steps of box filtering, alternating the roles
	   of the dest and buffer arrays. */
	for (step = 1; step < K; ++step)
	{
		cur = next;
		next = (cur.data == buffer_data) ? dest : buffer;
		box_filter(next.data, next.stride, cur.data, cur.stride, N, r);
	}

	/* Multiply by the constant scale factor. */
	if (next.data != dest_data)
	{
		long n, i;

		for (n = i = 0; n < N; ++n, i += stride)
			dest_data[i] = buffer_data[n] * scale;
	}
	else
	{
		long i, i_end = stride * N;

		for (i = 0; i < i_end; i += stride)
			dest_data[i] *= scale;
	}

	return;
}
Example #2
0
// ---------------------------------------------------------------------------
/// Creates piece of plasma (velocity distribution to be set later).
// ---------------------------------------------------------------------------
double
tag_plasma (FILE *fp)
{
   // Cell sub-sampling and velocity space resolution.
   plasma_nx     = cfg_readInt (fp);
   plasma_ny     = cfg_readInt (fp);
   plasma_nz     = cfg_readInt (fp);
   plasma_layers = cfg_readInt (fp);

   // Reads all exctrusion elements.
   while (cfg_isParameter (fp)) {
      const char *element = cfg_readWord (fp);
      if (!strcmp (element, "planes")) {
         convex_read (fp);			// Reads set of planes.
      } else if (!strcmp (element, "spheres")) {
         sphere_read (fp);			// Reads set of spheres.
      } else if (!strcmp (element, "cylinders")) {
         cylinder_read (fp);			// Reads set of cylinders.
      } else if (!strcmp (element, "boxes")) {
         box_read (fp);				// Reads set of boxes.
      } else {
         DIE ("bad shape '%s' ('planes', 'spheres', or 'cylinders' expected)",
              element);
      }
   }

   // Deactivates axises if necessary.
   plasma_nx  = mc_have_x ? plasma_nx : 1;
   plasma_ny  = mc_have_y ? plasma_ny : 1;
   plasma_nz  = mc_have_z ? plasma_nz : 1;
   plasma_PPC = plasma_nx*plasma_ny*plasma_nz*plasma_layers;

   // Checks parameters.
   ENSURE (plasma_nx > 0 && plasma_ny > 0 && plasma_nz > 0 &&
           plasma_layers > 0,
           "bad nx(%d), ny(%d), nz(%d) or number of velocity layers (%d)",
           plasma_nx, plasma_ny, plasma_nz, plasma_layers);

   say ("tag_plasma: ");
   say ("  - %d x %d x %d in-cell spacing", plasma_nx, plasma_ny, plasma_nz);
   say ("  - %d velocity layers", plasma_layers);
   say ("  - %d particles per cell", plasma_PPC);

   marker_t cell[plasma_PPC], filtered[plasma_PPC];
   double dx = h1*mc_have_x/(double) plasma_nx,
          dy = h2*mc_have_y/(double) plasma_ny,
          dz = h3*mc_have_z/(double) plasma_nz;

   // 'p' enumerates particles in cell.
   int p = 0;
   for (int i = 0 ; i < plasma_nx ; ++i)
   for (int j = 0 ; j < plasma_ny ; ++j)
   for (int k = 0 ; k < plasma_nz ; ++k) {
      // Fills sample (NAN) to catch unitialized variables.
      marker_t m = {.x = (i + 0.5)*dx,
                    .y = (j + 0.5)*dy,
                    .z = (k + 0.5)*dz,
                    .vx = NAN,
                    .vy = NAN,
                    .vz = NAN,
                    .rho = NAN,
                    .qDivM = NAN};
      for (int l = 0 ; l < plasma_layers ; ++l) {
         cell[p]    = m;
         cell[p].vx = p++;	// Stores enumeration position.
      }
   }

   double memUsage = 0;
   plasma_newObject ();
   for (int I = cpu_min[0] ; I < cpu_max[0] + 1 - mc_have_x ; ++I)
   for (int J = cpu_min[1] ; J < cpu_max[1] + 1 - mc_have_y ; ++J)
   for (int K = cpu_min[2] ; K < cpu_max[2] + 1 - mc_have_z ; ++K) {
      // Adds cell data into current cell.
      for (int p = 0 ; p < plasma_PPC ; ++p) {
         filtered[p] = cell[p];
         filtered[p].x += I*h1*mc_have_x;
         filtered[p].y += J*h2*mc_have_y;
         filtered[p].z += K*h3*mc_have_z;
      }

      // Set of filter extrusions.
      int PPC = plasma_PPC;
      for (convex_t *b = polys, *b2 = b + polysN ; b < b2 ; ++b) {
         PPC = convex_filter (filtered, PPC, b);
      }

      for (sphere_t *s = spheres, *s2 = s + spheresN ; s < s2 ; ++s) {
         PPC = sphere_filter (filtered, PPC, s);
      }

      for (cylinder_t *c = cylinders, *c2 = c + cylindersN ; c < c2 ; ++c) {
         PPC = cylinder_filter (filtered, PPC, c);
      }

      for (box_t *b = boxes, *b2 = b + boxesN ; b < b2 ; ++b) {
         PPC = box_filter (filtered, PPC, b);
      }

      // Adds particles to the storage.
      for (int p = 0 ; p < PPC && (!memEstimateOnly) ; ++p) {
         *plasma_marker () = filtered[p];
      }

      memUsage += PPC;
   }

   say ("  - %.3e particles on cpu %d", memUsage, cpu_here);

   // Turns off all filters.
   box_free      ();
   convex_free   ();
   cylinder_free ();
   sphere_free   ();

   return memUsage*sizeof (marker_t);						// Returns memory usage.
}