Пример #1
0
bool SmoothConstrainedInterpolator::ProjectVelocity(const Config& x,Config& v)
{
  constraint->PreEval(x);
  Matrix J;
  constraint->Jacobian(x,J);
  if(!xmin.empty()) {
    //look through active contraints, set that column to 0
    for(int i=0;i<x.n;i++) {
      if(x(i)==xmin(i) || x(i) == xmax(i)) { 
	v(i) = 0;
	for(int j=0;j<J.m;j++)
	  J(j,i) = 0;
      }
    }
  }
  RobustSVD<Real> svd;
  bool res=svd.set(J);
  if(!res) {
    fprintf(stderr,"SmoothConstrainedInterpolator: Numerical error projecting velocity?\n");
    return false;
  }
  Vector temp;
  svd.nullspaceComponent(v,temp);
  v -= temp;
  return true;
}
Пример #2
0
void Instance::transformBoundingBox()
{
    auto b = i->getBoundingBox();
    
    BoundingBox bb;
    
    for(int i = 0; i <= RayTracer::getInstance()->maxTime; ++i)
    {
        std::set<double> x,y,z;
        
        auto m = makeMatrices(i);
        
        auto p = Vector(b.xmin(i),0,0);
        p = transformLoc(m.first, p);
        x.insert(p.x);
        y.insert(p.y);
        z.insert(p.z);
        
        p = Vector(b.xmax(i),0,0);
        p = transformLoc(m.first, p);
        x.insert(p.x);
        y.insert(p.y);
        z.insert(p.z);
        
        p = Vector(0,b.ymin(i),0);
        p = transformLoc(m.first, p);
        x.insert(p.x);
        y.insert(p.y);
        z.insert(p.z);
        
        p = Vector(0,b.ymax(i),0);
        p = transformLoc(m.first, p);
        x.insert(p.x);
        y.insert(p.y);
        z.insert(p.z);
        
        p = Vector(0,0,b.zmin(i));
        p = transformLoc(m.first, p);
        x.insert(p.x);
        y.insert(p.y);
        z.insert(p.z);
        
        p = Vector(0,0,b.zmax(i));
        p = transformLoc(m.first, p);
        x.insert(p.x);
        y.insert(p.y);
        z.insert(p.z);
               
        bb.xmin.addFrame(i, *x.begin());
        bb.xmax.addFrame(i, *x.rbegin());
        bb.ymin.addFrame(i, *y.begin());
        bb.ymax.addFrame(i, *y.rbegin());
        bb.zmin.addFrame(i, *z.begin());
        bb.zmax.addFrame(i, *z.rbegin());
    }
    bbox = bb;
}
Пример #3
0
int QDeclarativeDrag::qt_metacall(QMetaObject::Call _c, int _id, void **_a)
{
    _id = QObject::qt_metacall(_c, _id, _a);
    if (_id < 0)
        return _id;
    if (_c == QMetaObject::InvokeMetaMethod) {
        if (_id < 8)
            qt_static_metacall(this, _c, _id, _a);
        _id -= 8;
    }
#ifndef QT_NO_PROPERTIES
      else if (_c == QMetaObject::ReadProperty) {
        void *_v = _a[0];
        switch (_id) {
        case 0: *reinterpret_cast< QGraphicsObject**>(_v) = target(); break;
        case 1: *reinterpret_cast< Axis*>(_v) = axis(); break;
        case 2: *reinterpret_cast< qreal*>(_v) = xmin(); break;
        case 3: *reinterpret_cast< qreal*>(_v) = xmax(); break;
        case 4: *reinterpret_cast< qreal*>(_v) = ymin(); break;
        case 5: *reinterpret_cast< qreal*>(_v) = ymax(); break;
        case 6: *reinterpret_cast< bool*>(_v) = active(); break;
        case 7: *reinterpret_cast< bool*>(_v) = filterChildren(); break;
        }
        _id -= 8;
    } else if (_c == QMetaObject::WriteProperty) {
        void *_v = _a[0];
        switch (_id) {
        case 0: setTarget(*reinterpret_cast< QGraphicsObject**>(_v)); break;
        case 1: setAxis(*reinterpret_cast< Axis*>(_v)); break;
        case 2: setXmin(*reinterpret_cast< qreal*>(_v)); break;
        case 3: setXmax(*reinterpret_cast< qreal*>(_v)); break;
        case 4: setYmin(*reinterpret_cast< qreal*>(_v)); break;
        case 5: setYmax(*reinterpret_cast< qreal*>(_v)); break;
        case 7: setFilterChildren(*reinterpret_cast< bool*>(_v)); break;
        }
        _id -= 8;
    } else if (_c == QMetaObject::ResetProperty) {
        switch (_id) {
        case 0: resetTarget(); break;
        }
        _id -= 8;
    } else if (_c == QMetaObject::QueryPropertyDesignable) {
        _id -= 8;
    } else if (_c == QMetaObject::QueryPropertyScriptable) {
        _id -= 8;
    } else if (_c == QMetaObject::QueryPropertyStored) {
        _id -= 8;
    } else if (_c == QMetaObject::QueryPropertyEditable) {
        _id -= 8;
    } else if (_c == QMetaObject::QueryPropertyUser) {
        _id -= 8;
    }
#endif // QT_NO_PROPERTIES
    return _id;
}
Пример #4
0
 float size() const
 {
     if (xmax() < xmin() || ymax() < ymin())
     {
         // If box is invalid (e.g. xmax < xmin or ymax < ymin), return 0.
         return 0.0f;
     }
     else
     {
         return width() * height();
     }
 }
Пример #5
0
boot find_xmax(double *x,bvec y)
{
  boot xmax(y.nboot,y.njack);
  
  for(int iboot=0;iboot<=y.nboot;iboot++)
    {
      int imax=0;
      for(int iel=0;iel<y.nel;iel++)
	if(y[iel][iboot]>=y[imax][iboot]) imax=iel;
      xmax.data[iboot]=x[imax];
    }
  
  return xmax;
}
Пример #6
0
void CartesianWidget::setXRange(double a, double b)
{
    setCenter((a+b)/2.0, centerY());
    if (xmax()-xmin() > (b-a))
    {
        while ( xmax()-xmin() > (b-a) )
        {
            setZoomLevel(zoomLevel()-1);
        }
        if ( xmax()-xmin() < (b-a) )
        {
            setZoomLevel(zoomLevel()+1);
        }
    }
    else
    {
        while ( xmax()-xmin() < (b-a) )
        {
            setZoomLevel(zoomLevel()+1);
        }
    }
    update();
}
Пример #7
0
/* single even-stage */
slint sn_even(slint size, slint rank, slint stage, void *snp, slint *up) /* sl_proto, sl_func sn_even */
{
  slint stages = 1;

  /* if the rank is out of range, return 'finshed' */
  if (rank >= size) return -1;
  /* if 'stage < 0' return the number of stages */
  if (stage < 0) return stages;
  /* if the stage is to large, return 'finshed' */
  if (stage >= stages) return -1;

  if (up != NULL) *up = 0;

  return xmax(0, xmin(size - 1, ((1 == rank % 2)?rank + 1:rank - 1)));
}
AccelerationGrid::AccelerationGrid() :
m_cells(0,0,0),
m_elementidxs(0),
m_elementxmins(0),
m_elementxmaxs(0),
m_elementquery(0),
m_lastquery(0),
m_gridxmin(0,0,0),
m_gridxmax(0,0,0),
m_cellsize(0,0,0),
m_invcellsize(0,0,0)
{
    Vec3st dims(1,1,1);
    Vec3d xmin(0,0,0), xmax(1,1,1);
    set(dims, xmin, xmax);
}
Пример #9
0
static char *xstrsub(const char *src, int begin, int len)
{
	int l;
	int ind;
	char *ret;
	size_t s_full;

	s_full=strlen(src);
	if(len==-1) l=(int)s_full;
	else l=len;

	if(!(ret=(char *)malloc_w((xmin(s_full, l)+1)*sizeof(char), __func__)))
		return NULL;
	ind=begin<0?xmax((int) s_full+begin, 0):xmin(s_full, begin);

	strncpy(ret, src+ind, xmin(s_full, l));
	ret[xmin(s_full, l)] = '\0';

	return ret;
}
static void
memory_ostream::write_mem (memory_ostream_t stream,
                           const void *data, size_t len)
{
  if (len > 0)
    {
      if (len > stream->allocated - stream->buflen)
        {
          size_t new_allocated =
            xmax (xsum (stream->buflen, len),
                  xsum (stream->allocated, stream->allocated));
          if (size_overflow_p (new_allocated))
            error (EXIT_FAILURE, 0,
                   _("%s: too much output, buffer size overflow"),
                   "memory_ostream");
          stream->buffer = (char *) xrealloc (stream->buffer, new_allocated);
          stream->allocated = new_allocated;
        }
      memcpy (stream->buffer + stream->buflen, data, len);
      stream->buflen += len;
    }
}
Пример #11
0
//------------------------------------------------------------------------------------------------------------------------------------
// called when we want to draw the 3D data in our app.
//------------------------------------------------------------------------------------------------------------------------------------
void draw3D()
{
	const float DEG_TO_RAD = PI / 180.0f;
	const Vec3 xAxis(1.0f, 0, 0);
	const Vec3 yAxis(0, 1.0f, 0);

	translate(0, 0, -g_zoom);
	translate(g_tx, g_ty, 0);
	rotate(g_rotx * DEG_TO_RAD, xAxis);
	rotate(g_roty * DEG_TO_RAD, yAxis);

	// draw the grid on the floor
	setColour(0.25f, 0.25f, 0.25f);
	for(float i = -10.0f; i <= 10.1f; i += 1.0f)
	{
		Vec3 zmin(i, 0, -10);
		Vec3 zmax(i, 0,  10);
		Vec3 xmin(-10, 0, i);
		Vec3 xmax(10, 0, i);
		drawLine(xmin, xmax);
		drawLine(zmin, zmax);
	}
}
Пример #12
0
int main(int argc, char **argv)
{
        int c = 0;
        long i_start_arg = 1;
        long i_end_arg = N;
        int i_start = 1;
        int i_end = N;
        mpfr_fn sin_fn = 0;
        mpfr_fn cos_fn = 0;

        for (int k = 0; k < argc; ++k) {
                printf("%s ", argv[k]);
        }

        printf("\n");

        while ((c = getopt(argc, argv, "i:j:f:")) != -1) {
                switch (c) {
                case 'i':
                        errno = 0;
                        i_start_arg = strtoll(optarg, 0, 0);

                        if (errno) {
                                fprintf(stderr, "bad start index %s\n", optarg);

                                return 1;
                        }

                        break;
                case 'j':
                        errno = 0;
                        i_end_arg = strtoll(optarg, 0, 0);

                        if (errno) {
                                fprintf(stderr, "bad end index %s\n", optarg);

                                return 1;
                        }

                        break;
                case 'f':

                        if (!strcmp(optarg, "sin")) {
                                sin_fn = mpfr_sin;
                                cos_fn = mpfr_cos;
                        } else if (!strcmp(optarg, "tan")) {
                                sin_fn = mpfr_tan;
                                cos_fn = mpfr_cot;
                        } else {
                                fprintf(stderr, "unknown function %s\n",
                                        optarg);

                                return 1;
                        }

                        break;
                default:
                        usage();
                        break;
                }
        }

        if (i_start_arg <= 0 ||
            i_end_arg > N) {
                printf("truncating start to (0, %d]\n", N);
                i_start_arg = xmin(xmax(i_start_arg, 1), N);
        }

        if (i_end_arg <= 0 ||
            i_end_arg > N) {
                printf("truncating end to (0, %d]\n", N);
                i_end_arg = xmin(xmax(i_end_arg, 1), N);
        }

        i_start = i_start_arg;
        i_end = i_end_arg;

        if (!sin_fn ||
            !cos_fn) {
                fprintf(stderr, "-f required\n");

                return 1;
        }

        for (int i = i_start; i <= i_end; ++i) {
                if (find_triple_64(i, 11, 20, sin_fn, cos_fn) < 0) {
                        /*
                           This indicates you should drop the range
                           limitations on r, re-run, and come back
                           in a week.
                         */
                        printf("CANNOT FIND SUITABLE CANDIDATE FOR i = %03d\n",
                               i);
                }
        }

        return 0;
}
void
MAST::NPSOLOptimizationInterface::optimize() {
    
#if MAST_ENABLE_NPSOL == 1
    // make sure that functions have been provided
    libmesh_assert(_funobj);
    libmesh_assert(_funcon);
    
    int
    N      =  _feval->n_vars(),
    NCLIN  =  0,
    NCNLN  =  _feval->n_eq()+_feval->n_ineq(),
    NCTOTL =  N+NCLIN+NCNLN,
    LDA    =  std::max(NCLIN, 1),
    LDJ    =  std::max(NCNLN, 1),
    LDR    =  N,
    INFORM =  0,           // on exit: Reports result of call to NPSOL
                           // < 0 either funobj or funcon has set this to -ve
                           // 0 => converged to point x
                           // 1 => x satisfies optimality conditions, but sequence of iterates has not converged
                           // 2 => Linear constraints and bounds cannot be satisfied. No feasible solution
                           // 3 => Nonlinear constraints and bounds cannot be satisfied. No feasible solution
                           // 4 => Major iter limit was reached
                           // 6 => x does not satisfy first-order optimality to required accuracy
                           // 7 => function derivatives seem to be incorrect
                           // 9 => input parameter invalid
    ITER   = 0,            // iter count
    LENIW  = 3*N + NCLIN + 2*NCNLN,
    LENW   = 2*N*N + N*NCLIN + 2*N*NCNLN + 20*N + 11*NCLIN + 21*NCNLN;
    
    Real
    F      =  0.;          // on exit: final objective

    std::vector<int>
    IW      (LENIW,  0),
    ISTATE  (NCTOTL, 0);    // status of constraints l <= r(x) <= u,
                            // -2 => lower bound is violated by more than delta
                            // -1 => upper bound is violated by more than delta
                            // 0  => both bounds are satisfied by more than delta
                            // 1  => lower bound is active (to within delta)
                            // 2  => upper bound is active (to within delta)
                            // 3  => boundars are equal and equality constraint is satisfied
    
    std::vector<Real>
    A       (LDA,    0.),   // this is used for liear constraints, not currently handled
    BL      (NCTOTL, 0.),
    BU      (NCTOTL, 0.),
    C       (NCNLN,  0.),   // on exit: nonlinear constraints
    CJAC    (LDJ* N, 0.),   //
                            // on exit: CJAC(i,j) is the partial derivative of ith nonlinear constraint
    CLAMBDA (NCTOTL, 0.),   // on entry: need not be initialized for cold start
                            // on exit: QP multiplier from the QP subproblem, >=0 if istate(j)=1, <0 if istate(j)=2
    G       (N,      0.),   // on exit: objective gradient
    R       (LDR*N,  0.),   // on entry: need not be initialized if called with Cold Statrt
                            // on exit: information about Hessian, if Hessian=Yes, R is upper Cholesky factor of approx H
    X       (N,      0.),   // on entry: initial point
                            // on exit: final estimate of solution
    W       (LENW,   0.),   // workspace
    xmin    (N,      0.),
    xmax    (N,      0.);
    
    
    // now setup the lower and upper limits for the variables and constraints
    _feval->init_dvar(X, xmin, xmax);
    for (unsigned int i=0; i<N; i++) {
        BL[i] = xmin[i];
        BU[i] = xmax[i];
    }
    
    // all constraints are assumed to be g_i(x) <= 0, so that the upper
    // bound is 0 and lower bound is -infinity
    for (unsigned int i=0; i<NCNLN; i++) {
        BL[i+N] = -1.e20;
        BU[i+N] =     0.;
    }
    
    std::string nm;
//    nm = "List";
//    npoptn_(nm.c_str(), (int)nm.length());
//    nm = "Verify level 3";
//    npoptn_(nm.c_str(), (int)nm.length());
    
    npsol_(&N,
           &NCLIN,
           &NCNLN,
           &LDA,
           &LDJ,
           &LDR,
           &A[0],
           &BL[0],
           &BU[0],
           _funcon,
           _funobj,
           &INFORM,
           &ITER,
           &ISTATE[0],
           &C[0],
           &CJAC[0],
           &CLAMBDA[0],
           &F,
           &G[0],
           &R[0],
           &X[0],
           &IW[0],
           &LENIW,
           &W[0],
           &LENW);
    
#endif // MAST_ENABLE_NPSOL 1
}
Пример #14
0
CHAR_T *
VASNPRINTF (CHAR_T *resultbuf, size_t *lengthp, const CHAR_T *format, va_list args)
{
  DIRECTIVES d;
  arguments a;

  if (PRINTF_PARSE (format, &d, &a) < 0)
    {
      errno = EINVAL;
      return NULL;
    }

#define CLEANUP() \
  free (d.dir);								\
  if (a.arg)								\
    free (a.arg);

  if (printf_fetchargs (args, &a) < 0)
    {
      CLEANUP ();
      errno = EINVAL;
      return NULL;
    }

  {
    size_t buf_neededlength;
    CHAR_T *buf;
    CHAR_T *buf_malloced;
    const CHAR_T *cp;
    size_t i;
    DIRECTIVE *dp;
    /* Output string accumulator.  */
    CHAR_T *result;
    size_t allocated;
    size_t length;

    /* Allocate a small buffer that will hold a directive passed to
       sprintf or snprintf.  */
    buf_neededlength =
      xsum4 (7, d.max_width_length, d.max_precision_length, 6);
#if HAVE_ALLOCA
    if (buf_neededlength < 4000 / sizeof (CHAR_T))
      {
	buf = (CHAR_T *) alloca (buf_neededlength * sizeof (CHAR_T));
	buf_malloced = NULL;
      }
    else
#endif
      {
	size_t buf_memsize = xtimes (buf_neededlength, sizeof (CHAR_T));
	if (size_overflow_p (buf_memsize))
	  goto out_of_memory_1;
	buf = (CHAR_T *) malloc (buf_memsize);
	if (buf == NULL)
	  goto out_of_memory_1;
	buf_malloced = buf;
      }

    if (resultbuf != NULL)
      {
	result = resultbuf;
	allocated = *lengthp;
      }
    else
      {
	result = NULL;
	allocated = 0;
      }
    length = 0;
    /* Invariants:
       result is either == resultbuf or == NULL or malloc-allocated.
       If length > 0, then result != NULL.  */

    /* Ensures that allocated >= needed.  Aborts through a jump to
       out_of_memory if needed is SIZE_MAX or otherwise too big.  */
#define ENSURE_ALLOCATION(needed) \
    if ((needed) > allocated)						     \
      {									     \
	size_t memory_size;						     \
	CHAR_T *memory;							     \
									     \
	allocated = (allocated > 0 ? xtimes (allocated, 2) : 12);	     \
	if ((needed) > allocated)					     \
	  allocated = (needed);						     \
	memory_size = xtimes (allocated, sizeof (CHAR_T));		     \
	if (size_overflow_p (memory_size))				     \
	  goto out_of_memory;						     \
	if (result == resultbuf || result == NULL)			     \
	  memory = (CHAR_T *) malloc (memory_size);			     \
	else								     \
	  memory = (CHAR_T *) realloc (result, memory_size);		     \
	if (memory == NULL)						     \
	  goto out_of_memory;						     \
	if (result == resultbuf && length > 0)				     \
	  memcpy (memory, result, length * sizeof (CHAR_T));		     \
	result = memory;						     \
      }

    for (cp = format, i = 0, dp = &d.dir[0]; ; cp = dp->dir_end, i++, dp++)
      {
	if (cp != dp->dir_start)
	  {
	    size_t n = dp->dir_start - cp;
	    size_t augmented_length = xsum (length, n);

	    ENSURE_ALLOCATION (augmented_length);
	    memcpy (result + length, cp, n * sizeof (CHAR_T));
	    length = augmented_length;
	  }
	if (i == d.count)
	  break;

	/* Execute a single directive.  */
	if (dp->conversion == '%')
	  {
	    size_t augmented_length;

	    if (!(dp->arg_index == ARG_NONE))
	      abort ();
	    augmented_length = xsum (length, 1);
	    ENSURE_ALLOCATION (augmented_length);
	    result[length] = '%';
	    length = augmented_length;
	  }
	else
	  {
	    if (!(dp->arg_index != ARG_NONE))
	      abort ();

	    if (dp->conversion == 'n')
	      {
		switch (a.arg[dp->arg_index].type)
		  {
		  case TYPE_COUNT_SCHAR_POINTER:
		    *a.arg[dp->arg_index].a.a_count_schar_pointer = length;
		    break;
		  case TYPE_COUNT_SHORT_POINTER:
		    *a.arg[dp->arg_index].a.a_count_short_pointer = length;
		    break;
		  case TYPE_COUNT_INT_POINTER:
		    *a.arg[dp->arg_index].a.a_count_int_pointer = length;
		    break;
		  case TYPE_COUNT_LONGINT_POINTER:
		    *a.arg[dp->arg_index].a.a_count_longint_pointer = length;
		    break;
#ifdef HAVE_LONG_LONG
		  case TYPE_COUNT_LONGLONGINT_POINTER:
		    *a.arg[dp->arg_index].a.a_count_longlongint_pointer = length;
		    break;
#endif
		  default:
		    abort ();
		  }
	      }
	    else
	      {
		arg_type type = a.arg[dp->arg_index].type;
		CHAR_T *p;
		unsigned int prefix_count;
		int prefixes[2];
#if !USE_SNPRINTF
		size_t tmp_length;
		CHAR_T tmpbuf[700];
		CHAR_T *tmp;

		/* Allocate a temporary buffer of sufficient size for calling
		   sprintf.  */
		{
		  size_t width;
		  size_t precision;

		  width = 0;
		  if (dp->width_start != dp->width_end)
		    {
		      if (dp->width_arg_index != ARG_NONE)
			{
			  int arg;

			  if (!(a.arg[dp->width_arg_index].type == TYPE_INT))
			    abort ();
			  arg = a.arg[dp->width_arg_index].a.a_int;
			  width = (arg < 0 ? (unsigned int) (-arg) : arg);
			}
		      else
			{
			  const CHAR_T *digitp = dp->width_start;

			  do
			    width = xsum (xtimes (width, 10), *digitp++ - '0');
			  while (digitp != dp->width_end);
			}
		    }

		  precision = 6;
		  if (dp->precision_start != dp->precision_end)
		    {
		      if (dp->precision_arg_index != ARG_NONE)
			{
			  int arg;

			  if (!(a.arg[dp->precision_arg_index].type == TYPE_INT))
			    abort ();
			  arg = a.arg[dp->precision_arg_index].a.a_int;
			  precision = (arg < 0 ? 0 : arg);
			}
		      else
			{
			  const CHAR_T *digitp = dp->precision_start + 1;

			  precision = 0;
			  do
			    precision = xsum (xtimes (precision, 10), *digitp++ - '0');
			  while (digitp != dp->precision_end);
			}
		    }

		  switch (dp->conversion)
		    {

		    case 'd': case 'i': case 'u':
# ifdef HAVE_LONG_LONG
		      if (type == TYPE_LONGLONGINT || type == TYPE_ULONGLONGINT)
			tmp_length =
			  (unsigned int) (sizeof (unsigned long long) * CHAR_BIT
					  * 0.30103 /* binary -> decimal */
					  * 2 /* estimate for FLAG_GROUP */
					 )
			  + 1 /* turn floor into ceil */
			  + 1; /* account for leading sign */
		      else
# endif
		      if (type == TYPE_LONGINT || type == TYPE_ULONGINT)
			tmp_length =
			  (unsigned int) (sizeof (unsigned long) * CHAR_BIT
					  * 0.30103 /* binary -> decimal */
					  * 2 /* estimate for FLAG_GROUP */
					 )
			  + 1 /* turn floor into ceil */
			  + 1; /* account for leading sign */
		      else
			tmp_length =
			  (unsigned int) (sizeof (unsigned int) * CHAR_BIT
					  * 0.30103 /* binary -> decimal */
					  * 2 /* estimate for FLAG_GROUP */
					 )
			  + 1 /* turn floor into ceil */
			  + 1; /* account for leading sign */
		      break;

		    case 'o':
# ifdef HAVE_LONG_LONG
		      if (type == TYPE_LONGLONGINT || type == TYPE_ULONGLONGINT)
			tmp_length =
			  (unsigned int) (sizeof (unsigned long long) * CHAR_BIT
					  * 0.333334 /* binary -> octal */
					 )
			  + 1 /* turn floor into ceil */
			  + 1; /* account for leading sign */
		      else
# endif
		      if (type == TYPE_LONGINT || type == TYPE_ULONGINT)
			tmp_length =
			  (unsigned int) (sizeof (unsigned long) * CHAR_BIT
					  * 0.333334 /* binary -> octal */
					 )
			  + 1 /* turn floor into ceil */
			  + 1; /* account for leading sign */
		      else
			tmp_length =
			  (unsigned int) (sizeof (unsigned int) * CHAR_BIT
					  * 0.333334 /* binary -> octal */
					 )
			  + 1 /* turn floor into ceil */
			  + 1; /* account for leading sign */
		      break;

		    case 'x': case 'X':
# ifdef HAVE_LONG_LONG
		      if (type == TYPE_LONGLONGINT || type == TYPE_ULONGLONGINT)
			tmp_length =
			  (unsigned int) (sizeof (unsigned long long) * CHAR_BIT
					  * 0.25 /* binary -> hexadecimal */
					 )
			  + 1 /* turn floor into ceil */
			  + 2; /* account for leading sign or alternate form */
		      else
# endif
		      if (type == TYPE_LONGINT || type == TYPE_ULONGINT)
			tmp_length =
			  (unsigned int) (sizeof (unsigned long) * CHAR_BIT
					  * 0.25 /* binary -> hexadecimal */
					 )
			  + 1 /* turn floor into ceil */
			  + 2; /* account for leading sign or alternate form */
		      else
			tmp_length =
			  (unsigned int) (sizeof (unsigned int) * CHAR_BIT
					  * 0.25 /* binary -> hexadecimal */
					 )
			  + 1 /* turn floor into ceil */
			  + 2; /* account for leading sign or alternate form */
		      break;

		    case 'f': case 'F':
# ifdef HAVE_LONG_DOUBLE
		      if (type == TYPE_LONGDOUBLE)
			tmp_length =
			  (unsigned int) (LDBL_MAX_EXP
					  * 0.30103 /* binary -> decimal */
					  * 2 /* estimate for FLAG_GROUP */
					 )
			  + 1 /* turn floor into ceil */
			  + 10; /* sign, decimal point etc. */
		      else
# endif
			tmp_length =
			  (unsigned int) (DBL_MAX_EXP
					  * 0.30103 /* binary -> decimal */
					  * 2 /* estimate for FLAG_GROUP */
					 )
			  + 1 /* turn floor into ceil */
			  + 10; /* sign, decimal point etc. */
		      tmp_length = xsum (tmp_length, precision);
		      break;

		    case 'e': case 'E': case 'g': case 'G':
		    case 'a': case 'A':
		      tmp_length =
			12; /* sign, decimal point, exponent etc. */
		      tmp_length = xsum (tmp_length, precision);
		      break;

		    case 'c':
# if defined HAVE_WINT_T && !WIDE_CHAR_VERSION
		      if (type == TYPE_WIDE_CHAR)
			tmp_length = MB_CUR_MAX;
		      else
# endif
			tmp_length = 1;
		      break;

		    case 's':
# ifdef HAVE_WCHAR_T
		      if (type == TYPE_WIDE_STRING)
			{
			  tmp_length =
			    local_wcslen (a.arg[dp->arg_index].a.a_wide_string);

#  if !WIDE_CHAR_VERSION
			  tmp_length = xtimes (tmp_length, MB_CUR_MAX);
#  endif
			}
		      else
# endif
			tmp_length = strlen (a.arg[dp->arg_index].a.a_string);
		      break;

		    case 'p':
		      tmp_length =
			(unsigned int) (sizeof (void *) * CHAR_BIT
					* 0.25 /* binary -> hexadecimal */
				       )
			  + 1 /* turn floor into ceil */
			  + 2; /* account for leading 0x */
		      break;

		    default:
		      abort ();
		    }

		  if (tmp_length < width)
		    tmp_length = width;

		  tmp_length = xsum (tmp_length, 1); /* account for trailing NUL */
		}

		if (tmp_length <= sizeof (tmpbuf) / sizeof (CHAR_T))
		  tmp = tmpbuf;
		else
		  {
		    size_t tmp_memsize = xtimes (tmp_length, sizeof (CHAR_T));

		    if (size_overflow_p (tmp_memsize))
		      /* Overflow, would lead to out of memory.  */
		      goto out_of_memory;
		    tmp = (CHAR_T *) malloc (tmp_memsize);
		    if (tmp == NULL)
		      /* Out of memory.  */
		      goto out_of_memory;
		  }
#endif

		/* Construct the format string for calling snprintf or
		   sprintf.  */
		p = buf;
		*p++ = '%';
		if (dp->flags & FLAG_GROUP)
		  *p++ = '\'';
		if (dp->flags & FLAG_LEFT)
		  *p++ = '-';
		if (dp->flags & FLAG_SHOWSIGN)
		  *p++ = '+';
		if (dp->flags & FLAG_SPACE)
		  *p++ = ' ';
		if (dp->flags & FLAG_ALT)
		  *p++ = '#';
		if (dp->flags & FLAG_ZERO)
		  *p++ = '0';
		if (dp->width_start != dp->width_end)
		  {
		    size_t n = dp->width_end - dp->width_start;
		    memcpy (p, dp->width_start, n * sizeof (CHAR_T));
		    p += n;
		  }
		if (dp->precision_start != dp->precision_end)
		  {
		    size_t n = dp->precision_end - dp->precision_start;
		    memcpy (p, dp->precision_start, n * sizeof (CHAR_T));
		    p += n;
		  }

		switch (type)
		  {
#ifdef HAVE_LONG_LONG
		  case TYPE_LONGLONGINT:
		  case TYPE_ULONGLONGINT:
		    *p++ = 'l';
		    /*FALLTHROUGH*/
#endif
		  case TYPE_LONGINT:
		  case TYPE_ULONGINT:
#ifdef HAVE_WINT_T
		  case TYPE_WIDE_CHAR:
#endif
#ifdef HAVE_WCHAR_T
		  case TYPE_WIDE_STRING:
#endif
		    *p++ = 'l';
		    break;
#ifdef HAVE_LONG_DOUBLE
		  case TYPE_LONGDOUBLE:
		    *p++ = 'L';
		    break;
#endif
		  default:
		    break;
		  }
		*p = dp->conversion;
#if USE_SNPRINTF
		p[1] = '%';
		p[2] = 'n';
		p[3] = '\0';
#else
		p[1] = '\0';
#endif

		/* Construct the arguments for calling snprintf or sprintf.  */
		prefix_count = 0;
		if (dp->width_arg_index != ARG_NONE)
		  {
		    if (!(a.arg[dp->width_arg_index].type == TYPE_INT))
		      abort ();
		    prefixes[prefix_count++] = a.arg[dp->width_arg_index].a.a_int;
		  }
		if (dp->precision_arg_index != ARG_NONE)
		  {
		    if (!(a.arg[dp->precision_arg_index].type == TYPE_INT))
		      abort ();
		    prefixes[prefix_count++] = a.arg[dp->precision_arg_index].a.a_int;
		  }

#if USE_SNPRINTF
		/* Prepare checking whether snprintf returns the count
		   via %n.  */
		ENSURE_ALLOCATION (xsum (length, 1));
		result[length] = '\0';
#endif

		for (;;)
		  {
		    size_t maxlen;
		    int count;
		    int retcount;

		    maxlen = allocated - length;
		    count = -1;
		    retcount = 0;

#if USE_SNPRINTF
# define SNPRINTF_BUF(arg) \
		    switch (prefix_count)				    \
		      {							    \
		      case 0:						    \
			retcount = SNPRINTF (result + length, maxlen, buf,  \
					     arg, &count);		    \
			break;						    \
		      case 1:						    \
			retcount = SNPRINTF (result + length, maxlen, buf,  \
					     prefixes[0], arg, &count);	    \
			break;						    \
		      case 2:						    \
			retcount = SNPRINTF (result + length, maxlen, buf,  \
					     prefixes[0], prefixes[1], arg, \
					     &count);			    \
			break;						    \
		      default:						    \
			abort ();					    \
		      }
#else
# define SNPRINTF_BUF(arg) \
		    switch (prefix_count)				    \
		      {							    \
		      case 0:						    \
			count = sprintf (tmp, buf, arg);		    \
			break;						    \
		      case 1:						    \
			count = sprintf (tmp, buf, prefixes[0], arg);	    \
			break;						    \
		      case 2:						    \
			count = sprintf (tmp, buf, prefixes[0], prefixes[1],\
					 arg);				    \
			break;						    \
		      default:						    \
			abort ();					    \
		      }
#endif

		    switch (type)
		      {
		      case TYPE_SCHAR:
			{
			  int arg = a.arg[dp->arg_index].a.a_schar;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_UCHAR:
			{
			  unsigned int arg = a.arg[dp->arg_index].a.a_uchar;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_SHORT:
			{
			  int arg = a.arg[dp->arg_index].a.a_short;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_USHORT:
			{
			  unsigned int arg = a.arg[dp->arg_index].a.a_ushort;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_INT:
			{
			  int arg = a.arg[dp->arg_index].a.a_int;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_UINT:
			{
			  unsigned int arg = a.arg[dp->arg_index].a.a_uint;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_LONGINT:
			{
			  long int arg = a.arg[dp->arg_index].a.a_longint;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_ULONGINT:
			{
			  unsigned long int arg = a.arg[dp->arg_index].a.a_ulongint;
			  SNPRINTF_BUF (arg);
			}
			break;
#ifdef HAVE_LONG_LONG
		      case TYPE_LONGLONGINT:
			{
			  long long int arg = a.arg[dp->arg_index].a.a_longlongint;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_ULONGLONGINT:
			{
			  unsigned long long int arg = a.arg[dp->arg_index].a.a_ulonglongint;
			  SNPRINTF_BUF (arg);
			}
			break;
#endif
		      case TYPE_DOUBLE:
			{
			  double arg = a.arg[dp->arg_index].a.a_double;
			  SNPRINTF_BUF (arg);
			}
			break;
#ifdef HAVE_LONG_DOUBLE
		      case TYPE_LONGDOUBLE:
			{
			  long double arg = a.arg[dp->arg_index].a.a_longdouble;
			  SNPRINTF_BUF (arg);
			}
			break;
#endif
		      case TYPE_CHAR:
			{
			  int arg = a.arg[dp->arg_index].a.a_char;
			  SNPRINTF_BUF (arg);
			}
			break;
#ifdef HAVE_WINT_T
		      case TYPE_WIDE_CHAR:
			{
			  wint_t arg = a.arg[dp->arg_index].a.a_wide_char;
			  SNPRINTF_BUF (arg);
			}
			break;
#endif
		      case TYPE_STRING:
			{
			  const char *arg = a.arg[dp->arg_index].a.a_string;
			  SNPRINTF_BUF (arg);
			}
			break;
#ifdef HAVE_WCHAR_T
		      case TYPE_WIDE_STRING:
			{
			  const wchar_t *arg = a.arg[dp->arg_index].a.a_wide_string;
			  SNPRINTF_BUF (arg);
			}
			break;
#endif
		      case TYPE_POINTER:
			{
			  void *arg = a.arg[dp->arg_index].a.a_pointer;
			  SNPRINTF_BUF (arg);
			}
			break;
		      default:
			abort ();
		      }

#if USE_SNPRINTF
		    /* Portability: Not all implementations of snprintf()
		       are ISO C 99 compliant.  Determine the number of
		       bytes that snprintf() has produced or would have
		       produced.  */
		    if (count >= 0)
		      {
			/* Verify that snprintf() has NUL-terminated its
			   result.  */
			if (count < maxlen && result[length + count] != '\0')
			  abort ();
			/* Portability hack.  */
			if (retcount > count)
			  count = retcount;
		      }
		    else
		      {
			/* snprintf() doesn't understand the '%n'
			   directive.  */
			if (p[1] != '\0')
			  {
			    /* Don't use the '%n' directive; instead, look
			       at the snprintf() return value.  */
			    p[1] = '\0';
			    continue;
			  }
			else
			  {
			    /* Look at the snprintf() return value.  */
			    if (retcount < 0)
			      {
				/* HP-UX 10.20 snprintf() is doubly deficient:
				   It doesn't understand the '%n' directive,
				   *and* it returns -1 (rather than the length
				   that would have been required) when the
				   buffer is too small.  */
				size_t bigger_need =
				  xsum (xtimes (allocated, 2), 12);
				ENSURE_ALLOCATION (bigger_need);
				continue;
			      }
			    else
			      count = retcount;
			  }
		      }
#endif

		    /* Attempt to handle failure.  */
		    if (count < 0)
		      {
			if (!(result == resultbuf || result == NULL))
			  free (result);
			if (buf_malloced != NULL)
			  free (buf_malloced);
			CLEANUP ();
			errno = EINVAL;
			return NULL;
		      }

#if !USE_SNPRINTF
		    if (count >= tmp_length)
		      /* tmp_length was incorrectly calculated - fix the
			 code above!  */
		      abort ();
#endif

		    /* Make room for the result.  */
		    if (count >= maxlen)
		      {
			/* Need at least count bytes.  But allocate
			   proportionally, to avoid looping eternally if
			   snprintf() reports a too small count.  */
			size_t n =
			  xmax (xsum (length, count), xtimes (allocated, 2));

			ENSURE_ALLOCATION (n);
#if USE_SNPRINTF
			continue;
#endif
		      }

#if USE_SNPRINTF
		    /* The snprintf() result did fit.  */
#else
		    /* Append the sprintf() result.  */
		    memcpy (result + length, tmp, count * sizeof (CHAR_T));
		    if (tmp != tmpbuf)
		      free (tmp);
#endif

		    length += count;
		    break;
		  }
	      }
	  }
      }

    /* Add the final NUL.  */
    ENSURE_ALLOCATION (xsum (length, 1));
    result[length] = '\0';

    if (result != resultbuf && length + 1 < allocated)
      {
	/* Shrink the allocated memory if possible.  */
	CHAR_T *memory;

	memory = (CHAR_T *) realloc (result, (length + 1) * sizeof (CHAR_T));
	if (memory != NULL)
	  result = memory;
      }

    if (buf_malloced != NULL)
      free (buf_malloced);
    CLEANUP ();
    *lengthp = length;
    return result;

  out_of_memory:
    if (!(result == resultbuf || result == NULL))
      free (result);
    if (buf_malloced != NULL)
      free (buf_malloced);
  out_of_memory_1:
    CLEANUP ();
    errno = ENOMEM;
    return NULL;
  }
}
Пример #15
0
//------------------------------------------------------------------------------------------------------------------------------------
// called when we want to draw the 3D data in our app.
//------------------------------------------------------------------------------------------------------------------------------------
void draw3D()
{
	// draw the grid on the floor
	setColour(0.25f, 0.25f, 0.25f);
	for(float i = -10.0f; i <= 10.1f; i += 1.0f)
	{
		Vec3 zmin(i, 0, -10);
		Vec3 zmax(i, 0,  10);
		Vec3 xmin(-10, 0, i);
		Vec3 xmax(10, 0, i);
		drawLine(xmin, xmax);
		drawLine(zmin, zmax);
	}

	// If using the GPU to compute the lighting (which is what you want to do!)
	if(!g_manualLighting)
	{
		// turn on lighting
		enableLighting();

		// set the diffuse material colour (this will be modulated by the effect of the lighting)
		setColour(1.0f, 1.0f, 1.0f);

		// draw the cube geometry
		drawPrimitives(g_pointsVN, 24, kQuads);

		// turn off lighting
		disableLighting();
	}
	else
	{
		// otherwise, compute the lighting manually. Don't ever do this in practice! It's insane! (But it may be useful for educational purposes)

		// The direction from the vertex to the light (effectively sunlight in this case!).
		Vec3 L(-0.6f, 1.0f, -0.2f);

		// make sure L has been normalised!
		L = normalize(L);

		// start drawing some quads
		begin(kQuads);

		// loop through each vertex normal
		for(int i = 0; i < 24; ++i)
		{
			// compute N.L
			// Make sure we clamp this to zero (so that we ignore any negative values).
			float N_dot_L = std::max(dot(L, g_pointsVN[i].n), 0.0f);

			// the ambient material colour (always gets added to the final colour)
			Vec3 Ka(0.2f, 0.2f, 0.2f);

			// the diffuse material colour
			Vec3 Kd(1.0f, 1.0f, 1.0f);

			// Compute the final colour
			Vec3 colour = Ka + (Kd * N_dot_L);

			// set the vertex colour 
			setColour(colour);

			// specify the vertex
			addVertex(g_pointsVN[i].v);
		}

		// finish drawing our quads
		end();
	}

	// if we are displaying normals
	if(g_displayNormals)
	{
		// make colour pink
		setColour(1.0f, 0.0f, 1.0f);

		// loop through each vertex
		for(int i = 0; i < 24; ++i)
		{
			// compute an offset (along the normal direction) from the vertex
			Vec3 pn = g_pointsVN[i].v + (g_pointsVN[i].n * 0.2f);

			// draw a line to show the normal
			drawLine(g_pointsVN[i].v, pn);
		}
	}
}
slint_t mpi_partition_radix2(elements_t *s, partcond2_t *pc, slint_t rhigh, slint_t rlow, slint_t rwidth, int *scounts, int *sdispls, int size, int rank, MPI_Comm comm) /* sl_proto, sl_func mpi_partition_radix2 */
{
  slkey_pure_t max_nclasses;
  slkey_pure_t nclasses, bit_mask;
  slkey_pure_t k;

  const slint_t max_nareas = size - 1;
  slint_t nareas, nareas_new;
  elements_t areas0[max_nareas], areas1[max_nareas], *areas, *areas_new;

  double *locals, *globals;
  double *local_counts, *local_weights, *global_counts, *global_weights;

  const slint_t max_nparts = size - 1;
  slint_t parts_low, parts_high, nparts_removed;
  slint_t parts[max_nparts], part_areas[max_nparts];

  double parts_range_[2 * 2 * (1 + max_nparts + 1)];
  double *parts_range = parts_range_ + (2 * 2);
  double parts_minmax_[2 * 4 * (1 + max_nparts + 1)];
  double *parts_minmax = parts_minmax_ + (2 * 4);
  slint_t parts_update_[1 + max_nparts + 1];
  slint_t *parts_update = parts_update_ + 1;

  double parts_minmax_new[2 * 4];
  double current_minmax[2 * 2];
  
  double final_locals[2 * max_nparts];

  slint_t i, j, jp1, jm1, l, lp1, lm1;
  slint_t current_width;

  double minmax[2 * 4 * size];
  
  slint_t last_new_area, last_new_class;

#ifdef HAVENT_MPI_IN_PLACE
  double local_minmax[2 * 4];
#endif

  slint_t lc, lcs, gc, gcs;
  double lw, gw, lws, gws;
  double d, m;

  elements_t xi, end;

  slint_t round = 0;
  slint_t direction = 1;

  slint_t refine, finalize;

#ifdef RCOUNTS_RDISPLS
  int *rcounts, *rdispls;
#endif

#ifdef WEIGHT_STATS
  slint_t total_count = 0, partial_counts[size + 1];
  double total_weight = 0.0, partial_weights[size + 1];
  double vmin, vmax;
# ifdef HAVENT_MPI_IN_PLACE
  slint_t partial_counts2[size + 1];
  double partial_weights2[size + 1];
# endif
#endif

  rti_treset(rti_tid_mpi_partition_radix2_while);                   /* sl_tid */
  rti_treset(rti_tid_mpi_partition_radix2_while_count);             /* sl_tid */
  rti_treset(rti_tid_mpi_partition_radix2_while_allreduce);         /* sl_tid */
  rti_treset(rti_tid_mpi_partition_radix2_while_round1);            /* sl_tid */
  rti_treset(rti_tid_mpi_partition_radix2_while_round1_allgather);  /* sl_tid */
  rti_treset(rti_tid_mpi_partition_radix2_while_exscan);            /* sl_tid */
  rti_treset(rti_tid_mpi_partition_radix2_while_check);             /* sl_tid */
  rti_treset(rti_tid_mpi_partition_radix2_while_check_pre);         /* sl_tid */
  rti_treset(rti_tid_mpi_partition_radix2_while_check_classes);     /* sl_tid */
  rti_treset(rti_tid_mpi_partition_radix2_while_check_final);       /* sl_tid */
  rti_treset(rti_tid_mpi_partition_radix2_while_check_post);        /* sl_tid */

  rti_tstart(rti_tid_mpi_partition_radix2_sync);
#ifdef SYNC_ON_INIT
  MPI_Barrier(comm);
#endif
  rti_tstop(rti_tid_mpi_partition_radix2_sync);

  rti_tstart(rti_tid_mpi_partition_radix2);

  if (rhigh < 0) rhigh = radix_high;
  if (rlow < 0) rlow = radix_low;
  if (rwidth < 0) rwidth = sort_radix_width_default;
  
  max_nclasses = powof2_typed(rwidth, slkey_pure_t);

  locals = sl_alloc(2 * (max_nareas * max_nclasses + max_nareas), sizeof(double));
  globals = sl_alloc(2 * (max_nareas * max_nclasses + max_nareas), sizeof(double));

  areas = areas0;
  areas_new = areas1;

  /* init the first area (all elements) */
  nareas = 1;
  elem_assign(s, &areas[0]);

  /* init all parts */
  parts_low = 0;
  parts_high = max_nparts - 1;
  for (i = parts_low; i <= parts_high; ++i)
  {
    parts[i] = i;
    part_areas[i] = 0;
  }

  /* init sdispls */
  for (i = 0; i < size; ++i) sdispls[i] = 0;

  rti_tstart(rti_tid_mpi_partition_radix2_while);

  while (parts_low <= parts_high)
  {
    ++round;

    /* setup bitmask */
    current_width = xmin(rwidth, rhigh - rlow + 1);
    rhigh -= (current_width > 0)?current_width - 1:rhigh;

    nclasses = (current_width > 0)?powof2_typed(current_width, slkey_pure_t):1;
    bit_mask = nclasses - 1;
    
    SL_TRACE_IF(DEBUG_OR_NOT, "ROUND: %" sl_int_type_fmt ", rhigh: %" sl_int_type_fmt ", current_width: %" sl_int_type_fmt ", nclasses: %" sl_key_pure_type_fmt, round, rhigh, current_width, nclasses);

    finalize = (current_width <= 0);

    if (!finalize || round == 1)
    {
      /* init counters */
      local_counts = locals;
      global_counts = globals;
      local_weights = locals + (nareas * nclasses) + nareas;
      global_weights = globals + (nareas * nclasses) + nareas;

      /* zero all counter */
      for (i = 0; i < nareas; ++i)
      for (k = 0; k < nclasses; ++k) local_counts[i * nclasses + k] = local_weights[i * nclasses + k] = 0.0;

      rti_tstart(rti_tid_mpi_partition_radix2_while_count);

      /* for every area */
      for (i = 0; i < nareas; ++i)
      {
        elem_assign_at(&areas[i], areas[i].size, &end);

        if (nclasses > 1)
        {
          /* counts and weights in every class */
          for (elem_assign(&areas[i], &xi); xi.keys < end.keys; elem_inc(&xi))
          {
            k = radix_key2class(key_purify(*xi.keys), rhigh, bit_mask);
            local_counts[i * nclasses + k] += 1;
            local_weights[i * nclasses + k] += elem_weight_one(&xi, 0);
          }

        } else
        {
          /* total counts and weights */
          local_counts[i * nclasses + 0] = areas[i].size;

          for (elem_assign(&areas[i], &xi); xi.keys < end.keys; elem_inc(&xi)) local_weights[i * nclasses + 0] += elem_weight_one(&xi, 0);
        }

        /* total counts and weights in this area */
        local_counts[nareas * nclasses + i] = areas[i].size;

        local_weights[nareas * nclasses + i] = 0.0;
        for (k = 0; k < nclasses; ++k) local_weights[nareas * nclasses + i] += local_weights[i * nclasses + k];
      }

      rti_tstop(rti_tid_mpi_partition_radix2_while_count);

      --rhigh;

      rti_tstart(rti_tid_mpi_partition_radix2_while_allreduce);

      /* create global counts and weights */
#ifdef MPI_PARTITION_RADIX_REDUCEBCAST_THRESHOLD
      if (size >= MPI_PARTITION_RADIX_REDUCEBCAST_THRESHOLD)
      {
        MPI_Reduce(locals, globals, (1 + 1) * (nareas * nclasses + nareas), MPI_DOUBLE, MPI_SUM, REDUCEBCAST_ROOT, comm);
        MPI_Bcast(globals, (1 + 1) * (nareas * nclasses + nareas), MPI_DOUBLE, REDUCEBCAST_ROOT, comm);

      } else
#endif
        MPI_Allreduce(locals, globals, (1 + 1) * (nareas * nclasses + nareas), MPI_DOUBLE, MPI_SUM, comm);

      rti_tstop(rti_tid_mpi_partition_radix2_while_allreduce);
    }

#ifdef TIMING
    SL_TRACE_IF(DEBUG_OR_NOT, "allreduce: %f, nareas: %" sl_int_type_fmt ", nclasses: %" sl_key_type_fmt ", doubles: %" sl_int_type_fmt, rti_tlast(rti_tid_mpi_partition_radix2_while_allreduce), nareas, nclasses, (1 + 1) * (nareas * nclasses + nareas));
#endif

/*    if (DEBUG_OR_NOT)
    {
      printf("%d: locals\n", rank);
      for (i = 0; i < nareas; ++i)
      {
        printf("%d: %" sl_int_type_fmt ":", rank, i);
        for (k = 0; k < nclasses; ++k) printf("  %f", local_counts[i * nclasses + k]);
        printf(" = %f\n", local_counts[nareas * nclasses + i]);
        printf("%d: %" sl_int_type_fmt ":", rank, i);
        for (k = 0; k < nclasses; ++k) printf("  %f", local_weights[i * nclasses + k]);
        printf(" = %f\n", local_weights[nareas * nclasses + i]);
      }
      printf("%d: globals\n", rank);
      for (i = 0; i < nareas; ++i)
      {
        printf("%d: %" sl_int_type_fmt ":", rank, i);
        for (k = 0; k < nclasses; ++k) printf("  %f", global_counts[i * nclasses + k]);
        printf(" = %f\n", global_counts[nareas * nclasses + i]);
        printf("%d: %" sl_int_type_fmt ":", rank, i);
        for (k = 0; k < nclasses; ++k) printf("  %f", global_weights[i * nclasses + k]);
        printf(" = %f\n", global_weights[nareas * nclasses + i]);
      }
    }*/

    /* do some initializations */
    if (round == 1)
    {
      rti_tstart(rti_tid_mpi_partition_radix2_while_round1);
    
      /* distribute min/max counts and weights */
      minmax[rank * 2 * 4 + 0 + 0] = (pc->min_count >= 0)?pc->min_count:(-pc->min_count * global_counts[nareas * nclasses + 0] / size);
      minmax[rank * 2 * 4 + 0 + 1] = (pc->max_count >= 0)?pc->max_count:(-pc->max_count * global_counts[nareas * nclasses + 0] / size);
      minmax[rank * 2 * 4 + 0 + 2] = (pc->min_cpart >= 0)?pc->min_cpart:(-pc->min_cpart * global_counts[nareas * nclasses + 0]);
      minmax[rank * 2 * 4 + 0 + 3] = (pc->max_cpart >= 0)?pc->max_cpart:(-pc->max_cpart * global_counts[nareas * nclasses + 0]);

      minmax[rank * 2 * 4 + 4 + 0] = (pc->min_weight >= 0)?pc->min_weight:(-pc->min_weight * global_weights[nareas * nclasses + 0] / size);
      minmax[rank * 2 * 4 + 4 + 1] = (pc->max_weight >= 0)?pc->max_weight:(-pc->max_weight * global_weights[nareas * nclasses + 0] / size);
      minmax[rank * 2 * 4 + 4 + 2] = (pc->min_wpart >= 0)?pc->min_wpart:(-pc->min_wpart * global_weights[nareas * nclasses + 0]);
      minmax[rank * 2 * 4 + 4 + 3] = (pc->max_wpart >= 0)?pc->max_wpart:(-pc->max_wpart * global_weights[nareas * nclasses + 0]);

      rti_tstart(rti_tid_mpi_partition_radix2_while_round1_allgather);
#ifdef HAVENT_MPI_IN_PLACE
      local_minmax[0 + 0] = minmax[rank * 2 * 4 + 0 + 0];
      local_minmax[0 + 1] = minmax[rank * 2 * 4 + 0 + 1];
      local_minmax[0 + 2] = minmax[rank * 2 * 4 + 0 + 2];
      local_minmax[0 + 3] = minmax[rank * 2 * 4 + 0 + 3];
      local_minmax[4 + 0] = minmax[rank * 2 * 4 + 4 + 0];
      local_minmax[4 + 1] = minmax[rank * 2 * 4 + 4 + 1];
      local_minmax[4 + 2] = minmax[rank * 2 * 4 + 4 + 2];
      local_minmax[4 + 3] = minmax[rank * 2 * 4 + 4 + 3];
      MPI_Allgather(local_minmax, 2 * 4, MPI_DOUBLE, minmax, 2 * 4, MPI_DOUBLE, comm);
/*      MPI_Gather(local_minmax_weights, 2 * 4, MPI_DOUBLE, minmax_weights, 2 * 4, MPI_DOUBLE, 0, comm);
      MPI_Bcast(minmax_weights, 2 * 4 * size, MPI_DOUBLE, 0, comm);*/
#else
      MPI_Allgather(MPI_IN_PLACE, 2 * 4, MPI_DOUBLE, minmax_weights, 2 * 4, MPI_DOUBLE, comm);
#endif
      rti_tstop(rti_tid_mpi_partition_radix2_while_round1_allgather);

#ifdef WEIGHT_STATS
      total_count = global_counts[nareas * nclasses + 0];
      total_weight = global_weights[nareas * nclasses + 0];
#endif

      parts_minmax[2 * 4 * (parts_low - 1) + 0 + 0] = parts_minmax[2 * 4 * (parts_low - 1) + 0 + 2] = 0;
      parts_minmax[2 * 4 * (parts_low - 1) + 0 + 1] = parts_minmax[2 * 4 * (parts_low - 1) + 0 + 3] = 0;
      parts_minmax[2 * 4 * (parts_low - 1) + 4 + 0] = parts_minmax[2 * 4 * (parts_low - 1) + 4 + 2] = 0;
      parts_minmax[2 * 4 * (parts_low - 1) + 4 + 1] = parts_minmax[2 * 4 * (parts_low - 1) + 4 + 3] = 0;

      parts_minmax[2 * 4 * (parts_high + 1) + 0 + 0] = parts_minmax[2 * 4 * (parts_high + 1) + 0 + 2] = 0;
      parts_minmax[2 * 4 * (parts_high + 1) + 0 + 1] = parts_minmax[2 * 4 * (parts_high + 1) + 0 + 3] = global_counts[nareas * nclasses + 0];
      parts_minmax[2 * 4 * (parts_high + 1) + 4 + 0] = parts_minmax[2 * 4 * (parts_high + 1) + 4 + 2] = 0;
      parts_minmax[2 * 4 * (parts_high + 1) + 4 + 1] = parts_minmax[2 * 4 * (parts_high + 1) + 4 + 3] = global_weights[nareas * nclasses + 0];

      parts_range[2 * 2 * (parts_low - 1) + 0 + 0] = parts_range[2 * 2 * (parts_high + 1) + 0 + 0] = 0.0;
      parts_range[2 * 2 * (parts_low - 1) + 0 + 1] = parts_range[2 * 2 * (parts_high + 1) + 0 + 1] = global_counts[nareas * nclasses + 0];
      parts_range[2 * 2 * (parts_low - 1) + 2 + 0] = parts_range[2 * 2 * (parts_high + 1) + 2 + 0] = 0.0;
      parts_range[2 * 2 * (parts_low - 1) + 2 + 1] = parts_range[2 * 2 * (parts_high + 1) + 2 + 1] = global_weights[nareas * nclasses + 0];

      for (i = parts_high; i >= parts_low; --i)
      {
        parts_minmax[2 * 4 * parts[i] + 0 + 1] = parts_minmax[2 * 4 * (parts[i] + 1) + 0 + 1] - minmax[2 * 4 * (parts[i] + 1) + 0 + 0];
        parts_minmax[2 * 4 * parts[i] + 0 + 3] = parts_minmax[2 * 4 * (parts[i] + 1) + 0 + 3] - minmax[2 * 4 * (parts[i] + 1) + 0 + 1];
        parts_minmax[2 * 4 * parts[i] + 4 + 1] = parts_minmax[2 * 4 * (parts[i] + 1) + 4 + 1] - minmax[2 * 4 * (parts[i] + 1) + 4 + 0];
        parts_minmax[2 * 4 * parts[i] + 4 + 3] = parts_minmax[2 * 4 * (parts[i] + 1) + 4 + 3] - minmax[2 * 4 * (parts[i] + 1) + 4 + 1];
        
        parts_minmax[2 * 4 * parts[i] + 0 + 0] = parts_minmax[2 * 4 * parts[i] + 0 + 2] = parts_minmax[2 * 4 * parts[i] + 4 + 0] = parts_minmax[2 * 4 * parts[i] + 4 + 2] = -1;

        parts_range[2 * 2 * parts[i] + 0 + 0] = 0.0;
        parts_range[2 * 2 * parts[i] + 0 + 1] = global_counts[nareas * nclasses + 0];
        parts_range[2 * 2 * parts[i] + 2 + 0] = 0.0;
        parts_range[2 * 2 * parts[i] + 2 + 1] = global_weights[nareas * nclasses + 0];
/*        SL_ASSERT(minmax[2 * 4 * (parts[i] + 1) + 0 + 2] <= minmax[2 * 4 * (parts[i] + 0) + 0 + 3]);*/
/*        SL_ASSERT(minmax[2 * 4 * (parts[i] + 1) + 4 + 2] <= minmax[2 * 4 * (parts[i] + 0) + 4 + 3]);*/

        parts_update[parts[i]] = 1;

        if (finalize)
        {
          final_locals[2 * i + 0] = local_counts[nareas * nclasses + 0];
          final_locals[2 * i + 1] = local_weights[nareas * nclasses + 0];
        }
      }

      rti_tstop(rti_tid_mpi_partition_radix2_while_round1);
    }

    if (finalize)
    {
      j = parts_high - parts_low + 1;
    
      SL_TRACE_IF(DEBUG_OR_NOT, "Exscan: finalizing %" sl_int_type_fmt " parts", j);

      rti_tstart(rti_tid_mpi_partition_radix2_while_exscan);

      MPI_Exscan(&final_locals[2 * parts_low], &locals[2 * parts_low], 2 * j, MPI_DOUBLE, MPI_SUM, comm);
      if (rank == 0) for (i = parts_low; i <= parts_high; ++i) locals[2 * i + 0] = locals[2 * i + 1] = 0;

      rti_tstop(rti_tid_mpi_partition_radix2_while_exscan);
    }

    nareas_new = 0;
    last_new_area = last_new_class = -1;

    /* check all remaining parts */

    SL_TRACE_IF(DEBUG_OR_NOT, "ROUND: %" sl_int_type_fmt ", %s", round, (direction > 0)?"forward":"backward");

    nparts_removed = 0;

    rti_tstart(rti_tid_mpi_partition_radix2_while_check);

    i = (direction > 0)?parts_low:parts_high;
    while ((direction > 0)?(i <= parts_high):(i >= parts_low))
    {
      rti_tstart(rti_tid_mpi_partition_radix2_while_check_pre);
    
      SL_TRACE_IF(DEBUG_OR_NOT, "%" sl_int_type_fmt ": PART: %" sl_int_type_fmt ",%" sl_int_type_fmt, round, i, parts[i]);

      j = 2 * 4 * parts[i];
      jp1 = 2 * 4 * (parts[i] + 1);
      jm1 = 2 * 4 * (parts[i] - 1);
      l = 2 * 2 * parts[i];
      lp1 = 2 * 2 * (parts[i] + 1);
      lm1 = 2 * 2 * (parts[i] - 1);

      if (parts_update[parts[i]])
      {
        if (direction > 0)
        {
          parts_minmax_new[0 + 0] = parts_minmax[jm1 + 0 + 0] + minmax[j + 0 + 0];
          parts_minmax_new[0 + 2] = parts_minmax[jm1 + 0 + 2] + minmax[j + 0 + 1];
          parts_minmax_new[4 + 0] = parts_minmax[jm1 + 4 + 0] + minmax[j + 4 + 0];
          parts_minmax_new[4 + 2] = parts_minmax[jm1 + 4 + 2] + minmax[j + 4 + 1];

          SL_TRACE_IF(DEBUG_OR_NOT, "%" sl_int_type_fmt ",%" sl_int_type_fmt ": %f + %f, %f + %f  /  %f + %f, %f + %f", i, parts[i],
            parts_minmax[jm1 + 0 + 0], minmax[j + 0 + 0],
            parts_minmax[jm1 + 0 + 2], minmax[j + 0 + 1],
            parts_minmax[jm1 + 4 + 0], minmax[j + 4 + 0],
            parts_minmax[jm1 + 4 + 2], minmax[j + 4 + 1]);

          SL_TRACE_IF(DEBUG_OR_NOT, "%" sl_int_type_fmt ": 0. parts_minmax_new: %f  %f  %f  %f  /  %f  %f  %f  %f", parts[i], parts_minmax_new[0 + 0], parts_minmax_new[0 + 1], parts_minmax_new[0 + 2], parts_minmax_new[0 + 3], parts_minmax_new[4 + 0], parts_minmax_new[4 + 1], parts_minmax_new[4 + 2], parts_minmax_new[4 + 3]);

          if (parts_minmax_new[0 + 0] < minmax[jp1 + 0 + 2]) parts_minmax_new[0 + 0] = minmax[jp1 + 0 + 2];
          if (parts_minmax_new[0 + 2] > minmax[j   + 0 + 3]) parts_minmax_new[0 + 2] = minmax[j   + 0 + 3];
          if (parts_minmax_new[4 + 0] < minmax[jp1 + 4 + 2]) parts_minmax_new[4 + 0] = minmax[jp1 + 4 + 2];
          if (parts_minmax_new[4 + 2] > minmax[j   + 4 + 3]) parts_minmax_new[4 + 2] = minmax[j   + 4 + 3];

          parts_minmax_new[0 + 1] = parts_minmax[j + 0 + 1];
          parts_minmax_new[0 + 3] = parts_minmax[j + 0 + 3];
          parts_minmax_new[4 + 1] = parts_minmax[j + 4 + 1];
          parts_minmax_new[4 + 3] = parts_minmax[j + 4 + 3];

        } else
        {
          parts_minmax_new[0 + 1] = parts_minmax[jp1 + 0 + 1] - minmax[jp1 + 0 + 0];
          parts_minmax_new[0 + 3] = parts_minmax[jp1 + 0 + 3] - minmax[jp1 + 0 + 1];
          parts_minmax_new[4 + 1] = parts_minmax[jp1 + 4 + 1] - minmax[jp1 + 4 + 0];
          parts_minmax_new[4 + 3] = parts_minmax[jp1 + 4 + 3] - minmax[jp1 + 4 + 1];

          SL_TRACE_IF(DEBUG_OR_NOT, "%" sl_int_type_fmt ",%" sl_int_type_fmt ": %f - %f, %f - %f  /  %f - %f, %f - %f", i, parts[i],
            parts_minmax[jp1 + 0 + 1], minmax[jp1 + 0 + 0],
            parts_minmax[jp1 + 0 + 3], minmax[jp1 + 0 + 1],
            parts_minmax[jp1 + 4 + 1], minmax[jp1 + 4 + 0],
            parts_minmax[jp1 + 4 + 3], minmax[jp1 + 4 + 1]);

          SL_TRACE_IF(DEBUG_OR_NOT, "%" sl_int_type_fmt ": 0. parts_minmax_new: %f  %f  %f  %f  /  %f  %f  %f  %f", parts[i], parts_minmax_new[0 + 0], parts_minmax_new[0 + 1], parts_minmax_new[0 + 2], parts_minmax_new[0 + 3], parts_minmax_new[4 + 0], parts_minmax_new[4 + 1], parts_minmax_new[4 + 2], parts_minmax_new[4 + 3]);

          if (parts_minmax_new[0 + 3] < minmax[jp1 + 0 + 2]) parts_minmax_new[0 + 3] = minmax[jp1 + 0 + 2];
          if (parts_minmax_new[0 + 1] > minmax[j   + 0 + 3]) parts_minmax_new[0 + 1] = minmax[j   + 0 + 3];
          if (parts_minmax_new[4 + 3] < minmax[jp1 + 4 + 2]) parts_minmax_new[4 + 3] = minmax[jp1 + 4 + 2];
          if (parts_minmax_new[4 + 1] > minmax[j   + 4 + 3]) parts_minmax_new[4 + 1] = minmax[j   + 4 + 3];

          parts_minmax_new[0 + 0] = parts_minmax[j + 0 + 0];
          parts_minmax_new[0 + 2] = parts_minmax[j + 0 + 2];
          parts_minmax_new[4 + 0] = parts_minmax[j + 4 + 0];
          parts_minmax_new[4 + 2] = parts_minmax[j + 4 + 2];
        }

        SL_TRACE_IF(DEBUG_OR_NOT, "%" sl_int_type_fmt ": 1. parts_minmax_new: %f  %f  %f  %f  /  %f  %f  %f  %f", parts[i], parts_minmax_new[0 + 0], parts_minmax_new[0 + 1], parts_minmax_new[0 + 2], parts_minmax_new[0 + 3], parts_minmax_new[4 + 0], parts_minmax_new[4 + 1], parts_minmax_new[4 + 2], parts_minmax_new[4 + 3]);
        SL_TRACE_IF(DEBUG_OR_NOT, "%" sl_int_type_fmt ": minmax: %f  %f  /  %f  %f", parts[i], minmax[2 * 4 * (parts[i] + 1) + 0 + 2], minmax[2 * 4 * (parts[i] + 0) + 0 + 3], minmax[2 * 4 * (parts[i] + 1) + 4 + 2], minmax[2 * 4 * (parts[i] + 0) + 4 + 3]);

        if (parts_minmax_new[0 + 0] > parts_minmax_new[0 + 1]) parts_minmax_new[0 + 0] = parts_minmax_new[0 + 1] = (parts_minmax_new[0 + 0] + parts_minmax_new[0 + 1]) / 2;
        if (parts_minmax_new[0 + 2] < parts_minmax_new[0 + 3]) parts_minmax_new[0 + 2] = parts_minmax_new[0 + 3] = (parts_minmax_new[0 + 2] + parts_minmax_new[0 + 3]) / 2;

        if (parts_minmax_new[4 + 0] > parts_minmax_new[4 + 1]) parts_minmax_new[4 + 0] = parts_minmax_new[4 + 1] = (parts_minmax_new[4 + 0] + parts_minmax_new[4 + 1]) / 2;
        if (parts_minmax_new[4 + 2] < parts_minmax_new[4 + 3]) parts_minmax_new[4 + 2] = parts_minmax_new[4 + 3] = (parts_minmax_new[4 + 2] + parts_minmax_new[4 + 3]) / 2;

      } else
      {
        parts_minmax_new[0 + 0] = parts_minmax[j + 0 + 0];
        parts_minmax_new[0 + 1] = parts_minmax[j + 0 + 1];
        parts_minmax_new[0 + 2] = parts_minmax[j + 0 + 2];
        parts_minmax_new[0 + 3] = parts_minmax[j + 0 + 3];

        parts_minmax_new[4 + 0] = parts_minmax[j + 4 + 0];
        parts_minmax_new[4 + 1] = parts_minmax[j + 4 + 1];
        parts_minmax_new[4 + 2] = parts_minmax[j + 4 + 2];
        parts_minmax_new[4 + 3] = parts_minmax[j + 4 + 3];
      }

      SL_TRACE_IF(DEBUG_OR_NOT, "%" sl_int_type_fmt ",%" sl_int_type_fmt ": 2. parts_minmax_new: %f  %f  %f  %f  /  %f  %f  %f  %f", i, parts[i], parts_minmax_new[0 + 0], parts_minmax_new[0 + 1], parts_minmax_new[0 + 2], parts_minmax_new[0 + 3], parts_minmax_new[4 + 0], parts_minmax_new[4 + 1], parts_minmax_new[4 + 2], parts_minmax_new[4 + 3]);

      current_minmax[0 + 0] = xmax(parts_minmax_new[0 + 0], parts_minmax_new[0 + 3]) - parts_range[l + 0 + 0];
      current_minmax[0 + 1] = xmin(parts_minmax_new[0 + 2], parts_minmax_new[0 + 1]) - parts_range[l + 0 + 0];

      current_minmax[2 + 0] = xmax(parts_minmax_new[4 + 0], parts_minmax_new[4 + 3]) - parts_range[l + 2 + 0];
      current_minmax[2 + 1] = xmin(parts_minmax_new[4 + 2], parts_minmax_new[4 + 1]) - parts_range[l + 2 + 0];

      SL_ASSERT(current_minmax[0 + 0] <= current_minmax[0 + 1]);
      SL_ASSERT(current_minmax[2 + 0] <= current_minmax[2 + 1]);

      rti_tstop(rti_tid_mpi_partition_radix2_while_check_pre);

      SL_TRACE_IF(DEBUG_OR_NOT, "%" sl_int_type_fmt ": current_minmax: %f  %f / %f  %f", parts[i], current_minmax[0 + 0], current_minmax[0 + 1], current_minmax[2 + 0], current_minmax[2 + 1]);

      lcs = gcs = 0;
      lws = gws = 0;

      /* HIT is the default */
      refine = 0;

      if (!finalize)
      {
        rti_tstart(rti_tid_mpi_partition_radix2_while_check_classes);
      
        for (k = 0; k < nclasses; ++k)
        {
          lc = local_counts[part_areas[i] * nclasses + k];
          gc = global_counts[part_areas[i] * nclasses + k];
          lw = local_weights[part_areas[i] * nclasses + k];
          gw = global_weights[part_areas[i] * nclasses + k];

          current_minmax[0 + 0] -= gc;
          current_minmax[0 + 1] -= gc;

          current_minmax[2 + 0] -= gw;
          current_minmax[2 + 1] -= gw;

          SL_TRACE_IF(DEBUG_OR_NOT, "k = %" sl_key_pure_type_fmt ", current_minmax: %f  %f  / %f  %f", k, current_minmax[0], current_minmax[1], current_minmax[2], current_minmax[3]);

          /* stop and refine if max count is skipped OR min count AND max weight is skipped */
          if ((current_minmax[0 + 1] < 0) || (current_minmax[0 + 0] < 0 && current_minmax[2 + 1] < 0))
          {
            refine = 1;
            break;
          }

          lcs += lc;
          gcs += gc;
          lws += lw;
          gws += gw;

          gc = gw = 0.0;

          /* if between min/max counts */
          if (current_minmax[0 + 0] <= 0 && current_minmax[0 + 1] >= 0)
          {
            /* go to next if max count not reached AND min weight not reached */
            if (current_minmax[0 + 1] > 0 && current_minmax[2 + 0] > 0) continue;

            /* look ahead for a better stop */
            if (k + 1 < nclasses && current_minmax[0 + 1] - global_counts[part_areas[i] * nclasses + k + 1] >= 0)
            {
              /* continue if weights will improve */
              if (myabs(current_minmax[2 + 0] + current_minmax[2 + 1]) > myabs(current_minmax[2 + 0] + current_minmax[2 + 1] - 2 * global_weights[part_areas[i] * nclasses + k + 1])) continue;
            }

            /* stop */
            break;
          }
        }

        SL_ASSERT(k < nclasses);

        SL_TRACE_IF(DEBUG_OR_NOT, "%s k = %" sl_key_pure_type_fmt, (refine)?"REFINE":"HIT", k);
      
        rti_tstop(rti_tid_mpi_partition_radix2_while_check_classes);

      } else
      {
        rti_tstart(rti_tid_mpi_partition_radix2_while_check_final);

        /* middle of min/max weight */
        m = (current_minmax[2 + 0] + current_minmax[2 + 1]) / 2;

        /* min. part of weight to contribute */
        d = xmax(0, m - locals[i * 2 + 1]);

        /* contribute all? */
        if (d >= final_locals[i * 2 + 1])
        {
          lc = final_locals[i * 2 + 0];
          lw = final_locals[i * 2 + 1];

        } else
        {
          /* contribute only a part */
          lc = 0;
          lw = 0; /* not required */

          do
          {
            d -= elem_weight_one(s, sdispls[1 + parts[i]] + lc);
            ++lc;

          } while (d >= 0 && lc < final_locals[i * 2 + 0]);

          --lc;
        
          /* if unweighted, then m = middle of min/max count, d = ..., lc = d */
        }

        /* check mc against min/max count borders */
        lc = xminmax(current_minmax[0 + 0] - locals[i * 2 + 0], lc, current_minmax[0 + 1] - locals[i * 2 + 0]);

        /* check agains 0 (don't step back!) and the local contribution */
        lc = xminmax(0, lc, final_locals[i * 2 + 0]);

        /* the exact global counts/weights are unknown (set gc/gw so that parts_range is not changed) */
        gc = 0;
        gw = 0;

        lcs += lc;
        gcs += gc;
        lws += lw;
        gws += gw;
        
        gc = (parts_range[2 * 2 * parts[i] + 0 + 1] - parts_range[2 * 2 * parts[i] + 0 + 0]);
        gw = (parts_range[2 * 2 * parts[i] + 2 + 1] - parts_range[2 * 2 * parts[i] + 2 + 0]);

        rti_tstop(rti_tid_mpi_partition_radix2_while_check_final);
      }      

      rti_tstart(rti_tid_mpi_partition_radix2_while_check_post);
      
      SL_TRACE_IF(DEBUG_OR_NOT, "%" sl_int_type_fmt ",%" sl_int_type_fmt ": sdispls[%" sl_int_type_fmt " + 1] = %d, lcs = %" sl_int_type_fmt, i, parts[i], parts[i], sdispls[parts[i] + 1], lcs);

      sdispls[parts[i] + 1] += lcs;

      if (gcs > 0 || gws > 0)
      {
        parts_range[l + 0 + 0] += gcs;
        parts_range[l + 0 + 1] = parts_range[l + 0 + 0] + gc;
        parts_range[l + 2 + 0] += gws;
        parts_range[l + 2 + 1] = parts_range[l + 2 + 0] + gw;

        SL_TRACE_IF(DEBUG_OR_NOT, "%" sl_int_type_fmt ",%" sl_int_type_fmt ": 3. part_minmax_new: %f  %f  %f  %f  /  %f  %f  %f  %f", i, parts[i], parts_minmax_new[0 + 0], parts_minmax_new[0 + 1], parts_minmax_new[0 + 2], parts_minmax_new[0 + 3], parts_minmax_new[4 + 0], parts_minmax_new[4 + 1], parts_minmax_new[4 + 2], parts_minmax_new[4 + 3]);
        SL_TRACE_IF(DEBUG_OR_NOT, "%" sl_int_type_fmt ",%" sl_int_type_fmt ": parts_range: %f  %f  /  %f  %f", i, parts[i], parts_range[2 * 2 * parts[i] + 0 + 0], parts_range[2 * 2 * parts[i] + 0 + 1], parts_range[2 * 2 * parts[i] + 2 + 0], parts_range[2 * 2 * parts[i] + 2 + 1]);

        parts_minmax_new[0 + 0] = xminmax(parts_range[l + 0 + 0], parts_minmax_new[0 + 0], parts_range[l + 0 + 1]);
        parts_minmax_new[0 + 2] = xminmax(parts_range[l + 0 + 0], parts_minmax_new[0 + 2], parts_range[l + 0 + 1]);
        parts_minmax_new[0 + 1] = xminmax(parts_range[l + 0 + 0], parts_minmax_new[0 + 1], parts_range[l + 0 + 1]);
        parts_minmax_new[0 + 3] = xminmax(parts_range[l + 0 + 0], parts_minmax_new[0 + 3], parts_range[l + 0 + 1]);
      
        parts_minmax_new[4 + 0] = xminmax(parts_range[l + 2 + 0], parts_minmax_new[4 + 0], parts_range[l + 2 + 1]);
        parts_minmax_new[4 + 2] = xminmax(parts_range[l + 2 + 0], parts_minmax_new[4 + 2], parts_range[l + 2 + 1]);
        parts_minmax_new[4 + 1] = xminmax(parts_range[l + 2 + 0], parts_minmax_new[4 + 1], parts_range[l + 2 + 1]);
        parts_minmax_new[4 + 3] = xminmax(parts_range[l + 2 + 0], parts_minmax_new[4 + 3], parts_range[l + 2 + 1]);
      }

      SL_TRACE_IF(DEBUG_OR_NOT, "%" sl_int_type_fmt ",%" sl_int_type_fmt ": 4. part_minmax_new: %f  %f  %f  %f  /  %f  %f  %f  %f", i, parts[i], parts_minmax_new[0 + 0], parts_minmax_new[0 + 1], parts_minmax_new[0 + 2], parts_minmax_new[0 + 3], parts_minmax_new[4 + 0], parts_minmax_new[4 + 1], parts_minmax_new[4 + 2], parts_minmax_new[4 + 3]);

      if (parts_minmax_new[0 + 0] != parts_minmax[j + 0 + 0] || parts_minmax_new[0 + 2] != parts_minmax[j + 0 + 2] || parts_minmax_new[4 + 0] != parts_minmax[j + 4 + 0] || parts_minmax_new[4 + 2] != parts_minmax[j + 4 + 2])
      {
        parts_minmax[j + 0 + 0] = parts_minmax_new[0 + 0];
        parts_minmax[j + 0 + 2] = parts_minmax_new[0 + 2];
        parts_minmax[j + 4 + 0] = parts_minmax_new[4 + 0];
        parts_minmax[j + 4 + 2] = parts_minmax_new[4 + 2];

        parts_update[parts[i] + 1] = 1;
      }

      if (parts_minmax_new[0 + 1] != parts_minmax[j + 0 + 1] || parts_minmax_new[0 + 3] != parts_minmax[j + 0 + 3] || parts_minmax_new[4 + 1] != parts_minmax[j + 4 + 1] || parts_minmax_new[4 + 3] != parts_minmax[j + 4 + 3])
      {
        parts_minmax[j + 0 + 1] = parts_minmax_new[0 + 1];
        parts_minmax[j + 0 + 3] = parts_minmax_new[0 + 3];
        parts_minmax[j + 4 + 1] = parts_minmax_new[4 + 1];
        parts_minmax[j + 4 + 3] = parts_minmax_new[4 + 3];

        parts_update[parts[i] - 1] = 1;
      }

      parts_update[parts[i]] = 0;

      /* refine or remove */
      if (refine)
      {
        /* bits left for partitioning? */
        if (rhigh >= rlow)
        {
          if (last_new_area == part_areas[i] && last_new_class == k) part_areas[i] = nareas_new - 1;
          else
          {
            /* update last_new_... */
            last_new_area = part_areas[i];
            last_new_class = k;

            /* create new area */
            elem_assign_at(&areas[part_areas[i]], lcs, &areas_new[nareas_new]);
            areas_new[nareas_new].size = local_counts[part_areas[i] * nclasses + k];
            part_areas[i] = nareas_new;
            ++nareas_new;
          }

        } else
        {
          /* save local count/weight for the later prefix calculations */
          final_locals[2 * (i - nparts_removed * direction) + 0] = lc;
          final_locals[2 * (i - nparts_removed * direction) + 1] = lw;
        }

        parts[i - nparts_removed * direction] = parts[i];
        part_areas[i - nparts_removed * direction] = part_areas[i];

      } else ++nparts_removed;

      rti_tstop(rti_tid_mpi_partition_radix2_while_check_post);
      
      i += direction;
    }

    if (direction > 0) parts_high -= nparts_removed;
    else parts_low += nparts_removed;

    direction *= -1;

/*    SL_NOTICE_IF(DEBUG_OR_NOT, "nparts = %" sl_int_type_fmt " vs. nareas_new = %" sl_int_type_fmt, nparts, nareas_new);*/

    rti_tstop(rti_tid_mpi_partition_radix2_while_check);
    
    /* switch areas */
    nareas = nareas_new;
    if (areas == areas0)
    {
      areas = areas1;
      areas_new = areas0;
    } else
    {
      areas = areas0;
      areas_new = areas1;
    }
  }

  rti_tstop(rti_tid_mpi_partition_radix2_while);

  /* create scounts */
  for (i = 0; i < size - 1; ++i) scounts[i] = sdispls[i + 1] - sdispls[i];
  scounts[size - 1] = s->size - sdispls[size - 1];

#ifdef SCOUNTS_SDISPLS
  printf("%d: scounts", rank);
  for (i = 0, j = 0; i < size; ++i) { printf("  %d", scounts[i]); j += scounts[i]; }
  printf(" = %" sl_int_type_fmt "\n", j);
  printf("%d: sdispls", rank);
  for (i = 0; i < size; ++i) printf("  %d", sdispls[i]);
  printf("\n");
#endif

#ifdef RCOUNTS_RDISPLS
  rcounts = sl_alloc(size, sizeof(int));
  rdispls = sl_alloc(size, sizeof(int));

  MPI_Alltoall(scounts, 1, MPI_INT, rcounts, 1, MPI_INT, comm);

  rdispls[0] = 0;
  for (i = 1; i < size; ++i) rdispls[i] = rdispls[i - 1] + rcounts[i - 1];

  printf("%d: rcounts", rank);
  for (i = 0; i < size; ++i) printf("  %d", rcounts[i]);
  printf("\n");
  printf("%d: rdispls", rank);
  for (i = 0; i < size; ++i) printf("  %d", rdispls[i]);
  printf("\n");

  sl_free(rcounts);
  sl_free(rdispls);
#endif

  sl_free(locals);
  sl_free(globals);

#ifdef WEIGHT_STATS
  partial_counts[size] = 0;
  partial_weights[size] = 0.0;
  for (i = 0; i < size; ++i)
  {
    partial_counts[i] = scounts[i];
    partial_weights[i] = 0.0;
    for (j = sdispls[i]; j < sdispls[i] + scounts[i]; ++j) partial_weights[i] += elem_weight_one(s, j);
    
    partial_counts[size] += partial_counts[i];
    partial_weights[size] += partial_weights[i];
  }

#ifdef HAVENT_MPI_IN_PLACE
  MPI_Reduce(partial_counts, partial_counts2, size + 1, int_mpi_datatype, MPI_SUM, 0, comm);
  MPI_Reduce(partial_weights, partial_weights2, size + 1, MPI_DOUBLE, MPI_SUM, 0, comm);
# define partial_counts   partial_counts2
# define partial_weights  partial_weights2
#else
  /* recvbuf requires workaround for an in-place/aliased-buffer-check-bug in mpich2 (fixed with rev 5518) */
  MPI_Reduce((rank == 0)?MPI_IN_PLACE:partial_counts, (rank == 0)?partial_counts:NULL, size + 1, int_mpi_datatype, MPI_SUM, 0, comm);
  MPI_Reduce((rank == 0)?MPI_IN_PLACE:partial_weights, (rank == 0)?partial_weights:NULL, size + 1, MPI_DOUBLE, MPI_SUM, 0, comm);
#endif

  if (rank == 0)
  {
    printf("%d: total_count: %" sl_int_type_fmt " vs. %" sl_int_type_fmt "\n", rank, total_count, partial_counts[size]);
    d = 0.0;
    vmin = 1.0;
    vmax = 0.0;
    for (i = 0; i < size; ++i)
    {
/*      printf("%d: %" sl_int_type_fmt " %" sl_int_type_fmt " / %f - %" sl_int_type_fmt " / %f\n", rank, i, partial_counts[i], (double) partial_counts[i] / partial_counts[size], (partial_counts[size] / size) - partial_counts[i], fabs(1.0 - ((double) partial_counts[i] * size / partial_counts[size])));*/
      d += fabs((partial_counts[size] / size) - partial_counts[i]);
      if (fabs(1.0 - ((double) partial_counts[i] * size / partial_counts[size])) < vmin) vmin = fabs(1.0 - ((double) partial_counts[i] * size / partial_counts[size]));
      if (fabs(1.0 - ((double) partial_counts[i] * size / partial_counts[size])) > vmax) vmax = fabs(1.0 - ((double) partial_counts[i] * size / partial_counts[size]));
    }
    printf("%d: min/max: %f / %f\n", rank, vmin, vmax);
    printf("%d: average_count: %" sl_int_type_fmt " - %f / %f\n", rank, partial_counts[size] / size, d / size, d / partial_counts[size]);

    printf("%d: total_weight: %f vs. %f\n", rank, total_weight, partial_weights[size]);
    d = 0.0;
    vmin = 1.0;
    vmax = 0.0;
    for (i = 0; i < size; ++i)
    {
/*      printf("%d: %" sl_int_type_fmt " %f / %f - %f / %f\n", rank, i, partial_weights[i], partial_weights[i] / partial_weights[size], (partial_weights[size] / size) - partial_weights[i], fabs(1.0 - (partial_weights[i] * size / partial_weights[size])));*/
      d += fabs((partial_weights[size] / size) - partial_weights[i]);
      if (fabs(1.0 - (partial_weights[i] * size / partial_weights[size])) < vmin) vmin = fabs(1.0 - (partial_weights[i] * size / partial_weights[size]));
      if (fabs(1.0 - (partial_weights[i] * size / partial_weights[size])) > vmax) vmax = fabs(1.0 - (partial_weights[i] * size / partial_weights[size]));
    }
    printf("%d: min/max: %f / %f\n", rank, vmin, vmax);
    printf("%d: average_weight: %f - %f / %f\n", rank, partial_weights[size] / size, d / size, d / partial_weights[size]);
  }
#endif

  rti_tstop(rti_tid_mpi_partition_radix2);

#if defined(TIMING_STATS) && defined(SL_USE_RTI_TIM)
  if (rank == 0)
  {
    printf("%d: mpi_partition_radix: %f\n", rank, rti_tlast(rti_tid_mpi_partition_radix2));
    printf("%d: mpi_partition_radix: sync: %f\n", rank, rti_tlast(rti_tid_mpi_partition_radix2_sync));
    printf("%d: mpi_partition_radix: while: %f\n", rank, rti_tlast(rti_tid_mpi_partition_radix2_while));
    printf("%d: mpi_partition_radix:   count: %f\n", rank, rti_tcumu(rti_tid_mpi_partition_radix2_while_count));
    printf("%d: mpi_partition_radix:   allreduce: %f\n", rank, rti_tcumu(rti_tid_mpi_partition_radix2_while_allreduce));
    printf("%d: mpi_partition_radix:   round1: %f\n", rank, rti_tcumu(rti_tid_mpi_partition_radix2_while_round1));
    printf("%d: mpi_partition_radix:     allgather: %f\n", rank, rti_tcumu(rti_tid_mpi_partition_radix2_while_round1_allgather));
    printf("%d: mpi_partition_radix:   exscan: %f\n", rank, rti_tlast(rti_tid_mpi_partition_radix2_while_exscan));
    printf("%d: mpi_partition_radix:   check: %f\n", rank, rti_tcumu(rti_tid_mpi_partition_radix2_while_check));
    printf("%d: mpi_partition_radix:     pre: %f\n", rank, rti_tlast(rti_tid_mpi_partition_radix2_while_check_pre));
    printf("%d: mpi_partition_radix:     classes: %f\n", rank, rti_tlast(rti_tid_mpi_partition_radix2_while_check_classes));
    printf("%d: mpi_partition_radix:     final: %f\n", rank, rti_tlast(rti_tid_mpi_partition_radix2_while_check_final));
    printf("%d: mpi_partition_radix:     post: %f\n", rank, rti_tlast(rti_tid_mpi_partition_radix2_while_check_post));
  }
#endif

  return 0;
}
Пример #17
0
void Nhdc12832::setpixel(unsigned int x, unsigned int y){
	if( (x <= xmax()) && (y <= ymax()) ){
		nhd_mem[x][y/8] |= mask[7-(y&0x07)];
	}
}
slint_t mpi_select_exact_radix_fixed(elements_t *s, slint_t nelements, slint_t nparts, partcond_t *pconds, slint_t rhigh, slint_t rlow, slint_t rwidth, int *sdispls, int size, int rank, MPI_Comm comm) /* sl_proto, sl_func mpi_select_exact_radix_fixed */
{
  slkey_pure_t max_nclasses, nclasses, bit_mask;
  slkey_pure_t k, l;

  typedef struct {
    slint_t count_min, count_max;
    slint_t count_low, count_hig;
#ifdef elem_weight
    double weight_min, weight_max;
    double weight_low, weight_hig;
#endif
  } mmlh_t;

  mmlh_t mmlh[nparts];

  const slint_t max_nborders = nparts - 1;
  slint_t border_lo, border_hi, nborders_removed;
  slint_t borders[max_nborders], border_areas[max_nborders];

#define MIN_LE  0
#define MIN_RI  1
#define MAX_LE  2
#define MAX_RI  3

  struct {
    slint_t update;
    slint_t crange[2], cmmlr[4];
#ifdef elem_weight
    double wrange[2], wmmlr[4];
#endif
  } border_infos_[1 + max_nborders + 1], *border_infos = border_infos_ + 1, border_info_old;

  const slint_t max_nareas = max_nborders;
  slint_t nareas, nareas_new;
  elements_t areas0[max_nareas * nelements], areas1[max_nareas * nelements], *areas, *areas_new;

  slint_t *area_counts, *current_counts;
  double *local_counts, *global_counts;
#ifdef elem_weight
  double *local_weights, *global_weights, *current_weights;
#endif

  slint_t current_cmm[2];
#ifdef elem_weight
  double current_wmm[2];
#endif

  slint_t final_areas[max_nborders * nelements];
  double final_locals[NCONDS * max_nborders], *final_globals;

  slint_t current_width;
  slint_t round, direction, refine, finalize;
  slint_t last_new_area, last_new_class;

  slint_t lc, lcs, gc, gcs, lcv[nelements], lcsv[nelements];
#ifdef elem_weight
  double lw, gw, lws, gws;
  double mw, dw;
  double mcw[4];
#else
  slint_t mc, dc;
#endif

  slint_t i, j;

  elements_t xi, end;

#ifdef VERIFY
  slint_t v;
#endif


  SL_TRACE_IF(DEBUG_OR_NOT, "starting mpi_select_exact_radix");

  /* sl_tid rti_tid_mpi_select_exact_radix rti_tid_mpi_select_exact_radix_sync */

  rti_treset(rti_tid_mpi_select_exact_radix_while);                   /* sl_tid */
  rti_treset(rti_tid_mpi_select_exact_radix_while_count);             /* sl_tid */
  rti_treset(rti_tid_mpi_select_exact_radix_while_allreduce);         /* sl_tid */
  rti_treset(rti_tid_mpi_select_exact_radix_while_round1);            /* sl_tid */
  rti_treset(rti_tid_mpi_select_exact_radix_while_round1_allgather);  /* sl_tid */
  rti_treset(rti_tid_mpi_select_exact_radix_while_exscan);            /* sl_tid */
  rti_treset(rti_tid_mpi_select_exact_radix_while_check);             /* sl_tid */
  rti_treset(rti_tid_mpi_select_exact_radix_while_check_pre);         /* sl_tid */
  rti_treset(rti_tid_mpi_select_exact_radix_while_check_classes);     /* sl_tid */
  rti_treset(rti_tid_mpi_select_exact_radix_while_check_final);       /* sl_tid */
  rti_treset(rti_tid_mpi_select_exact_radix_while_check_post);        /* sl_tid */

  rti_tstart(rti_tid_mpi_select_exact_radix_sync);
#ifdef SYNC_ON_INIT
  MPI_Barrier(comm);
#endif
  rti_tstop(rti_tid_mpi_select_exact_radix_sync);

#ifdef VERIFY
  v = elements_validate_order(s, 1);
  
  SL_TRACE_IF(DEBUG_OR_NOT, "elements order: %s (%" slint_fmt ")", (v > 0)?"FAILED":"SUCCESS", v);
#endif

  rti_tstart(rti_tid_mpi_select_exact_radix);

  if (rhigh < 0) rhigh = key_radix_high;
  if (rlow < 0) rlow = key_radix_low;
  if (rwidth < 0) rwidth = sort_radix_width_default;
  
  max_nclasses = powof2_typed(rwidth, slkey_pure_t);

/*  SL_TRACE_IF(DEBUG_OR_NOT, "alloc area_counts: %" slint_fmt " * %d", max_nareas * nelements * max_nclasses, sizeof(slint_t));
  SL_TRACE_IF(DEBUG_OR_NOT, "alloc local_counts: %" slint_fmt " * %d", NCONDS * (max_nareas * max_nclasses + max_nareas), sizeof(slint_t));
  SL_TRACE_IF(DEBUG_OR_NOT, "alloc global_counts: %" slint_fmt " * %d", NCONDS * (max_nareas * max_nclasses + max_nareas), sizeof(slint_t));*/

  area_counts = sl_alloc(max_nareas * nelements * max_nclasses, sizeof(slint_t));
  local_counts = sl_alloc(NCONDS * (max_nareas * max_nclasses + max_nareas), sizeof(double));
  global_counts = sl_alloc(NCONDS * (max_nareas * max_nclasses + max_nareas), sizeof(double));

  /* init areas (first area = all elements) */
  areas = areas0;
  areas_new = areas1;

  nareas = 1;
  for (j = 0; j < nelements; ++j) elem_assign(&s[j], &areas[0 * nelements + j]);

  /* init parts */
  border_lo = 0;
  border_hi = max_nborders - 1;
  for (i = border_lo; i <= border_hi; ++i)
  {
    borders[i] = i;
    border_areas[i] = 0;
  }

  /* init sdispls */
  for (i = 0; i < nparts; ++i)
  for (j = 0; j < nelements; ++j) sdispls[i * nelements + j] = 0;

  rti_tstart(rti_tid_mpi_select_exact_radix_while);

  round = 0;
  while (border_lo <= border_hi)
  {
    ++round;

    /* setup bitmask */
    current_width = xmin(rwidth, rhigh - rlow + 1);
    rhigh -= (current_width > 0)?current_width - 1:rhigh;

    nclasses = (current_width > 0)?powof2_typed(current_width, slkey_pure_t):1;
    bit_mask = nclasses - 1;

    SL_TRACE_IF(DEBUG_OR_NOT, "ROUND: %" slint_fmt ", rhigh: %" slint_fmt ", current_width: %" slint_fmt ", nclasses: %" sl_key_pure_type_fmt, round, rhigh, current_width, nclasses);

    finalize = (current_width <= 0);

    if (!finalize || round == 1)
    {
#ifdef elem_weight
      /* init weight counters */
      local_weights = local_counts + (nareas * nclasses) + nareas;
      global_weights = global_counts + (nareas * nclasses) + nareas;
#endif

      /* zero all counter */
      for (i = 0; i < nareas; ++i)
      for (k = 0; k < nclasses; ++k) local_counts[i * nclasses + k] = 
#ifdef elem_weight
        local_weights[i * nclasses + k] = 
#endif
        0.0;

      rti_tstart(rti_tid_mpi_select_exact_radix_while_count);

      /* for every area */
      for (i = 0; i < nareas; ++i)
      {
        local_counts[nareas * nclasses + i] = 0;
#ifdef elem_weight
        local_weights[nareas * nclasses + i] = 0.0;
#endif

        /* for every list of elements */
        for (j = 0; j < nelements; ++j)
        {
          SL_TRACE_IF(DEBUG_OR_NOT, "area %" slint_fmt ",%" slint_fmt ": size = %" slint_fmt, i, j, areas[i * nelements + j].size);

          elem_assign_at(&areas[i * nelements + j], areas[i * nelements + j].size, &end);
          
          current_counts = area_counts + ((i * nelements + j) * nclasses);
#ifdef elem_weight
          current_weights = local_weights + (i * nclasses);
#endif

          for (k = 0; k < nclasses; ++k) current_counts[k] = 0;

          if (nclasses > 1)
          {
            /* counts and weights in every class */
            for (elem_assign(&areas[i * nelements + j], &xi); xi.keys < end.keys; elem_inc(&xi))
            {
              k = key_radix_key2class(key_purify(*xi.keys), rhigh, bit_mask);
              current_counts[k] += 1;
/*              SL_TRACE_IF(DEBUG_OR_NOT, "key %" sl_key_pure_type_fmt " goes to bin %"  sl_key_pure_type_fmt, key_purify(*xi.keys), k);*/
#ifdef elem_weight
              current_weights[k] += elem_weight(&xi, 0);
#endif
            }

          } else
          {
            /* total counts and weights */
            current_counts[0] = areas[i * nelements + j].size;

#ifdef elem_weight
            for (elem_assign(&areas[i * nelements + j], &xi); xi.keys < end.keys; elem_inc(&xi)) current_weights[0] += elem_weight(&xi, 0);
#endif
          }
          
          for (k = 0; k < nclasses; ++k) local_counts[i * nclasses + k] += current_counts[k];

          /* total counts and weights in this area */
          local_counts[nareas * nclasses + i] += areas[i * nelements + j].size;
#ifdef elem_weight
          for (k = 0; k < nclasses; ++k) local_weights[nareas * nclasses + i] += current_weights[k];
#endif
        }

        SL_TRACE_ARRAY_IF(DEBUG_OR_NOT, "%" slint_fmt ": counts =", " %f", k, nclasses, (&local_counts[i * nclasses]), i);
      }

      rti_tstop(rti_tid_mpi_select_exact_radix_while_count);

      --rhigh;

      SL_TRACE_IF(DEBUG_OR_NOT, "all-reducing %" slint_fmt " doubles", (slint_t) (NCONDS * (nareas * nclasses + nareas)));

      rti_tstart(rti_tid_mpi_select_exact_radix_while_allreduce);

      /* create global counts and weights */
#ifdef MPI_SELECT_EXACT_RADIX_REDUCEBCAST_THRESHOLD
      if (size >= MPI_SELECT_EXACT_RADIX_REDUCEBCAST_THRESHOLD)
      {
        MPI_Reduce(local_counts, global_counts, NCONDS * (nareas * nclasses + nareas), MPI_DOUBLE, MPI_SUM, REDUCEBCAST_ROOT, comm);
        MPI_Bcast(global_counts, NCONDS * (nareas * nclasses + nareas), MPI_DOUBLE, REDUCEBCAST_ROOT, comm);

      } else
#endif
        MPI_Allreduce(local_counts, global_counts, NCONDS * (nareas * nclasses + nareas), MPI_DOUBLE, MPI_SUM, comm);

      rti_tstop(rti_tid_mpi_select_exact_radix_while_allreduce);
    }

    /* do initializations */
    if (round == 1)
    {
      rti_tstart(rti_tid_mpi_select_exact_radix_while_round1);

      for (i = 0; i < nparts; ++i)
      {
        /* truncate counts, set default values and determine local (count/weight) limits */
        init_partconds(1, &pconds[i], nparts, global_counts[nareas * nclasses + 0],
#ifdef elem_weight
          global_weights[nareas * nclasses + 0]
#else
          0
#endif
          );

        mmlh[i].count_min = pconds[i].count_min;
        mmlh[i].count_max = pconds[i].count_max;
        mmlh[i].count_low = pconds[i].count_low;
        mmlh[i].count_hig = pconds[i].count_high;

#ifdef elem_weight
        mmlh[i].weight_min = pconds[i].weight_min;
        mmlh[i].weight_max = pconds[i].weight_max;
        mmlh[i].weight_low = pconds[i].weight_low;
        mmlh[i].weight_hig = pconds[i].weight_high;
#endif
      }

      /* init lowest and highest part (sentinels) */
      border_infos[border_lo - 1].update = 0;
      border_infos[border_lo - 1].crange[0] = 0;
      border_infos[border_lo - 1].crange[1] = 0;
      border_infos[border_lo - 1].cmmlr[MIN_LE] = border_infos[border_lo - 1].cmmlr[MAX_LE] = 0;
      border_infos[border_lo - 1].cmmlr[MIN_RI] = border_infos[border_lo - 1].cmmlr[MAX_RI] = 0;

      SL_TRACE_IF(DEBUG_OR_NOT, "lowest: %" slint_fmt ": init count[min/max-left/right]: %" slint_fmt " / %" slint_fmt " - %" slint_fmt " / %" slint_fmt "", border_lo - 1,
        border_infos[border_lo - 1].cmmlr[MIN_LE], border_infos[border_lo - 1].cmmlr[MAX_LE], border_infos[border_lo - 1].cmmlr[MIN_RI], border_infos[border_lo - 1].cmmlr[MAX_RI]);

#ifdef elem_weight
      border_infos[border_lo - 1].wrange[0] = 0.0;
      border_infos[border_lo - 1].wrange[1] = 0.0;
      border_infos[border_lo - 1].wmmlr[MIN_LE] = border_infos[border_lo - 1].wmmlr[MAX_LE] = 0.0;
      border_infos[border_lo - 1].wmmlr[MIN_RI] = border_infos[border_lo - 1].wmmlr[MAX_RI] = 0.0;

      SL_TRACE_IF(DEBUG_OR_NOT, "lowest: %" slint_fmt ": init weight[min/max-left/right]: %f / %f - %f / %f", border_lo - 1,
        border_infos[border_lo - 1].wmmlr[MIN_LE], border_infos[border_lo - 1].wmmlr[MAX_LE], border_infos[border_lo - 1].wmmlr[MIN_RI], border_infos[border_lo - 1].wmmlr[MAX_RI]);
#endif

      /* init highest part (sentinel) */
      border_infos[border_hi + 1].update = 0;
      border_infos[border_hi + 1].crange[0] = global_counts[nareas * nclasses + 0];
      border_infos[border_hi + 1].crange[1] = global_counts[nareas * nclasses + 0];
      border_infos[border_hi + 1].cmmlr[MIN_LE] = border_infos[border_hi + 1].cmmlr[MAX_LE] = 0;
      border_infos[border_hi + 1].cmmlr[MIN_RI] = border_infos[border_hi + 1].cmmlr[MAX_RI] = global_counts[nareas * nclasses + 0];

      SL_TRACE_IF(DEBUG_OR_NOT, "highest: %" slint_fmt ": init count[min/max-left/right]: %" slint_fmt " / %" slint_fmt " - %" slint_fmt " / %" slint_fmt "", border_hi + 1,
        border_infos[border_hi + 1].cmmlr[MIN_LE], border_infos[border_hi + 1].cmmlr[MAX_LE], border_infos[border_hi + 1].cmmlr[MIN_RI], border_infos[border_hi + 1].cmmlr[MAX_RI]);

#ifdef elem_weight
      border_infos[border_hi + 1].wrange[0] = global_weights[nareas * nclasses + 0];
      border_infos[border_hi + 1].wrange[1] = global_weights[nareas * nclasses + 0];
      border_infos[border_hi + 1].wmmlr[MIN_LE] = border_infos[border_hi + 1].wmmlr[MAX_LE] = 0.0;
      border_infos[border_hi + 1].wmmlr[MIN_RI] = border_infos[border_hi + 1].wmmlr[MAX_RI] = global_weights[nareas * nclasses + 0];

      SL_TRACE_IF(DEBUG_OR_NOT, "highest: %" slint_fmt ": init weight[min/max-left/right]: %f / %f - %f / %f", border_hi + 1,
        border_infos[border_hi + 1].wmmlr[MIN_LE], border_infos[border_hi + 1].wmmlr[MAX_LE], border_infos[border_hi + 1].wmmlr[MIN_RI], border_infos[border_hi + 1].wmmlr[MAX_RI]);
#endif

      /* init regular parts (backwards) */
      for (i = border_hi; i >= border_lo; --i)
      {
        border_infos[borders[i]].update = 1;
        border_infos[borders[i]].crange[0] = 0;
        border_infos[borders[i]].crange[1] = global_counts[nareas * nclasses + 0];
        border_infos[borders[i]].cmmlr[MIN_LE] = -1;
        border_infos[borders[i]].cmmlr[MIN_RI] = border_infos[borders[i] + 1].cmmlr[MIN_RI] - mmlh[borders[i] + 1].count_min;
        border_infos[borders[i]].cmmlr[MAX_LE] = -1;
        border_infos[borders[i]].cmmlr[MAX_RI] = border_infos[borders[i] + 1].cmmlr[MAX_RI] - mmlh[borders[i] + 1].count_max;

        SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": init count[min/max-left/right]: %" slint_fmt " / %" slint_fmt " - %" slint_fmt " / %" slint_fmt "", i, borders[i],
          border_infos[borders[i]].cmmlr[MIN_LE], border_infos[borders[i]].cmmlr[MAX_LE], border_infos[borders[i]].cmmlr[MIN_RI], border_infos[borders[i]].cmmlr[MAX_RI]);

#ifdef elem_weight
        border_infos[borders[i]].wrange[0] = 0.0;
        border_infos[borders[i]].wrange[1] = global_weights[nareas * nclasses + 0];
        border_infos[borders[i]].wmmlr[MIN_LE] = -1.0;
        border_infos[borders[i]].wmmlr[MIN_RI] = border_infos[borders[i] + 1].wmmlr[MIN_RI] - mmlh[borders[i] + 1].weight_min;
        border_infos[borders[i]].wmmlr[MAX_LE] = -1.0;
        border_infos[borders[i]].wmmlr[MAX_RI] = border_infos[borders[i] + 1].wmmlr[MAX_RI] - mmlh[borders[i] + 1].weight_max;

        SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": init weight[min/max-left/right]: %f / %f - %f / %f", i, borders[i],
          border_infos[borders[i]].wmmlr[MIN_LE], border_infos[borders[i]].wmmlr[MAX_LE], border_infos[borders[i]].wmmlr[MIN_RI], border_infos[borders[i]].wmmlr[MAX_RI]);
#endif

        /* prepare for finalization in the 1st round */
        if (finalize)
        {
          for (j = 0; j < nelements; ++j) final_areas[i * nelements + j] = area_counts[(0 * nelements + j) * nclasses + 0];

          final_locals[NCONDS * i + 0] = local_counts[nareas * nclasses + 0];
#ifdef elem_weight
          final_locals[NCONDS * i + 1] = local_weights[nareas * nclasses + 0];
#endif
        }
      }
      
      /* first direction: forward */
      direction = 1;

      rti_tstop(rti_tid_mpi_select_exact_radix_while_round1);
    }

    /* compute prefixes for finalization */
    if (finalize)
    {
      /* determine number of parts to finalize */
      j = border_hi - border_lo + 1;
    
      SL_TRACE_IF(DEBUG_OR_NOT, "Exscan: finalizing %" slint_fmt " parts", j);

      rti_tstart(rti_tid_mpi_select_exact_radix_while_exscan);

      /* use local_counts to store the global prefix sums */      
      final_globals = local_counts;

      /* create global prefix sums (set rank 0 to zero) */
      MPI_Exscan(&final_locals[NCONDS * border_lo], &final_globals[NCONDS * border_lo], NCONDS * j, MPI_DOUBLE, MPI_SUM, comm);
      if (rank == 0) for (i = border_lo; i <= border_hi; ++i) final_globals[NCONDS * i + 0] = 
#ifdef elem_weight
        final_globals[NCONDS * i + 1] = 
#endif
        0.0;

      rti_tstop(rti_tid_mpi_select_exact_radix_while_exscan);
    }

    /* check all remaining parts */
    SL_TRACE_IF(DEBUG_OR_NOT, "ROUND: %" slint_fmt ", %s", round, (direction > 0)?"forward":"backward");

    nareas_new = 0;
    last_new_area = last_new_class = -1;
    nborders_removed = 0;

    rti_tstart(rti_tid_mpi_select_exact_radix_while_check);

    i = (direction > 0)?border_lo:border_hi;
    while ((direction > 0)?(i <= border_hi):(i >= border_lo))
    {
      /* check partition borders[i] */
      SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ": PART: %" slint_fmt ",%" slint_fmt, round, i, borders[i]);

      rti_tstart(rti_tid_mpi_select_exact_radix_while_check_pre);

      /* save to old limits */
      border_info_old = border_infos[borders[i]];

      /* is an update required? */
      if (border_infos[borders[i]].update)
      {
        /* forward */
        if (direction > 0)
        {
          /* init from min/max (always) */
          border_infos[borders[i]].cmmlr[MIN_LE] = border_infos[borders[i] - 1].cmmlr[MIN_LE] + mmlh[borders[i]].count_min;
          border_infos[borders[i]].cmmlr[MAX_LE] = border_infos[borders[i] - 1].cmmlr[MAX_LE] + mmlh[borders[i]].count_max;

          SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": count[min/max-left]: %" slint_fmt " + %" slint_fmt ", %" slint_fmt " + %" slint_fmt "", i, borders[i],
            border_infos[borders[i] - 1].cmmlr[MIN_LE], mmlh[borders[i]].count_min,
            border_infos[borders[i] - 1].cmmlr[MAX_LE], mmlh[borders[i]].count_max);

          /* check against low/high (on demand) */
          if (pconds->pcm & SLPC_COUNTS_LH)
          {
            if (border_infos[borders[i]].cmmlr[MIN_LE] < mmlh[borders[i] + 1].count_low) border_infos[borders[i]].cmmlr[MIN_LE] = mmlh[borders[i] + 1].count_low;
            if (border_infos[borders[i]].cmmlr[MAX_LE] > mmlh[borders[i]    ].count_hig) border_infos[borders[i]].cmmlr[MAX_LE] = mmlh[borders[i]    ].count_hig;
          }

#ifdef elem_weight
          /* init from min/max (always) */
          border_infos[borders[i]].wmmlr[MIN_LE] = border_infos[borders[i] - 1].wmmlr[MIN_LE] + mmlh[borders[i]].weight_min;
          border_infos[borders[i]].wmmlr[MAX_LE] = border_infos[borders[i] - 1].wmmlr[MAX_LE] + mmlh[borders[i]].weight_max;

          SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": weight[min/max-left]: %f + %f, %f + %f", i, borders[i],
            border_infos[borders[i] - 1].wmmlr[MIN_LE], mmlh[borders[i]].weight_min,
            border_infos[borders[i] - 1].wmmlr[MAX_LE], mmlh[borders[i]].weight_max);

          /* check against low/high (on demand) */
          if (pconds->pcm & SLPC_WEIGHTS_LH)
          {
            if (border_infos[borders[i]].wmmlr[MIN_LE] < mmlh[borders[i] + 1].weight_low) border_infos[borders[i]].wmmlr[MIN_LE] = mmlh[borders[i] + 1].weight_low;
            if (border_infos[borders[i]].wmmlr[MAX_LE] > mmlh[borders[i]    ].weight_hig) border_infos[borders[i]].wmmlr[MAX_LE] = mmlh[borders[i]    ].weight_hig;
          }
#endif
        } else /* backward */
        {
          /* init from min/max (always) */
          border_infos[borders[i]].cmmlr[MIN_RI] = border_infos[borders[i] + 1].cmmlr[MIN_RI] - mmlh[borders[i] + 1].count_min;
          border_infos[borders[i]].cmmlr[MAX_RI] = border_infos[borders[i] + 1].cmmlr[MAX_RI] - mmlh[borders[i] + 1].count_max;

          SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": count[min/max-right]: %" slint_fmt " - %" slint_fmt ", %" slint_fmt " - %" slint_fmt "", i, borders[i],
            border_infos[borders[i] + 1].cmmlr[MIN_RI], mmlh[borders[i] + 1].count_min,
            border_infos[borders[i] + 1].cmmlr[MAX_RI], mmlh[borders[i] + 1].count_max);

          /* check against low/high (on demand) */
          if (pconds->pcm & SLPC_COUNTS_LH)
          {
            if (border_infos[borders[i]].cmmlr[MAX_RI] < mmlh[borders[i] + 1].count_low) border_infos[borders[i]].cmmlr[MAX_RI] = mmlh[borders[i] + 1].count_low;
            if (border_infos[borders[i]].cmmlr[MIN_RI] > mmlh[borders[i]    ].count_hig) border_infos[borders[i]].cmmlr[MIN_RI] = mmlh[borders[i]    ].count_hig;
          }

#ifdef elem_weight
          /* init from min/max (always) */
          border_infos[borders[i]].wmmlr[MIN_RI] = border_infos[borders[i] + 1].wmmlr[MIN_RI] - mmlh[borders[i] + 1].weight_min;
          border_infos[borders[i]].wmmlr[MAX_RI] = border_infos[borders[i] + 1].wmmlr[MAX_RI] - mmlh[borders[i] + 1].weight_max;

          SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": weight[min/max-right]: %f - %f, %f - %f", i, borders[i],
            border_infos[borders[i] + 1].wmmlr[MIN_RI], mmlh[borders[i] + 1].weight_min,
            border_infos[borders[i] + 1].wmmlr[MAX_RI], mmlh[borders[i] + 1].weight_max);

          /* check against low/high (on demand) */
          if (pconds->pcm & SLPC_WEIGHTS_LH)
          {
            if (border_infos[borders[i]].wmmlr[MAX_RI] < mmlh[borders[i] + 1].weight_low) border_infos[borders[i]].wmmlr[MAX_RI] = mmlh[borders[i] + 1].weight_low;
            if (border_infos[borders[i]].wmmlr[MIN_RI] > mmlh[borders[i]    ].weight_hig) border_infos[borders[i]].wmmlr[MIN_RI] = mmlh[borders[i]    ].weight_hig;
          }
#endif
        }

        SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": count[min/max-left/right]: %" slint_fmt " / %" slint_fmt " - %" slint_fmt " / %" slint_fmt "", i, borders[i],
          border_infos[borders[i]].cmmlr[MIN_LE], border_infos[borders[i]].cmmlr[MAX_LE], border_infos[borders[i]].cmmlr[MIN_RI], border_infos[borders[i]].cmmlr[MAX_RI]);

        /* check against inconsistence */
        if (border_infos[borders[i]].cmmlr[MIN_LE] > border_infos[borders[i]].cmmlr[MIN_RI]) border_infos[borders[i]].cmmlr[MIN_LE] = border_infos[borders[i]].cmmlr[MIN_RI] = (border_infos[borders[i]].cmmlr[MIN_LE] + border_infos[borders[i]].cmmlr[MIN_RI]) / 2;
        if (border_infos[borders[i]].cmmlr[MAX_LE] < border_infos[borders[i]].cmmlr[MAX_RI]) border_infos[borders[i]].cmmlr[MAX_LE] = border_infos[borders[i]].cmmlr[MAX_RI] = (border_infos[borders[i]].cmmlr[MAX_LE] + border_infos[borders[i]].cmmlr[MAX_RI]) / 2;

#ifdef elem_weight
        SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": weight[min/max-left/right]: %f / %f - %f / %f", i, borders[i],
          border_infos[borders[i]].wmmlr[MIN_LE], border_infos[borders[i]].wmmlr[MAX_LE], border_infos[borders[i]].wmmlr[MIN_RI], border_infos[borders[i]].wmmlr[MAX_RI]);

        /* check against inconsistence */
        if (border_infos[borders[i]].wmmlr[MIN_LE] > border_infos[borders[i]].wmmlr[MIN_RI]) border_infos[borders[i]].wmmlr[MIN_LE] = border_infos[borders[i]].wmmlr[MIN_RI] = (border_infos[borders[i]].wmmlr[MIN_LE] + border_infos[borders[i]].wmmlr[MIN_RI]) / 2;
        if (border_infos[borders[i]].wmmlr[MAX_LE] < border_infos[borders[i]].wmmlr[MAX_RI]) border_infos[borders[i]].wmmlr[MAX_LE] = border_infos[borders[i]].wmmlr[MAX_RI] = (border_infos[borders[i]].wmmlr[MAX_LE] + border_infos[borders[i]].wmmlr[MAX_RI]) / 2;
#endif
      }

      SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": count[min/max-left/right]: %" slint_fmt " / %" slint_fmt " - %" slint_fmt " / %" slint_fmt "", i, borders[i],
        border_infos[borders[i]].cmmlr[MIN_LE], border_infos[borders[i]].cmmlr[MAX_LE], border_infos[borders[i]].cmmlr[MIN_RI], border_infos[borders[i]].cmmlr[MAX_RI]);

      SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": crange: %" slint_fmt " - %" slint_fmt "", i, borders[i], border_infos[borders[i]].crange[0], border_infos[borders[i]].crange[1]);

      /* select highest min and lowest max */
      current_cmm[0] = xmax(border_infos[borders[i]].cmmlr[MIN_LE], border_infos[borders[i]].cmmlr[MAX_RI]) - border_infos[borders[i]].crange[0];
      current_cmm[1] = xmin(border_infos[borders[i]].cmmlr[MAX_LE], border_infos[borders[i]].cmmlr[MIN_RI]) - border_infos[borders[i]].crange[0];

      if (rank == 0) SL_ASSERT(current_cmm[0] <= current_cmm[1]);
      
      if (rank == 0) SL_ASSERT(0 <= current_cmm[0]);

      SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": current_count: %" slint_fmt " - %" slint_fmt "", i, borders[i], current_cmm[0], current_cmm[1]);

#ifdef elem_weight
      SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": weight[min/max-left/right]: %f / %f - %f / %f", i, borders[i],
        border_infos[borders[i]].wmmlr[MIN_LE], border_infos[borders[i]].wmmlr[MAX_LE], border_infos[borders[i]].wmmlr[MIN_RI], border_infos[borders[i]].wmmlr[MAX_RI]);

      SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": wrange: %f - %f", i, borders[i], border_infos[borders[i]].wrange[0], border_infos[borders[i]].wrange[1]);

      /* select highest min and lowest max */
      current_wmm[0] = xmax(border_infos[borders[i]].wmmlr[MIN_LE], border_infos[borders[i]].wmmlr[MAX_RI]) - border_infos[borders[i]].wrange[0];
      current_wmm[1] = xmin(border_infos[borders[i]].wmmlr[MAX_LE], border_infos[borders[i]].wmmlr[MIN_RI]) - border_infos[borders[i]].wrange[0];

      if (rank == 0) SL_ASSERT(current_wmm[0] <= current_wmm[1]);

      SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": current_weight: %f - %f", i, borders[i], current_wmm[0], current_wmm[1]);
#endif

      rti_tstop(rti_tid_mpi_select_exact_radix_while_check_pre);

      /* HIT is the default */
      refine = 0;

      if (!finalize)
      {
        rti_tstart(rti_tid_mpi_select_exact_radix_while_check_classes);

        lcs = gcs = 0;
#ifdef elem_weight
        lws = gws = 0.0;
#endif

        for (k = 0; k < nclasses; ++k)
        {
          lc = local_counts[border_areas[i] * nclasses + k];
          gc = global_counts[border_areas[i] * nclasses + k];

          current_cmm[0] -= gc;
          current_cmm[1] -= gc;

          SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": k = %" sl_key_pure_type_fmt ", current_count: %" slint_fmt " - %" slint_fmt ", lc = %" slint_fmt ", lcs = %" slint_fmt ", gc = %" slint_fmt ", gcs = %" slint_fmt,
            i, borders[i], k, current_cmm[0], current_cmm[1], lc, lcs, gc, gcs);

#ifdef elem_weight
          lw = local_weights[border_areas[i] * nclasses + k];
          gw = global_weights[border_areas[i] * nclasses + k];

          current_wmm[0] -= gw;
          current_wmm[1] -= gw;

          SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": k = %" sl_key_pure_type_fmt ", current_weight: %e - %e", i, borders[i], k, current_wmm[0], current_wmm[1]);
#endif

          /* stop and refine if max count is skipped OR min count AND max weight is skipped */
          if ((current_cmm[1] < 0)
#ifdef elem_weight
            || (current_cmm[0] < 0 && current_wmm[1] < 0.0)
#endif
            )
          {
            refine = 1;
            break;
          }

          lcs += lc;
          gcs += gc;
          gc = 0;

#ifdef elem_weight
          lws += lw;
          gws += gw;
          gw = 0.0;
#endif

          /* if between min/max counts */
          if (current_cmm[0] <= 0 && current_cmm[1] >= 0)
          {
#ifdef elem_weight
            SL_TRACE_IF(DEBUG_OR_NOT, "got to next: %d && %d", (current_cmm[1] > 0), (current_wmm[0] > 0));

            /* go to next if max count not reached AND min weight not reached */
            if (current_cmm[1] > 0 && current_wmm[0] > 0) continue;
#endif

            /* look ahead for a better stop */
            if (k + 1 < nclasses && current_cmm[1] - global_counts[border_areas[i] * nclasses + k + 1] >= 0)
            {
#ifdef elem_weight
              /* continue if weights will improve */
              if (myabs(current_wmm[0] + current_wmm[1]) > myabs(current_wmm[0] + current_wmm[1] - 2 * global_weights[border_areas[i] * nclasses + k + 1])) continue;
#else
              /* continue if counts will improve */
              if (myabs(current_cmm[0] + current_cmm[1]) > myabs(current_cmm[0] + current_cmm[1] - 2 * global_counts[border_areas[i] * nclasses + k + 1])) continue;
#endif
            }

            /* stop */
            break;
          }
        }

        SL_ASSERT_IF((rank == 0), k < nclasses);

        SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": %s k = %" sl_key_pure_type_fmt ", lcs = %" slint_fmt, i, borders[i], (refine)?"REFINE":"HIT", k, lcs);

        /* make sure k is safe (it is used as index later) */
        if (k >= nclasses) k = nclasses - 1;

        /* break the local contribution into contributions for the lists of elements */
        for (j = 0; j < nelements; ++j)
        {
          lcsv[j] = 0;
          for (l = 0; l < k; ++l) lcsv[j] += area_counts[((border_areas[i] * nelements + j) * nclasses) + l];

          if (refine) lcv[j] = area_counts[((border_areas[i] * nelements + j) * nclasses) + k];
          else
          {
            lcv[j] = 0;
            lcsv[j] += area_counts[((border_areas[i] * nelements + j) * nclasses) + k];
          }

          lcs -= lcsv[j];
        }

        rti_tstop(rti_tid_mpi_select_exact_radix_while_check_classes);

      } else
      {
        rti_tstart(rti_tid_mpi_select_exact_radix_while_check_final);
        
        k = 0;

#ifdef elem_weight
        /* middle of min/max weight */
        mw = (current_wmm[0] + current_wmm[1]) / 2.0;

        /* min. part of weight to contribute */
        dw = xmax(0, mw - final_globals[NCONDS * i + 1]);

        SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": mw = %e, dw = %e", i, borders[i], mw, dw);
#else
        /* middle of min/max count */
        mc = (current_cmm[0] + current_cmm[1]) / 2;

        /* min. part of count to contribute */
        dc = xmax(0, mc - final_globals[NCONDS * i + 0]);

        SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": mc = %" slint_fmt ", dc = %" slint_fmt, i, borders[i], mc, dc);
#endif

        /* contribute all? */
        if (
#ifdef elem_weight
          dw >= final_locals[NCONDS * i + 1]
#else
          dc >= final_locals[NCONDS * i + 0]
#endif
        )
        {
          lc = final_locals[NCONDS * i + 0];
#ifdef elem_weight
          lw = final_locals[NCONDS * i + 1];
#endif

        } else
        {
          /* contribute only a part */
#ifdef elem_weight
          lc = 0;

          for (j = 0; j < nelements; ++j)
          {
            elem_assign_at(&areas[border_areas[i] * nelements + j], areas[border_areas[i] * nelements + j].size, &end);

            for (elem_assign(&areas[border_areas[i] * nelements + j], &xi); xi.keys < end.keys; elem_inc(&xi))
            {
              dw -= elem_weight(&xi, 0);
              ++lc;

              if (dw < 0.0 || lc >= final_locals[NCONDS * i + 0])
              {
                dw += elem_weight(&xi, 0);
                --lc;
                break;
              }
            }
          }

          lw = dw;
#else
          lc = dc;
#endif
        }

        /* check mc against min/max count borders */
        lc = xminmax(current_cmm[0] - final_globals[NCONDS * i + 0], lc, current_cmm[1] - final_globals[NCONDS * i + 0]);

        /* check agains 0 (don't step back!) and the local contribution */
        lc = xminmax(0, lc, final_locals[NCONDS * i + 0]);

        lcs = lc;
#ifdef elem_weight
        lws = lw;
#endif

#ifdef elem_weight
        SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": next border: %" slint_fmt " <= %" slint_fmt " + %" slint_fmt " <= %" slint_fmt,
          i, borders[i], border_lo, i, direction, border_hi);
        if (border_lo <= i + direction && i + direction <= border_hi)
          SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": next border: %" slint_fmt " == %" slint_fmt " + %" slint_fmt,
            i, borders[i], borders[i + direction], borders[i], direction);

        /* FIXME: finalize geht auch rückwärts!!! */

        /* if the next open border is really the _next_ border */
        if (border_lo <= i + direction && i + direction <= border_hi && borders[i + direction] == borders[i] + direction)
        {
          /* determine the exact global counts/weights (damn, this is expensive) */
          mcw[0] = lcs;
          mcw[1] = lws;
          MPI_Allreduce(&mcw[0], &mcw[2], 2, MPI_DOUBLE, MPI_SUM, comm);

        } else
        {
          /* the exact global counts/weights are not required */
          mcw[2] = 0.0;
          mcw[3] = 0.0;
        }

        gc = 0;
        gcs = mcw[2];
        gw = 0.0;
        gws = mcw[3];
        
        SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": gcs = %" slint_fmt ", gws = %f", i, borders[i], gcs, gws);
#else
        /* the global count is simply mc */
        gc = 0;
        gcs = mc;

        SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": gcs = %" slint_fmt, i, borders[i], gcs);
#endif

        SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": lcs = %" slint_fmt, i, borders[i], lcs);

        /* break the local contribution into contributions for the lists of elements */
        for (j = 0; j < nelements; ++j)
        {
          lcv[j] = 0;
          lcsv[j] = xmin(lcs, final_areas[i * nelements + j]);
          
          lcs -= lcsv[j];
        }

        SL_TRACE_ARRAY_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": lcsv = ", "%" slint_fmt, j, nelements, lcsv, i, borders[i]);

        rti_tstop(rti_tid_mpi_select_exact_radix_while_check_final);
      }

      SL_ASSERT(lcs == 0);
      
      /* accept local contributions */
      for (j = 0; j < nelements; ++j) sdispls[(borders[i] + 1) * nelements + j] += lcsv[j];

      rti_tstart(rti_tid_mpi_select_exact_radix_while_check_post);

      /* this is wrong, e.g., even if gc == 0 and gcs == 0 then crange[1] is set to crange[0]! */
/*      if (gc > 0 || gcs > 0
#ifdef elem_weight
       || gw != 0.0 || gws != 0.0
#endif
       )*/
      {
        border_infos[borders[i]].crange[0] += gcs;
        border_infos[borders[i]].crange[1] = border_infos[borders[i]].crange[0] + gc;

        SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": counts_range: %" slint_fmt "  %" slint_fmt "", i, borders[i], border_infos[borders[i]].crange[0], border_infos[borders[i]].crange[1]);

        border_infos[borders[i]].cmmlr[MIN_LE] = xminmax(border_infos[borders[i]].crange[0], border_infos[borders[i]].cmmlr[MIN_LE], border_infos[borders[i]].crange[1]);
        border_infos[borders[i]].cmmlr[MAX_LE] = xminmax(border_infos[borders[i]].crange[0], border_infos[borders[i]].cmmlr[MAX_LE], border_infos[borders[i]].crange[1]);
        border_infos[borders[i]].cmmlr[MIN_RI] = xminmax(border_infos[borders[i]].crange[0], border_infos[borders[i]].cmmlr[MIN_RI], border_infos[borders[i]].crange[1]);
        border_infos[borders[i]].cmmlr[MAX_RI] = xminmax(border_infos[borders[i]].crange[0], border_infos[borders[i]].cmmlr[MAX_RI], border_infos[borders[i]].crange[1]);

        SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": count[min/max-left/right]: %" slint_fmt " / %" slint_fmt " - %" slint_fmt " / %" slint_fmt "", i, borders[i],
          border_infos[borders[i]].cmmlr[MIN_LE], border_infos[borders[i]].cmmlr[MAX_LE], border_infos[borders[i]].cmmlr[MIN_RI], border_infos[borders[i]].cmmlr[MAX_RI]);

#ifdef elem_weight
        border_infos[borders[i]].wrange[0] += gws;
        border_infos[borders[i]].wrange[1] = border_infos[borders[i]].wrange[0] + gw;

        SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": weights_range: %f  %f", i, borders[i], border_infos[borders[i]].wrange[0], border_infos[borders[i]].wrange[1]);

        border_infos[borders[i]].wmmlr[MIN_LE] = xminmax(border_infos[borders[i]].wrange[0], border_infos[borders[i]].wmmlr[MIN_LE], border_infos[borders[i]].wrange[1]);
        border_infos[borders[i]].wmmlr[MAX_LE] = xminmax(border_infos[borders[i]].wrange[0], border_infos[borders[i]].wmmlr[MAX_LE], border_infos[borders[i]].wrange[1]);
        border_infos[borders[i]].wmmlr[MIN_RI] = xminmax(border_infos[borders[i]].wrange[0], border_infos[borders[i]].wmmlr[MIN_RI], border_infos[borders[i]].wrange[1]);
        border_infos[borders[i]].wmmlr[MAX_RI] = xminmax(border_infos[borders[i]].wrange[0], border_infos[borders[i]].wmmlr[MAX_RI], border_infos[borders[i]].wrange[1]);

        SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": weight[min/max-left/right]: %f / %f - %f / %f", i, borders[i],
          border_infos[borders[i]].wmmlr[MIN_LE], border_infos[borders[i]].wmmlr[MAX_LE], border_infos[borders[i]].wmmlr[MIN_RI], border_infos[borders[i]].wmmlr[MAX_RI]);
#endif
      }
      
      SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": range diff 0: %" slint_fmt "-%" slint_fmt " | %" slint_fmt "-%" slint_fmt, i, borders[i],
        border_infos[borders[i]].crange[0] - border_infos[borders[i] - 1].crange[1], border_infos[borders[i]].crange[0] - border_infos[borders[i] - 1].crange[0],
        border_infos[borders[i] + 1].crange[0] - border_infos[borders[i]].crange[0], border_infos[borders[i] + 1].crange[1] - border_infos[borders[i]].crange[0]);
      SL_TRACE_IF(DEBUG_OR_NOT, "%" slint_fmt ",%" slint_fmt ": range diff 1: %" slint_fmt "-%" slint_fmt " | %" slint_fmt "-%" slint_fmt, i, borders[i],
        border_infos[borders[i]].crange[1] - border_infos[borders[i] - 1].crange[1], border_infos[borders[i]].crange[1] - border_infos[borders[i] - 1].crange[0],
        border_infos[borders[i] + 1].crange[0] - border_infos[borders[i]].crange[1], border_infos[borders[i] + 1].crange[1] - border_infos[borders[i]].crange[1]);

      if (border_infos[borders[i]].cmmlr[MIN_LE] != border_info_old.cmmlr[MIN_LE]
       || border_infos[borders[i]].cmmlr[MAX_LE] != border_info_old.cmmlr[MAX_LE]
#ifdef elem_weight
       || border_infos[borders[i]].wmmlr[MIN_LE] != border_info_old.wmmlr[MIN_LE]
       || border_infos[borders[i]].wmmlr[MAX_LE] != border_info_old.wmmlr[MAX_LE]
#endif
       ) border_infos[borders[i] + 1].update = 1;

      if (border_infos[borders[i]].cmmlr[MIN_RI] != border_info_old.cmmlr[MIN_RI]
       || border_infos[borders[i]].cmmlr[MAX_RI] != border_info_old.cmmlr[MAX_RI]
#ifdef elem_weight
       || border_infos[borders[i]].wmmlr[MIN_RI] != border_info_old.wmmlr[MIN_RI]
       || border_infos[borders[i]].wmmlr[MAX_RI] != border_info_old.wmmlr[MAX_RI]
#endif
       ) border_infos[borders[i] - 1].update = 1;

      border_infos[borders[i]].update = 0;

      /* refine or remove */
      if (refine)
      {
        /* bits left for partitioning? */
        if (rhigh >= rlow)
        {
          if (last_new_area == border_areas[i] && last_new_class == k) border_areas[i] = nareas_new - 1;
          else
          {
            /* update last_new_... */
            last_new_area = border_areas[i];
            last_new_class = k;

            /* create new area */
            for (j = 0; j < nelements; ++j)
            {
              elem_assign_at(&areas[border_areas[i] * nelements + j], lcsv[j], &areas_new[nareas_new * nelements + j]);
              areas_new[nareas_new * nelements + j].size = lcv[j];
            }
            border_areas[i] = nareas_new;
            ++nareas_new;
          }

        } else
        {
          for (j = 0; j < nelements; ++j) final_areas[(i - nborders_removed * direction) * nelements + j] = lcv[j];

          /* save local count/weight for the later prefix calculations */
          final_locals[NCONDS * (i - nborders_removed * direction) + 0] = lc;
#ifdef elem_weight
          final_locals[NCONDS * (i - nborders_removed * direction) + 1] = lw;
#endif
        }

        borders[i - nborders_removed * direction] = borders[i];
        border_areas[i - nborders_removed * direction] = border_areas[i];

      } else ++nborders_removed;

      rti_tstop(rti_tid_mpi_select_exact_radix_while_check_post);

      i += direction;
    }

    /* restrict the parts */
    if (direction > 0) border_hi -= nborders_removed;
    else border_lo += nborders_removed;

    /* change direction */
    direction *= -1;

    rti_tstop(rti_tid_mpi_select_exact_radix_while_check);
    
    /* switch areas */
    nareas = nareas_new;
    if (areas == areas0)
    {
      areas = areas1;
      areas_new = areas0;
    } else
    {
      areas = areas0;
      areas_new = areas1;
    }
  }

  rti_tstop(rti_tid_mpi_select_exact_radix_while);

  sl_free(area_counts);
  sl_free(local_counts);
  sl_free(global_counts);

  rti_tstop(rti_tid_mpi_select_exact_radix);

#ifdef VERIFY
  v = mpi_post_check_partconds(s, nelements, nparts, pconds, sdispls, size, rank, comm);
  
  SL_ASSERT_IF(rank == 0, v < 0);
  
  SL_NOTICE_IF(rank == 0, "post_check_partconds: %s (%" slint_fmt ")", (v >= 0)?"FAILED":"SUCCESS", v);
#endif

#ifdef PRINT_SDISPLS
  printf("%d: sdispls:", rank);
  for (i = 0; i < nparts; ++i) printf(" %d ", sdispls[i]);
  printf("\n");
#endif

#ifdef PRINT_STATS
  mpi_select_stats(s, nparts, sdispls, size, rank, comm);
#endif

#if defined(PRINT_TIMINGS) && defined(SL_USE_RTI_TIM)
  if (rank == PRINT_TIMINGS)
  {
    printf("%d: mpi_select_exact_radix: %f\n", rank, rti_tlast(rti_tid_mpi_select_exact_radix));
    printf("%d: mpi_select_exact_radix: sync: %f\n", rank, rti_tlast(rti_tid_mpi_select_exact_radix_sync));
    printf("%d: mpi_select_exact_radix: while: %f\n", rank, rti_tlast(rti_tid_mpi_select_exact_radix_while));
    printf("%d: mpi_select_exact_radix:  count: %f\n", rank, rti_tcumu(rti_tid_mpi_select_exact_radix_while_count));
    printf("%d: mpi_select_exact_radix:  allreduce: %f\n", rank, rti_tcumu(rti_tid_mpi_select_exact_radix_while_allreduce));
    printf("%d: mpi_select_exact_radix:  round1: %f\n", rank, rti_tcumu(rti_tid_mpi_select_exact_radix_while_round1));
    printf("%d: mpi_select_exact_radix:   allgather: %f\n", rank, rti_tcumu(rti_tid_mpi_select_exact_radix_while_round1_allgather));
    printf("%d: mpi_select_exact_radix:  exscan: %f\n", rank, rti_tlast(rti_tid_mpi_select_exact_radix_while_exscan));
    printf("%d: mpi_select_exact_radix:  check: %f\n", rank, rti_tcumu(rti_tid_mpi_select_exact_radix_while_check));
    printf("%d: mpi_select_exact_radix:   pre: %f\n", rank, rti_tlast(rti_tid_mpi_select_exact_radix_while_check_pre));
    printf("%d: mpi_select_exact_radix:   classes: %f\n", rank, rti_tlast(rti_tid_mpi_select_exact_radix_while_check_classes));
    printf("%d: mpi_select_exact_radix:   final: %f\n", rank, rti_tlast(rti_tid_mpi_select_exact_radix_while_check_final));
    printf("%d: mpi_select_exact_radix:   post: %f\n", rank, rti_tlast(rti_tid_mpi_select_exact_radix_while_check_post));
    printf("%d: mpi_select_exact_radix: rounds: %" slint_fmt "\n", rank, round);
  }
#endif

  return 0;
}
Пример #19
0
 nervana::boundingbox::box unnormalize(float width, float height)
 {
     return nervana::boundingbox::box(
         xmin() * width, ymin() * height, xmax() * width - 1, ymax() * height - 1);
 }
Пример #20
0
QRectF QTessellatorPrivate::collectAndSortVertices(const QPointF *points, int *maxActiveEdges)
{
    *maxActiveEdges = 0;
    Vertex *v = vertices.storage;
    Vertex **vv = vertices.sorted;

    qreal xmin(points[0].x());
    qreal xmax(points[0].x());
    qreal ymin(points[0].y());
    qreal ymax(points[0].y());

    // collect vertex data
    Q27Dot5 y_prev = FloatToQ27Dot5(points[vertices.nPoints-1].y());
    Q27Dot5 x_next = FloatToQ27Dot5(points[0].x());
    Q27Dot5 y_next = FloatToQ27Dot5(points[0].y());
    int j = 0;
    int i = 0;
    while (i < vertices.nPoints) {
        Q27Dot5 y_curr = y_next;

        *vv = v;

        v->x = x_next;
        v->y = y_next;
        v->flags = 0;

    next_point:

        xmin = qMin(xmin, points[i+1].x());
        xmax = qMax(xmax, points[i+1].x());
        ymin = qMin(ymin, points[i+1].y());
        ymax = qMax(ymax, points[i+1].y());

        y_next = FloatToQ27Dot5(points[i+1].y());
        x_next = FloatToQ27Dot5(points[i+1].x());

        // skip vertices on top of each other
        if (v->x == x_next && v->y == y_next) {
            ++i;
            if (i < vertices.nPoints)
                goto next_point;
            Vertex *v0 = vertices.storage;
            v0->flags &= ~(LineBeforeStarts|LineBeforeEnds|LineBeforeHorizontal);
            if (y_prev < y_curr)
                v0->flags |= LineBeforeEnds;
            else if (y_prev > y_curr)
                v0->flags |= LineBeforeStarts;
            else
                v0->flags |= LineBeforeHorizontal;
            if ((v0->flags & (LineBeforeStarts|LineAfterStarts))
                && !(v0->flags & (LineAfterEnds|LineBeforeEnds)))
                *maxActiveEdges += 2;
            break;
        }

        if (y_prev < y_curr)
            v->flags |= LineBeforeEnds;
        else if (y_prev > y_curr)
            v->flags |= LineBeforeStarts;
        else
            v->flags |= LineBeforeHorizontal;


        if (y_curr < y_next)
            v->flags |= LineAfterStarts;
        else if (y_curr > y_next)
            v->flags |= LineAfterEnds;
        else
            v->flags |= LineAfterHorizontal;
        // ### could probably get better limit by looping over sorted list and counting down on ending edges
        if ((v->flags & (LineBeforeStarts|LineAfterStarts))
            && !(v->flags & (LineAfterEnds|LineBeforeEnds)))
            *maxActiveEdges += 2;
        y_prev = y_curr;
        ++v;
        ++vv;
        ++j;
        ++i;
    }
    vertices.nPoints = j;

    QDEBUG() << "maxActiveEdges=" << *maxActiveEdges;
    vv = vertices.sorted;
    qSort(vv, vv + vertices.nPoints, compareVertex);

    return QRectF(xmin, ymin, xmax-xmin, ymax-ymin);
}
Пример #21
0
/* Returns >= zero iff successful */
static int find_triple_64(int i, int min_leeway, int perfect_leeway, mpfr_fn
                          sin_fn, mpfr_fn cos_fn)
{
        /*
           Using mpfr is not entirely overkill for this; [Lut95]
           includes PASCAL fragments that use almost entirely integer
           arithmetic... but the error term in that only handles
           up to 13 extra bits of zeroes or so. We proudly boast
           at least 16 bits of extra zeroes in all cases.
         */
        mpfr_t xi;
        mpfr_t xip1;
        mpfr_t cos;
        mpfr_t sin;
        double xip1_d;
        double t;
        uint64_t sin_u;
        uint64_t cos_u;
        int e1;
        int e2;
        uint64_t xip1_u;
        double xi_initial;
        uint64_t xi_initial_u;
        double xi_current;
        uint64_t xi_current_u;
        long int r = 0;
        long int best_r = 0;
        int sgn = 1;
        int ml = min_leeway;
        int best_l = 0;
        uint64_t best_xi_u;
        uint64_t best_sin_u;
        uint64_t best_cos_u;
        time_t start;
        time_t end;

        start = time(0);
        mpfr_init2(xi, 100);
        mpfr_init2(xip1, 100);
        mpfr_init2(cos, 100);
        mpfr_init2(sin, 100);

        /* start out at xi = πi/(4N) */
        mpfr_const_pi(xi, MPFR_RNDN);
        mpfr_mul_si(xip1, xi, (long int) (i + 1), MPFR_RNDN);
        mpfr_mul_si(xi, xi, (long int) i, MPFR_RNDN);
        mpfr_div_si(xi, xi, (long int) 4 * N, MPFR_RNDN);
        mpfr_div_si(xip1, xip1, (long int) 4 * N, MPFR_RNDN);
        xip1_d = mpfr_get_d(xip1, MPFR_RNDN);
        xip1_u = FLT64_TO_UINT64(xip1_d);
        xi_initial = mpfr_get_d(xi, MPFR_RNDN);
        xi_initial_u = FLT64_TO_UINT64(xi_initial);

        while (1) {
                xi_current_u = xi_initial_u + (sgn * r);
                xi_current = UINT64_TO_FLT64(xi_current_u);
                mpfr_set_d(xi, xi_current, MPFR_RNDN);

                /* Test if cos(xi) has enough zeroes */
                cos_fn(cos, xi, MPFR_RNDN);
                t = mpfr_get_d(cos, MPFR_RNDN);
                cos_u = FLT64_TO_UINT64(t);
                e1 = EXP_OF_FLT64(t);
                mpfr_sub_d(cos, cos, t, MPFR_RNDN);
                t = mpfr_get_d(cos, MPFR_RNDN);
                e2 = EXP_OF_FLT64(t);

                if (e2 == -1024) {

                        /* Damn; this is too close to a subnormal. i = 0 or N? */
                        return -1;
                }

                if (e1 - e2 < (52 + min_leeway)) {
                        goto inc;
                }

                ml = xmax(min_leeway, e1 - e2 - 52);

                /* Test if sin(xi) has enough zeroes */
                sin_fn(sin, xi, MPFR_RNDN);
                t = mpfr_get_d(sin, MPFR_RNDN);
                sin_u = FLT64_TO_UINT64(t);
                e1 = EXP_OF_FLT64(t);
                mpfr_sub_d(sin, sin, t, MPFR_RNDN);
                t = mpfr_get_d(sin, MPFR_RNDN);
                e2 = EXP_OF_FLT64(t);

                if (e2 == -1024) {

                        /* Damn; this is too close to a subnormal. i = 0 or N? */
                        return -1;
                }

                if (e1 - e2 < (52 + min_leeway)) {
                        goto inc;
                }

                ml = xmin(ml, e1 - e2 - 52);

                /* Hurrah, this is valid */
                if (ml > best_l) {
                        best_l = ml;
                        best_xi_u = xi_current_u;
                        best_cos_u = cos_u;
                        best_sin_u = sin_u;
                        best_r = sgn * r;

                        /* If this is super-good, don't bother finding more */
                        if (best_l >= perfect_leeway) {
                                break;
                        }
                }

inc:

                /* Increment */
                sgn *= -1;

                if (sgn < 0) {
                        r++;
                } else if (r > (1 << 29) ||
                           xi_current_u > xip1_u) {
                        /*
                           This is taking too long, give up looking
                           for perfection and take the best we've
                           got. A sweep of 1 << 28 finishes in ~60
                           hrs on my personal machine as I write
                           this.
                         */
                        break;
                }
        }

        end = time(0);

        if (best_l > min_leeway) {
                printf(
                        "(%#018lx, %#018lx, %#018lx), /* i = %03d, l = %02d, r = %010ld, t = %ld */ \n",
                        best_xi_u, best_cos_u, best_sin_u, i, best_l, best_r,
                        end -
                        start);

                return 0;
        } else {
                return -1;
        }
}
Пример #22
0
CHAR_T *
VASNPRINTF (CHAR_T *resultbuf, size_t *lengthp, const CHAR_T *format, va_list args)
{
  DIRECTIVES d;
  arguments a;

  if (PRINTF_PARSE (format, &d, &a) < 0)
    {
      errno = EINVAL;
      return NULL;
    }

#define CLEANUP() \
  free (d.dir);								\
  if (a.arg)								\
    free (a.arg);

  if (printf_fetchargs (args, &a) < 0)
    {
      CLEANUP ();
      errno = EINVAL;
      return NULL;
    }

  {
    size_t buf_neededlength;
    CHAR_T *buf;
    CHAR_T *buf_malloced;
    const CHAR_T *cp;
    size_t i;
    DIRECTIVE *dp;
    
    CHAR_T *result;
    size_t allocated;
    size_t length;

    buf_neededlength =
      xsum4 (7, d.max_width_length, d.max_precision_length, 6);
#if HAVE_ALLOCA
    if (buf_neededlength < 4000 / sizeof (CHAR_T))
      {
	buf = (CHAR_T *) alloca (buf_neededlength * sizeof (CHAR_T));
	buf_malloced = NULL;
      }
    else
#endif
      {
	size_t buf_memsize = xtimes (buf_neededlength, sizeof (CHAR_T));
	if (size_overflow_p (buf_memsize))
	  goto out_of_memory_1;
	buf = (CHAR_T *) malloc (buf_memsize);
	if (buf == NULL)
	  goto out_of_memory_1;
	buf_malloced = buf;
      }

    if (resultbuf != NULL)
      {
	result = resultbuf;
	allocated = *lengthp;
      }
    else
      {
	result = NULL;
	allocated = 0;
      }
    length = 0;

#define ENSURE_ALLOCATION(needed) \
    if ((needed) > allocated)						     \
      {									     \
	size_t memory_size;						     \
	CHAR_T *memory;							     \
									     \
	allocated = (allocated > 0 ? xtimes (allocated, 2) : 12);	     \
	if ((needed) > allocated)					     \
	  allocated = (needed);						     \
	memory_size = xtimes (allocated, sizeof (CHAR_T));		     \
	if (size_overflow_p (memory_size))				     \
	  goto out_of_memory;						     \
	if (result == resultbuf || result == NULL)			     \
	  memory = (CHAR_T *) malloc (memory_size);			     \
	else								     \
	  memory = (CHAR_T *) realloc (result, memory_size);		     \
	if (memory == NULL)						     \
	  goto out_of_memory;						     \
	if (result == resultbuf && length > 0)				     \
	  memcpy (memory, result, length * sizeof (CHAR_T));		     \
	result = memory;						     \
      }

    for (cp = format, i = 0, dp = &d.dir[0]; ; cp = dp->dir_end, i++, dp++)
      {
	if (cp != dp->dir_start)
	  {
	    size_t n = dp->dir_start - cp;
	    size_t augmented_length = xsum (length, n);

	    ENSURE_ALLOCATION (augmented_length);
	    memcpy (result + length, cp, n * sizeof (CHAR_T));
	    length = augmented_length;
	  }
	if (i == d.count)
	  break;

	
	if (dp->conversion == '%')
	  {
	    size_t augmented_length;

	    if (!(dp->arg_index == ARG_NONE))
	      abort ();
	    augmented_length = xsum (length, 1);
	    ENSURE_ALLOCATION (augmented_length);
	    result[length] = '%';
	    length = augmented_length;
	  }
	else
	  {
	    if (!(dp->arg_index != ARG_NONE))
	      abort ();

	    if (dp->conversion == 'n')
	      {
		switch (a.arg[dp->arg_index].type)
		  {
		  case TYPE_COUNT_SCHAR_POINTER:
		    *a.arg[dp->arg_index].a.a_count_schar_pointer = length;
		    break;
		  case TYPE_COUNT_SHORT_POINTER:
		    *a.arg[dp->arg_index].a.a_count_short_pointer = length;
		    break;
		  case TYPE_COUNT_INT_POINTER:
		    *a.arg[dp->arg_index].a.a_count_int_pointer = length;
		    break;
		  case TYPE_COUNT_LONGINT_POINTER:
		    *a.arg[dp->arg_index].a.a_count_longint_pointer = length;
		    break;
#ifdef HAVE_LONG_LONG
		  case TYPE_COUNT_LONGLONGINT_POINTER:
		    *a.arg[dp->arg_index].a.a_count_longlongint_pointer = length;
		    break;
#endif
		  default:
		    abort ();
		  }
	      }
	    else
	      {
		arg_type type = a.arg[dp->arg_index].type;
		CHAR_T *p;
		unsigned int prefix_count;
		int prefixes[2];
#if !USE_SNPRINTF
		size_t tmp_length;
		CHAR_T tmpbuf[700];
		CHAR_T *tmp;

		{
		  size_t width;
		  size_t precision;

		  width = 0;
		  if (dp->width_start != dp->width_end)
		    {
		      if (dp->width_arg_index != ARG_NONE)
			{
			  int arg;

			  if (!(a.arg[dp->width_arg_index].type == TYPE_INT))
			    abort ();
			  arg = a.arg[dp->width_arg_index].a.a_int;
			  width = (arg < 0 ? (unsigned int) (-arg) : arg);
			}
		      else
			{
			  const CHAR_T *digitp = dp->width_start;

			  do
			    width = xsum (xtimes (width, 10), *digitp++ - '0');
			  while (digitp != dp->width_end);
			}
		    }

		  precision = 6;
		  if (dp->precision_start != dp->precision_end)
		    {
		      if (dp->precision_arg_index != ARG_NONE)
			{
			  int arg;

			  if (!(a.arg[dp->precision_arg_index].type == TYPE_INT))
			    abort ();
			  arg = a.arg[dp->precision_arg_index].a.a_int;
			  precision = (arg < 0 ? 0 : arg);
			}
		      else
			{
			  const CHAR_T *digitp = dp->precision_start + 1;

			  precision = 0;
			  do
			    precision = xsum (xtimes (precision, 10), *digitp++ - '0');
			  while (digitp != dp->precision_end);
			}
		    }

		  switch (dp->conversion)
		    {

		    case 'd': case 'i': case 'u':
# ifdef HAVE_LONG_LONG
		      if (type == TYPE_LONGLONGINT || type == TYPE_ULONGLONGINT)
			tmp_length =
			  (unsigned int) (sizeof (unsigned long long) * CHAR_BIT
					  * 0.30103 
					  * 2 
					 )
			  + 1 
			  + 1; 
		      else
# endif
		      if (type == TYPE_LONGINT || type == TYPE_ULONGINT)
			tmp_length =
			  (unsigned int) (sizeof (unsigned long) * CHAR_BIT
					  * 0.30103 
					  * 2 
					 )
			  + 1 
			  + 1; 
		      else
			tmp_length =
			  (unsigned int) (sizeof (unsigned int) * CHAR_BIT
					  * 0.30103 
					  * 2 
					 )
			  + 1 
			  + 1; 
		      break;

		    case 'o':
# ifdef HAVE_LONG_LONG
		      if (type == TYPE_LONGLONGINT || type == TYPE_ULONGLONGINT)
			tmp_length =
			  (unsigned int) (sizeof (unsigned long long) * CHAR_BIT
					  * 0.333334 
					 )
			  + 1 
			  + 1; 
		      else
# endif
		      if (type == TYPE_LONGINT || type == TYPE_ULONGINT)
			tmp_length =
			  (unsigned int) (sizeof (unsigned long) * CHAR_BIT
					  * 0.333334 
					 )
			  + 1 
			  + 1; 
		      else
			tmp_length =
			  (unsigned int) (sizeof (unsigned int) * CHAR_BIT
					  * 0.333334 
					 )
			  + 1 
			  + 1; 
		      break;

		    case 'x': case 'X':
# ifdef HAVE_LONG_LONG
		      if (type == TYPE_LONGLONGINT || type == TYPE_ULONGLONGINT)
			tmp_length =
			  (unsigned int) (sizeof (unsigned long long) * CHAR_BIT
					  * 0.25 
					 )
			  + 1 
			  + 2; 
		      else
# endif
		      if (type == TYPE_LONGINT || type == TYPE_ULONGINT)
			tmp_length =
			  (unsigned int) (sizeof (unsigned long) * CHAR_BIT
					  * 0.25 
					 )
			  + 1 
			  + 2; 
		      else
			tmp_length =
			  (unsigned int) (sizeof (unsigned int) * CHAR_BIT
					  * 0.25 
					 )
			  + 1 
			  + 2; 
		      break;

		    case 'f': case 'F':
# ifdef HAVE_LONG_DOUBLE
		      if (type == TYPE_LONGDOUBLE)
			tmp_length =
			  (unsigned int) (LDBL_MAX_EXP
					  * 0.30103 
					  * 2 
					 )
			  + 1 
			  + 10; 
		      else
# endif
			tmp_length =
			  (unsigned int) (DBL_MAX_EXP
					  * 0.30103 
					  * 2 
					 )
			  + 1 
			  + 10; 
		      tmp_length = xsum (tmp_length, precision);
		      break;

		    case 'e': case 'E': case 'g': case 'G':
		    case 'a': case 'A':
		      tmp_length =
			12; 
		      tmp_length = xsum (tmp_length, precision);
		      break;

		    case 'c':
# if defined HAVE_WINT_T && !WIDE_CHAR_VERSION
		      if (type == TYPE_WIDE_CHAR)
			tmp_length = MB_CUR_MAX;
		      else
# endif
			tmp_length = 1;
		      break;

		    case 's':
# ifdef HAVE_WCHAR_T
		      if (type == TYPE_WIDE_STRING)
			{
			  tmp_length =
			    local_wcslen (a.arg[dp->arg_index].a.a_wide_string);

#  if !WIDE_CHAR_VERSION
			  tmp_length = xtimes (tmp_length, MB_CUR_MAX);
#  endif
			}
		      else
# endif
			tmp_length = strlen (a.arg[dp->arg_index].a.a_string);
		      break;

		    case 'p':
		      tmp_length =
			(unsigned int) (sizeof (void *) * CHAR_BIT
					* 0.25 
				       )
			  + 1 
			  + 2; 
		      break;

		    default:
		      abort ();
		    }

		  if (tmp_length < width)
		    tmp_length = width;

		  tmp_length = xsum (tmp_length, 1); 
		}

		if (tmp_length <= sizeof (tmpbuf) / sizeof (CHAR_T))
		  tmp = tmpbuf;
		else
		  {
		    size_t tmp_memsize = xtimes (tmp_length, sizeof (CHAR_T));

		    if (size_overflow_p (tmp_memsize))
		      
		      goto out_of_memory;
		    tmp = (CHAR_T *) malloc (tmp_memsize);
		    if (tmp == NULL)
		      
		      goto out_of_memory;
		  }
#endif

		p = buf;
		*p++ = '%';
		if (dp->flags & FLAG_GROUP)
		  *p++ = '\'';
		if (dp->flags & FLAG_LEFT)
		  *p++ = '-';
		if (dp->flags & FLAG_SHOWSIGN)
		  *p++ = '+';
		if (dp->flags & FLAG_SPACE)
		  *p++ = ' ';
		if (dp->flags & FLAG_ALT)
		  *p++ = '#';
		if (dp->flags & FLAG_ZERO)
		  *p++ = '0';
		if (dp->width_start != dp->width_end)
		  {
		    size_t n = dp->width_end - dp->width_start;
		    memcpy (p, dp->width_start, n * sizeof (CHAR_T));
		    p += n;
		  }
		if (dp->precision_start != dp->precision_end)
		  {
		    size_t n = dp->precision_end - dp->precision_start;
		    memcpy (p, dp->precision_start, n * sizeof (CHAR_T));
		    p += n;
		  }

		switch (type)
		  {
#ifdef HAVE_LONG_LONG
		  case TYPE_LONGLONGINT:
		  case TYPE_ULONGLONGINT:
		    *p++ = 'l';
		    
#endif
		  case TYPE_LONGINT:
		  case TYPE_ULONGINT:
#ifdef HAVE_WINT_T
		  case TYPE_WIDE_CHAR:
#endif
#ifdef HAVE_WCHAR_T
		  case TYPE_WIDE_STRING:
#endif
		    *p++ = 'l';
		    break;
#ifdef HAVE_LONG_DOUBLE
		  case TYPE_LONGDOUBLE:
		    *p++ = 'L';
		    break;
#endif
		  default:
		    break;
		  }
		*p = dp->conversion;
#if USE_SNPRINTF
		p[1] = '%';
		p[2] = 'n';
		p[3] = '\0';
#else
		p[1] = '\0';
#endif

		
		prefix_count = 0;
		if (dp->width_arg_index != ARG_NONE)
		  {
		    if (!(a.arg[dp->width_arg_index].type == TYPE_INT))
		      abort ();
		    prefixes[prefix_count++] = a.arg[dp->width_arg_index].a.a_int;
		  }
		if (dp->precision_arg_index != ARG_NONE)
		  {
		    if (!(a.arg[dp->precision_arg_index].type == TYPE_INT))
		      abort ();
		    prefixes[prefix_count++] = a.arg[dp->precision_arg_index].a.a_int;
		  }

#if USE_SNPRINTF
		ENSURE_ALLOCATION (xsum (length, 1));
		result[length] = '\0';
#endif

		for (;;)
		  {
		    size_t maxlen;
		    int count;
		    int retcount;

		    maxlen = allocated - length;
		    count = -1;
		    retcount = 0;

#if USE_SNPRINTF
# define SNPRINTF_BUF(arg) \
		    switch (prefix_count)				    \
		      {							    \
		      case 0:						    \
			retcount = SNPRINTF (result + length, maxlen, buf,  \
					     arg, &count);		    \
			break;						    \
		      case 1:						    \
			retcount = SNPRINTF (result + length, maxlen, buf,  \
					     prefixes[0], arg, &count);	    \
			break;						    \
		      case 2:						    \
			retcount = SNPRINTF (result + length, maxlen, buf,  \
					     prefixes[0], prefixes[1], arg, \
					     &count);			    \
			break;						    \
		      default:						    \
			abort ();					    \
		      }
#else
# define SNPRINTF_BUF(arg) \
		    switch (prefix_count)				    \
		      {							    \
		      case 0:						    \
			count = sprintf (tmp, buf, arg);		    \
			break;						    \
		      case 1:						    \
			count = sprintf (tmp, buf, prefixes[0], arg);	    \
			break;						    \
		      case 2:						    \
			count = sprintf (tmp, buf, prefixes[0], prefixes[1],\
					 arg);				    \
			break;						    \
		      default:						    \
			abort ();					    \
		      }
#endif

		    switch (type)
		      {
		      case TYPE_SCHAR:
			{
			  int arg = a.arg[dp->arg_index].a.a_schar;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_UCHAR:
			{
			  unsigned int arg = a.arg[dp->arg_index].a.a_uchar;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_SHORT:
			{
			  int arg = a.arg[dp->arg_index].a.a_short;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_USHORT:
			{
			  unsigned int arg = a.arg[dp->arg_index].a.a_ushort;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_INT:
			{
			  int arg = a.arg[dp->arg_index].a.a_int;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_UINT:
			{
			  unsigned int arg = a.arg[dp->arg_index].a.a_uint;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_LONGINT:
			{
			  long int arg = a.arg[dp->arg_index].a.a_longint;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_ULONGINT:
			{
			  unsigned long int arg = a.arg[dp->arg_index].a.a_ulongint;
			  SNPRINTF_BUF (arg);
			}
			break;
#ifdef HAVE_LONG_LONG
		      case TYPE_LONGLONGINT:
			{
			  long long int arg = a.arg[dp->arg_index].a.a_longlongint;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_ULONGLONGINT:
			{
			  unsigned long long int arg = a.arg[dp->arg_index].a.a_ulonglongint;
			  SNPRINTF_BUF (arg);
			}
			break;
#endif
		      case TYPE_DOUBLE:
			{
			  double arg = a.arg[dp->arg_index].a.a_double;
			  SNPRINTF_BUF (arg);
			}
			break;
#ifdef HAVE_LONG_DOUBLE
		      case TYPE_LONGDOUBLE:
			{
			  long double arg = a.arg[dp->arg_index].a.a_longdouble;
			  SNPRINTF_BUF (arg);
			}
			break;
#endif
		      case TYPE_CHAR:
			{
			  int arg = a.arg[dp->arg_index].a.a_char;
			  SNPRINTF_BUF (arg);
			}
			break;
#ifdef HAVE_WINT_T
		      case TYPE_WIDE_CHAR:
			{
			  wint_t arg = a.arg[dp->arg_index].a.a_wide_char;
			  SNPRINTF_BUF (arg);
			}
			break;
#endif
		      case TYPE_STRING:
			{
			  const char *arg = a.arg[dp->arg_index].a.a_string;
			  SNPRINTF_BUF (arg);
			}
			break;
#ifdef HAVE_WCHAR_T
		      case TYPE_WIDE_STRING:
			{
			  const wchar_t *arg = a.arg[dp->arg_index].a.a_wide_string;
			  SNPRINTF_BUF (arg);
			}
			break;
#endif
		      case TYPE_POINTER:
			{
			  void *arg = a.arg[dp->arg_index].a.a_pointer;
			  SNPRINTF_BUF (arg);
			}
			break;
		      default:
			abort ();
		      }

#if USE_SNPRINTF
		    if (count >= 0)
		      {
			if (count < maxlen && result[length + count] != '\0')
			  abort ();
			
			if (retcount > count)
			  count = retcount;
		      }
		    else
		      {
			if (p[1] != '\0')
			  {
			    p[1] = '\0';
			    continue;
			  }
			else
			  {
			    
			    if (retcount < 0)
			      {
				size_t bigger_need =
				  xsum (xtimes (allocated, 2), 12);
				ENSURE_ALLOCATION (bigger_need);
				continue;
			      }
			    else
			      count = retcount;
			  }
		      }
#endif

		    
		    if (count < 0)
		      {
			if (!(result == resultbuf || result == NULL))
			  free (result);
			free (buf_malloced);
			CLEANUP ();
			errno = EINVAL;
			return NULL;
		      }

#if !USE_SNPRINTF
		    if (count >= tmp_length)
		      abort ();
#endif

		    
		    if (count >= maxlen)
		      {
			size_t n =
			  xmax (xsum (length, count), xtimes (allocated, 2));

			ENSURE_ALLOCATION (n);
#if USE_SNPRINTF
			continue;
#endif
		      }

#if USE_SNPRINTF
		    
#else
		    
		    memcpy (result + length, tmp, count * sizeof (CHAR_T));
		    if (tmp != tmpbuf)
		      free (tmp);
#endif

		    length += count;
		    break;
		  }
	      }
	  }
      }

    
    ENSURE_ALLOCATION (xsum (length, 1));
    result[length] = '\0';

    if (result != resultbuf && length + 1 < allocated)
      {
	
	CHAR_T *memory;

	memory = (CHAR_T *) realloc (result, (length + 1) * sizeof (CHAR_T));
	if (memory != NULL)
	  result = memory;
      }

    free (buf_malloced);
    CLEANUP ();
    *lengthp = length;
    return result;

  out_of_memory:
    if (!(result == resultbuf || result == NULL))
      free (result);
    free (buf_malloced);
  out_of_memory_1:
    CLEANUP ();
    errno = ENOMEM;
    return NULL;
  }
}
Пример #23
0
CHAR_T *
VASNPRINTF (CHAR_T *resultbuf, size_t *lengthp, const CHAR_T *format, va_list args)
{
  DIRECTIVES d;
  arguments a;

  if (PRINTF_PARSE (format, &d, &a) < 0)
    {
      errno = EINVAL;
      return NULL;
    }

#define CLEANUP() \
  free (d.dir);								\
  if (a.arg)								\
    free (a.arg);

  if (printf_fetchargs (args, &a) < 0)
    {
      CLEANUP ();
      errno = EINVAL;
      return NULL;
    }

  {
    size_t buf_neededlength;
    CHAR_T *buf;
    CHAR_T *buf_malloced;
    const CHAR_T *cp;
    size_t i;
    DIRECTIVE *dp;
    /* Output string accumulator.  */
    CHAR_T *result;
    size_t allocated;
    size_t length;

    /* Allocate a small buffer that will hold a directive passed to
       snprintf.  */
    buf_neededlength =
      xsum4 (7, d.max_width_length, d.max_precision_length, 6);
#if HAVE_ALLOCA
    if (buf_neededlength < 4000 / sizeof (CHAR_T))
      {
	buf = (CHAR_T *) alloca (buf_neededlength * sizeof (CHAR_T));
	buf_malloced = NULL;
      }
    else
#endif
      {
	size_t buf_memsize = xtimes (buf_neededlength, sizeof (CHAR_T));
	if (size_overflow_p (buf_memsize))
	  goto out_of_memory_1;
	buf = (CHAR_T *) malloc (buf_memsize);
	if (buf == NULL)
	  goto out_of_memory_1;
	buf_malloced = buf;
      }

    if (resultbuf != NULL)
      {
	result = resultbuf;
	allocated = *lengthp;
      }
    else
      {
	result = NULL;
	allocated = 0;
      }
    length = 0;
    /* Invariants:
       result is either == resultbuf or == NULL or malloc-allocated.
       If length > 0, then result != NULL.  */

    /* Ensures that allocated >= needed.  Aborts through a jump to
       out_of_memory if needed is SIZE_MAX or otherwise too big.  */
#define ENSURE_ALLOCATION(needed) \
    if ((needed) > allocated)						     \
      {									     \
	size_t memory_size;						     \
	CHAR_T *memory;							     \
									     \
	allocated = (allocated > 0 ? xtimes (allocated, 2) : 12);	     \
	if ((needed) > allocated)					     \
	  allocated = (needed);						     \
	memory_size = xtimes (allocated, sizeof (CHAR_T));		     \
	if (size_overflow_p (memory_size))				     \
	  goto out_of_memory;						     \
	if (result == resultbuf || result == NULL)			     \
	  memory = (CHAR_T *) malloc (memory_size);			     \
	else								     \
	  memory = (CHAR_T *) realloc (result, memory_size);		     \
	if (memory == NULL)						     \
	  goto out_of_memory;						     \
	if (result == resultbuf && length > 0)				     \
	  memcpy (memory, result, length * sizeof (CHAR_T));		     \
	result = memory;						     \
      }

    for (cp = format, i = 0, dp = &d.dir[0]; ; cp = dp->dir_end, i++, dp++)
      {
	if (cp != dp->dir_start)
	  {
	    size_t n = dp->dir_start - cp;
	    size_t augmented_length = xsum (length, n);

	    ENSURE_ALLOCATION (augmented_length);
	    memcpy (result + length, cp, n * sizeof (CHAR_T));
	    length = augmented_length;
	  }
	if (i == d.count)
	  break;

	/* Execute a single directive.  */
	if (dp->conversion == '%')
	  {
	    size_t augmented_length;

	    if (!(dp->arg_index == ARG_NONE))
	      abort ();
	    augmented_length = xsum (length, 1);
	    ENSURE_ALLOCATION (augmented_length);
	    result[length] = '%';
	    length = augmented_length;
	  }
	else
	  {
	    if (!(dp->arg_index != ARG_NONE))
	      abort ();

	    if (dp->conversion == 'n')
	      {
		switch (a.arg[dp->arg_index].type)
		  {
		  case TYPE_COUNT_SCHAR_POINTER:
		    *a.arg[dp->arg_index].a.a_count_schar_pointer = length;
		    break;
		  case TYPE_COUNT_SHORT_POINTER:
		    *a.arg[dp->arg_index].a.a_count_short_pointer = length;
		    break;
		  case TYPE_COUNT_INT_POINTER:
		    *a.arg[dp->arg_index].a.a_count_int_pointer = length;
		    break;
		  case TYPE_COUNT_LONGINT_POINTER:
		    *a.arg[dp->arg_index].a.a_count_longint_pointer = length;
		    break;
#ifdef HAVE_LONG_LONG
		  case TYPE_COUNT_LONGLONGINT_POINTER:
		    *a.arg[dp->arg_index].a.a_count_longlongint_pointer = length;
		    break;
#endif
		  default:
		    abort ();
		  }
	      }
	    else
	      {
		arg_type type = a.arg[dp->arg_index].type;
		CHAR_T *p;
		unsigned int prefix_count;
		int prefixes[2];

		/* Construct the format string for calling snprintf.  */
		p = buf;
		*p++ = '%';
		if (dp->flags & FLAG_GROUP)
		  *p++ = '\'';
		if (dp->flags & FLAG_LEFT)
		  *p++ = '-';
		if (dp->flags & FLAG_SHOWSIGN)
		  *p++ = '+';
		if (dp->flags & FLAG_SPACE)
		  *p++ = ' ';
		if (dp->flags & FLAG_ALT)
		  *p++ = '#';
		if (dp->flags & FLAG_ZERO)
		  *p++ = '0';
		if (dp->width_start != dp->width_end)
		  {
		    size_t n = dp->width_end - dp->width_start;
		    memcpy (p, dp->width_start, n * sizeof (CHAR_T));
		    p += n;
		  }
		if (dp->precision_start != dp->precision_end)
		  {
		    size_t n = dp->precision_end - dp->precision_start;
		    memcpy (p, dp->precision_start, n * sizeof (CHAR_T));
		    p += n;
		  }

		switch (type)
		  {
#ifdef HAVE_LONG_LONG
		  case TYPE_LONGLONGINT:
		  case TYPE_ULONGLONGINT:
		    *p++ = 'l';
		    /*FALLTHROUGH*/
#endif
		  case TYPE_LONGINT:
		  case TYPE_ULONGINT:
#ifdef HAVE_WINT_T
		  case TYPE_WIDE_CHAR:
#endif
#ifdef HAVE_WCHAR_T
		  case TYPE_WIDE_STRING:
#endif
		    *p++ = 'l';
		    break;
#ifdef HAVE_LONG_DOUBLE
		  case TYPE_LONGDOUBLE:
		    *p++ = 'L';
		    break;
#endif
		  default:
		    break;
		  }
		*p = dp->conversion;
#if USE_SNPRINTF
		p[1] = '%';
		p[2] = 'n';
		p[3] = '\0';
#else
		p[1] = '\0';
#endif

		/* Construct the arguments for calling snprintf.  */
		prefix_count = 0;
		if (dp->width_arg_index != ARG_NONE)
		  {
		    if (!(a.arg[dp->width_arg_index].type == TYPE_INT))
		      abort ();
		    prefixes[prefix_count++] = a.arg[dp->width_arg_index].a.a_int;
		  }
		if (dp->precision_arg_index != ARG_NONE)
		  {
		    if (!(a.arg[dp->precision_arg_index].type == TYPE_INT))
		      abort ();
		    prefixes[prefix_count++] = a.arg[dp->precision_arg_index].a.a_int;
		  }

#if USE_SNPRINTF
		/* Prepare checking whether snprintf returns the count
		   via %n.  */
		ENSURE_ALLOCATION (xsum (length, 1));
		result[length] = '\0';
#endif

		for (;;)
		  {
		    size_t maxlen;
		    int count;
		    int retcount;

		    maxlen = allocated - length;
		    count = -1;
		    retcount = 0;

#if USE_SNPRINTF
# define SNPRINTF_BUF(arg) \
		    switch (prefix_count)				    \
		      {							    \
		      case 0:						    \
			retcount = SNPRINTF (result + length, maxlen, buf,  \
					     arg, &count);		    \
			break;						    \
		      case 1:						    \
			retcount = SNPRINTF (result + length, maxlen, buf,  \
					     prefixes[0], arg, &count);	    \
			break;						    \
		      case 2:						    \
			retcount = SNPRINTF (result + length, maxlen, buf,  \
					     prefixes[0], prefixes[1], arg, \
					     &count);			    \
			break;						    \
		      default:						    \
			abort ();					    \
		      }
#endif

		    switch (type)
		      {
		      case TYPE_SCHAR:
			{
			  int arg = a.arg[dp->arg_index].a.a_schar;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_UCHAR:
			{
			  unsigned int arg = a.arg[dp->arg_index].a.a_uchar;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_SHORT:
			{
			  int arg = a.arg[dp->arg_index].a.a_short;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_USHORT:
			{
			  unsigned int arg = a.arg[dp->arg_index].a.a_ushort;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_INT:
			{
			  int arg = a.arg[dp->arg_index].a.a_int;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_UINT:
			{
			  unsigned int arg = a.arg[dp->arg_index].a.a_uint;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_LONGINT:
			{
			  long int arg = a.arg[dp->arg_index].a.a_longint;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_ULONGINT:
			{
			  unsigned long int arg = a.arg[dp->arg_index].a.a_ulongint;
			  SNPRINTF_BUF (arg);
			}
			break;
#ifdef HAVE_LONG_LONG
		      case TYPE_LONGLONGINT:
			{
			  long long int arg = a.arg[dp->arg_index].a.a_longlongint;
			  SNPRINTF_BUF (arg);
			}
			break;
		      case TYPE_ULONGLONGINT:
			{
			  unsigned long long int arg = a.arg[dp->arg_index].a.a_ulonglongint;
			  SNPRINTF_BUF (arg);
			}
			break;
#endif
		      case TYPE_DOUBLE:
			{
			  double arg = a.arg[dp->arg_index].a.a_double;
			  SNPRINTF_BUF (arg);
			}
			break;
#ifdef HAVE_LONG_DOUBLE
		      case TYPE_LONGDOUBLE:
			{
			  long double arg = a.arg[dp->arg_index].a.a_longdouble;
			  SNPRINTF_BUF (arg);
			}
			break;
#endif
		      case TYPE_CHAR:
			{
			  int arg = a.arg[dp->arg_index].a.a_char;
			  SNPRINTF_BUF (arg);
			}
			break;
#ifdef HAVE_WINT_T
		      case TYPE_WIDE_CHAR:
			{
			  wint_t arg = a.arg[dp->arg_index].a.a_wide_char;
			  SNPRINTF_BUF (arg);
			}
			break;
#endif
		      case TYPE_STRING:
			{
			  const char *arg = a.arg[dp->arg_index].a.a_string;
			  SNPRINTF_BUF (arg);
			}
			break;
#ifdef HAVE_WCHAR_T
		      case TYPE_WIDE_STRING:
			{
			  const wchar_t *arg = a.arg[dp->arg_index].a.a_wide_string;
			  SNPRINTF_BUF (arg);
			}
			break;
#endif
		      case TYPE_POINTER:
			{
			  void *arg = a.arg[dp->arg_index].a.a_pointer;
			  SNPRINTF_BUF (arg);
			}
			break;
		      default:
			abort ();
		      }

#if USE_SNPRINTF
		    /* Portability: Not all implementations of snprintf()
		       are ISO C 99 compliant.  Determine the number of
		       bytes that snprintf() has produced or would have
		       produced.  */
		    if (count >= 0)
		      {
			/* Verify that snprintf() has NUL-terminated its
			   result.  */
			if (count < maxlen && result[length + count] != '\0')
			  abort ();
			/* Portability hack.  */
			if (retcount > count)
			  count = retcount;
		      }
		    else
		      {
			/* snprintf() doesn't understand the '%n'
			   directive.  */
			if (p[1] != '\0')
			  {
			    /* Don't use the '%n' directive; instead, look
			       at the snprintf() return value.  */
			    p[1] = '\0';
			    continue;
			  }
			else
			  {
			    /* Look at the snprintf() return value.  */
			    if (retcount < 0)
			      {
				/* HP-UX 10.20 snprintf() is doubly deficient:
				   It doesn't understand the '%n' directive,
				   *and* it returns -1 (rather than the length
				   that would have been required) when the
				   buffer is too small.  */
				size_t bigger_need =
				  xsum (xtimes (allocated, 2), 12);
				ENSURE_ALLOCATION (bigger_need);
				continue;
			      }
			    else
			      count = retcount;
			  }
		      }
#endif

		    /* Attempt to handle failure.  */
		    if (count < 0)
		      {
			if (!(result == resultbuf || result == NULL))
			  free (result);
			if (buf_malloced != NULL)
			  free (buf_malloced);
			CLEANUP ();
			errno = EINVAL;
			return NULL;
		      }

#if !USE_SNPRINTF
		    if (count >= tmp_length)
		      /* tmp_length was incorrectly calculated - fix the
			 code above!  */
		      abort ();
#endif

		    /* Make room for the result.  */
		    if (count >= maxlen)
		      {
			/* Need at least count bytes.  But allocate
			   proportionally, to avoid looping eternally if
			   snprintf() reports a too small count.  */
			size_t n =
			  xmax (xsum (length, count), xtimes (allocated, 2));

			ENSURE_ALLOCATION (n);
#if USE_SNPRINTF
			continue;
#endif
		      }

		    length += count;
		    break;
		  }
	      }
	  }
      }

    /* Add the final NUL.  */
    ENSURE_ALLOCATION (xsum (length, 1));
    result[length] = '\0';

    if (result != resultbuf && length + 1 < allocated)
      {
	/* Shrink the allocated memory if possible.  */
	CHAR_T *memory;

	memory = (CHAR_T *) realloc (result, (length + 1) * sizeof (CHAR_T));
	if (memory != NULL)
	  result = memory;
      }

    if (buf_malloced != NULL)
      free (buf_malloced);
    CLEANUP ();
    *lengthp = length;
    if (length > INT_MAX)
      goto length_overflow;
    return result;

  length_overflow:
    /* We could produce such a big string, but its length doesn't fit into
       an 'int'.  POSIX says that snprintf() fails with errno = EOVERFLOW in
       this case.  */
    if (result != resultbuf)
      free (result);
    errno = EOVERFLOW;
    return NULL;

  out_of_memory:
    if (!(result == resultbuf || result == NULL))
      free (result);
    if (buf_malloced != NULL)
      free (buf_malloced);
  out_of_memory_1:
    CLEANUP ();
    errno = ENOMEM;
    return NULL;
  }
}