Exemple #1
0
/*
================
SV_CheckVelocity
================
*/
void SV_CheckVelocity (edict_t *ent)
{
	int		i;
	float	wishspeed;	//sv_maxvelocity fix - Eradicator

//
// bound velocity
//
	for (i=0 ; i<3 ; i++)
	{
		if (IS_NAN(ent->v.velocity[i]))
		{
			Con_Printf ("Got a NaN velocity on %s\n", pr_strings + ent->v.classname);
			ent->v.velocity[i] = 0;
		}
		if (IS_NAN(ent->v.origin[i]))
		{
			Con_Printf ("Got a NaN origin on %s\n", pr_strings + ent->v.classname);
			ent->v.origin[i] = 0;
		}
		/*if (ent->v.velocity[i] > sv_maxvelocity.value) //Old
			ent->v.velocity[i] = sv_maxvelocity.value;
		else if (ent->v.velocity[i] < -sv_maxvelocity.value)
			ent->v.velocity[i] = -sv_maxvelocity.value;*/
	}
	//START - sv_maxvelocity fix - Eradicator
	wishspeed = Length(ent->v.velocity);
	if (wishspeed > sv_maxvelocity.value)
	{
		VectorScale (ent->v.velocity, sv_maxvelocity.value/wishspeed, ent->v.velocity);
		wishspeed = sv_maxvelocity.value;
	}
	//END - sv_maxvelocity fix - Eradicator
}
Exemple #2
0
/*
================
SV_CheckVelocity
================
*/
void SV_CheckVelocity (edict_t *ent)
{
	int		i;

//
// bound velocity
//
	for (i=0 ; i<3 ; i++)
	{
		if (IS_NAN(ent->v.velocity[i]))
		{
			Con_Printf ("Got a NaN velocity on %s\n", pr_strings + ent->v.classname);
			ent->v.velocity[i] = 0;
		}
		if (IS_NAN(ent->v.origin[i]))
		{
			Con_Printf ("Got a NaN origin on %s\n", pr_strings + ent->v.classname);
			ent->v.origin[i] = 0;
		}
		if (ent->v.velocity[i] > sv_maxvelocity.value)
			ent->v.velocity[i] = sv_maxvelocity.value;
		else if (ent->v.velocity[i] < -sv_maxvelocity.value)
			ent->v.velocity[i] = -sv_maxvelocity.value;

		// Slot Zero 3.50-2  Zero velocity effect.
		if ((int)ent->v.effects & EF_ZERO_VELOCITY)
			ent->v.velocity[i] = 0;
	}
}
bool CmpNodePos::operator() (const Node* u, const Node* v) const {
	if (u->pos < v->pos) {
		return true;
	}
	if (v->pos < u->pos) {
		return false;
	}
	if (IS_NAN(u->pos) != IS_NAN(v->pos)) {
		return IS_NAN(u->pos);
	}
	return u < v;

	/* I don't know how important it is to handle NaN correctly
	 * (e.g. we probably handle it badly in other code anyway, and
	 * in any case the best we can hope for is to reduce the
	 * badness of other nodes).
	 *
	 * Nevertheless, we try to do the right thing here and in
	 * event comparison.  The issue is that (on platforms with
	 * ieee floating point comparison) NaN compares neither less
	 * than nor greater than any other number, yet sort wants a
	 * well-defined ordering.  In particular, we want to ensure
	 * transitivity of equivalence, which normally wouldn't be
	 * guaranteed if the "middle" item in the transitivity
	 * involves a NaN.  (NaN is neither less than nor greater than
	 * other numbers, so tends to be considered as equal to all
	 * other numbers: even unequal numbers.)
	 */
}
Exemple #4
0
/*
================
SV_CheckVelocity
================
*/
void SV_CheckVelocity (edict_t *ent)
{
    int		i;
    float	wishspeed;

//
// bound velocity
//
    for (i=0 ; i<3 ; i++)
    {
        if (IS_NAN(ent->v.velocity[i]))
        {
            Com_DPrintf ("Got a NaN velocity on %s\n", PR_GetString(ent->v.classname));
            ent->v.velocity[i] = 0;
        }
        if (IS_NAN(ent->v.origin[i]))
        {
            Com_DPrintf ("Got a NaN origin on %s\n", PR_GetString(ent->v.classname));
            ent->v.origin[i] = 0;
        }
    }

    // SV_MAXVELOCITY fix by Maddes
    wishspeed = VectorLength(ent->v.velocity);
    if (wishspeed > sv_maxvelocity.value)
    {
        VectorScale (ent->v.velocity, sv_maxvelocity.value/wishspeed, ent->v.velocity);
        wishspeed = sv_maxvelocity.value;
    }
}
Exemple #5
0
/*
================
SV_CheckVelocity
================
*/
void SV_CheckVelocity (edict_t *ent)
{
	int		i;

//
// bound velocity
//
	for (i=0 ; i<3 ; i++)
	{
		if (IS_NAN(ent->v.velocity[i]))
		{
			Con_Printf ("Got a NaN velocity on %s\n", PR_GetString(ent->v.classname));
			ent->v.velocity[i] = 0;
		}
		if (IS_NAN(ent->v.origin[i]))
		{
			Con_Printf ("Got a NaN origin on %s\n", PR_GetString(ent->v.classname));
			ent->v.origin[i] = 0;
		}
		if (ent->v.velocity[i] > sv_maxvelocity.value)
			ent->v.velocity[i] = sv_maxvelocity.value;
		else if (ent->v.velocity[i] < -sv_maxvelocity.value)
			ent->v.velocity[i] = -sv_maxvelocity.value;
	}
}
Exemple #6
0
/*
================
SV_CheckVelocity
================
*/
void SV_CheckVelocity( edict_t *ent )
{
	int	i;

	// bound velocity
	for( i = 0; i < 3; i++ )
	{
		if( IS_NAN( ent->v.velocity[i] ))
		{
			MsgDev( D_INFO, "Got a NaN velocity on %s\n", STRING( ent->v.classname ));
			ent->v.velocity[i] = 0.0f;
		}

		if( IS_NAN( ent->v.origin[i] ))
		{
			MsgDev( D_INFO, "Got a NaN origin on %s\n", STRING( ent->v.classname ));
			ent->v.origin[i] = 0.0f;
		}

		if( ent->v.velocity[i] > sv_maxvelocity->value )
		{
			MsgDev( D_INFO, "Got a velocity too high on %s\n", STRING( ent->v.classname ));
			ent->v.velocity[i] = sv_maxvelocity->value;
		}
		else if( ent->v.velocity[i] < -sv_maxvelocity->value )
		{
			MsgDev( D_INFO, "Got a velocity too low on %s\n", STRING( ent->v.classname ));
			ent->v.velocity[i] = -sv_maxvelocity->value;
		}
	}
}
int compare_events(const void *a, const void *b) {
	Event *ea=*(Event**)a;
	Event *eb=*(Event**)b;
	if(ea->v->r==eb->v->r) {
		// when comparing opening and closing from the same rect
		// open must come first
		if(ea->type==Open) return -1;
		return 1;
	} else if(ea->pos > eb->pos) {
		return 1;
	} else if(ea->pos < eb->pos) {
		return -1;
	} else if(IS_NAN(ea->pos) != IS_NAN(ea->pos)) {
		/* See comment in CmpNodePos. */
		return ( IS_NAN(ea->pos)
			 ? -1
			 : 1 );
	}
	return 0;
}
Exemple #8
0
/*
================
SV_CheckVelocity
================
*/
void SV_CheckVelocity (edict_t *ent)
{
	int		i;
	float	wishspeed;	// 1999-10-18 SV_MAXVELOCITY fix by Maddes

//
// bound velocity
//
	for (i=0 ; i<3 ; i++)
	{
		if (IS_NAN(ent->v.velocity[i]))
		{
			Con_Printf ("Got a NaN velocity on %s\n", pr_strings + ent->v.classname);
			ent->v.velocity[i] = 0;
		}
		if (IS_NAN(ent->v.origin[i]))
		{
			Con_Printf ("Got a NaN origin on %s\n", pr_strings + ent->v.classname);
			ent->v.origin[i] = 0;
		}
// 1999-10-18 SV_MAXVELOCITY fix by Maddes  start
/*
		if (ent->v.velocity[i] > sv_maxvelocity->value)
			ent->v.velocity[i] = sv_maxvelocity->value;
		else if (ent->v.velocity[i] < -sv_maxvelocity->value)
			ent->v.velocity[i] = -sv_maxvelocity->value;
*/
// 1999-10-18 SV_MAXVELOCITY fix by Maddes  end
	}

// 1999-10-18 SV_MAXVELOCITY fix by Maddes  start
	wishspeed = Length(ent->v.velocity);
	if (wishspeed > sv_maxvelocity->value)
	{
		VectorScale (ent->v.velocity, sv_maxvelocity->value/wishspeed, ent->v.velocity);
	}
// 1999-10-18 SV_MAXVELOCITY fix by Maddes  end
}
void CASW_Drone_Movement::CheckVelocity( void )
{
	Vector origin = mv->GetAbsOrigin();
	bool bFixedOrigin = false;
	for (int i=0; i < 3; i++)
	{
		// See if it's bogus.
		if (IS_NAN(mv->m_vecVelocity[i]))
		{
			DevMsg( 1, "PM  Got a NaN velocity %s\n", DescribeAxis( i ) );
			mv->m_vecVelocity[i] = 0;
		}
		if (IS_NAN(origin[i]))
		{
			DevMsg( 1, "PM  Got a NaN origin on %s\n", DescribeAxis( i ) );
			origin[i] = 0;
			bFixedOrigin = true;
		}
	}
	if (bFixedOrigin)
	{
		mv->SetAbsOrigin( origin );
	}
}
/** Scales this vector to make it a unit vector (within rounding error).
 *
 *  The current version tries to handle infinite coordinates gracefully,
 *  but it's not clear that any callers need that.
 *
 *  \pre *this != Point(0, 0).
 *  \pre Neither coordinate is NaN.
 *  \post L2(*this) very near 1.0.
 */
void NR::Point::normalize() {
	double len = hypot(_pt[0], _pt[1]);
	g_return_if_fail(len != 0);
	g_return_if_fail(!IS_NAN(len));
	static double const inf = 1e400;
	if(len != inf) {
		*this /= len;
	} else {
		unsigned n_inf_coords = 0;
		/* Delay updating pt in case neither coord is infinite. */
		NR::Point tmp;
		for ( unsigned i = 0 ; i < 2 ; ++i ) {
			if ( _pt[i] == inf ) {
				++n_inf_coords;
				tmp[i] = 1.0;
			} else if ( _pt[i] == -inf ) {
				++n_inf_coords;
				tmp[i] = -1.0;
			} else {
				tmp[i] = 0.0;
			}
		}
		switch (n_inf_coords) {
		case 0:
			/* Can happen if both coords are near +/-DBL_MAX. */
			*this /= 4.0;
			len = hypot(_pt[0], _pt[1]);
			g_assert(len != inf);
			*this /= len;
			break;

		case 1:
			*this = tmp;
			break;

		case 2:
			*this = sqrt(0.5) * tmp;
			break;
		}
	}
}
double CHOLMOD(dbound)	/* returns modified diagonal entry of D */
(
    /* ---- input ---- */
    double dj,		/* diagonal entry of D, for LDL' factorization */
    /* --------------- */
    cholmod_common *Common
)
{
    double dbound ;
    RETURN_IF_NULL_COMMON (0) ;
    if (!IS_NAN (dj))
    {
	dbound = Common->dbound ;
	if (dj < 0)
	{
	    if (dj > -dbound)
	    {
		dj = -dbound ;
		Common->ndbounds_hit++ ;
		if (Common->status == CHOLMOD_OK)
		{
		    ERROR (CHOLMOD_DSMALL, "diagonal below threshold") ;
		}
	    }
	}
	else
	{
	    if (dj < dbound)
	    {
		dj = dbound ;
		Common->ndbounds_hit++ ;
		if (Common->status == CHOLMOD_OK)
		{
		    ERROR (CHOLMOD_DSMALL, "diagonal below threshold") ;
		}
	    }
	}
    }
    return (dj) ;
}
static void change_simplicial_numeric
(
    cholmod_factor *L,
    int to_ll,
    int to_packed,
    int to_monotonic,
    cholmod_common *Common
)
{
    double grow0, grow1, xlen, xlnz ;
    void *newLi, *newLx, *newLz ;
    double *Lx, *Lz ;
    Int *Lp, *Li, *Lnz ;
    Int make_monotonic, grow2, n, j, lnz, len, grow, ok, make_ll, make_ldl ;
    size_t nzmax0 ;

    PRINT1 (("\n===Change simplicial numeric: %d %d %d\n",
	    to_ll, to_packed, to_monotonic)) ;
    DEBUG (CHOLMOD(dump_factor) (L, "change simplicial numeric", Common)) ;
    ASSERT (L->xtype != CHOLMOD_PATTERN && !(L->is_super)) ;

    make_monotonic = ((to_packed || to_monotonic) && !(L->is_monotonic)) ;
    make_ll  = (to_ll && !(L->is_ll)) ;
    make_ldl = (!to_ll && L->is_ll) ;

    n = L->n ;
    Lp = L->p ;
    Li = L->i ;
    Lx = L->x ;
    Lz = L->z ;
    Lnz = L->nz ;

    grow = FALSE ;
    grow0 = Common->grow0 ;
    grow1 = Common->grow1 ;
    grow2 = Common->grow2 ;
    grow0 = IS_NAN (grow0) ? 1 : grow0 ;
    grow1 = IS_NAN (grow1) ? 1 : grow1 ;
    ok = TRUE ;
    newLi = NULL ;
    newLx = NULL ; 
    newLz = NULL ; 
    lnz = 0 ;

    if (make_monotonic)
    {

	/* ------------------------------------------------------------------ */
	/* Columns out of order, but will be reordered and optionally packed. */
	/* ------------------------------------------------------------------ */

	PRINT1 (("L is non-monotonic\n")) ;

	/* compute new L->nzmax */
	if (!to_packed)
	{
	    /* if any parameter is NaN, grow is false */
	    /* fl.pt. comparisons below are false if any parameter is NaN */
	    grow = (grow0 >= 1.0) && (grow1 >= 1.0) && (grow2 > 0) ;
	}
	for (j = 0 ; ok && j < n ; j++)
	{
	    len = Lnz [j] ;
	    ASSERT (len >= 1 && len <= n-j) ;

	    /* compute len in double to avoid integer overflow */
	    if (grow)
	    {
		xlen = (double) len ;
		xlen = grow1 * xlen + grow2 ;
		xlen = MIN (xlen, n-j) ;
		len = (Int) xlen ;
	    }
	    ASSERT (len >= Lnz [j] && len <= n-j) ;

	    PRINT2 (("j: "ID" Lnz[j] "ID" len "ID" p "ID"\n",
			j, Lnz [j], len, lnz)) ;

	    lnz += len ;
	    ok = (lnz >= 0) ;
	}

	if (!ok)
	{
	    ERROR (CHOLMOD_TOO_LARGE, "problem too large") ;
	    return ;
	}

	if (grow)
	{
	    xlnz = (double) lnz ;
	    xlnz *= grow0 ;
	    xlnz = MIN (xlnz, Size_max) ;
	    xlnz = MIN (xlnz, ((double) n * (double) n + (double) n) / 2) ;
	    lnz = (Int) xlnz ;
	}

	lnz = MAX (1, lnz) ;
	PRINT1 (("final lnz "ID"\n", lnz)) ;
	nzmax0 = 0 ;

	CHOLMOD(realloc_multiple) (lnz, 1, L->xtype, &newLi, NULL,
		&newLx, &newLz, &nzmax0, Common) ;

	if (Common->status < CHOLMOD_OK)
	{
	    return ;	    /* out of memory */
	}
    }

    /* ============================================== commit the changes to L */

    /* ---------------------------------------------------------------------- */
    /* convert the simplicial L, using template routine */
    /* ---------------------------------------------------------------------- */

    switch (L->xtype)
    {

	case CHOLMOD_REAL:
	    r_change_simplicial_numeric (L, to_ll, to_packed,
		    newLi, newLx, newLz, lnz, grow, grow1, grow2,
		    make_ll, make_monotonic, make_ldl, Common) ;
	    break ;

	case CHOLMOD_COMPLEX:
	    c_change_simplicial_numeric (L, to_ll, to_packed,
		    newLi, newLx, newLz, lnz, grow, grow1, grow2,
		    make_ll, make_monotonic, make_ldl, Common) ;
	    break ;

	case CHOLMOD_ZOMPLEX:
	    z_change_simplicial_numeric (L, to_ll, to_packed,
		    newLi, newLx, newLz, lnz, grow, grow1, grow2,
		    make_ll, make_monotonic, make_ldl, Common) ;
	    break ;
    }

    DEBUG (CHOLMOD(dump_factor) (L, "L simplicial changed", Common)) ;
}
Exemple #13
0
/* Plotting procedure. 
 * X-axis is segmented according to 'delta' distance, that is distance between
 * consecutive x's (x_i to x_{i + 1} distance). Infinity and NaN checks are 
 * performed and corresponding action is taken to prevent mess. 
 * Infinity case is handled by computing intersect with plot-box. NaNs
 * are not plotted at all.
 * Adaptive smoothing is performed at the end of plot loop.
 */
static void plot(FILE * out, parsed_expr p)
{
    double delta = (x_high - x_low) / SMOOTHNESS;
    double x_1 = x_low;
    double x_2 = x_low + delta;
    double y_1 = evaluate(p, x_1);
    double y_2 = evaluate(p, x_2);
    double old_x;
    double old_y;
    /* if y-value out of box, compute intersection of line with box */
    double x_intersect;
    int last_out = 1, last_nan = 0;
    int SMOOTHNESS_LVL = MAX_SMOOTHNESS_LVL;



/* Transformation of real number coordinates to plot box coordinates.
 * Plot is in lanscape mode, real x-coordinates are mappend to plot 
 * y-coordinate and vice versa.
 * Plot y-coordinates are flipped vertically
 */
#define COORD_X(x) (((x) - x_low) * scale_x + LLY + BLANK)
#define COORD_Y(y) (URX - BLANK - (((y) - y_low) * scale_y))

/* Redefine postscript lineto and moveto commands, such that there is 
 * no need to take care of coordinate transformations.
 * x and y coordinates are swapped here.
 */
#define LINETO(x, y) fprintf(out, "%.3f %.3f lineto\n", \
                             COORD_Y(y), COORD_X(x))
#define MOVETO(x, y) fprintf(out, "%.3f %.3f moveto\n", \
                             COORD_Y(y), COORD_X(x))

/* find intersection with y-boundary */
#define INTERSECT(boundary) (x_1 + ((boundary) - y_1) * \
                            (x_2 - x_1) / (y_2 - y_1));

    if (y_1 < y_low)
        y_1 = y_low;
    else if (y_1 > y_high)
        y_1 = y_high;
    else
        last_out = 0;

    /* New path exclusively for function plot */
    fprintf(out, "newpath\n");
    if (!IS_NAN(y_1))
        MOVETO(x_1, y_1);
    else
        last_nan = 1;


    old_x = x_1;
    old_y = y_1;

    /* plotting loop */
    while (x_2 <= x_high) {
        if (IS_NAN(y_2))
            last_nan = 1;
        /* next point is in bounding box, thus can be plotted */
        else if (y_low <= y_2 && y_2 <= y_high) {
            /* handle case where last point was NaN */
            if (last_nan) {
                MOVETO(x_2, y_2);
                last_nan = 0;
                /* handle case where last point was out of box, 
                 * find intersections with box. Next line will start
                 * from this point 
                 */
            } else if (last_out) {
                /* point is too high or too low? */
                if (y_2 > y_1) {
                    x_intersect = INTERSECT(y_low);
                    if (IS_NAN(x_intersect))
                        x_intersect = x_1;
                    MOVETO(x_intersect, y_low);
                } else {
                    x_intersect = INTERSECT(y_high);
                    if (IS_NAN(x_intersect))
                        x_intersect = x_1;
                    MOVETO(x_intersect, y_high);
                }
            }
            /* Draw a valid line. This plots most of the lines. */
            LINETO(x_2, y_2);

            last_out = 0;
            /* case where next point is out of the box */
        } else {
            /* if last point was in the box, stroke a line to the intersection
             * with y-boundary */
            if (!last_out && !last_nan) {
                if (y_2 > y_1) {
                    x_intersect = INTERSECT(y_high);
                    if (IS_NAN(x_intersect))
                        x_intersect = x_1;
                    LINETO(x_intersect, y_high);
                } else {
                    x_intersect = INTERSECT(y_low);
                    if (IS_NAN(x_intersect))
                        x_intersect = x_1;
                    LINETO(x_intersect, y_low);
                }
            }

            last_out = 1;
        }

/** smoothing procedure **/
/* SLOPE_JUMP is numerically evaluated second derivative, that is 
 * difference of first derivatives. Expression is simplified algebraically
 */
#define SLOPE_JUMP (y_2 - y_1 - ((y_1 - old_y)*(x_2 - x_1))/(x_1 - old_x))
#define TOO_SHARP() (fabs(SLOPE_JUMP) > THRESHOLD)
#define TOO_SMOOTH() (fabs(SLOPE_JUMP) < THRESHOLD / 4)

        old_x = x_1;
        old_y = y_1;
        x_1 = x_2;
        y_1 = y_2;
        x_2 = x_1 + delta;
        y_2 = evaluate(p, x_2);

        /* If the plot is too sharp or too smooth, 
         * find appropriate smoothness lvl 
         * by increasing/decreasing smoothness until right one is found.
         */
        if (TOO_SHARP()) {
            while (SMOOTHNESS_LVL < MAX_SMOOTHNESS_LVL && TOO_SHARP()) {
                delta /= 2;
                x_2 = x_1 + delta;
                y_2 = evaluate(p, x_2);
                SMOOTHNESS_LVL++;
            }
        } else {
            while (SMOOTHNESS_LVL > 0 && TOO_SMOOTH()) {
                delta *= 2;
                x_2 = x_1 + delta;
                y_2 = evaluate(p, x_2);
                SMOOTHNESS_LVL--;
            }
        }

        /* make sure the last point is on the right boundary */
        if (x_1 < x_high && x_1 + delta > x_high) {
            x_2 = x_high;
            y_2 = evaluate(p, x_2);
        }
    }

    /* stroke the path */
    fprintf(out, "%s setrgbcolor\n", PLOT_COLOR);
    fprintf(out, "0.5 setlinewidth\n");
    fprintf(out, "stroke\n\n\n");
}
Exemple #14
0
GLOBAL void UMF_scale
(
    Int n,
    Entry pivot,
    Entry X [ ]
)
{
    Entry x ;
    double s ;
    Int i ;

    /* ---------------------------------------------------------------------- */
    /* compute the approximate absolute value of the pivot, and select method */
    /* ---------------------------------------------------------------------- */

    APPROX_ABS (s, pivot) ;

    if (s < RECIPROCAL_TOLERANCE || IS_NAN (pivot))
    {
	/* ------------------------------------------------------------------ */
	/* tiny, or zero, pivot case */
	/* ------------------------------------------------------------------ */

	/* The pivot is tiny, or NaN.  Do not divide zero by the pivot value,
	 * and do not multiply by 1/pivot, either. */

	for (i = 0 ; i < n ; i++)
	{
	    /* X [i] /= pivot ; */
	    x = X [i] ;

#ifndef NO_DIVIDE_BY_ZERO
	    if (IS_NONZERO (x))
	    {
		DIV (X [i], x, pivot) ;
	    }
#else
	    /* Do not divide by zero */
	    if (IS_NONZERO (x) && IS_NONZERO (pivot))
	    {
		DIV (X [i], x, pivot) ;
	    }
#endif

	}

    }
    else
    {

	/* ------------------------------------------------------------------ */
	/* normal case */
	/* ------------------------------------------------------------------ */

	/* The pivot is not tiny, and is not NaN.   Don't bother to check for
	 * zeros in the pivot column, X.  This is slightly more accurate than
	 * multiplying by 1/pivot (but slightly slower), particularly if the
	 * pivot column consists of only IEEE subnormals. */

	for (i = 0 ; i < n ; i++)
	{
	    /* X [i] /= pivot ; */
	    x = X [i] ;
	    DIV (X [i], x, pivot) ;
	}
    }
}
Exemple #15
0
__global__ static void reduce_first_kernel(Param<To> out, CParam<Ti> in,
                                           uint blocks_x, uint blocks_y,
                                           uint repeat, bool change_nan,
                                           To nanval) {
    const uint tidx = threadIdx.x;
    const uint tidy = threadIdx.y;
    const uint tid  = tidy * blockDim.x + tidx;

    const uint zid        = blockIdx.x / blocks_x;
    const uint blockIdx_x = blockIdx.x - (blocks_x)*zid;
    const uint xid        = blockIdx_x * blockDim.x * repeat + tidx;

    Binary<To, op> reduce;
    Transform<Ti, To, op> transform;

    __shared__ To s_val[THREADS_PER_BLOCK];

    const uint wid = (blockIdx.y + blockIdx.z * gridDim.y) / blocks_y;
    const uint blockIdx_y =
        (blockIdx.y + blockIdx.z * gridDim.y) - (blocks_y)*wid;
    const uint yid = blockIdx_y * blockDim.y + tidy;

    const Ti *const iptr = in.ptr + (wid * in.strides[3] + zid * in.strides[2] +
                                     yid * in.strides[1]);

    if (yid >= in.dims[1] || zid >= in.dims[2] || wid >= in.dims[3]) return;

    int lim = min((int)(xid + repeat * DIMX), in.dims[0]);

    To out_val = Binary<To, op>::init();
    for (int id = xid; id < lim; id += DIMX) {
        To in_val = transform(iptr[id]);
        if (change_nan) in_val = !IS_NAN(in_val) ? in_val : nanval;
        out_val = reduce(in_val, out_val);
    }

    s_val[tid] = out_val;

    __syncthreads();
    To *s_ptr = s_val + tidy * DIMX;

    if (DIMX == 256) {
        if (tidx < 128) s_ptr[tidx] = reduce(s_ptr[tidx], s_ptr[tidx + 128]);
        __syncthreads();
    }

    if (DIMX >= 128) {
        if (tidx < 64) s_ptr[tidx] = reduce(s_ptr[tidx], s_ptr[tidx + 64]);
        __syncthreads();
    }

    if (DIMX >= 64) {
        if (tidx < 32) s_ptr[tidx] = reduce(s_ptr[tidx], s_ptr[tidx + 32]);
        __syncthreads();
    }

    typedef cub::WarpReduce<To> WarpReduce;
    __shared__ typename WarpReduce::TempStorage temp_storage;

    To warp_val = s_ptr[tidx];
    out_val     = WarpReduce(temp_storage).Reduce(warp_val, reduce);

    To *const optr = out.ptr + (wid * out.strides[3] + zid * out.strides[2] +
                                yid * out.strides[1]);
    if (tidx == 0) optr[blockIdx_x] = out_val;
}
void C_FuncSmokeVolume::Update( float fTimeDelta )
{
	// Update our world space bbox if we've moved at all.
	// We do this manually because sometimes people make HUGE bboxes, and if they're constantly changing because their
	// particles wander outside the current bounds sometimes, it'll be linking them into all the leaves repeatedly.
	const Vector &curOrigin = GetAbsOrigin();
	const QAngle &curAngles = GetAbsAngles();
	if ( !VectorsAreEqual( curOrigin, m_vLastOrigin, 0.1 ) || 
		fabs( curAngles.x - m_vLastAngles.x ) > 0.1 || 
		fabs( curAngles.y - m_vLastAngles.y ) > 0.1 || 
		fabs( curAngles.z - m_vLastAngles.z ) > 0.1 ||
		m_bFirstUpdate )
	{
		m_bFirstUpdate = false;
		m_vLastAngles = curAngles;
		m_vLastOrigin = curOrigin;

		Vector vWorldMins, vWorldMaxs;
		CollisionProp()->WorldSpaceAABB( &vWorldMins, &vWorldMaxs );
		vWorldMins -= Vector( m_ParticleRadius, m_ParticleRadius, m_ParticleRadius );
		vWorldMaxs += Vector( m_ParticleRadius, m_ParticleRadius, m_ParticleRadius );

		m_ParticleEffect.SetBBox( vWorldMins, vWorldMaxs );
	}
		
	// lerp m_CurrentDensity towards m_Density at a rate of m_DensityRampSpeed
	if( m_CurrentDensity < m_Density )
	{
		m_CurrentDensity += m_DensityRampSpeed * fTimeDelta;
		if( m_CurrentDensity > m_Density )
		{
			m_CurrentDensity = m_Density;
		}
	}
	else if( m_CurrentDensity > m_Density )
	{
		m_CurrentDensity -= m_DensityRampSpeed * fTimeDelta;
		if( m_CurrentDensity < m_Density )
		{
			m_CurrentDensity = m_Density;
		}
	}

	if( m_CurrentDensity == 0.0f )
	{
		return;
	}
	
	// This is used to randomize the direction it chooses to move a particle in.

	int offsetLookup[3] = {-1,0,1};

	float tradeDurationMax = m_ParticleSpacingDistance / ( m_MovementSpeed + 0.1f );
	float tradeDurationMin = tradeDurationMax * 0.5f;

	if ( IS_NAN( tradeDurationMax ) || IS_NAN( tradeDurationMin ) )
		return;

//	Warning( "tradeDuration: [%f,%f]\n", tradeDurationMin, tradeDurationMax );
	
	// Update all the moving traders and establish new ones.
	int nTotal = m_xCount * m_yCount * m_zCount;
	for( int i=0; i < nTotal; i++ )
	{
		SmokeParticleInfo *pInfo = &m_pSmokeParticleInfos[i];

		if(!pInfo->m_pParticle)
			continue;
	
		if(pInfo->m_TradeIndex == -1)
		{
			pInfo->m_pParticle->m_FadeAlpha = pInfo->m_FadeAlpha;
			pInfo->m_pParticle->m_Color[0] = pInfo->m_Color[0];
			pInfo->m_pParticle->m_Color[1] = pInfo->m_Color[1];
			pInfo->m_pParticle->m_Color[2] = pInfo->m_Color[2];

			// Is there an adjacent one that's not trading?
			int x, y, z;
			GetParticleInfoXYZ(i, x, y, z);

			int xCountOffset = rand();
			int yCountOffset = rand();
			int zCountOffset = rand();

			bool bFound = false;
			for(int xCount=0; xCount < 3 && !bFound; xCount++)
			{
				for(int yCount=0; yCount < 3 && !bFound; yCount++)
				{
					for(int zCount=0; zCount < 3; zCount++)
					{
						int testX = x + offsetLookup[(xCount+xCountOffset) % 3];
						int testY = y + offsetLookup[(yCount+yCountOffset) % 3];
						int testZ = z + offsetLookup[(zCount+zCountOffset) % 3];

						if(testX == x && testY == y && testZ == z)
							continue;

						if(IsValidXYZCoords(testX, testY, testZ))
						{
							SmokeParticleInfo *pOther = GetSmokeParticleInfo(testX, testY, testZ);
							if(pOther->m_pParticle && pOther->m_TradeIndex == -1)
							{
								// Ok, this one is looking to trade also.
								pInfo->m_TradeIndex = GetSmokeParticleIndex(testX, testY, testZ);
								pOther->m_TradeIndex = i;
								pInfo->m_TradeClock = pOther->m_TradeClock = 0;
								pOther->m_TradeDuration = pInfo->m_TradeDuration = FRand( tradeDurationMin, tradeDurationMax );
								
								bFound = true;
								break;
							}
						}
					}
				}
			}
		}
		else
		{
			SmokeParticleInfo *pOther = &m_pSmokeParticleInfos[pInfo->m_TradeIndex];
			assert(pOther->m_TradeIndex == i);
			
			// This makes sure the trade only gets updated once per frame.
			if(pInfo < pOther)
			{
				// Increment the trade clock..
				pInfo->m_TradeClock = (pOther->m_TradeClock += fTimeDelta);
				int x, y, z;
				GetParticleInfoXYZ(i, x, y, z);
				Vector myPos = GetSmokeParticlePos(x, y, z);
				
				int otherX, otherY, otherZ;
				GetParticleInfoXYZ(pInfo->m_TradeIndex, otherX, otherY, otherZ);
				Vector otherPos = GetSmokeParticlePos(otherX, otherY, otherZ);

				// Is the trade finished?
				if(pInfo->m_TradeClock >= pInfo->m_TradeDuration)
				{
					pInfo->m_TradeIndex = pOther->m_TradeIndex = -1;
					
					pInfo->m_pParticle->m_Pos = otherPos;
					pOther->m_pParticle->m_Pos = myPos;

					SmokeGrenadeParticle *temp = pInfo->m_pParticle;
					pInfo->m_pParticle = pOther->m_pParticle;
					pOther->m_pParticle = temp;
				}
				else
				{			
					// Ok, move them closer.
					float percent = (float)cos(pInfo->m_TradeClock * 2 * 1.57079632f / pInfo->m_TradeDuration);
					percent = percent * 0.5 + 0.5;
					
					pInfo->m_pParticle->m_FadeAlpha  = pInfo->m_FadeAlpha + (pOther->m_FadeAlpha - pInfo->m_FadeAlpha) * (1 - percent);
					pOther->m_pParticle->m_FadeAlpha = pInfo->m_FadeAlpha + (pOther->m_FadeAlpha - pInfo->m_FadeAlpha) * percent;

					InterpColor(pInfo->m_pParticle->m_Color,  pInfo->m_Color, pOther->m_Color, 1-percent);
					InterpColor(pOther->m_pParticle->m_Color, pInfo->m_Color, pOther->m_Color, percent);

					pInfo->m_pParticle->m_Pos  = myPos + (otherPos - myPos) * (1 - percent);
					pOther->m_pParticle->m_Pos = myPos + (otherPos - myPos) * percent;
				}
			}
		}
	}
}
Exemple #17
0
SP_spectrum _SP_addSpectra(SP_spectrum* spectra,size_t nbsp,double width,double clip,double prc,int error_type,double rebin_pixfactor,double rebin_pixshift,int spline,int tclip,int keepnbsp,char* file,size_t line)
{

  VC_vector   group,factors;
  double*     work,fct;
  double*     tmp;
  double      *dw,*df,*ds;
  double      fact1,fact2;
  double      *wmin,*wmax;
  VC_vector    wlim,wave;
  SP_spectrum rslt;
  VC_vector   eflux,esigm,emask;
  VC_vector   flx1,flx2,wav1,pds1,pds2;
  long        *npix;
  double      y1,y2;
  double       tmin, tmax,tcen,wpas,old_wpas;
  double       w1min,w1max,w2min,w2max;
  long         k1min,k1max,k2min,k2max;
  long         imin,imax,jmin,jmax;
  long        *idmin,*idmax; 
  long        *kmin,*kmax; 
  long        nbgroup,igroup,nbcol;
  long        i,j;
  size_t      si,sj,k,ngood;
  size_t      npix_max,npix_tot;
  int         pourc,old_pourc;
  double      fsum,wsum,weight,sigm_srianand,mean_srianand;
  long        nbg_srianand;
  long        nbg_bastien;
  double      a1,b1;
  SP_spectrum* spline_spectra;

  if(nbsp<2) return NULL;

  /****************************/
  /*  Sort and group by setup */
  /****************************/
  SP_sortSpectrum(spectra,nbsp); 
  OS_message(0,OS_STD,"[SP_addSpectra] new order for spectra :\n");
  for(i=0;i<nbsp;i++)
    {
      OS_message(0,OS_STD,"[SP_addSpectra] %4d : %s\n",i+1,spectra[i]->name);
      
    }
  group=SP_groupSpectrum(spectra,nbsp,prc);
  if(!group)
    {
      OS_message(0,OS_ERR,"not enough memory !!\n");
      exit(1);
    }
  nbgroup=1+group->ldata[nbsp-1];
  OS_message(0,OS_STD,"[SP_addSpectra] found %d group%s\n",nbgroup,(nbgroup>1)?"s":" ");
  
  
  npix_max=0; /*will contain the number of pixel of the biggest spectrum*/
  for(si=0;si<nbsp;si++) 
    npix_max=(spectra[si]->npix>npix_max)?spectra[si]->npix:npix_max;
   
  MMV_malloc(tmp,npix_max,double); /*working buffer*/
  MMV_malloc(idmin,nbsp,long);
  MMV_malloc(idmax,nbsp,long);
  MMV_malloc( kmin,nbsp,long);
  MMV_malloc( kmax,nbsp,long);
  MMV_malloc( wmin,nbsp,double);
  MMV_malloc( wmax,nbsp,double);
  for(i=0;i<nbsp;i++) 
    {
      /**********************************************************/
      /*Find first and last pixel that not have null flux values*/
      /*sort the corresponding wavelength in wmin and wmax and  */
      /*the pixel index in idmin and idmax.                     */
      /**********************************************************/
      VC_getIdxEdge(spectra[i]->data[spectra[i]->colF],idmin+i,idmax+i);
      wmin[i]=spectra[i]->wave[idmin[i]];
      wmax[i]=spectra[i]->wave[idmax[i]-1];
      kmin[i]=kmax[i]=-1;
    }
  for(i=0;i<nbgroup;i++)
    {
      k=0;
      for(j=0;j<nbsp;j++) if(group->ldata[j]==i) k++;
      OS_message(0,OS_STD,"[SP_addSpectra] group #%-2d : %d spectr%s\n",i+1,k,(k>1)?"a":"um");
    }

  /************************************************/
  /*Compute scale factors by group and apply them */
  /************************************************/
  
  imax=imin=igroup=0;
  while(imax<nbsp)
    {
      imax=imin;
      while((imax<nbsp)&&(group->ldata[imax]==igroup)) ++imax;
      //      printf("igroup: %2ld imin: %2ld  imax: %2ld\n",igroup,imin,imax);
      factors=addiScaleFactors(spectra+imin,imax-imin,width,NULL);
      for(i=0,si=imin;si<imax;i++,si++)
	for(sj=0;sj<spectra[si]->npix;sj++)
	  {
	    spectra[si]->flux[sj]*=factors->ddata[i];
	    spectra[si]->sigm[sj]*=factors->ddata[i];
	  }
      VC_free(factors);
      imin=imax;
      igroup++;
    }

  /*************/
  /* Find edge */
  /*************/


  /****************************************/
  /* Compute merge factors and apply them */
  /****************************************/
  /* Two steps for the computation.             */
  /* First find scale factor for the edges      */
  /* Then rebin each edges and compute their    */
  /* ratio. Fit the ratio by a line and take    */
  /* max of 1 and the line value for correction */
  
  if(nbgroup>1)
    {    
      OS_message(0,OS_STD,"[SP_addSpectra] Compute factor for the merging\n");
      MMV_malloc(work,nbsp,double);
      imin=igroup=0;
      while(igroup<nbgroup-1)
	{
	  imax=imin;
	  while((imax<nbsp)&&(group->ldata[imax]==igroup)) imax++;
	  //	  printf("igroup: %2ld imin: %2ld  imax: %2ld\n",igroup,imin,imax);
	  ++igroup;
	  if(imax==nbsp)
	    {
	      OS_message(0,OS_ERR,"[SP_addSpectra] BUG at line %d of file %s\n",__LINE__,__FILE__);
	      exit(1);
	    }
	  jmin=imax;
	  jmax=jmin;
	  while((jmax<nbsp)&&(group->ldata[jmax]==igroup)) jmax++;
	  //printf("igroup: %2ld jmin: %2ld  jmax: %2ld\n",igroup,jmin,jmax);

	  w1min=wmin[imin];
	  w1max=wmax[imin];
	  for(si=imin+1;si<imax;si++)
	    {
	      w1min=(w1min<wmin[si])?w1min:wmin[si];
	      w1max=(w1max>wmax[si])?w1max:wmax[si];
	    }

	  w2min=wmin[jmin];
	  w2max=wmax[jmin];
	  for(sj=jmin+1;sj<jmax;sj++)
	    {
	      w2min=(w2min<wmin[sj])?w2min:wmin[sj];
	      w2max=(w2max>wmax[sj])?w2max:wmax[sj];
	    }

	  tmin=(w1min>w2min)?w1min:w2min;
	  tmax=(w1max<w2max)?w1max:w2max;
	  if(tmin>=tmax) /* no overlap*/
	    {
	      w1min=w1max-width/10.;
	      w2max=w2min+width/10.;
	    }
	  else
	    {
	      w1min=w2min=tmin;
	      w1max=w2max=tmax;
	    }

	  for(si=imin;si<imax;si++)
	    {
	      k1min=findNear(spectra[si]->data[spectra[si]->colW],w1min);
	      k1max=findNear(spectra[si]->data[spectra[si]->colW],w1max)+1;
	      for(i=0,j=k1min;j<k1max;j++,i++) 
		tmp[i]=spectra[si]->flux[j];
	      work[si]=median(tmp,k1max-k1min);
	    }

	  for(sj=jmin;sj<jmax;sj++)
	    {
	      k2min=findNear(spectra[sj]->data[spectra[sj]->colW],w2min);
	      k2max=findNear(spectra[sj]->data[spectra[sj]->colW],w2max)+1;
	      for(i=0,j=k2min;j<k2max;j++,i++) 
		tmp[i]=spectra[sj]->flux[j];
	      work[sj]=median(tmp,k2max-k2min);
	    }

	  fact1=median(work+imin,imax-imin);
	  fact2=median(work+jmin,jmax-jmin);
	  if(fact1<=0) fact1=1;
	  if(fact2<=0) fact2=1;


	  /* Factor are computed. Now rebin edges*/
	  /* imin->imax index of spectra in group 1 */
	  /* jmin->jmax index of spectra in group 2 */
	  /* Compute flx1 and flx2 */
	  
	  if(tmin>=tmax)
	    {
	      fact1/=fact2;
	      OS_message(0,OS_STD,"[SP_addSpectra] Factor for merging for group #%d : %f\n",1+igroup,fact1);
	      for(sj=jmin;sj<jmax;sj++)
		for(j=0;j<spectra[sj]->npix;j++)
		  {
		    spectra[sj]->flux[j]*=fact1;
		    spectra[sj]->sigm[j]*=fact1;
		  }
	  
	    }
	  else
	    {
	      /*First crude addition of the edge 2*/
	      k1min=findNear(spectra[imin]->data[spectra[imin]->colW],w1min);
	      k1max=findNear(spectra[imin]->data[spectra[imin]->colW],w1max)+1;
	      //	      printf("%d  %d\n",k1min,k1max);
	      VCV_allocate(flx1,k1max-k1min,VCTDOUBLE);
	      VCV_allocate(wav1,k1max-k1min,VCTDOUBLE);
	      VCV_allocate(flx2,k1max-k1min,VCTDOUBLE);
	      VCV_allocate(pds1,k1max-k1min,VCTDOUBLE);
	      VCV_allocate(pds2,k1max-k1min,VCTDOUBLE);

	      //	      printf("flx1 %x \n",flx1);
	      //	      printf("flx2 %x \n",flx2);
	      //	      printf("wav1 %x \n",wav1);

	      for(i=k1min,j=0;i<k1max;i++,j++)
		{
		  wav1->ddata[j]=spectra[imin]->wave[i];
		  flx1->ddata[j]=0.;
		  pds1->ddata[j]=0.;
		  flx2->ddata[j]=0.;
		  pds2->ddata[j]=0.;
		}

	      for(si=imin;si<imax;si++)
		{
		  k2min=k2max=-1;
		  dw=spectra[si]->wave+idmin[si];
		  df=spectra[si]->flux+idmin[si];
		  ds=spectra[si]->sigm+idmin[si];

		  for(i=k1min,j=0;i<k1max;i++,j++)
		    {
		      if(j==0) tmin=0.5*(3*wav1->ddata[0]-wav1->ddata[1]);
		      else     tmin=0.5*(wav1->ddata[j-1]+wav1->ddata[j]);
		      
		      if(j==(wav1->size-1)) tmax=0.5*(3*wav1->ddata[j]-wav1->ddata[j-1]);
		      else                  tmax=0.5*(wav1->ddata[j+1]+wav1->ddata[j]);
		      
		      rebinLocal(df,ds,dw,idmax[si]-idmin[si],
				 tmin,tmax,&k2min,&k2max,
				 &fsum,&wsum);
		      if(wsum<=0) continue;
		      weight=fsum/wsum;
		      weight*=weight;
		      pds1->ddata[j]+=weight;
		      flx1->ddata[j]+=weight*fsum;
		    }
		}
	      
	      for(sj=jmin;sj<jmax;sj++)
		{
		  k2min=k2max=-1;
		  dw=spectra[sj]->wave+idmin[sj];
		  df=spectra[sj]->flux+idmin[sj];
		  ds=spectra[sj]->sigm+idmin[sj];

		  for(i=k1min,j=0;i<k1max;i++,j++)
		    {
		      if(j==0) tmin=0.5*(3*wav1->ddata[0]-wav1->ddata[1]);
		      else     tmin=0.5*(wav1->ddata[j-1]+wav1->ddata[j]);
		      
		      if(j==(wav1->size-1)) tmax=0.5*(3*wav1->ddata[j]-wav1->ddata[j-1]);
		      else                  tmax=0.5*(wav1->ddata[j+1]+wav1->ddata[j]);
		      
		      rebinLocal(df,ds,dw,idmax[sj]-idmin[sj],
				 tmin,tmax,&k2min,&k2max,
				 &fsum,&wsum);
		      if(wsum<=0) continue;
		      weight=fsum/wsum;
		      weight*=weight;
		      pds2->ddata[j]+=weight;
		      flx2->ddata[j]+=weight*fsum;
		    }
		}

	      ngood=0;
	      for(j=0;j<wav1->size;j++)
		{
		  if((!pds1->ddata[j])||(!pds2->ddata[j])) continue;
		  y1=flx1->ddata[j]/(fact1*pds1->ddata[j]);
		  y2=flx2->ddata[j]/(fact2*pds2->ddata[j]);
		  
		  if((y1<0.2)||(y2<0.2)) continue;
		  flx1->ddata[ngood]=y1/y2;
		  wav1->ddata[ngood]=wav1->ddata[j];
		  ngood++;
		}
	      if(ngood)
		fitLineIter(flx1->ddata,wav1->ddata,ngood,&a1,&b1);
	      else
		{
		  a1=0;
		  b1=1;
		}
	      wpas=0.1/(wav1->ddata[1]-wav1->ddata[0]);
	      fact1/=fact2;
	      /* apply correction */
	      for(sj=jmin;sj<jmax;sj++)
		for(j=0;j<spectra[sj]->npix;j++)
		  {
		    wsum=spectra[sj]->wave[j];
		    weight=(fact1*(a1*wsum+b1)*expFilter(wsum,w1max,-wpas))+
		      (fact1*(a1*w1max+b1)*expFilter(wsum,w1max,wpas));
		      
		    spectra[sj]->flux[j]*=weight;
		    spectra[sj]->sigm[j]*=weight;
		  }
	      VC_free(pds2);
	      VC_free(pds1);
	      VC_free(flx1);
	      VC_free(flx2);
	      VC_free(wav1);

	    }
	  imin=jmin;
	}
      MM_free(work);
    }


  /**************************/
  /* Compute the wave range */
  /**************************/

  OS_message(0,OS_STD,"[SP_addSpectra] Compute wavelength range\n");

  /*****************************************/
  /* merge all the wave limits and compute */
  /* average pixel size for each group     */
  /*****************************************/
  VCV_allocate(wlim,2*nbgroup,VCTDOUBLE);
  imin=igroup=0;
  while(imin<nbsp)
    {
      imax=imin;
      tmin=wmin[imin];
      tmax=wmax[imin];
      while((imax<nbsp)&&(group->ldata[imax]==igroup))
	{
	  tmin=(tmin<wmin[imax])?tmin:wmin[imax];
	  tmax=(tmax>wmax[imax])?tmax:wmax[imax];
	  imax++;
	}
      wlim->ddata[  2*igroup]=tmin;
      wlim->ddata[1+2*igroup]=tmax;
      imin=imax;
      igroup++;
    }
  VC_hpsort(wlim);
  /* Compute pixel size */
  
  MMV_malloc(npix,(2*nbgroup-1),long);
  
  wpas=0;
  for(i=0;i<2*nbgroup-1;i++)
    {
      k=0;
      npix[i]=0;
      tcen=0.5*(wlim->ddata[i]+wlim->ddata[i+1]);
      old_wpas=wpas;
      wpas=0;
      for(j=0;j<nbsp;j++)
	{
	  if((tcen>=wmax[j])||(tcen<=wmin[j])) continue;
	  k++;
	  wpas+=(wmax[j]-wmin[j])/(float)(idmax[j]-idmin[j]);
	}
      //      printf("chunck %2ld  k: %2ld   swpas: %f   rap:%f\n",i,k,wpas,(k>0)?(wpas/(double)k):0);
      if(k)
	wpas/=(double)k;
      else
	if(!i)
	  {
	    OS_message(0,OS_ERR,"Bug at %s line %d !!!\n",__FILE__,__LINE__);
	    exit(1);
	  }
	else
	  wpas=old_wpas;
      wpas*=rebin_pixfactor;
      npix[i]=(long)((wlim->ddata[i+1]-wlim->ddata[i])/wpas+0.5);
      if(!npix[i])
	{
	  OS_message(0,OS_ERR,"Bug at %s line %d !!!\n",__FILE__,__LINE__);
	  exit(1);
	}
    }
  npix_tot=0;
  for(i=0;i<2*nbgroup-1;i++) npix_tot+=npix[i]-1;
  npix_tot++;

  /* Fill the final wave array */
  VCV_allocate(wave,npix_tot,VCTDOUBLE);
  k=0;
  for(i=0;i<2*nbgroup-1;i++)
    {
      wpas=(wlim->ddata[i+1]-wlim->ddata[i])/(double)(npix[i]);
      for(j=0;j<npix[i]-1;j++)
	wave->ddata[k++]=wlim->ddata[i]+((float)j+rebin_pixshift)*wpas;
    }
  wave->ddata[k++]=wlim->ddata[2*nbgroup-1]+rebin_pixshift*wpas;

  /* display some information */
  OS_message(0,OS_STD,"\n[SP_mergeSpectra] New wavelength range (%d chunks) \n",2*nbgroup-1);
  for(i=0;i<2*nbgroup-1;i++)
    OS_message(0,OS_STD,"[SP_mergeSpectra]  chunks #%-2d : %10.2f %10.2f %9.4f\n",
	       i+1,wlim->ddata[i],wlim->ddata[i+1],
	       (wlim->ddata[i+1]-wlim->ddata[i])/(double)npix[i]);


  //  for(i=0;i<nbsp;i++) printf("i:%2d   wmin:%f   wmax:%f\n",(int)i,wmin[i],wmax[i]);
  MM_free( npix);
  VC_free( wlim);
  MM_free(wmin);
  MM_free(wmax);


  /**********************************/
  /* if spline is set rebin spectra */
  /* with cubic spline              */
  /**********************************/
  if(spline)
    {
      OS_message(0,OS_STD,"[SP_mergeSpectra]  Doing spline rebinning : \n");
      MMV_malloc(spline_spectra,nbsp,SP_spectrum);
      if(!spline_spectra)
	{
	  OS_message(0,OS_ERR,"not enough memory !!\n");
	  MM_free( kmax);
	  MM_free( kmin);
	  MM_free(idmax);
	  MM_free(idmin);
	  MM_free(  tmp);
	  return NULL;
	}
      for(i=0;i<nbsp;i++)
	{
	  tmin=spectra[i]->wave[idmin[i]];
	  tmax=spectra[i]->wave[idmax[i]-1];
	  if(wave->ddata[0]>tmin)
	    imin=0;
	  else
	    while((imin<wave->size)&&(wave->ddata[imin]<tmin)) ++imin;

	  imax=imin;
	  while((imax<wave->size)&&(wave->ddata[imax]<tmax)) ++imax;

	  spline_spectra[i]=SP_allocate(imax-imin,3);
	  if(!spline_spectra[i])
	    {
	      OS_message(0,OS_ERR,"not enough memory !!\n");
	      MM_free(spline_spectra);
	      MM_free( kmax);
	      MM_free( kmin);
	      MM_free(idmax);
	      MM_free(idmin);
	      MM_free(  tmp);
	      return NULL;
	    }
	  /*Copy Wavelength array*/
	  for(k=0,j=imin;j<imax;j++,k++) 
	    spline_spectra[i]->wave[k]=wave->ddata[j];
	  /*********************************/
	  /* do the cubic spline estimation*/
	  /* for the flux                  */
	  NRspline(spectra[i]->wave,spectra[i]->flux,spectra[i]->npix,1e30,1e30,tmp);
	  NRasplint(spectra[i]->wave,spectra[i]->flux,tmp,spectra[i]->npix,
		    spline_spectra[i]->wave,spline_spectra[i]->flux,spline_spectra[i]->npix);
	  /* for the error                 */
	  /* (strange, maybe to fix latter)*/
	  NRspline(spectra[i]->wave,spectra[i]->sigm,spectra[i]->npix,1e30,1e30,tmp);
	  NRasplint(spectra[i]->wave,spectra[i]->sigm,tmp,spectra[i]->npix,
		    spline_spectra[i]->wave,spline_spectra[i]->sigm,spline_spectra[i]->npix);
	  /* Rebin Done                    */
	  /* Put index instead of wavelenght */
	  for(k=0,j=imin;j<imax;j++,k++) 
	    spline_spectra[i]->wave[k]=j;
	  
	  /*********************************/
	  /*Old spectra[i] useless, set it to spline_spectra[i] */
	  SP_free(spectra[i]);
	  spectra[i]=spline_spectra[i];
	}
    }

  /***************************************/
  /* Allocate memory for output spectrum */
  /***************************************/
  nbcol=3;
  if(keepnbsp) ++nbcol;
  if(error_type==_BOTH_ERRORS_) ++nbcol;

  rslt=_SP_allocate(wave->size,nbcol,file,line);

  if(!rslt) 
    {
      OS_message(0,OS_ERR,"not enough memory !!\n");
      MM_free( kmax);
      MM_free( kmin);
      MM_free(idmax);
      MM_free(idmin);
      MM_free(  tmp);
      return NULL;
    }
  
  VC_free(rslt->data[rslt->colW]);
  rslt->data[rslt->colW]=wave;

  /*******************/
  /* Do the addition */
  /*******************/
  VCV_allocate(eflux,nbsp,VCTDOUBLE);
  VCV_allocate(esigm,nbsp,VCTDOUBLE);
  VCV_allocate(emask,nbsp,VCTDOUBLE);

  OS_message(0,OS_STD,"\n[SP_addSpectra] Do the addition :   0%%");
  old_pourc=pourc=0;


  for(j=0;j<rslt->npix;j++)
    {
      pourc=(int)(j*100/(rslt->npix-1));
      if(pourc>old_pourc)
	{
	  old_pourc=pourc;
	  OS_message(0,OS_STD,"\b\b\b\b%3ld%%",pourc);
	  OS_flush();
	}
      
      if(spline)
	{
	  ngood=0;
	  for(i=0;i<nbsp;i++)
	    {
	      eflux->ddata[ngood]=esigm->ddata[ngood]=0.;
	      dw=spectra[i]->wave;
	      df=spectra[i]->flux;
	      ds=spectra[i]->sigm;
	      if(getSplineInter(df,ds,dw,spectra[i]->npix,j,kmin+i,
				eflux->ddata+ngood,esigm->ddata+ngood)) continue;
	      emask->ddata[ngood]=1.;
	      ++ngood;
	    }
	}
      else
	{
	  if(j==0)  tmin=0.5*(3*wave->ddata[0]-wave->ddata[1]);
	  else      tmin=0.5*(wave->ddata[j-1]+wave->ddata[j]);
	  
	  if(j==(rslt->npix-1)) tmax=0.5*(3*wave->ddata[rslt->npix-1]-wave->ddata[rslt->npix-2]);
	  else                  tmax=0.5*(  wave->ddata[j+1]+         wave->ddata[j]);
	  
	  ngood=0;
	  for(i=0;i<nbsp;i++)
	    {
	      eflux->ddata[ngood]=esigm->ddata[ngood]=0.;
	      dw=spectra[i]->wave+idmin[i];
	      df=spectra[i]->flux+idmin[i];
	      ds=spectra[i]->sigm+idmin[i];
	      
	      if(rebinLocal(df,ds,dw,idmax[i]-idmin[i],
			    tmin,tmax,kmin+i,kmax+i,
			    eflux->ddata+ngood,esigm->ddata+ngood)) continue;
	      emask->ddata[ngood]=1.;
	      ++ngood;
	    }
	}
/*       if((tmin>5156)&&(tmax<5158)) */
/* 	printf("tmin:%f  tmax:%f   ngood: %d\n",tmin,tmax,ngood); */

      mean_srianand=sigm_srianand=fsum=wsum=0.0;
      nbg_srianand=nbg_bastien=0;
      if(ngood)
	{
	  eflux->size=ngood;
	  esigm->size=ngood;
	  emask->size=ngood;
	  
	  if(tclip==_DATACLIPPING_)
	    dataClipping(eflux,esigm,&emask,clip);
	  else if (tclip!=_NOCLIPPING_)
	    sigmaClipping(eflux,&emask,clip,0);

	  for(i=0;i<ngood;i++)
	    {
	      if(emask->ddata[i])
		{
		  weight=esigm->ddata[i];
		  mean_srianand+=eflux->ddata[i];
		  sigm_srianand+=eflux->ddata[i]*eflux->ddata[i];
		  ++nbg_srianand;
		  if(weight<=0) continue;
		  ++nbg_bastien;
		  weight=1./(weight*weight);
		  wsum+=weight;
		  fsum+=eflux->ddata[i]*weight;
		}
	    }
	}
      if(nbg_srianand>0)
	{
/* 	  mean_srianand*=mean_srianand; */
/* 	  mean_srianand/=(double)(nbg_srianand); /\* n.<X>^2 *\/ */
/* 	  sigm_srianand-=mean_srianand;  // n<X^2>-n<X>^2 */
/* 	  sigm_srianand/=(double)(nbg_srianand);  //(n/n-1)(<X^2>-<X>^2) */
	  mean_srianand/=(double)(nbg_srianand);
	  mean_srianand*=mean_srianand;
	  
	  sigm_srianand/=(double)(nbg_srianand);
	  sigm_srianand=(sigm_srianand-mean_srianand)/(double)(nbg_srianand);
	}
      else
	sigm_srianand=0.;
      if(wsum>0)
	{
	  wsum=1./wsum;
	  fsum*=wsum;
	  switch (error_type)
	    {
	    case _BOTH_ERRORS_ :
	    case _BASTIEN_     : 
	      {
		wsum=sqrt(wsum);
		sigm_srianand=sqrt(sigm_srianand);
	      }
	      break;
	    case _SRIANAND_                 : wsum=sqrt(sigm_srianand);
	      break;
	    case _BASTIEN_AND_SRIANAND_     : wsum=sqrt(wsum+sigm_srianand);
	      break;
	    case _MAX_BASTIEN_AND_SRIANAND_ : wsum=sqrt((sigm_srianand>wsum)?sigm_srianand:wsum);
	      break;
	    case _MAX_BASTIEN_AND_MAX_SRIANAND_ : 
	      if(nbg_srianand>1)
		fct=(1+sqrt(2./(nbg_srianand-1)));
	      else
		fct=1.;
	      wsum=sqrt((sigm_srianand>wsum)?sigm_srianand*fct:wsum);
	      break;
	    default : wsum=sqrt((sigm_srianand>wsum)?sigm_srianand:wsum);
	    }
	}
      else
	fsum=wsum=0;

      if((!IS_NAN(fsum))&&(!IS_NAN(wsum)))
	{
	  rslt->flux[j]=fsum;
	  rslt->sigm[j]=wsum;
	}
      if((error_type==_BOTH_ERRORS_)&&(!IS_NAN(sigm_srianand)))
	rslt->cont[j]=sigm_srianand;
      if(keepnbsp)
	rslt->data[rslt->ndata-1]->ddata[j]=nbg_bastien;

      eflux->size=nbsp;
      esigm->size=nbsp;
      emask->size=nbsp;
    }
  OS_message(0,OS_STD,"\b\b\b\b%3ld%%\n",100);
  VC_free(emask);
  VC_free(esigm);
  VC_free(eflux);
  MM_free( kmin);
  MM_free( kmax);
  MM_free(idmin);
  MM_free(idmax);
  MM_free(tmp);
  return rslt;
}
int CHOLMOD(super_symbolic)
(
    /* ---- input ---- */
    cholmod_sparse *A,	/* matrix to analyze */
    cholmod_sparse *F,	/* F = A' or A(:,f)' */
    Int *Parent,	/* elimination tree */
    /* ---- in/out --- */
    cholmod_factor *L,	/* simplicial symbolic on input,
			 * supernodal symbolic on output */
    /* --------------- */
    cholmod_common *Common
)
{
    double zrelax0, zrelax1, zrelax2, xxsize ;
    Int *Wi, *Wj, *Super, *Snz, *Ap, *Ai, *Flag, *Head, *Ls, *Lpi, *Lpx, *Fnz,
	*Sparent, *Anz, *SuperMap, *Merged, *Nscol, *Zeros, *Fp, *Fj,
	*ColCount, *Lpi2, *Lsuper, *Iwork ;
    Int nsuper, d, n, j, k, s, mark, parent, p, pend, k1, k2, packed, nscol,
	nsrow, ndrow1, ndrow2, stype, ssize, xsize, sparent, plast, slast,
	csize, maxcsize, ss, nscol0, nscol1, ns, nfsuper, newzeros, totzeros,
	merge, snext, esize, maxesize, nrelax0, nrelax1, nrelax2 ;
    size_t w ;
    int ok = TRUE ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    RETURN_IF_NULL_COMMON (FALSE) ;
    RETURN_IF_NULL (A, FALSE) ;
    RETURN_IF_NULL (L, FALSE) ;
    RETURN_IF_NULL (Parent, FALSE) ;
    RETURN_IF_XTYPE_INVALID (A, CHOLMOD_PATTERN, CHOLMOD_ZOMPLEX, FALSE) ;
    RETURN_IF_XTYPE_INVALID (L, CHOLMOD_PATTERN, CHOLMOD_PATTERN, FALSE) ;
    stype = A->stype ;
    if (stype < 0)
    {
	/* invalid symmetry; symmetric lower form not supported */
	ERROR (CHOLMOD_INVALID, "symmetric lower not supported") ;
	return (FALSE) ;
    }
    if (stype == 0)
    {
	/* F must be present in the unsymmetric case */
	RETURN_IF_NULL (F, FALSE) ;
	ASSERT (CHOLMOD(dump_sparse) (F, "Fsup", Common) >= 0) ;
    }
    if (L->is_super)
    {
	/* L must be a simplicial symbolic factor */
	ERROR (CHOLMOD_INVALID, "L must be symbolic on input") ;
	return (FALSE) ;
    }
    Common->status = CHOLMOD_OK ;

    ASSERT (CHOLMOD(dump_sparse) (A, "Asup", Common) >= 0) ;

    /* ---------------------------------------------------------------------- */
    /* allocate workspace */
    /* ---------------------------------------------------------------------- */

    n = A->nrow ;

    /* w = 5*n */
    w = CHOLMOD(mult_size_t) (n, 5, &ok) ;
    if (!ok)
    {
	ERROR (CHOLMOD_TOO_LARGE, "problem too large") ;
	return (FALSE) ;
    }

    CHOLMOD(allocate_work) (n, w, 0, Common) ;
    if (Common->status < CHOLMOD_OK)
    {
	/* out of memory */
	return (FALSE) ;
    }
    ASSERT (CHOLMOD(dump_work) (TRUE, TRUE, 0, Common)) ;

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    /* A is now either A or triu(A(p,p)) for the symmetric case.  It is either
     * A or A(p,f) for the unsymmetric case (both in column form).  It can be
     * either packed or unpacked, and either sorted or unsorted.  Entries in
     * the lower triangular part may be present if A is symmetric, but these
     * are ignored. */

    Ap = A->p ;
    Ai = A->i ;
    Anz = A->nz ;

    if (stype != 0)
    {
	/* F not accessed */
	Fp = NULL ;
	Fj = NULL ;
	Fnz = NULL ;
	packed = TRUE ;
    }
    else
    {
	/* F = A(:,f) or A(p,f) in packed row form, either sorted or unsorted */
	Fp = F->p ;
	Fj = F->i ;
	Fnz = F->nz ;
	packed = F->packed ;
    }

    ColCount = L->ColCount ;

    nrelax0 = Common->nrelax [0] ;
    nrelax1 = Common->nrelax [1] ;
    nrelax2 = Common->nrelax [2] ;

    zrelax0 = Common->zrelax [0] ;
    zrelax1 = Common->zrelax [1] ;
    zrelax2 = Common->zrelax [2] ;

    zrelax0 = IS_NAN (zrelax0) ? 0 : zrelax0 ;
    zrelax1 = IS_NAN (zrelax1) ? 0 : zrelax1 ;
    zrelax2 = IS_NAN (zrelax2) ? 0 : zrelax2 ;

    ASSERT (CHOLMOD(dump_parent) (Parent, n, "Parent", Common)) ;

    /* ---------------------------------------------------------------------- */
    /* get workspace */
    /* ---------------------------------------------------------------------- */

    /* Sparent, Snz, and Merged could be allocated later, of size nfsuper */

    Iwork = Common->Iwork ;
    Wi      = Iwork ;	    /* size n (i/l/l).  Lpi2 is i/l/l */
    Wj      = Iwork + n ;   /* size n (i/l/l).  Zeros is i/l/l */
    Sparent = Iwork + 2*((size_t) n) ; /* size nfsuper <= n [ */
    Snz     = Iwork + 3*((size_t) n) ; /* size nfsuper <= n [ */
    Merged  = Iwork + 4*((size_t) n) ; /* size nfsuper <= n [ */

    Flag = Common->Flag ;   /* size n */
    Head = Common->Head ;   /* size n+1 */

    /* ---------------------------------------------------------------------- */
    /* find the fundamental supernodes */
    /* ---------------------------------------------------------------------- */

    /* count the number of children of each node, using Wi [ */
    for (j = 0 ; j < n ; j++)
    {
	Wi [j] = 0 ;
    }
    for (j = 0 ; j < n ; j++)
    {
	parent = Parent [j] ;
	if (parent != EMPTY)
	{
	    Wi [parent]++ ;
	}
    }

    Super = Head ;  /* use Head [0..nfsuper] as workspace for Super list ( */

    /* column 0 always starts a new supernode */
    nfsuper = (n == 0) ? 0 : 1 ;	/* number of fundamental supernodes */
    Super [0] = 0 ;

    for (j = 1 ; j < n ; j++)
    {
	/* check if j starts new supernode, or in the same supernode as j-1 */
	if (Parent [j-1] != j	    /* parent of j-1 is not j */
	    || (ColCount [j-1] != ColCount [j] + 1) /* j-1 not subset of j*/
	    || Wi [j] > 1)	    /* j has more than one child */
	{
	    /* j is the leading node of a supernode */
	    Super [nfsuper++] = j ;
	}
    }
    Super [nfsuper] = n ;

    /* contents of Wi no longer needed for child count ] */

    Nscol = Wi ; /* use Wi as size-nfsuper workspace for Nscol [ */

    /* ---------------------------------------------------------------------- */
    /* find the mapping of fundamental nodes to supernodes */
    /* ---------------------------------------------------------------------- */

    SuperMap = Wj ;	/* use Wj as workspace for SuperMap [ */

    /* SuperMap [k] = s if column k is contained in supernode s */
    for (s = 0 ; s < nfsuper ; s++)
    {
	for (k = Super [s] ; k < Super [s+1] ; k++)
	{
	    SuperMap [k] = s ;
	}
    }

    /* ---------------------------------------------------------------------- */
    /* construct the fundamental supernodal etree */
    /* ---------------------------------------------------------------------- */

    for (s = 0 ; s < nfsuper ; s++)
    {
	j = Super [s+1] - 1 ;	/* last node in supernode s */
	parent = Parent [j] ;	/* parent of last node */
	Sparent [s] = (parent == EMPTY) ? EMPTY : SuperMap [parent] ;
	PRINT1 (("Sparent ["ID"] = "ID"\n", s, Sparent [s])) ;
    }

    /* contents of Wj no longer needed as workspace for SuperMap ]
     * SuperMap will be recomputed below, for the relaxed supernodes. */

    Zeros = Wj ;   /* use Wj for Zeros, workspace of size nfsuper [ */

    /* ---------------------------------------------------------------------- */
    /* relaxed amalgamation */
    /* ---------------------------------------------------------------------- */

    for (s = 0 ; s < nfsuper ; s++)
    {
	Merged [s] = EMPTY ;			/* s not merged into another */
	Nscol [s] = Super [s+1] - Super [s] ;	/* # of columns in s */
	Zeros [s] = 0 ;				/* # of zero entries in s */
	ASSERT (s <= Super [s]) ;
	Snz [s] = ColCount [Super [s]] ;  /* # of entries in leading col of s */
	PRINT2 (("lnz ["ID"] "ID"\n", s, Snz [s])) ;
    }

    for (s = nfsuper-2 ; s >= 0 ; s--)
    {
	/* should supernodes s and s+1 merge into a new node s? */
	PRINT1 (("\n========= Check relax of s "ID" and s+1 "ID"\n", s, s+1)) ;

	ss = Sparent [s] ;
	if (ss == EMPTY)
	{
	    PRINT1 (("s "ID" is a root, no merge with s+1 = "ID"\n", s, s+1)) ;
	    continue ;
	}

	/* find the current parent of s (perform path compression as needed) */
	for (ss = Sparent [s] ; Merged [ss] != EMPTY ; ss = Merged [ss]) ;
	sparent = ss ;
	PRINT2 (("Current sparent of s "ID" is "ID"\n", s, sparent)) ;

	/* ss is the current parent of s */
	for (ss = Sparent [s] ; Merged [ss] != EMPTY ; ss = snext)
	{
	    snext = Merged [ss] ;
	    PRINT2 (("ss "ID" is dead, merged into snext "ID"\n", ss, snext)) ;
	    Merged [ss] = sparent ;
	}

	/* if s+1 is not the current parent of s, do not merge */
	if (sparent != s+1)
	{
	    continue ;
	}

	nscol0 = Nscol [s] ;	/* # of columns in s */
	nscol1 = Nscol [s+1] ;	/* # of columns in s+1 */
	ns = nscol0 + nscol1 ;
	PRINT2 (("ns "ID" nscol0 "ID" nscol1 "ID"\n", ns, nscol0, nscol1)) ;

	totzeros = Zeros [s+1] ;	/* current # of zeros in s+1 */

	/* determine if supernodes s and s+1 should merge */
	if (ns <= nrelax0)
	{
	    PRINT2 (("ns is tiny ("ID"), so go ahead and merge\n", ns)) ;
	    merge = TRUE ;
	}
	else
	{
	    /* use double to avoid integer overflow */
	    double lnz0 = Snz [s] ;	/* # entries in leading column of s */
	    double lnz1 = Snz [s+1] ;	/* # entries in leading column of s+1 */
	    double xnewzeros = nscol0 * (lnz1 + nscol0 - lnz0) ;

	    /* use Int for the final update of Zeros [s] below */
	    newzeros = nscol0 * (Snz [s+1] + nscol0 - Snz [s]) ;
	    ASSERT (newzeros == xnewzeros) ;

	    PRINT2 (("lnz0 %g lnz1 %g xnewzeros %g\n", lnz0, lnz1, xnewzeros)) ;
	    if (xnewzeros == 0)
	    {
		/* no new zeros, so go ahead and merge */
		PRINT2 (("no new fillin, so go ahead and merge\n")) ;
		merge = TRUE ;
	    }
	    else
	    {
		/* # of zeros if merged */
		double xtotzeros = ((double) totzeros) + xnewzeros ;

		/* xtotsize: total size of merged supernode, if merged: */
		double xns = (double) ns ;
		double xtotsize  = (xns * (xns+1) / 2) + xns * (lnz1 - nscol1) ;
		double z = xtotzeros / xtotsize ;

		Int totsize ;
		totsize  = (ns * (ns+1) / 2) + ns * (Snz [s+1] - nscol1) ;

		PRINT2 (("oldzeros "ID" newzeros "ID" xtotsize %g z %g\n",
			    Zeros [s+1], newzeros, xtotsize, z)) ;

		/* use Int for the final update of Zeros [s] below */
		totzeros += newzeros ;

		/* do not merge if supernode would become too big
		 * (Int overflow).  Continue computing; not (yet) an error. */
		/* fl.pt. compare, but no NaN's can occur here */
		merge = ((ns <= nrelax1 && z < zrelax0) ||
			 (ns <= nrelax2 && z < zrelax1) ||
					  (z < zrelax2)) &&
			(xtotsize < Int_max / sizeof (double)) ;

	    }
	}

	if (merge)
	{
	    PRINT1 (("Merge node s ("ID") and s+1 ("ID")\n", s, s+1)) ;
	    Zeros [s] = totzeros ;
	    Merged [s+1] = s ;
	    Snz [s] = nscol0 + Snz [s+1] ;
	    Nscol [s] += Nscol [s+1] ;
	}
    }

    /* contents of Wj no longer needed for Zeros ] */
    /* contents of Wi no longer needed for Nscol ] */
    /* contents of Sparent no longer needed (recomputed below) */

    /* ---------------------------------------------------------------------- */
    /* construct the relaxed supernode list */
    /* ---------------------------------------------------------------------- */

    nsuper = 0 ;
    for (s = 0 ; s < nfsuper ; s++)
    {
	if (Merged [s] == EMPTY)
	{
	    PRINT1 (("live supernode: "ID" snz "ID"\n", s, Snz [s])) ;
	    Super [nsuper] = Super [s] ;
	    Snz [nsuper] = Snz [s] ;
	    nsuper++ ;
	}
    }
    Super [nsuper] = n ;
    PRINT1 (("Fundamental supernodes: "ID"  relaxed "ID"\n", nfsuper, nsuper)) ;

    /* Merged no longer needed ] */

    /* ---------------------------------------------------------------------- */
    /* find the mapping of relaxed nodes to supernodes */
    /* ---------------------------------------------------------------------- */

    /* use Wj as workspace for SuperMap { */

    /* SuperMap [k] = s if column k is contained in supernode s */
    for (s = 0 ; s < nsuper ; s++)
    {
	for (k = Super [s] ; k < Super [s+1] ; k++)
	{
	    SuperMap [k] = s ;
	}
    }

    /* ---------------------------------------------------------------------- */
    /* construct the relaxed supernodal etree */
    /* ---------------------------------------------------------------------- */

    for (s = 0 ; s < nsuper ; s++)
    {
	j = Super [s+1] - 1 ;	/* last node in supernode s */
	parent = Parent [j] ;	/* parent of last node */
	Sparent [s] = (parent == EMPTY) ? EMPTY : SuperMap [parent] ;
	PRINT1 (("new Sparent ["ID"] = "ID"\n", s, Sparent [s])) ;
    }

    /* ---------------------------------------------------------------------- */
    /* determine the size of L->s and L->x */
    /* ---------------------------------------------------------------------- */

    ssize = 0 ;
    xsize = 0 ;
    xxsize = 0 ;
    for (s = 0 ; s < nsuper ; s++)
    {
	nscol = Super [s+1] - Super [s] ;
	nsrow = Snz [s] ;
	ASSERT (nscol > 0) ;
	ssize += nsrow ;
	xsize += nscol * nsrow ;
	/* also compute xsize in double to guard against Int overflow */
	xxsize += ((double) nscol) * ((double) nsrow) ;
	if (xxsize > Int_max)
	{
	    /* Int overflow, clear workspace and return */
	    ERROR (CHOLMOD_TOO_LARGE, "problem too large") ;
	    FREE_WORKSPACE ;
	    return (FALSE) ;
	}
	ASSERT (ssize > 0 && xsize > 0) ;
    }
    xsize = MAX (1, xsize) ;
    ssize = MAX (1, ssize) ;
    PRINT1 (("ix sizes: "ID" "ID" nsuper "ID"\n", ssize, xsize, nsuper)) ;

    /* ---------------------------------------------------------------------- */
    /* allocate L (all except real part L->x) */
    /* ---------------------------------------------------------------------- */

    L->ssize = ssize ;
    L->xsize = xsize ;
    L->nsuper = nsuper ;

    CHOLMOD(change_factor) (CHOLMOD_PATTERN, TRUE, TRUE, TRUE, TRUE, L, Common);

    if (Common->status < CHOLMOD_OK)
    {
	/* out of memory; L is still a valid simplicial symbolic factor */
	FREE_WORKSPACE ;
	return (FALSE) ;
    }

    DEBUG (CHOLMOD(dump_factor) (L, "L to symbolic super", Common)) ;
    ASSERT (L->is_ll && L->xtype == CHOLMOD_PATTERN && L->is_super) ;

    Lpi = L->pi ;
    Lpx = L->px ;
    Ls = L->s ;
    Ls [0] = 0 ;    /* flag for cholmod_check_factor; supernodes are defined */
    Lsuper = L->super ;

    /* copy the list of relaxed supernodes into the final list in L */
    for (s = 0 ; s <= nsuper ; s++)
    {
	Lsuper [s] = Super [s] ;
    }

    /* Head no longer needed as workspace for fundamental Super list ) */

    Super = Lsuper ;	    /* Super is now the list of relaxed supernodes */

    /* ---------------------------------------------------------------------- */
    /* construct column pointers of relaxed supernodal pattern (L->pi) */
    /* ---------------------------------------------------------------------- */

    p = 0 ;
    for (s = 0 ; s < nsuper ; s++)
    {
	Lpi [s] = p ;
	p += Snz [s] ;
	PRINT1 (("Snz ["ID"] = "ID", Super ["ID"] = "ID"\n",
		    s, Snz [s], s, Super[s])) ;
    }
    Lpi [nsuper] = p ;
    ASSERT ((Int) (L->ssize) == MAX (1,p)) ;

    /* ---------------------------------------------------------------------- */
    /* construct pointers for supernodal values (L->px) */
    /* ---------------------------------------------------------------------- */

    p = 0 ;
    for (s = 0 ; s < nsuper ; s++)
    {
	nscol = Super [s+1] - Super [s] ;   /* number of columns in s */
	nsrow = Snz [s] ;		    /* # of rows, incl triangular part*/
	Lpx [s] = p ;			    /* pointer to numerical part of s */
	p += nscol * nsrow ;
    }
    Lpx [s] = p ;
    ASSERT ((Int) (L->xsize) == MAX (1,p)) ;

    /* Snz no longer needed ] */

    /* ---------------------------------------------------------------------- */
    /* symbolic analysis to construct the relaxed supernodal pattern (L->s) */
    /* ---------------------------------------------------------------------- */

    Lpi2 = Wi ;	    /* copy Lpi into Lpi2, using Wi as workspace for Lpi2 [ */
    for (s = 0 ; s < nsuper ; s++)
    {
	Lpi2 [s] = Lpi [s] ;
    }

    for (s = 0 ; s < nsuper ; s++)
    {
	/* sth supernode is in columns k1 to k2-1.
	 * compute nonzero pattern of L (k1:k2-1,:). */

	/* place rows k1 to k2-1 in leading column of supernode s */
	k1 = Super [s] ;
	k2 = Super [s+1] ;
	PRINT1 (("=========>>> Supernode "ID" k1 "ID" k2-1 "ID"\n",
		    s, k1, k2-1)) ;
	for (k = k1 ; k < k2 ; k++)
	{
	    Ls [Lpi2 [s]++] = k ;
	}

	/* compute nonzero pattern each row k1 to k2-1 */
	for (k = k1 ; k < k2 ; k++)
	{
	    /* compute row k of L.  In the symmetric case, the pattern of L(k,:)
	     * is the set of nodes reachable in the supernodal etree from any
	     * row i in the nonzero pattern of A(0:k,k).  In the unsymmetric
	     * case, the pattern of the kth column of A*A' is the set union
	     * of all columns A(0:k,j) for each nonzero F(j,k). */

	    /* clear the Flag array and mark the current supernode */
	    mark = CHOLMOD(clear_flag) (Common) ;
	    Flag [s] = mark ;
	    ASSERT (s == SuperMap [k]) ;

	    /* traverse the row subtree for each nonzero in A or AA' */
	    if (stype != 0)
	    {
		subtree (k, k, Ap, Ai, Anz, SuperMap, Sparent, mark,
			Flag, Ls, Lpi2) ;
	    }
	    else
	    {
		/* for each nonzero in F (k,:) do */
		p = Fp [k] ;
		pend = (packed) ? (Fp [k+1]) : (p + Fnz [k]) ;
		for ( ; p < pend ; p++)
		{
		    subtree (Fj [p], k, Ap, Ai, Anz, SuperMap, Sparent, mark,
			    Flag, Ls, Lpi2) ;
		}
	    }
	}
    }

#ifndef NDEBUG
    for (s = 0 ; s < nsuper ; s++)
    {
	PRINT1 (("Lpi2[s] "ID" Lpi[s+1] "ID"\n", Lpi2 [s], Lpi [s+1])) ;
	ASSERT (Lpi2 [s] == Lpi [s+1]) ;
	CHOLMOD(dump_super) (s, Super, Lpi, Ls, NULL, NULL, 0, Common) ;
    }
#endif

    /* contents of Wi no longer needed for Lpi2 ] */
    /* Sparent no longer needed ] */

    /* ---------------------------------------------------------------------- */
    /* determine the largest update matrix (L->maxcsize) */
    /* ---------------------------------------------------------------------- */

    /* maxcsize could be determined before L->s is allocated and defined, which
     * would mean that all memory requirements for both the symbolic and numeric
     * factorizations could be computed using O(nnz(A)+O(n)) space.  However, it
     * would require a lot of extra work.  The analysis phase, above, would need
     * to be duplicated, but with Ls not kept; instead, the algorithm would keep
     * track of the current s and slast for each supernode d, and update them
     * when a new row index appears in supernode d.  An alternative would be to
     * do this computation only if the allocation of L->s failed, in which case
     * the following code would be skipped.
     *
     * The csize for a supernode is the size of its largest contribution to
     * a subsequent ancestor supernode.  For example, suppose the rows of #'s
     * in the figure below correspond to the columns of a subsequent supernode,
     * and the dots are the entries in that ancestore.
     *
     *	    c
     *	    c c
     *	    c c c
     *	    x x x
     *	    x x x
     *	    # # #   .
     *	    # # #   . .
     *	    * * *   . .
     *	    * * *   . .
     *	    * * *   . .
     *	            . .
     *
     * Then for this update, the csize is 3-by-2, or 6, because there are 3
     * rows of *'s which is the number of rows in the update, and there are
     * 2 rows of #'s, which is the number columns in the update.  The csize
     * of a supernode is the largest such contribution for any ancestor
     * supernode.  maxcsize, for the whole matrix, has a rough upper bound of
     * the maximum size of any supernode.  This bound is loose, because the
     * the contribution must be less than the size of the ancestor supernodal
     * that it's updating.  maxcsize of a completely dense matrix, with one
     * supernode, is zero.
     *
     * maxesize is the column dimension for the workspace E needed for the
     * solve.  E is of size nrhs-by-maxesize, where the nrhs is the number of
     * columns in the right-hand-side.  The maxesize is the largest esize of
     * any supernode.  The esize of a supernode is the number of row indices
     * it contains, excluding the column indices of the supernode itself.
     * For the following example, esize is 4:
     *
     *	    c
     *	    c c
     *	    c c c
     *	    x x x
     *	    x x x
     *	    x x x
     *	    x x x
     *
     * maxesize can be no bigger than n.
     */

    maxcsize = 1 ;
    maxesize = 1 ;

    /* do not need to guard csize against Int overflow if xsize is OK */

    for (d = 0 ; d < nsuper ; d++)
    {
	nscol = Super [d+1] - Super [d] ;
	p = Lpi [d] + nscol ;
	plast = p ;
	pend = Lpi [d+1] ;
	esize = pend - p ;
	maxesize = MAX (maxesize, esize) ;
	slast = (p == pend) ? (EMPTY) : (SuperMap [Ls [p]]) ;
	for ( ; p <= pend ; p++)
	{
	    s = (p == pend) ? (EMPTY) : (SuperMap [Ls [p]]) ;
	    if (s != slast)
	    {
		/* row i is the start of a new supernode */
		ndrow1 = p - plast ;
		ndrow2 = pend - plast ;
		csize = ndrow2 * ndrow1 ;
		PRINT1 (("Supernode "ID" ancestor "ID" C: "ID"-by-"ID"  csize "
			""ID"\n", d, slast, ndrow1, ndrow2, csize)) ;
		maxcsize = MAX (maxcsize, csize) ;
		plast = p ;
		slast = s ;
	    }
	}
    }
    PRINT1 (("max csize "ID"\n", maxcsize)) ;

    /* Wj no longer needed for SuperMap } */

    L->maxcsize = maxcsize ;
    L->maxesize = maxesize ;
    L->is_super = TRUE ;
    ASSERT (L->xtype == CHOLMOD_PATTERN && L->is_ll) ;

    /* ---------------------------------------------------------------------- */
    /* supernodal symbolic factorization is complete */
    /* ---------------------------------------------------------------------- */

    FREE_WORKSPACE ;
    return (TRUE) ;
}
Exemple #19
0
int CHOLMOD(drop)
(
    /* ---- input ---- */
    double tol,		/* keep entries with absolute value > tol */
    /* ---- in/out --- */
    cholmod_sparse *A,	/* matrix to drop entries from */
    /* --------------- */
    cholmod_common *Common
)
{
    double aij ;
    double *Ax ;
    Int *Ap, *Ai, *Anz ;
    Int packed, i, j, nrow, ncol, p, pend, nz, values ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    RETURN_IF_NULL_COMMON (FALSE) ;
    RETURN_IF_NULL (A, FALSE) ;
    RETURN_IF_XTYPE_INVALID (A, CHOLMOD_PATTERN, CHOLMOD_REAL, FALSE) ;
    Common->status = CHOLMOD_OK ;
    ASSERT (CHOLMOD(dump_sparse) (A, "A predrop", Common) >= 0) ;

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    Ap = A->p ;
    Ai = A->i ;
    Ax = A->x ;
    Anz = A->nz ;
    packed = A->packed ;
    ncol = A->ncol ;
    nrow = A->nrow ;
    values = (A->xtype != CHOLMOD_PATTERN) ;
    nz = 0 ;

    if (values)
    {

	/* ------------------------------------------------------------------ */
	/* drop small numerical entries from A, and entries in ignored part */
	/* ------------------------------------------------------------------ */

	if (A->stype > 0)
	{

	    /* -------------------------------------------------------------- */
	    /* A is symmetric, with just upper triangular part stored */
	    /* -------------------------------------------------------------- */

	    for (j = 0 ; j < ncol ; j++)
	    {
		p = Ap [j] ;
		pend = (packed) ? (Ap [j+1]) : (p + Anz [j]) ;
		Ap [j] = nz ;
		for ( ; p < pend ; p++)
		{
		    i = Ai [p] ;
		    aij = Ax [p] ;
		    if (i <= j && (fabs (aij) > tol || IS_NAN (aij)))
		    {
			Ai [nz] = i ;
			Ax [nz] = aij ;
			nz++ ;
		    }
		}
	    }

	}
	else if (A->stype < 0)
	{

	    /* -------------------------------------------------------------- */
	    /* A is symmetric, with just lower triangular part stored */
	    /* -------------------------------------------------------------- */

	    for (j = 0 ; j < ncol ; j++)
	    {
		p = Ap [j] ;
		pend = (packed) ? (Ap [j+1]) : (p + Anz [j]) ;
		Ap [j] = nz ;
		for ( ; p < pend ; p++)
		{
		    i = Ai [p] ;
		    aij = Ax [p] ;
		    if (i >= j && (fabs (aij) > tol || IS_NAN (aij)))
		    {
			Ai [nz] = i ;
			Ax [nz] = aij ;
			nz++ ;
		    }
		}
	    }
	}
	else
	{

	    /* -------------------------------------------------------------- */
	    /* both parts of A present, just drop small entries */
	    /* -------------------------------------------------------------- */

	    for (j = 0 ; j < ncol ; j++)
	    {
		p = Ap [j] ;
		pend = (packed) ? (Ap [j+1]) : (p + Anz [j]) ;
		Ap [j] = nz ;
		for ( ; p < pend ; p++)
		{
		    i = Ai [p] ;
		    aij = Ax [p] ;
		    if (fabs (aij) > tol || IS_NAN (aij))
		    {
			Ai [nz] = i ;
			Ax [nz] = aij ;
			nz++ ;
		    }
		}
	    }
	}
	Ap [ncol] = nz ;

	/* reduce A->i and A->x in size */
	ASSERT (MAX (1,nz) <= A->nzmax) ;
	CHOLMOD(reallocate_sparse) (nz, A, Common) ;
	ASSERT (Common->status >= CHOLMOD_OK) ;

    }
    else
    {

	/* ------------------------------------------------------------------ */
	/* consider only the pattern of A */
	/* ------------------------------------------------------------------ */

	/* Note that cholmod_band_inplace calls cholmod_reallocate_sparse */
	if (A->stype > 0)
	{
	    CHOLMOD(band_inplace) (0, ncol, 0, A, Common) ;
	}
	else if (A->stype < 0)
	{
	    CHOLMOD(band_inplace) (-nrow, 0, 0, A, Common) ;
	}
    }

    ASSERT (CHOLMOD(dump_sparse) (A, "A dropped", Common) >= 0) ;
    return (TRUE) ;
}
Exemple #20
0
double CHOLMOD(norm_dense)
(
    /* ---- input ---- */
    cholmod_dense *X,	/* matrix to compute the norm of */
    int norm,		/* type of norm: 0: inf. norm, 1: 1-norm, 2: 2-norm */
    /* --------------- */
    cholmod_common *Common
)
{
    double xnorm, s, x, z ;
    double *Xx, *Xz, *W ;
    Int nrow, ncol, d, i, j, use_workspace, xtype ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    RETURN_IF_NULL_COMMON (EMPTY) ;
    RETURN_IF_NULL (X, EMPTY) ;
    RETURN_IF_XTYPE_INVALID (X, CHOLMOD_REAL, CHOLMOD_ZOMPLEX, EMPTY) ;
    Common->status = CHOLMOD_OK ;
    ncol = X->ncol ;
    if (norm < 0 || norm > 2 || (norm == 2 && ncol > 1))
    {
	ERROR (CHOLMOD_INVALID, "invalid norm") ;
	return (EMPTY) ;
    }

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    nrow = X->nrow ;
    d = X->d ;
    Xx = X->x ;
    Xz = X->z ;
    xtype = X->xtype ;

    /* ---------------------------------------------------------------------- */
    /* allocate workspace, if needed */
    /* ---------------------------------------------------------------------- */

    W = NULL ;
    use_workspace = (norm == 0 && ncol > 4) ;
    if (use_workspace)
    {
	CHOLMOD(allocate_work) (0, 0, nrow, Common) ;
	W = Common->Xwork ;
	if (Common->status < CHOLMOD_OK)
	{
	    /* oops, no workspace */
	    use_workspace = FALSE ;
	}
    }


    /* ---------------------------------------------------------------------- */
    /* compute the norm */
    /* ---------------------------------------------------------------------- */

    xnorm = 0 ;

    if (use_workspace)
    {

	/* ------------------------------------------------------------------ */
	/* infinity-norm = max row sum, using stride-1 access of X */
	/* ------------------------------------------------------------------ */

	DEBUG (for (i = 0 ; i < nrow ; i++) ASSERT (W [i] == 0)) ;

	/* this is faster than stride-d, but requires O(nrow) workspace */
	for (j = 0 ; j < ncol ; j++)
	{
	    for (i = 0 ; i < nrow ; i++)
	    {
		W [i] += abs_value (xtype, Xx, Xz, i+j*d, Common) ;
	    }
	}
	for (i = 0 ; i < nrow ; i++)
	{
	    s = W [i] ;
	    if ((IS_NAN (s) || s > xnorm) && !IS_NAN (xnorm))
	    {
		xnorm = s ;
	    }
	    W [i] = 0 ;
	}

    }
    else if (norm == 0)
Exemple #21
0
__global__ static void reduce_dim_kernel(Param<To> out, CParam<Ti> in,
                                         uint blocks_x, uint blocks_y,
                                         uint offset_dim, bool change_nan,
                                         To nanval) {
    const uint tidx = threadIdx.x;
    const uint tidy = threadIdx.y;
    const uint tid  = tidy * THREADS_X + tidx;

    const uint zid        = blockIdx.x / blocks_x;
    const uint blockIdx_x = blockIdx.x - (blocks_x)*zid;
    const uint xid        = blockIdx_x * blockDim.x + tidx;

    __shared__ To s_val[THREADS_X * DIMY];

    const uint wid = (blockIdx.y + blockIdx.z * gridDim.y) / blocks_y;
    const uint blockIdx_y =
        (blockIdx.y + blockIdx.z * gridDim.y) - (blocks_y)*wid;
    const uint yid = blockIdx_y;  // yid  of output. updated for input later.

    uint ids[4] = {xid, yid, zid, wid};

    // There is only one element per block for out
    // There are blockDim.y elements per block for in
    // Hence increment ids[dim] just after offseting out and before offsetting
    // in
    To *const optr = out.ptr + ids[3] * out.strides[3] +
                     ids[2] * out.strides[2] + ids[1] * out.strides[1] + ids[0];

    const uint blockIdx_dim = ids[dim];
    ids[dim]                = ids[dim] * blockDim.y + tidy;

    const Ti *iptr = in.ptr + ids[3] * in.strides[3] + ids[2] * in.strides[2] +
                     ids[1] * in.strides[1] + ids[0];

    const uint id_dim_in   = ids[dim];
    const uint istride_dim = in.strides[dim];

    bool is_valid = (ids[0] < in.dims[0]) && (ids[1] < in.dims[1]) &&
                    (ids[2] < in.dims[2]) && (ids[3] < in.dims[3]);

    Transform<Ti, To, op> transform;
    Binary<To, op> reduce;
    To out_val = Binary<To, op>::init();
    for (int id = id_dim_in; is_valid && (id < in.dims[dim]);
         id += offset_dim * blockDim.y) {
        To in_val = transform(*iptr);
        if (change_nan) in_val = !IS_NAN(in_val) ? in_val : nanval;
        out_val = reduce(in_val, out_val);
        iptr    = iptr + offset_dim * blockDim.y * istride_dim;
    }

    s_val[tid] = out_val;

    To *s_ptr = s_val + tid;
    __syncthreads();

    if (DIMY == 8) {
        if (tidy < 4) *s_ptr = reduce(*s_ptr, s_ptr[THREADS_X * 4]);
        __syncthreads();
    }

    if (DIMY >= 4) {
        if (tidy < 2) *s_ptr = reduce(*s_ptr, s_ptr[THREADS_X * 2]);
        __syncthreads();
    }

    if (DIMY >= 2) {
        if (tidy < 1) *s_ptr = reduce(*s_ptr, s_ptr[THREADS_X * 1]);
        __syncthreads();
    }

    if (tidy == 0 && is_valid && (blockIdx_dim < out.dims[dim])) {
        *optr = *s_ptr;
    }
}
Exemple #22
0
bool CheckTime (bool IsStartup, CFAbsoluteTime & NextTime, CFAbsoluteTime &DeltaTime, CFAbsoluteTime & LastRunAlign)
{
	LastRunAlign = NAN;
	CFAbsoluteTime LastExecTime = NAN;
	enumSLFreq CheckFreq = kSLFreqDisabled;
	
	SyslistPrefs::Sync();

	SyslistPrefs::getFrequency(CheckFreq);
	SyslistPrefs::getAlignTime(LastRunAlign);
	SyslistPrefs::getLastTime(LastExecTime);
	
	CFAbsoluteTime DeltaCompare = NAN;
	
	switch (CheckFreq) {
	
	case kSLFreqDisabled:
		DeltaTime = 0;
		return false;
		
	case kSLFreqStartup:
		DeltaTime = 0;
		return IsStartup;
		
	case kSLFreqDay:
		DeltaCompare = OneDay;
		break;

	case kSLFreqWeek:
		DeltaCompare = OneWeek;
		break;

	case kSLFreqMonth:
		DeltaCompare = OneMonth;
		break;

	case kSLFreqDebug:
		DeltaCompare = DebugTime;
		break;

	default:
		DeltaTime = NAN;
		return FALSE;
	}
		
	CFAbsoluteTime CurrTime = CFAbsoluteTimeGetCurrent();

	if (!IS_NAN(LastRunAlign) )
		NextTime = LastRunAlign + DeltaCompare;
	else if (!IS_NAN(LastExecTime))
		NextTime = LastExecTime + DeltaCompare;
	else
		NextTime = DeltaCompare;

	CurrTime += BufferTime;
	
	if (CurrTime >= NextTime) {
	
		//if (!__isnand	(LastExecTime)) {
		if (!IS_NAN(LastExecTime)) {
			
			CFAbsoluteTime TimeDelta = CurrTime - NextTime;
			CFAbsoluteTime QuantTimeDelta;
			modf((TimeDelta / DeltaCompare), &QuantTimeDelta); // throw away fractional
			QuantTimeDelta *= DeltaCompare;
			
			LastRunAlign = NextTime + QuantTimeDelta;
		}
		
		return true;
	}
	
	return false;
}
Exemple #23
0
static void
test_bson_type (bson_t *scenario, test_bson_type_valid_cb valid)
{
   bson_iter_t iter;
   bson_iter_t inner_iter;
   BSON_ASSERT (scenario);

   if (bson_iter_init_find (&iter, scenario, "valid")) {
      const char *expected = NULL;
      bson_t json;
      bson_t bson_input = BSON_INITIALIZER;

      bson_iter_recurse (&iter, &inner_iter);
      while (bson_iter_next (&inner_iter)) {
         bson_iter_t test;
         uint8_t       *bson_str;
         uint32_t       bson_str_len;
         const uint8_t *extjson_str;
         uint32_t       extjson_str_len;
         const uint8_t *canonical_extjson_str;
         uint32_t       canonical_extjson_str_len;
         bool           lossy = false;
         bool           have_extjson = false;
         bool           have_canonical_extjson = false;

         bson_iter_recurse (&inner_iter, &test);
         _test_bson_type_print_description (&test);
         while (bson_iter_next (&test)) {
            const char *key = bson_iter_key (&test);

            if (!strcmp (key, "bson") && BSON_ITER_HOLDS_UTF8 (&test)) {
               const char *input = NULL;
               unsigned int byte;
               uint32_t tmp;
               int x = 0;
               int i = 0;

               input = bson_iter_utf8 (&test, &tmp);
               bson_str_len = tmp / 2;
               bson_str = bson_malloc (bson_str_len);
               while (SSCANF (&input[i], "%2x", &byte) == 1) {
                  bson_str[x++] = (uint8_t) byte;
                  i += 2;
               }
            }

            if (!strcmp (key, "extjson") && BSON_ITER_HOLDS_UTF8 (&test)) {
               extjson_str = (const uint8_t *)bson_iter_utf8 (&test, &extjson_str_len);
               have_extjson = true;
            }

            if (!strcmp (key, "canonical_extjson") && BSON_ITER_HOLDS_UTF8 (&test)) {
               canonical_extjson_str = (const uint8_t *)bson_iter_utf8 (&test, &canonical_extjson_str_len);
               have_canonical_extjson = true;
            }
            if (!strcmp (key, "canonical_extjson") && BSON_ITER_HOLDS_BOOL (&test)) {
               lossy = bson_iter_bool (&test);
            }
         }

         valid (bson_str,
                bson_str_len,
                bson_str,
                bson_str_len,
                have_extjson ? extjson_str : NULL,
                have_extjson ?  extjson_str_len : 0,
                have_canonical_extjson ? canonical_extjson_str : extjson_str,
                have_canonical_extjson ? canonical_extjson_str_len : extjson_str_len,
                lossy);
         bson_free (bson_str);
      }
   }

   if (bson_iter_init_find (&iter, scenario, "parseErrors")) {
      bson_iter_recurse (&iter, &inner_iter);
      while (bson_iter_next (&inner_iter)) {
         bson_iter_t test;

         bson_iter_recurse (&inner_iter, &test);
         _test_bson_type_print_description (&test);

         if (bson_iter_find (&test, "string") && BSON_ITER_HOLDS_UTF8 (&test)) {
            bson_decimal128_t d;
            uint32_t tmp;
            const char *input = bson_iter_utf8 (&test, &tmp);

            ASSERT (!bson_decimal128_from_string (input, &d));
            ASSERT (IS_NAN (d));
         }
      }
   }
}
static void simplicial_symbolic_to_simplicial_numeric
(
    cholmod_factor *L,
    int to_ll,
    int packed,
    int to_xtype,
    cholmod_common *Common
)
{
    double grow0, grow1, xlen, xlnz ;
    double *Lx, *Lz ;
    Int *Li, *Lp, *Lnz, *ColCount ;
    Int n, grow, grow2, p, j, lnz, len, ok, e ;

    ASSERT (L->xtype == CHOLMOD_PATTERN && !(L->is_super)) ;
    if (!allocate_simplicial_numeric (L, Common))
    {
	PRINT1 (("out of memory, allocate simplicial numeric\n")) ;
	return ;	/* out of memory */
    }
    ASSERT (L->ColCount != NULL && L->nz != NULL && L->p != NULL) ;
    ASSERT (L->x == NULL && L->z == NULL && L->i == NULL) ;

    ColCount = L->ColCount ;
    Lnz = L->nz ;
    Lp = L->p ;
    ok = TRUE ;
    n = L->n ;

    if (packed < 0)
    {

	/* ------------------------------------------------------------------ */
	/* used by cholmod_copy_factor to allocate a copy of a factor object */
	/* ------------------------------------------------------------------ */

	lnz = L->nzmax ;
	L->nzmax = 0 ;

    }
    else if (packed)
    {

	/* ------------------------------------------------------------------ */
	/* LDL' or LL' packed */
	/* ------------------------------------------------------------------ */

	PRINT1 (("convert to packed LL' or LDL'\n")) ;
	lnz = 0 ;
	for (j = 0 ; ok && j < n ; j++)
	{
	    /* ensure len is in the range 1 to n-j */
	    len = ColCount [j] ;
	    len = MAX (1, len) ;
	    len = MIN (len, n-j) ;
	    lnz += len ;
	    ok = (lnz >= 0) ;
	}
	for (j = 0 ; j <= n ; j++)
	{
	    Lp [j] = j ;
	}
	for (j = 0 ; j < n ; j++)
	{
	    Lnz [j] = 1 ;
	}

    }
    else
    {

	/* ------------------------------------------------------------------ */
	/* LDL' unpacked */
	/* ------------------------------------------------------------------ */

	PRINT1 (("convert to unpacked\n")) ;
	/* compute new lnzmax */
	/* if any parameter is NaN, grow is false */
	grow0 = Common->grow0 ;
	grow1 = Common->grow1 ;
	grow2 = Common->grow2 ;
	grow0 = IS_NAN (grow0) ? 1 : grow0 ;
	grow1 = IS_NAN (grow1) ? 1 : grow1 ;
	/* fl.pt. compare, but no NaN's: */
	grow = (grow0 >= 1.0) && (grow1 >= 1.0) && (grow2 > 0) ;
	PRINT1 (("init, grow1 %g grow2 "ID"\n", grow1, grow2)) ;
	/* initialize Lp and Lnz for each column */
	lnz = 0 ;
	for (j = 0 ; ok && j < n ; j++)
	{
	    Lp [j] = lnz ;
	    Lnz [j] = 1 ;

	    /* ensure len is in the range 1 to n-j */
	    len = ColCount [j] ;
	    len = MAX (1, len) ;
	    len = MIN (len, n-j) ;

	    /* compute len in double to avoid integer overflow */
	    PRINT1 (("ColCount ["ID"] = "ID"\n", j, len)) ;
	    if (grow)
	    {
		xlen = (double) len ;
		xlen = grow1 * xlen + grow2 ;
		xlen = MIN (xlen, n-j) ;
		len = (Int) xlen ;
	    }
	    ASSERT (len >= 1 && len <= n-j) ;
	    lnz += len ;
	    ok = (lnz >= 0) ;
	}
	if (ok)
	{
	    Lp [n] = lnz ;
	    if (grow)
	    {
		/* add extra space */
		xlnz = (double) lnz ;
		xlnz *= grow0 ;
		xlnz = MIN (xlnz, Size_max) ;
		xlnz = MIN (xlnz, ((double) n * (double) n + (double) n) / 2) ;
		lnz = (Int) xlnz ;
	    }
	}
    }

    lnz = MAX (1, lnz) ;

    if (!ok)
    {
	ERROR (CHOLMOD_TOO_LARGE, "problem too large") ;
    }

    /* allocate L->i, L->x, and L->z */
    PRINT1 (("resizing from zero size to lnz "ID"\n", lnz)) ;
    ASSERT (L->nzmax == 0) ;
    e = (to_xtype == CHOLMOD_COMPLEX ? 2 : 1) ;
    if (!ok || !CHOLMOD(realloc_multiple) (lnz, 1, to_xtype, &(L->i), NULL,
		&(L->x), &(L->z), &(L->nzmax), Common))
    {
	L->p    = CHOLMOD(free) (n+1, sizeof (Int),      L->p, Common) ;
	L->nz   = CHOLMOD(free) (n,   sizeof (Int),      L->nz, Common) ;
	L->prev = CHOLMOD(free) (n+2, sizeof (Int),      L->prev, Common) ;
	L->next = CHOLMOD(free) (n+2, sizeof (Int),      L->next, Common) ;
	L->i    = CHOLMOD(free) (lnz, sizeof (Int),      L->i, Common) ;
	L->x    = CHOLMOD(free) (lnz, e*sizeof (double), L->x, Common) ;
	L->z    = CHOLMOD(free) (lnz, sizeof (double),   L->z, Common) ;
	PRINT1 (("cannot realloc simplicial numeric\n")) ;
	return ;	/* out of memory */
    }

    /* ============================================== commit the changes to L */

    /* initialize L to be the identity matrix */
    L->xtype = to_xtype ;
    L->dtype = CHOLMOD_DOUBLE ;
    L->minor = n ;

    Li = L->i ;
    Lx = L->x ;
    Lz = L->z ;

#if 0
    if (lnz == 1)
    {
	/* the user won't expect to access this entry, but some CHOLMOD
	 * routines may.  Set it to zero so that valgrind doesn't complain. */
	switch (to_xtype)
	{
	    case CHOLMOD_REAL:
		Lx [0] = 0 ;
		break ;

	    case CHOLMOD_COMPLEX:
		Lx [0] = 0 ;
		Lx [1] = 0 ;
		break ;

	    case CHOLMOD_ZOMPLEX:
		Lx [0] = 0 ;
		Lz [0] = 0 ;
		break ;
	}
    }
#endif

    if (packed >= 0)
    {
	/* create the unit diagonal for either the LL' or LDL' case */

	switch (L->xtype)
	{
	    case CHOLMOD_REAL:
		for (j = 0 ; j < n ; j++)
		{
		    ASSERT (Lp [j] < Lp [j+1]) ;
		    p = Lp [j] ;
		    Li [p] = j ;
		    Lx [p] = 1 ;
		}
		break ;

	    case CHOLMOD_COMPLEX:
		for (j = 0 ; j < n ; j++)
		{
		    ASSERT (Lp [j] < Lp [j+1]) ;
		    p = Lp [j] ;
		    Li [p] = j ;
		    Lx [2*p  ] = 1 ;
		    Lx [2*p+1] = 0 ;
		}
		break ;

	    case CHOLMOD_ZOMPLEX:
		for (j = 0 ; j < n ; j++)
		{
		    ASSERT (Lp [j] < Lp [j+1]) ;
		    p = Lp [j] ;
		    Li [p] = j ;
		    Lx [p] = 1 ;
		    Lz [p] = 0 ;
		}
		break ;
	}
    }

    L->is_ll = to_ll ;

    PRINT1 (("done convert simplicial symbolic to numeric\n")) ;
}
Exemple #25
0
To reduce_all(CParam<Ti> in, bool change_nan, double nanval) {
    int in_elements = in.dims[0] * in.dims[1] * in.dims[2] * in.dims[3];
    bool is_linear  = (in.strides[0] == 1);
    for (int k = 1; k < 4; k++) {
        is_linear &= (in.strides[k] == (in.strides[k - 1] * in.dims[k - 1]));
    }

    // FIXME: Use better heuristics to get to the optimum number
    if (in_elements > 4096 || !is_linear) {
        if (is_linear) {
            in.dims[0] = in_elements;
            for (int k = 1; k < 4; k++) {
                in.dims[k]    = 1;
                in.strides[k] = in_elements;
            }
        }

        uint threads_x = nextpow2(std::max(32u, (uint)in.dims[0]));
        threads_x      = std::min(threads_x, THREADS_PER_BLOCK);
        uint threads_y = THREADS_PER_BLOCK / threads_x;

        Param<To> tmp;

        uint blocks_x = divup(in.dims[0], threads_x * REPEAT);
        uint blocks_y = divup(in.dims[1], threads_y);

        tmp.dims[0]    = blocks_x;
        tmp.strides[0] = 1;

        for (int k = 1; k < 4; k++) {
            tmp.dims[k]    = in.dims[k];
            tmp.strides[k] = tmp.dims[k - 1] * tmp.strides[k - 1];
        }

        int tmp_elements = tmp.strides[3] * tmp.dims[3];

        auto tmp_alloc = memAlloc<To>(tmp_elements);
        tmp.ptr        = tmp_alloc.get();
        reduce_first_launcher<Ti, To, op>(tmp, in, blocks_x, blocks_y,
                                          threads_x, change_nan, nanval);

        std::vector<To> h_data(tmp_elements);
        CUDA_CHECK(
            cudaMemcpyAsync(h_data.data(), tmp.ptr, tmp_elements * sizeof(To),
                            cudaMemcpyDeviceToHost, cuda::getActiveStream()));
        CUDA_CHECK(cudaStreamSynchronize(cuda::getActiveStream()));

        Binary<To, op> reduce;
        To out = Binary<To, op>::init();
        for (int i = 0; i < tmp_elements; i++) { out = reduce(out, h_data[i]); }

        return out;
    } else {
        std::vector<Ti> h_data(in_elements);
        CUDA_CHECK(
            cudaMemcpyAsync(h_data.data(), in.ptr, in_elements * sizeof(Ti),
                            cudaMemcpyDeviceToHost, cuda::getActiveStream()));
        CUDA_CHECK(cudaStreamSynchronize(cuda::getActiveStream()));

        Transform<Ti, To, op> transform;
        Binary<To, op> reduce;
        To out       = Binary<To, op>::init();
        To nanval_to = scalar<To>(nanval);

        for (int i = 0; i < in_elements; i++) {
            To in_val = transform(h_data[i]);
            if (change_nan) in_val = !IS_NAN(in_val) ? in_val : nanval_to;
            out = reduce(out, in_val);
        }

        return out;
    }
}