コード例 #1
0
ファイル: dlpack.c プロジェクト: BrechtBa/casadi
static int DenseSymPSDNormF2(void* AA, int n, double *dddot){
  dtpumat* A=(dtpumat*) AA;
  ffinteger ione=1,nn=A->n*(A->n+1)/2;
  double dd,tt=sqrt(0.5),*val=A->val;
  int info;
  info=DTPUMatScaleDiagonal(AA,tt);
  dd=dnrm2(&nn,val,&ione);
  info=DTPUMatScaleDiagonal(AA,1.0/tt);
  *dddot=dd*dd*2;
  return 0;
}
コード例 #2
0
double compute_nvector_scale(double *residuals, int n, int nv,
				double *nrms)
{ 
	int j;
	MW_scalar_statistics stats;
	double scale;

	for(j=0;j<nv;++j) nrms[j] = dnrm2(n,residuals+j*n,1);
	stats = MW_calc_statistics_double(nrms,nv);
	scale = stats.q3_4 - stats.q1_4;
	scale /= SIGMA_IQ;
	return(scale);
}
コード例 #3
0
/*---------------------------------------------------------
  fem_vec_norm - to compute a norm of global vector (in parallel)
---------------------------------------------------------*/
double fem_vec_norm( /* returns: L2 norm of global Vector */
    int Solver_id,        /* in: solver data structure to be used */
    int Level_id,         /* in: level number */
    int Nrdof,            /* in: number of vector components */
    double* Vector        /* in: local part of global Vector */
)
{

    const int IONE=1;

    /*++++++++++++++++ executable statements ++++++++++++++++*/

    //return(ddr_sol_vec_norm(Solver_id, Level_id, Nrdof, Vector));

    return(dnrm2(&Nrdof, Vector, &IONE));

}
コード例 #4
0
/* This function uses m-estimators to estimate a three vector from 
a cloud of points in n-space.  The approach is comparable to 
computing the center of mass of cloud of equal mass particles.
We use a component median for the initial estimate of center, then compute
the m-estimator based on a Rayleigh distribution keyed on the 
l2 norm amplitude of the vector residual.  This is comparable to the 
estimation of phase in robust estimates of transfer functions
described by Chave and Thomson as we use a penalty function based on
a Rayleigh distribution.  It is not clear to the author if this is
valid for other than 2 vectors, but it surely is not a bad approximation
for 3-vectors.  Higher order spaces should use this with care. 

Arguments:

v -  n by nv matrix containing ensemble of data n-vectors.  These are assumed
	stored as in the blas in a pseudofortran sense as a continuous
	vector of floats with columns sequential (i.e. first column
	of v is elements v[0], v[1], ... , v[n-1] and second column
	starts at v[n].
n - length of vectors (number of rows in v)
nv - number of vectors (number of columns in v)
mode - switch (see below)
minscale - minimum error scale allowed.  Use depends on setting of 
	the mode variable with which it is associated.  Scale factor
	is determined here from interquartiles.  mode is as defined
	in M_estimator_float above using symbols SCALE_IQ_RELATIVE
	and SCALE_IQ_ABSOLUTE.  The former uses a relative scale derived
	from the scale factor determined on the first pass.  That is, 
	the minimum scale alllowed is vmag0*minscale (e.g. 0.01 would
	limit the minimum scale to 1% of the magnitude of the total n
	vector initial estimate.  Absolute scaling
	uses the limit straight up.    
mean - vector of length n to hold result.  Blindly assumed to be already
	allocated of length n.
weight - vector of length to hold final weights used in robust
	estimation.

Function returns a 3 vector it estimates from v.  This is an array
alloced in this function that needs to be managed externally.  

Author:  G Pavlis
Written:  January 2000
*/
#define EPSILON 0.01 /* convergence parameter*/
#define HUBER_LIMIT 2 /* Number of Huber weight iterations */
#define THOMPSON_LIMIT 25 /* Limit on number of iterations using Thompson formula */
#define MIN_DGF 10  /* I've seen the Thompson formula work badly with 
		small degrees of freedom.  When dgf are less than 
		this the Thomson section is skipped */
void M_estimator_double_n_vector(double *v, 
	int n, 
	int nv,
	int mode, 
	double minscale,
	double *mean,
	double *weight)
{
	double *col, *row;  /* work spaces for columns and rows respectively*/
	double sum_weights;
	double *residuals;
	double *delta_mean;  
	int i, j, iteration;
	double vmag,dvmag; 
	double scale, fminsc;
	int ndgf;
	MW_scalar_statistics stats;
	double beta;

	allot(double *,col,n);
	allot(double *,row,nv);
	allot(double *,delta_mean, n);
	allot(double *,residuals,n*nv);

	/* We compute component medians to obtain initial estimate 
	of vector */
	for(i=0;i<n;++i)
	{
		dcopy(nv,v+i,n,row,1);
		stats=MW_calc_statistics_double(row,nv);
		mean[i] = stats.median;
	}
	/* We first do a few passes with the huber formula
	which is less aggressive on outliers, but helps 
	establish a solid value for the scale factor.*/
	iteration =0;
	vmag = dnrm2(n,mean,1);
	for(j=0;j<nv;++j) weight[j] = 1.0; /* done to make sure scale
					is computed correctly on first pass*/
	if(mode==IQ_SCALE_RELATIVE)
	    if(vmag<FLT_EPSILON)
		fminsc= minscale;
	    else
		fminsc = vmag*minscale;
	else
		fminsc = minscale;
	do
	{
		compute_nvector_residuals(v,n,nv,residuals,mean);
		/* This produces weighted residuals -- requires
		weighting formula to use weights 0<w<1 */
		for(j=0;j<nv;++j) dscal(n,weight[i],residuals+j,1);
		scale = compute_nvector_scale(residuals,n,nv,row);
		if(scale < fminsc) scale = fminsc;
		for(i=0;i<n;++i) delta_mean[i] = 0.0;
		for(j=0,sum_weights=0.0;j<nv;++j)
		{
			dcopy(n,residuals+j*n,1,col,1);
			dvmag = dnrm2(n,col,1);
			weight[j] = dhuber(dvmag/scale);
			daxpy(n,weight[j],col,1,delta_mean,1);
			sum_weights += weight[j];
		}
		dscal(n,1.0/sum_weights,delta_mean,1);
		dvmag = dnrm2(n,delta_mean,1);
		for(i=0;i<n;++i) mean[i] += delta_mean[i];
		++iteration;
	}while( ((dvmag/scale) > EPSILON)
		&& (iteration < HUBER_LIMIT) );
	
	/* Now we use Thomson's redescending formula which is the 
	opposite of the huber formula being extremely aggressive 
	on outliers and works only if the scale factor is not 
	too out of line.  It also works badly with low degrees
	of freedom.  Consequently, we return immediately when
	degrees of freedom are below a frozen threshold*/

	ndgf = nv - n;
	if(ndgf<MIN_DGF)
	{
		free(col);
		free(row);
		free(delta_mean);
                free(residuals);
		return;
	}
	/* This is the value of beta recommended by chave and thomson, 1987,
	based on the nvth quantile of the Rayleigh distribution.  I use
	number of degrees of freedom here instead to perhaps more properly
	work with higher order spaces, but use a minimum on ndgf to 
	avoid making the formula unstable.  The thomson formula becomes
	exponential-like with low degrees of freedom, which we need to 
	avoid. */
	beta = sqrt(2.0*log(2.0*((double)ndgf)));
	iteration = 0;
	do
	{
		compute_nvector_residuals(v,n,nv,residuals,mean);
		for(i=0;i<n;++i) delta_mean[i] = 0.0;
		for(j=0;j<nv;++j)
		{
			dcopy(n,residuals+j*n,1,col,1);
			dvmag = dnrm2(n,col,1);
			weight[j] = dthomson(dvmag/scale,beta);
			daxpy(n,weight[j],col,1,delta_mean,1);
			sum_weights += weight[j];
		}
		dscal(n,1.0/sum_weights,delta_mean,1);
		dvmag = dnrm2(n,delta_mean,1);
		for(i=0;i<n;++i) mean[i] += delta_mean[i];
		++iteration;
	}while( ((dvmag/scale) > EPSILON)
		&& (iteration < THOMPSON_LIMIT) );

	free(col);
	free(row);
	free(delta_mean);
	free(residuals);
}
コード例 #5
0
ファイル: blasAcmlExample.c プロジェクト: sductor/DimaX
double  KNITRO_EXPORT  KTR_dnrm2 (const int             n,
                                  const double * const  x,
                                  const int             incx)
{
    return( dnrm2 (n, x, incx) );
}
コード例 #6
0
ファイル: npcheck.c プロジェクト: rolk/ug
INT NS_DIM_PREFIX CheckNP (MULTIGRID *theMG, INT argc, char **argv)
{
  MATDATA_DESC *A;
  VECDATA_DESC *x,*y;
  INT i,level,nerr;
  char value[VALUELEN];
  VEC_SCALAR damp;
  DOUBLE nrm,diff;

  if (ReadArgvChar("A",value,argc,argv) == 0) {
    A = GetMatDataDescByName(theMG,value);
    if (A == NULL) {
      UserWriteF("ERROR: no matrix %s in npckeck\n",value);
      return(1);
    }
    if (ReadArgvOption("S",argc,argv)) {
      for (level=theMG->bottomLevel; level<=TOPLEVEL(theMG); level++)
        if (CheckSymmetryOfMatrix(GRID_ON_LEVEL(theMG,level),A))
          UserWriteF("matrix %s not symmetric on level %d\n",
                     ENVITEM_NAME(A),level);
      return(0);
    }
    if (ReadArgvOption("G",argc,argv)) {
      if (ReadArgvChar("x",value,argc,argv)) {
        UserWriteF("ERROR: no vector in npckeck\n");
        return(1);
      }
      x = GetVecDataDescByName(theMG,value);
      if (x == NULL) {
        UserWriteF("ERROR: no vector %s in npckeck\n",value);
        return(1);
      }
      level = CURRENTLEVEL(theMG);
      if (level == BOTTOMLEVEL(theMG)) {
        UserWriteF("ERROR: no GalerkinCheck,"
                   "level %d is bottomlevel\n",level);
        return(1);
      }
      if (AllocVDFromVD(theMG,level-1,level,x,&y))
        return(1);
      dmatset(theMG,level-1,level-1,ALL_VECTORS,A,0.0);
      dset(theMG,level,level,ALL_VECTORS,x,0.0);
      dset(theMG,level-1,level,ALL_VECTORS,y,0.0);
      AssembleGalerkinByMatrix(GRID_ON_LEVEL(theMG,level),A,0);
      for (i=0; i<VD_NCOMP(x); i++) damp[i] = 1.0;
      InterpolateCorrectionByMatrix(GRID_ON_LEVEL(theMG,level),x,x,damp);
      if (dmatmul(theMG,level,level,ALL_VECTORS,y,A,x) != NUM_OK)
        return(1);
      RestrictByMatrix(GRID_ON_LEVEL(theMG,level),y,y,damp);
      IFDEBUG(np,1)
      UserWriteF("x %d\n",level-1);
      PrintVector(GRID_ON_LEVEL(theMG,level-1),x,3,3);
      UserWriteF("x %d\n",level);
      PrintVector(GRID_ON_LEVEL(theMG,level),x,3,3);
      UserWriteF("y %d\n",level);
      PrintVector(GRID_ON_LEVEL(theMG,level),y,3,3);
      UserWriteF("y %d\n",level-1);
      PrintVector(GRID_ON_LEVEL(theMG,level-1),y,3,3);
      ENDDEBUG
      if (dmatmul_minus(theMG,level-1,level-1,ALL_VECTORS,y,A,x)!=NUM_OK)
        return(1);
      IFDEBUG(np,1)
      UserWriteF("y %d\n",level-1);
      PrintVector(GRID_ON_LEVEL(theMG,level-1),y,3,3);
      ENDDEBUG
      dnrm2(theMG,level-1,level-1,ALL_VECTORS,x,&nrm);
      dnrm2(theMG,level-1,level-1,ALL_VECTORS,y,&diff);
      UserWriteF("Galerkin test: nrm(x) = %f nrm(Ax-RAPx) = %f\n",
                 nrm,diff);
      return(0);
    }
  }
コード例 #7
0
ファイル: fcmer.c プロジェクト: xhub/fclib
/* calculate merit function for a local problem */
double fclib_merit_local (struct fclib_local *problem, enum fclib_merit merit, struct fclib_solution *solution)
{

  struct fclib_matrix * W =  problem->W;
  struct fclib_matrix * V =  problem->V;
  struct fclib_matrix * R =  problem->R;
  
  double *mu = problem->mu;
  double *q = problem->q;
  double *s = problem->s;
  int d = problem->spacedim;          
  if (d !=3 )
  {
    printf("fclib_merit_local for space dimension = %i not yet implemented\n",d);
    return 0;
  }

  double *v = solution->v;
  double *r = solution->r;
  double *u = solution->u;
  double *l = solution->l;

  double error_l, error;
  double * tmp;

  error=0.0;
  error_l=0.0;
  int i, ic, ic3;
  if (merit == MERIT_1)
  {
    
    /* cs M_cs;  */
    /* fclib_matrix_to_cssparse(W, &M_cs); */
    /* cs V_cs;  */
    /* fclib_matrix_to_cssparse(V, &V_cs); */
    /* cs R_cs;  */
    /* fclib_matrix_to_cssparse(R, &R_cs); */
    int n_e =0;
    if (R) n_e = R->n;
    /* compute V^T {r} + R \lambda + s */
    if (n_e >0)
    {
      cs * VT = cs_transpose((cs *)V, 0) ;
      tmp = (double *)malloc(n_e*sizeof(double));
      for (i =0; i <n_e; i++) tmp[i] = s[i] ;
      cs_gaxpy(VT, r, tmp);
      cs_gaxpy((cs *)R, l, tmp);
      error_l += dnrm2(tmp,n_e)/(1.0 +  dnrm2(s,n_e) );
      free(tmp);
    }
    /* compute  \hat u = W {r}    + V\lambda  + q  */
    
    tmp = (double *)malloc(W->n*sizeof(double));
    for (i =0; i <W->n; i++) tmp[i] = q[i] ;
    cs_gaxpy((cs*)V, l, tmp);
    cs_gaxpy((cs*)W, r, tmp);

    /* Compute natural map */
    int nc = W->n/3;
    for (ic = 0, ic3 = 0 ; ic < nc ; ic++, ic3 += 3)
    {
      FrictionContact3D_unitary_compute_and_add_error(r + ic3, tmp + ic3, mu[ic], &error);
    }
          
    free(tmp);
    error = sqrt(error)/(1.0 +  sqrt(dnrm2(q,W->n)) )+error_l;  

    /* printf("error_l = %12.8e", error_l); */
    /* printf("norm of u  = %12.8e\n",  dnrm2(u,W->n)); */
    /* printf("norm of r  = %12.8e\n",  dnrm2(r,W->n)); */
    /* printf("error = %12.8e\n", error); */
  
    return error;
  }

  return 0; /* TODO */
}
コード例 #8
0
/* This function takes complex numbers x, y, and z defined by an 
eigenvector for a multiwavelet (would work for Fourier transforms
too, however) and returns a pointer to a structure that defines
the major and minor axes of the particle motion vectors defined
by those three complex numbers.  The up vector (assumed to be
three element vector) defines the direction used to resolve
the sign ambiguity inherent in defining an ellipse.  That is,
both the major and minor component directions are required
to have a positive projection in the up direction.  If they 
aren't the sign is flipped before returning.  Normally up 
would point [0,0,1] or in the up radial direction for P waves.
For S, it becomes more ambiguous and should be sorted out 
by a more complicated method.

The polarization information (defined by the Particle_Motion_Ellipse 
structure) is allocated within this routine. 

Author:  G. L. Pavlis
Written:  October 1999
*/
Particle_Motion_Ellipse compute_particle_motion(complex x, 
						complex y, 
						complex z,
						double *up)
{
	double rx,ry,rz,thetax,thetay,thetaz;  /* polar forms of x,y,z*/
	double a,b;
	double phi1,phi2;
	double x1[3],x2[3];
	double nrmx1,nrmx2;
	Particle_Motion_Ellipse e;


	rx = hypot((double)x.r,(double)x.i);
	ry = hypot((double)y.r,(double)y.i);
	rz = hypot((double)z.r,(double)z.i);
	thetax = atan2((double)x.i,(double)x.r);
	thetay = atan2((double)y.i,(double)y.r);
	thetaz = atan2((double)z.i,(double)z.r);

	a = rx*rx*cos(2.0*thetax) 
		+ ry*ry*cos(2.0*thetay) 
		+ rz*rz*cos(2.0*thetaz);
	b = rx*rx*sin(2.0*thetax) 
		+ ry*ry*sin(2.0*thetay) 
		+ rz*rz*sin(2.0*thetaz);

	phi1 = atan2(-b,a)/2.0;
	phi2 = phi1 + M_PI_2;

	x1[0] = rx*cos(phi1+thetax);
	x1[1] = ry*cos(phi1+thetay);
	x1[2] = rz*cos(phi1+thetaz);
	x2[0] = rx*cos(phi2+thetax);
	x2[1] = ry*cos(phi2+thetay);
	x2[2] = rz*cos(phi2+thetaz);

	nrmx1 = dnrm2(3,x1,1);
	nrmx2 = dnrm2(3,x2,1);
	/* normalize to unit vectors */
	dscal(3,1.0/nrmx1,x1,1);
	dscal(3,1.0/nrmx2,x2,1);

	if(nrmx1>nrmx2)
	{
		dcopy(3,x1,1,e.major,1);
		dcopy(3,x2,1,e.minor,1);
		e.rectilinearity = (1.0 - nrmx2/nrmx1);
	}
	else
	{
		dcopy(3,x2,1,e.major,1);
		dcopy(3,x1,1,e.minor,1);
		e.rectilinearity = (1.0 - nrmx1/nrmx2);
	}
	/* Choose the positive sign direction */
	if(ddot(3,up,1,e.major,1) < 0.0)
		dscal(3,-1.0,e.major,1);
	if(ddot(3,up,1,e.minor,1) < 0.0)
		dscal(3,-1.0,e.minor,1);
	return(e);
}
コード例 #9
0
/* this is a blas like function analogous to scopy, dcopy, etc
for a matrix of pointers to Particle_Motion_Vector objects.  I could
have made this a general matrix function, I suppose, but I decided that
would be a bit opaque, and would promote on of C's most evil features.
The pointers are blindly copied and it assumed the output vector 
bounds are not violated.

Arguments:
	n - number of elements in input and output vectors]
	x - input vector of pointers
	incx - storage increment of x ala blas
	y - output vector
	incy - storage increment of y ala blas.
Written:  February 2000
Author:  G Pavlis
*/
void pmvector_copy(int n, Particle_Motion_Ellipse *x, int incx,
			Particle_Motion_Ellipse *y, int incy)
{
	int i,ix,iy;

	for(i=0,ix=0,iy=0;i<n;++i,ix+=incx,iy+=incy)
	{
		y[iy] = x[ix];
	}
}
#define PM_MINSCALE_MAJOR 0.2  /* This needs to be pretty large compared to 
				good data because if the errors get much
				larger than this the results are trash anyway */
#define PM_MINSCALE_MINOR 1.0 /* Minor axis can easily be totally random.  
				Nearly always happens for pure linear pm.
				This essentially turns off robust weighting */
void pmvector_average(Particle_Motion_Ellipse *pmv, int n,
	Particle_Motion_Ellipse *pmavg, Particle_Motion_Error *pmerr)
{
	int i,j,ii;
	double *v;  /* work space used to store coordinates passed
			to m-estimator routine */
	double avg[3];
	double *weight;
	double nrm_major, nrm_minor;  
	Spherical_Coordinate scoor;
	double U[9];   /* transformation matrix*/
	double work[3];
	double *workn;
	double dotprod;
	double sumsq,sumwt;
	int ndgf;
	MW_scalar_statistics stats;
	double nrmtest;

	allot(double *,v,3*n);
	allot(double *,weight,n);
	allot(double *,workn,n);
	for(i=0,ii=0;i<n;++i,ii+=3)
	{
		/* This could be done with the blas, but it would
		be more obscure and no faster */
		v[ii] = pmv[i].major[0];
		v[ii+1] = pmv[i].major[1];
		v[ii+2] = pmv[i].major[2];
	}
	/* We use relative scaling here because the pm vectors are 
	not normalized.  We could use absolute scaling if we normalized
	them above.  This is a modification that might actually give
	better results.  */
	M_estimator_double_n_vector(v,3,n,
		IQ_SCALE_RELATIVE,PM_MINSCALE_MAJOR,avg,weight);
	nrm_major = dnrm2(3,avg,1);
	for(i=0;i<3;++i) 
	{
	    /* Needed to avoid random NaN */
	    if(nrm_major<FLT_EPSILON)
	    	pmavg->major[i] = avg[i];
	    else
		pmavg->major[i] = avg[i]/nrm_major;
	}

	/* Error estimates are computed completely differently here from
	that described in Bear and Pavlis (1999).  Rather than use a 
	jackknife on individual angles, here I've chosen to use a simple
	standard deviation measure using weighted residuals.  The residuals,
	however, are computed from total angular separation computed using
	a dot product.  This allows us to avoid wraparound errors that 
	are inevitable with angles.  

	First step is to compute a vector of angle residuals.  */
	for(i=0,ii=0;i<n;++i,ii+=3)
	{
		dotprod = ddot(3,v+ii,1,pmavg->major,1);
		dotprod /= dnrm2(3,v+ii,1);
		workn[i] = acos(dotprod);
	}
	/* weighted mean formula for error */
	for(i=0,sumwt=0.0,sumsq=0.0;i<n;++i)
	{
		sumsq += workn[i]*workn[i]*weight[i]*weight[i];
		sumwt += weight[i];
	}
	ndgf = nint(sumwt) - 3;
	if(ndgf<1)
	{
		elog_notify(0,"pmvector_average:  sum of weights = %lf in major axis average implies degrees of freedom less than 1\nUsing 1 degree of freedom\n",sumwt);
		ndgf = 1;
	}
	pmerr->ndgf_major = ndgf;
	pmerr->dtheta_major =  sqrt(sumsq/((double)ndgf));
	/* We scale the azimuthal error by 1/sin(theta) to get a stable
	error estimate that correctly goes to infinitity when theta -> 0*/
	scoor = unit_vector_to_spherical(pmavg->major);
	pmerr->dphi_major = (pmerr->dtheta_major)/sin(scoor.theta);

	/* We first project the minor axis vectors onto the plane
	perpendicular to the average major axis.  This reduces the
	degrees of freedom in a way that I consider reasonable and
	is in line with with Lorie Bear did */
	for(i=0,ii=0;i<n;++i,ii+=3)
	{
		double minor_scale;
		/* Intentionally ignore error return of null project because
		the only error condition in current code cannot happen 
		with this call.  null_project writes result in the
		last argument, so this step is functionally like the 
		v[ii]=pmv[i].major, etc. loop above, but combines
		the projection operation . */
		null_project(pmavg->major,3,1,pmv[i].minor,v+ii);
		/* We also want to scale the vector by a factor
		that is determinable from rectilinearity to keep
		the axis length consistent to allow a refined
		rectilinearity average below */
		minor_scale = 1.0 - pmv[i].rectilinearity;
		dscal(3,minor_scale,v+ii,1);
	}
	/* This constructs a rotational tranformation to a coordinate
	system where x1 and x2 are in the desired projection plane.
	Actually, the null projection above is redundant, but for now
	the extra work is largely irrelevant and is a good cross check
	for debugging. */
	ray_coordinate_trans(scoor,U);
	for(i=0;i<n;++i)
	{
		for(j=0;j<3;++j) 
		{
			work[j] = ddot(3,v+j+3*i,1,U+j,3);
		}
		dcopy(3,work,1,v+3*i,1);
	}
	/* Note the change from above to a 2-d space now.  The above 
	transformations zero the x3 direction after the transformation */
	M_estimator_double_n_vector(v,2,n,
		IQ_SCALE_RELATIVE,PM_MINSCALE_MINOR,avg,weight);
	avg[2] = 0.0;
	nrm_minor = hypot(avg[0],avg[1]);
	/* This is the inverse tranformation -- u is orthogonal */
	for(j=0;j<3;++j)
		work[j] = ddot(3,U+j*3,1,avg,1);

	/* This is similar to above, but, perhaps incorrectly, the
	degrees of freedom are larger by one because we reduce the
	space to 2d */
	for(i=0,ii=0;i<n;++i,ii+=3)
	{
		dotprod = ddot(2,v+ii,1,avg,1);
		nrmtest = dnrm2(2,v+ii,1);
		if(nrmtest<=0.0)
		{
			elog_notify(0,"pmvector_average:  minor axis estimate %d of %d estimates has 0 projection perpendicular to major\nArtificially set to average\n",
				i,n);
			workn[0] = 0.0;
		}
		else
		{
			dotprod/= nrmtest;
		/* because avg wasn't normalized we have divide by norm */
			dotprod /= nrm_minor;
			workn[i] = acos(dotprod);
		}
	}
	/* We want the final result normalized to a unit vector length */
	for(i=0;i<3;++i) pmavg->minor[i] = work[i]/nrm_minor;

	/* weighted mean formula again */
	for(i=0,sumwt=0.0,sumsq=0.0;i<n;++i)
	{
		sumsq += workn[i]*workn[i]*weight[i]*weight[i];
		sumwt += weight[i];
	}
	ndgf = nint(sumwt) - 2;
	if(ndgf<1)
	{
		elog_notify(0,"pmvector_average:  sum of weights = %lf in minor axis average implies degrees of freedom less than 1\nUsing 1 degree of freedom\n",sumwt);
		ndgf = 1;
	}
	pmerr->ndgf_minor = ndgf;
	pmerr->dtheta_minor =  sqrt(sumsq/((double)ndgf));
	/* We cast the minor axis in spherical coordinates like
	the major axis.  This differ's from Lorie's skew measures, but
	it is simpler to deal with in a database output as it treats
	the two vector in a common way */
	scoor = unit_vector_to_spherical(pmavg->minor);
	pmerr->dphi_minor = (pmerr->dtheta_major)/sin(scoor.theta);

	/* Finally, we deal with rectilinearity.
	We use the contents of v which are the projected
	minor axis values rather than the raw minor
	axes.  This estimator will tend to give slightly
	better rectilinearity using the raw vectors
	because a projection is always <= original */
	for(i=0;i<n;++i)
	{
		double minor_nrm;
		minor_nrm = dnrm2(3,v+i*3,1);
		/* Not needed because the major axis vector
		was previously normalized to unit length 
		major_nrm = dnrm2(3,pmv[i].major,1);
		*/

		workn[i] = 1.0 - minor_nrm;
	}
	stats = MW_calc_statistics_double(workn,n);
	pmavg->rectilinearity = stats.median;
	pmerr->ndgf_rect = n - 1;
	/* Assume a simple normal distribution to convert interquartiles
	to standard deviation */
	pmerr->delta_rect = NORMAL_IQSCALE*((stats.q3_4)-(stats.q1_4));

	free(weight);
	free(workn);
	free(v);
}
コード例 #10
0
ファイル: autlib4.cpp プロジェクト: fbergmann/auto-sbml
/* Subroutine */ int 
flowkm(integer ndim, doublereal **c0, doublereal **c1, integer iid, doublecomplex *ev)
{
    

  /* System generated locals */
  integer rwork_dim1;

  /* Local variables */
  doublereal beta, *svde, *svds, svdu[1], *svdv;


  integer i, j;

  doublereal *v, *x;

  logical infev;

  doublereal const__;

  integer ndimm1;
  doublereal nrmc0x, nrmc1x, *qzalfi, *qzbeta;
  integer svdinf;
  doublereal *qzalfr;
  integer qzierr;
  doublereal *svdwrk, qzz[1], *rwork;

  rwork = (doublereal *)malloc(sizeof(doublereal)*ndim*ndim);
  svde = (doublereal *)malloc(sizeof(doublereal)*ndim);
  svds = (doublereal *)malloc(sizeof(doublereal)*(ndim+1));
  svdv = (doublereal *)malloc(sizeof(doublereal)*ndim*ndim);
  v = (doublereal *)malloc(sizeof(doublereal)*ndim);
  x = (doublereal *)malloc(sizeof(doublereal)*ndim);
  qzalfi = (doublereal *)malloc(sizeof(doublereal)*ndim);
  qzbeta = (doublereal *)malloc(sizeof(doublereal)*ndim);
  qzalfr = (doublereal *)malloc(sizeof(doublereal)*ndim);
  svdwrk = (doublereal *)malloc(sizeof(doublereal)*ndim);

  /*  Subroutine to compute Floquet multipliers via the "deflated circuit */
  /*  pencil" method. This routine is called by the AUTO routine FNSPBV */

  /*  storage for SVD computations */

  /*  compute right singular vectors only */

  /*  storage for generalized eigenvalue computations */

  /*      LOGICAL           QZMATZ */
  /*  don't want to accumulate the transforms --- vectors not needed */

  /*  BLAS routines */


  /*  routines from EISPACK */


  /*  own routines */


  /*  Jim Demmel's svd routine  ([email protected]) */


  /*  builtin F77 functions */

  /* xx   DOUBLE COMPLEX    DCMPLX */

  /*  Make sure that you have enough local storage. */

  /* Parameter adjustments */
  /*--ev;*/
  rwork_dim1 = ndim;

  /* Change sign of P1 so that we get the sign of the multipliers right. */

  for (j = 0; j < ndim; ++j) {
    for (i = 0; i < ndim; ++i) {
      c1[j][i] = -c1[j][i];
    }
  }

  /*  Print the undeflated circuit pencil (C0, C1). */

  if (iid > 4) {
    fprintf(fp9," Undeflated circuit pencil (C0, C1) \n");	

    fprintf(fp9,"   C0 : \n");	

    for (i = 0; i < ndim; ++i) {
      for (j = 0; j < ndim; ++j) {
	fprintf(fp9," %23.16f",c0[j][i]);	
      }
      fprintf(fp9,"\n");	

    }
    fprintf(fp9,"   C1 : \n");	

    for (i = 0; i < ndim; ++i) {
      for (j = 0; j < ndim; ++j) {
	fprintf(fp9," %23.16f",c1[j][i]);
      }
      fprintf(fp9,"\n");	

    }
  }

  /*  PART I: */
  /*  ======= */

  /*  Deflate the Floquet multiplier at +1.0 so that the deflated */
  /*  circuit pencil is not defective at periodic branch turning points. */

  /* The matrix (C0 - C1) should be (nearly) singular.  Find an approximatio
     n*/
  /*  to the right null vector (call it X).  This will be our approximation 
   */
  /*  to the eigenvector corresponding to the fixed multiplier at +1.0. */

  /*  There are many ways to get this approximation.  We could use */
  /*    1) p'(0) = f(p(0)) */
  /*    2) AUTO'86 routine NLVC applied to C0-C1 */
  /*    3) the right singular vector corresponding to the smallest */
  /*       singular value of C0-C1 */

  /*  I've chosen option 3) because it should introduce as little roundoff 
   */
  /* error as possible.  Although it is more expensive, this is insignifican
     t*/
  /* relative to the rest of the AUTO computations. Also, the SVD does give 
     a*/
  /*  version of the Householder matrix which we would have to compute */
  /* anyways.  But note that it gives V = ( X perp | X ) and not (X | Xperp)
     ,*/
  /* which the Householder routine would give.  This will permute the deflat
     ed*/
  /*  circuit pencil, so that the part to be deflated is in the last column,
   */
  /*  not it the first column, as was shown in the paper. */

  for (j = 0; j < ndim; ++j) {
    for (i = 0; i < ndim; ++i) {
      ARRAY2D(rwork, i, j) = c0[j][i] - c1[j][i];
    }
  }
  {
    /* This is here since I don't want to change the calling sequence of the
       BLAS routines. */
    integer tmp = 1;
    doublereal tmp_tol = 1.0E-16;
    ezsvd(rwork, &ndim, &ndim, &ndim, svds, svde, svdu, &tmp, 
	  svdv, &ndim, svdwrk, &tmp, &svdinf, &tmp_tol);
  }
  if (svdinf != 0) {
    fprintf(fp9," NOTE : Warning from subroutine FLOWKM SVD routine returned SVDINF = %4ld        Floquet multiplier calculations may be wrong\n",svdinf);	

  }

  /*  Apply a Householder matrix (call it H1) based on the null vector */
  /*  to (C0, C1) from the right.  H1 = SVDV = ( Xperp | X ), where X */
  /*  is the null vector. */

  {          
    /* This is here since I don't want to change the calling sequence of the
       BLAS routines. */
    doublereal tmp1 = 1.0;
    doublereal tmp0 = 0.0;
    logical tmp_false = FALSE_;

    dgemm("n", "n", &ndim, &ndim, &ndim, &tmp1, *c0, &ndim, svdv, 
	  &ndim, &tmp0, rwork, &ndim, 1L, 1L);
    dgemc(&ndim, &ndim, rwork, &ndim, *c0, &ndim, &tmp_false);
    dgemm("n", "n", &ndim, &ndim, &ndim, &tmp1, *c1, &ndim, svdv, 
	  &ndim, &tmp0, rwork, &ndim, 1L, 1L);
    dgemc(&ndim, &ndim, rwork, &ndim, *c1, &ndim, &tmp_false);
  }
  /*  Apply a Householder matrix (call it H2) based on */
  /*  (C0*X/||C0*X|| + C1*X/||C1*X||) / 2 */
  /*  to (C0*H1, C1*H1) from the left. */

  {
    /* This is here since I don't want to change the calling sequence of the
       BLAS routines. */
    integer tmp = 1;
    nrmc0x = dnrm2(&ndim, &c0[ndim - 1][0], &tmp);
    nrmc1x = dnrm2(&ndim, &c1[ndim - 1][0], &tmp);
  }
  for (i = 0; i < ndim; ++i) {
    x[i] = (c0[ndim - 1][i] / nrmc0x + c1[ndim - 1][i] / nrmc1x) / 2.;
  }
  dhhpr(1, ndim, ndim, x, 1, &beta, v);
  dhhap(1, ndim, ndim, ndim, &beta, v, LEFT, c0, ndim);
  dhhap(1, ndim, ndim, ndim, &beta, v, LEFT, c1, ndim);

  /* Rescale so that (H2^T)*C0*(H1)(1,NDIM) ~= (H2^T)*C1*(H1)(1,NDIM) ~= 1.0
   */

  /* Computing MAX */
  const__ = max(fabs(c0[ndim - 1][0]),fabs(c1[ndim - 1][0]));
  for (j = 0; j < ndim; ++j) {
    for (i = 0; i < ndim; ++i) {
      c0[j][i] /= const__;
      c1[j][i] /= const__;
    }
  }

  /*  Finished the deflation process! Print the deflated circuit pencil. */

  if (iid > 4) {
    fprintf(fp9," Deflated cicuit pencil (H2^T)*(C0, C1)*(H1) \n");	

    fprintf(fp9,"   (H2^T)*C0*(H1) : \n");	

    for (i = 0; i < ndim; ++i) {
      for (j = 0; j < ndim; ++j) {
	fprintf(fp9," %23.16f",c0[j][i]);
      }
      fprintf(fp9,"\n");	
    }
    fprintf(fp9,"   (H2^T)*C1*(H1) : \n");	

    for (i = 0; i < ndim; ++i) {
      for (j = 0; j < ndim; ++j) {
	fprintf(fp9," %23.16f",c1[j][i]);
      }
      fprintf(fp9,"\n");	

    }
  }

  /*  At this point we have */

  /*     (C0Bar, C1Bar) */
  /* ::= (H2^T)*(C0, C1)*(H1). */

  /*     (( B0^T     | Beta0  )  ( B1^T     | Beta1  ))  1 */
  /*   = (( ----------------- ), ( ----------------- )) */
  /*     (( C0BarDef | Delta0 )  ( C1BarDef | Delta1 )) NDIM-1 */

  /*         NDIM-1      1          NDIM-1      1 */

  /*  and approximations to the Floquet multipliers are */
  /*  (Beta0/Beta1) union the eigenvalues of the deflated pencil */
  /*  (C0BarDef, C1BarDef). */

  /*  PART II: */
  /*  ======== */

  /*  Compute the eigenvalues of the deflated circuit pencil */
  /*  (C0BarDef, C1BarDef) */
  /*  by using the QZ routines from EISPACK. */

  ndimm1 = ndim - 1;

  /*  reduce the generalized eigenvalue problem to a simpler form */
  /*   (C0BarDef,C1BarDef) = (upper hessenberg, upper triangular) */


  qzhes(ndim, ndimm1, &c0[0][1], &c1[0][1], FALSE_ , qzz);

  /*  now reduce to an even simpler form */
  /*   (C0BarDef,C1BarDef) = (quasi-upper triangular, upper triangular) */

  qzit(ndim, ndimm1, &c0[0][1], &c1[0][1], QZEPS1, FALSE_ , qzz, &qzierr);
  if (qzierr != 0) {
    fprintf(fp9," NOTE : Warning from subroutine FLOWKM : QZ routine returned QZIERR = %4ld        Floquet multiplier calculations may be wrong \n",qzierr);	

  }

  /*  compute the generalized eigenvalues */

  qzval(ndim, ndimm1, &c0[0][1], &c1[0][1], qzalfr, qzalfi, 
	qzbeta, FALSE_, qzz);

  /*  Pack the eigenvalues into complex form. */
  ev[0].r = c0[ndim - 1][0] / c1[ndim - 1][0];
  ev[0].i = 0.;
  infev = FALSE_;
  for (j = 0; j < ndimm1; ++j) {
    if (qzbeta[j] != 0.) {
      ev[j + 1].r = qzalfr[j] / qzbeta[j];
      ev[j + 1].i = qzalfi[j] / qzbeta[j];
    } else {
      ev[j + 1].r = 1e30, ev[j + 1].i = 1e30;
      infev = TRUE_;
    }
  }
  if (infev) {
    fprintf(fp9," NOTE : Warning from subroutine FLOWKM : Infinite Floquet multiplier represented by CMPLX( 1.0D+30, 1.0D+30 )\n");	

  }

  free(svde); 
  free(svds); 
  free(svdv); 
  free(v); 
  free(x); 
  free(qzalfi); 
  free(qzbeta); 
  free(qzalfr); 
  free(svdwrk);
  free(rwork);

  return 0;

} /* flowkm_ */
コード例 #11
0
ファイル: autlib4.cpp プロジェクト: fbergmann/auto-sbml
/* Subroutine */ int 
dhhpr(integer k, integer j, integer n, doublereal *x, integer incx, doublereal *beta, doublereal *v)
{

  /* Local variables */
  integer iend, jmkp1;

  integer i, l;
  doublereal m, alpha;

  integer istart;

    


  /*     IMPLICIT UNDEFINED (A-Z,a-z) */
  /*     .. Scalar Arguments .. */
  /*     .. Array Arguments .. */
  /*     .. */

  /*  Purpose */
  /*  ======= */

  /*  DHHPR  computes a Householder Plane Rotation (G&vL Alg. 3.3-1) */
  /*  defined by v and beta. */
  /*  (I - beta v vt) * x is such that x_i = 0 for i=k+1 to j. */

  /*  Parameters */
  /*  ========== */

  /*  K      - INTEGER. */
  /*           On entry, K specifies that the K+1st entry of X */
  /*           be the first to be zeroed. */
  /*           K must be at least one. */
  /*           Unchanged on exit. */

  /*  J      - INTEGER. */
  /*           On entry, J specifies the last entry of X to be zeroed. */
  /*           J must be >= K and <= N. */
  /*           Unchanged on exit. */

  /*  N      - INTEGER. */
  /*           On entry, N specifies the (logical) length of X. */
  /*           Unchanged on exit. */

  /*  X      - DOUBLE PRECISION array of DIMENSION at least */
  /*           ( 1 + ( N - 1 )*abs( INCX ) ). */
  /*           On entry, X specifies the vector to be (partially) zeroed. */
  /*           Unchanged on exit. */

  /*  INCX   - INTEGER. */
  /*           On entry, INCX specifies the increment for the elements of */
  /*           X. INCX must be > zero. If X represents part of a matrix, */
  /*           then use INCX = 1 if a column vector is being zeroed and */
  /*           INCX = NDIM if a row vector is being zeroed. */
  /*           Unchanged on exit. */

  /*  BETA   - DOUBLE PRECISION. */
  /*           BETA specifies the scalar beta. (see pg. 40 of G and v.L.) */

  /*  V      - DOUBLE PRECISION array of DIMENSION at least n. */
  /*           Is updated to be the appropriate Householder vector for */
  /*           the given problem. (Note: space for the implicit zeroes is */
  /*          assumed to be present. Will save on time for index translation
	      .)*/

  /*  -- Written by Tom Fairgrieve, */
  /*                Department of Computer Science, */
  /*                University of Toronto, */
  /*                Toronto, Ontario CANADA  M5S 1A4 */

  /*     .. Local Scalars .. */
  /*     .. External Functions from BLAS .. */
  /*     .. External Subroutines from BLAS .. */
  /*     .. Intrinsic Functions .. */

  /*     .. Executable Statements .. */

  /*  Test the input parameters. */

  /* Parameter adjustments */
  /*--v;*/
  /*--x;*/

    
  if (k < 1 || k > j) {
    fprintf(fp9,"Domain error for K in DHHPR\n");	
	throw "Domain error for K in DHHPR\n";
    //exit(0);
  }
  if (j > n) {
    fprintf(fp9,"Domain error for J in DHHPR\n");	
	throw "Domain error for J in DHHPR\n";
    //exit(0);
  }
  if (incx < 1) {
    fprintf(fp9,"Domain error for INCX in DHHPR\n");	
	throw "Domain error for INCX in DHHPR\n";
    //exit(0);
  }

  /*  Number of potential non-zero elements in V. */

  jmkp1 = j - k + 1;

  /*  Find M := max{ |x_k|, ... , |x_j| } */

  m = fabs(x[-1 + idamax(&jmkp1, &x[-1 + k], &incx)]);

  /*  alpha := 0 */
  /*  For i = k to j */
  /*      v_i = x_i / m */
  /*      alpha := alpha + v_i^2    (i.e. alpha = vtv) */
  /*  End For */
  /*  alpha :=  sqrt( alpha ) */

  /*  Copy X(K)/M, ... , X(J)/M to V(K), ... , V(J) */

  if (incx == 1) {
    for (i = k - 1; i < j; ++i) {
      v[i] = x[i] / m;
    }
  } else {
    iend = jmkp1 * incx;
    istart = (k - 1) * incx + 1;
    l = k;
    for (i = istart; incx < 0 ? i >= iend : i <= iend; i += incx) 
      {
	v[-1 + l] = x[-1 + i] / m;
	++l;
      }
  }

  /*  Compute alpha */
  {
    /* This is here since I don't want to change the calling sequence of the
       BLAS routines. */
    integer tmp = 1;
    alpha = dnrm2(&jmkp1, &v[-1 + k], &tmp);
  }
  /*  beta := 1/(alpha(alpha + |V_k|)) */

  *beta = 1. / (alpha * (alpha + fabs(v[-1 + k])));

  /*  v_k := v_k + sign(v_k)*alpha */

  v[-1 + k] += d_sign(1.0, v[-1 + k]) * alpha;

  /*  Done ! */

  return 0;

  /*     End of DHHPR. */

} /* dhhpr_ */
コード例 #12
0
int cg_mkl_double(MKL_INT n, 
                  double a[], 
                  MKL_INT ia[],
                  MKL_INT ja[],
                  double solution[],
                  double rhs[],
                  MKL_INT max_iter,
                  double r_tol,
                  double a_tol)
{
	MKL_INT rci_request, itercount, i;

    // parameter arrays for solver
	MKL_INT ipar[128];
    double  dpar[128];

	double euclidean_norm;
    
    // for SpMV
    char tr = 'n';

    double * tmp;
    double * residual;

    tmp      = (double *) malloc(4 * n * sizeof(double));	
    residual = (double *) malloc(n * sizeof(double));

	// initialize the solver
	dcg_init(&n,solution,rhs,&rci_request,ipar,dpar,tmp);

	if (rci_request!=0) goto failure;
    
	ipar[1]=6;                       // output all warnings and errors 
	ipar[4]=max_iter;                // maximum number of iterations
	ipar[7]=1;                       // stop iteration at maximum iterations
	ipar[8]=1;                       // residual stopping test
	ipar[9]=0;                       // request for the user defined stopping test
	dpar[0]=r_tol * r_tol;           // relative residual tolerance
	dpar[1]=a_tol * a_tol;           // absolute residual tolerance

	/*---------------------------------------------------------------------------*/
	/* Check the correctness and consistency of the newly set parameters         */
	/*---------------------------------------------------------------------------*/
	dcg_check(&n,solution,rhs,&rci_request,ipar,dpar,tmp);
	if (rci_request!=0) goto failure;

	/*---------------------------------------------------------------------------*/
	/* Compute the solution by RCI (P)CG solver without preconditioning          */
	/* Reverse Communications starts here                                        */
	/*---------------------------------------------------------------------------*/
rci: dcg(&n,solution,rhs,&rci_request,ipar,dpar,tmp);
    //printf("Residual norm is %e\n", sqrt(dpar[4]));
	/*---------------------------------------------------------------------------*/
	/* If rci_request=0, then the solution was found with the required precision */
	/*---------------------------------------------------------------------------*/
	if (rci_request==0) goto getsln;
	/*---------------------------------------------------------------------------*/
	/* If rci_request=1, then compute the vector A*tmp[0]                        */
	/* and put the result in vector tmp[n]                                       */
	/*---------------------------------------------------------------------------*/
	if (rci_request==1)
	{
        mkl_cspblas_dcsrgemv(&tr, &n, a, ia, ja, tmp, &tmp[n]);
		goto rci;
	}
	/*---------------------------------------------------------------------------*/
	/* If rci_request=anything else, then dcg subroutine failed                  */
	/* to compute the solution vector: solution[n]                               */
	/*---------------------------------------------------------------------------*/
	goto failure;
	/*---------------------------------------------------------------------------*/
	/* Reverse Communication ends here                                           */
	/* Get the current iteration number into itercount                           */
	/*---------------------------------------------------------------------------*/
getsln: dcg_get(&n,solution,rhs,&rci_request,ipar,dpar,tmp,&itercount);

    mkl_cspblas_dcsrgemv(&tr, &n, a, ia, ja, solution, residual);
	for(i=0;i<n;i++) residual[i] -= rhs[i];
    i=1; euclidean_norm=dnrm2(&n,residual,&i);
	
    printf("\nMKL CG reached %e residual in %d iterations\n",euclidean_norm, itercount);

    // release memory
	MKL_FreeBuffers();
    free(tmp);
    free(residual);

    if (itercount <= max_iter && (euclidean_norm * euclidean_norm) < (dpar[0] * dpar[4] + dpar[5]))
    {
//        printf("This example has successfully PASSED through all steps of computation!");
//        printf("\n");
//        printf("(Residual norm is %e)\n", euclidean_norm);
        return 0;
    }
    else
    {
//        printf("This example may have FAILED as either the number of iterations exceeds");
//        printf("\nthe maximum number of iterations %d, or the ", max_iter);
//        printf("computed solution\ndiffers has not sufficiently converged.");
//        printf("(Residual norm is %e), or both.\n", euclidean_norm);
        return 1;
    }
	/*-------------------------------------------------------------------------*/
	/* Release internal MKL memory that might be used for computations         */
	/* NOTE: It is important to call the routine below to avoid memory leaks   */
	/* unless you disable MKL Memory Manager                                   */
	/*-------------------------------------------------------------------------*/
failure: printf("This example FAILED as the solver has returned the ERROR ");
				 printf("code %d", rci_request);
         MKL_FreeBuffers();
         return 1;
}
コード例 #13
0
ファイル: cg_mrhs_c.c プロジェクト: whguan/Hydrodynamics
/*---------------------------------------------------------------------------*/
int CGFMMmrhs(MKL_INT *solution, double *ShellSphs, double *rhs, double *nRhs, MKL_INT n)
{
  /*---------------------------------------------------------------------------*/
  /* Define arrays for the upper triangle of the coefficient matrix and rhs vector */
  /* Compressed sparse row storage is used for sparse representation           */
  /*---------------------------------------------------------------------------*/
  MKL_INT rci_request, expected_itercount = 20, i, j;
  MKL_INT itercount[nRhs];
  /* Fill all arrays containing matrix data. */
  /*---------------------------------------------------------------------------*/
  /* Allocate storage for the solver ?par and temporary storage tmp            */
  /*---------------------------------------------------------------------------*/
  MKL_INT length = 128, method = 1;
  /*---------------------------------------------------------------------------*/
  /* Some additional variables to use with the RCI (P)CG solver                */
  /*---------------------------------------------------------------------------*/
  MKL_INT ipar[128 + 2 * nRhs];
  double euclidean_norm, dpar[128 + 2 * nRhs]; 
  double *tmp;
  tmp = (double*)calloc(n * (3 + nRhs),sizeof(double));
  double eone = -1.E0;
  MKL_INT ione = 1;

  /*---------------------------------------------------------------------------*/
  /* Initialize the initial guess                                              */
  /*---------------------------------------------------------------------------*/
  for (i = 0; i < n*nRhs; i++)
    solution[i] = 1.E0;
  /*---------------------------------------------------------------------------*/
  /* Initialize the solver                                                     */
  /*---------------------------------------------------------------------------*/
  for (i = 0; i < (length + 2 * nRhs); i++)
    ipar[i] = 0;
  for (i = 0; i < (length + 2 * nRhs); i++)
    dpar[i] = 0.E0;

  dcgmrhs_init (&n, solution, &nRhs, rhs, &method, &rci_request, ipar, dpar, tmp);
  if (rci_request != 0)
    goto failure;
  /*---------------------------------------------------------------------------*/
  /* Set the desired parameters:                                               */
  /* LOGICAL parameters:                                                       */
  /* do residual stopping test                                                 */
  /* do not request for the user defined stopping test                         */
  /* DOUBLE parameters                                                         */
  /* set the relative tolerance to 1.0D-5 instead of default value 1.0D-6      */
  /*---------------------------------------------------------------------------*/
  ipar[8] = 1;
  ipar[9] = 0;
  dpar[0] = 1.E-5;
  /*---------------------------------------------------------------------------*/
  /* Compute the solution by RCI (P)CG solver without preconditioning          */
  /* Reverse Communications starts here                                        */
  /*---------------------------------------------------------------------------*/

rci:dcgmrhs (&n, solution, &nRhs, rhs, &rci_request, ipar, dpar, tmp);
  /*---------------------------------------------------------------------------*/
  /* If rci_request=0, then the solution was found with the required precision */
  /*---------------------------------------------------------------------------*/
  if (rci_request == 0)
    goto getsln;
  /*---------------------------------------------------------------------------*/
  /* If rci_request=1, then compute the vector A*tmp[0]                        */
  /* and put the result in vector tmp[n]                                       */
  /*---------------------------------------------------------------------------*/
  if (rci_request == 1)
    {
      //mkl_dcsrsymv (&tr, &n, a, ia, ja, tmp, &tmp[n]);
	    //debug
       RPY(n, ShellSphs,tmp); // SPMV by 4 calls of  FMM
       goto rci;
    }
  /*---------------------------------------------------------------------------*/
  /* If rci_request=anything else, then dcg subroutine failed                  */
  /* to compute the solution vector: solution[n]                               */
  /*---------------------------------------------------------------------------*/
  goto failure;
  /*---------------------------------------------------------------------------*/
  /* Reverse Communication ends here                                           */
  /* Get the current iteration number into itercount                           */
  /*---------------------------------------------------------------------------*/

getsln:dcgmrhs_get (&n, solution, &nRhs, rhs, &rci_request, ipar, dpar, tmp, itercount);

  /*---------------------------------------------------------------------------*/
  /* Print solution vector: solution[n] and number of iterations: itercount    */
  /*---------------------------------------------------------------------------*/
  printf ("The system has been solved\n");
  printf ("The following solution obtained\n");
  for (i = 0; i < n / 2; i++)
    printf ("%6.3f  ", solution[i]);
  printf ("\n");
  for (i = n / 2; i < n; i++)
    printf ("%6.3f  ", solution[i]);
  printf ("\n");
  for (i = 0; i < n / 2; i++)
    printf ("%6.3f  ", solution[n + i]);
  printf ("\n");
  for (i = n / 2; i < n; i++)
    printf ("%6.3f  ", solution[n + i]);

  printf ("\nExpected solution is\n");
  for (i = 0; i < n / 2; i++)
    {
      printf ("%6.3f  ", expected_sol[i]);
      expected_sol[i] -= solution[i];
    }
  printf ("\n");
  for (i = n / 2; i < n; i++)
    {
      printf ("%6.3f  ", expected_sol[i]);
      expected_sol[i] -= solution[i];
    }
  printf ("\n");
  for (i = 0; i < n / 2; i++)
    {
      printf ("%6.3f  ", expected_sol[n + i]);
      expected_sol[n + i] -= solution[n + i];
    }
  printf ("\n");
  for (i = n / 2; i < n; i++)
    {
      printf ("%6.3f  ", expected_sol[n + i]);
      expected_sol[n + i] -= solution[n + i];
    }
  printf ("\n");

  i = 1;
  j = n * nRhs;
  euclidean_norm = dnrm2 (&j, expected_sol, &i);

  /*-------------------------------------------------------------------------*/
  /* Release internal MKL memory that might be used for computations         */
  /* NOTE: It is important to call the routine below to avoid memory leaks   */
  /* unless you disable MKL Memory Manager                                   */
  /*-------------------------------------------------------------------------*/
  MKL_Free_Buffers ();

  if (euclidean_norm < 1.0e-12)
    {
      printf ("This example has successfully PASSED through all steps of computation!\n");
      return 0;
    }
  else
    {
      printf ("This example may have FAILED as the computed solution differs\n");
      printf ("much from the expected solution (Euclidean norm is %e).\n", euclidean_norm);
      return 1;
    }
  /*-------------------------------------------------------------------------*/
  /* Release internal MKL memory that might be used for computations         */
  /* NOTE: It is important to call the routine below to avoid memory leaks   */
  /* unless you disable MKL Memory Manager                                   */
  /*-------------------------------------------------------------------------*/
failure:printf ("This example FAILED as the solver has returned the ERROR code %d", rci_request);
  MKL_Free_Buffers ();
  return 1;
}
コード例 #14
0
NLuint nlSolve_GMRES() {

    NLdouble* b        = nlCurrentContext->b ;
    NLdouble* x        = nlCurrentContext->x ;
    NLdouble  eps      = nlCurrentContext->threshold ;
    NLint    max_iter  = nlCurrentContext->max_iterations ;
    NLint    n         = nlCurrentContext->n ;
    NLint    m         = nlCurrentContext->inner_iterations ;

    typedef NLdouble *NLdoubleP;
    NLdouble *V   = NL_NEW_ARRAY(NLdouble, n*(m+1)   ) ;
    NLdouble *U   = NL_NEW_ARRAY(NLdouble, m*(m+1)/2 ) ;
    NLdouble *r   = NL_NEW_ARRAY(NLdouble, n         ) ;
    NLdouble *y   = NL_NEW_ARRAY(NLdouble, m+1       ) ;
    NLdouble *c   = NL_NEW_ARRAY(NLdouble, m         ) ;
    NLdouble *s   = NL_NEW_ARRAY(NLdouble, m         ) ;
    NLdouble **v  = NL_NEW_ARRAY(NLdoubleP, m+1      ) ;
    NLdouble * Ax = NL_NEW_ARRAY(NLdouble,nlCurrentContext->n);
    NLdouble accu =0.0;
    NLint i, j, io, uij, u0j ; 
    NLint its = -1 ;
    NLdouble beta, h, rd, dd, nrm2b ;

    for ( i=0; i<=m; ++i ){
        v[i]=V+i*n ;
    }
    
    nrm2b=dnrm2(n,b,1);
    io=0;
    do  { /* outer loop */
        ++io;
        nlCurrentContext->matrix_vector_prod(x,r);
        daxpy(n,-1.,b,1,r,1);
        beta=dnrm2(n,r,1);
        dcopy(n,r,1,v[0],1);
        dscal(n,1./beta,v[0],1);
        
        y[0]=beta;
        j=0;
        uij=0;
        do { /* inner loop: j=0,...,m-1 */
            u0j=uij;
            nlCurrentContext->matrix_vector_prod(v[j],v[j+1]);
            dgemv(
                Transpose,n,j+1,1.,V,n,v[j+1],1,0.,U+u0j,1
            );
            dgemv(
                NoTranspose,n,j+1,-1.,V,n,U+u0j,1,1.,v[j+1],1
            );
            h=dnrm2(n,v[j+1],1);
            dscal(n,1./h,v[j+1],1);
            for (i=0; i<j; ++i ) { /* rotiere neue Spalte */
                double tmp = c[i]*U[uij]-s[i]*U[uij+1];
                U[uij+1]   = s[i]*U[uij]+c[i]*U[uij+1];
                U[uij]     = tmp;
                ++uij;
            }
            { /* berechne neue Rotation */
                rd     = U[uij];
                dd     = sqrt(rd*rd+h*h);
                c[j]   = rd/dd;
                s[j]   = -h/dd;
                U[uij] = dd;
                ++uij;
            }
            { /* rotiere rechte Seite y (vorher: y[j+1]=0) */
                y[j+1] = s[j]*y[j];
                y[j]   = c[j]*y[j];
            }
            ++j;
        } while ( 
            j<m && fabs(y[j])>=eps*nrm2b 
        ) ;
        { /* minimiere bzgl Y */
            dtpsv(
                UpperTriangle,
                NoTranspose,
                NotUnitTriangular,
                j,U,y,1
            );
            /* correct X */
            dgemv(NoTranspose,n,j,-1.,V,n,y,1,1.,x,1);
        }
    } while ( fabs(y[j])>=eps*nrm2b && (m*(io-1)+j) < max_iter);
    
    /* Count the inner iterations */
    its = m*(io-1)+j;

    nlCurrentContext->matrix_vector_prod(x,Ax);
    for(i = 0 ; i < n ; ++i)
        accu+=(Ax[i]-b[i])*(Ax[i]-b[i]);
    printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/nrm2b);
    NL_DELETE_ARRAY(Ax);
    NL_DELETE_ARRAY(V) ;
    NL_DELETE_ARRAY(U) ;
    NL_DELETE_ARRAY(r) ;
    NL_DELETE_ARRAY(y) ;
    NL_DELETE_ARRAY(c) ;
    NL_DELETE_ARRAY(s) ;
    NL_DELETE_ARRAY(v) ;
    
    return its;
}
コード例 #15
0
ファイル: lbfgs.c プロジェクト: OpenCMISS-Dependencies/optpp
void lbfgs
   (int          n,              /* I  num unknowns = 3 * num atoms */
    real         stop_tol,       /* I  tol for ||g||/sqrt(n)        */
    int          itmax,          /* I  max num iterations allowed   */
    int          itmax_line,
    int*         iter,           /* IO iters required to find min   */
    real*        fret,           /*  O minimum value                */
    int          iprint,
    int          last_call)      /*    not used                     */
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*   Called by exec_minimization in "options.c".
*   This routine is modelled on mm_nlcg in "nlcg.c", by JC Meza.
*   It finds the minimum of the unconstrained molecular potential
*   energy using trust region methods and a limited memory BFGS
*   approximation of the Hessian.  Each trust region subproblem is
*   solved by Powell's dogleg method.
*
*   The L-BFGS structures are currently FORTRAN subroutines that
*   require memory allocation of CMPS and CMPY for work space.
*   These variables must be passed to the FORTRAN subroutines and
*   not altered anywhere else.
*
*   The outline of the algorithm is as follows:
*      Allocate memory, initialize parameters
*      Compute the gradient g_vec
*      LOOP
*        IF ||g_vec|| < tol THEN RETURN
*        Update L-BFGS matrices B and H
*        Compute the dogleg step d_vec
*        Compute the predicted reduction of the quadratic model
*        Compute the actual potential energy reduction
*        IF ared/pred > eta
*          THEN x_vec = x_vec + d_vec
*               TR_size >= TR_size
*               Compute the gradient g_vec at the new point
*          ELSE TR_size < ||d_vec||
*      CONTINUE
*********************************************************************/
{
  real         *x_vec, *new_x_vec, *g_vec, *d_vec, *y_vec;
  int          i;
  int          iter_num, NUPDT;
  real         obj_val, new_obj_val;
  real         dd1, dd2;
  real         eta, pred, ared;
  real         TR_size;
  real         gnorm, xnorm, dnorm;
  real         *CMPS, *CMPY, *tmp_vec;


/*-- Open the status file for saving output. */
  bfgs_fp = fopen (status_file, "w");
  if (bfgs_fp == NULL) {
    fprintf (stderr, "lbfgs: cannot open %s\n", status_file);
    printf ("*** lbfgs: cannot open %s\n", status_file);
    exit (1);
  }

/*--------------------------------------------------------------------
 *   Allocate memory and set up files.
 *-------------------------------------------------------------------*/

  fprintf (stderr, "Allocate space in lbfgs n=%d\n",n);
  x_vec     = (real *) calloc ((n+1) , sizeof(real));
  new_x_vec = (real *) calloc ((n+1) , sizeof(real));
  g_vec     = (real *) calloc ((n+1) , sizeof(real));
  d_vec     = (real *) calloc ((n+1) , sizeof(real));
  y_vec     = (real *) calloc ((n+1) , sizeof(real));
  CMPS      = (real *) calloc ((n*T_LBFGS) , sizeof(real));
  CMPY      = (real *) calloc ((n*T_LBFGS) , sizeof(real));
  tmp_vec   = (real *) calloc ((n+1) , sizeof(real));

/*--------------------------------------------------------------------
 *   Evaluate the objective and its gradient at the start point.
 *-------------------------------------------------------------------*/

/*  obj_val = potential (str, coor, force);
  force_to_grad0 (str, force, g_vec);
 */

/*--------------------------------------------------------------------
 *   Initialize the trust region algorithm parameters.
 *-------------------------------------------------------------------*/

  eta = 0.3;                   /*-- step acceptance threshold */

  TR_size = 1.0;

  iter_num = 0;                /*-- number inner iterations   */
  NUPDT    = 1;                /*-- number L-BFGS B updates   */

  gnorm = dnrm2 (n, g_vec, 1);

  fprintf(bfgs_fp, "\n\t\t Steepest descent with Trust Regions\n");
  fprintf(bfgs_fp, "     Iter     f(x)       ||grad||/n      Delta      ||step||       ared         pred\n");
  fprintf(bfgs_fp,"    %5d %12.4e %12.4e %12.4e\n",
          iter_num, obj_val, gnorm/sqrt(n), TR_size);

/*--------------------------------------------------------------------
 *   Begin the main loop.
 *-------------------------------------------------------------------*/

  while (iter_num < itmax) {

    iter_num++;

    dd1 = g_vec[0];
    for (i=1; i<n; i++)
      if (g_vec[i] > dd1)  dd1 = g_vec[i];
    if (dd1 <= stop_tol) {
/*-- Exit main loop IF ||g||_2 sufficiently small. */
/*    if (gnorm <= (stop_tol * sqrt(n))) {*/
      xnorm = dnrm2 (n, x_vec, 1);
      fprintf (stderr,
               " step_size: %5d   energy: %12.4f  |Grad|: %10.4f |X|: %10.4f\n",
               iter_num, obj_val, gnorm/sqrt((real)(n)),
               xnorm/sqrt((real)(n)));
      break;
    }

/*-- Solve the dogleg subproblem. */
    Dogleg (n, g_vec, gnorm, TR_size, NUPDT, T_LBFGS, CMPS, CMPY,
            tmp_vec, d_vec, &dnorm);

/*-- Compute pred = - g'd - 1/2 d'Bd. */

    dd1 = ddot (n, g_vec, 1, d_vec, 1);
    i = T_LBFGS;
    multbv_ (&n, &i, d_vec, tmp_vec, &NUPDT, CMPS, CMPY);
    dd2 = ddot (n, d_vec, 1, tmp_vec, 1);

    pred = -dd1 - (0.5 * dd2);

    if (pred < sqrt(MMIN)) {
      printf ("*** Predicted reduction not positive.  <lbfgs>\n");
      printf ("    pred = %15.6e\n", pred);
      exit (1);
    }

/*-- Compute ared by evaluating the objective at the trial point. */

    dcopy (n, x_vec, 1, new_x_vec, 1);
    daxpy (n, 1.0, d_vec, 1, new_x_vec, 1);

/*  call evalf
    p_to_coor0 (str, new_x_vec, coor);
    new_obj_val = potential (str, coor, force);
*/
    ared = obj_val - new_obj_val;

/*-- Decide whether to take the step.
 *-- If yes, then increase TR_size, compute the gradient, and
 *-- update the L-BFGS approximations.
 *-- If no, then decrease TR_size to a fraction of ||d||_2. */

    if ((ared / pred) >= eta) {

/*-- Increase the trust region size. */
      if (ared / pred >= 0.9) {
        dd1 = 10.0 * dnorm;
        if (dd1 > TR_size) TR_size = dd1;
      } else {
        dd1 = 2.0 * dnorm;
        if (dd1 > TR_size) TR_size = dd1;
      }
      if (TR_size > 1.0e3)  TR_size = 1.0e3;

/*-- Get the gradient from the previously calculated force vector.
 *-- Set y_vec = new gradient - old gradient. */
      dcopy (n, g_vec, 1, y_vec, 1);
      dscal (n, -1.0, y_vec, 1);
/* get grad
      force_to_grad0 (str, force, g_vec);
 */
      daxpy (n, 1.0, g_vec, 1, y_vec, 1);

/*-- Update the L-BFGS and inverse L-BFGS approximations. */
      Update_lbfgs (n, T_LBFGS, d_vec, y_vec, tmp_vec, &NUPDT, CMPS, CMPY);

      dcopy (n, new_x_vec, 1, x_vec, 1);

      gnorm = dnrm2 (n, g_vec, 1);
      obj_val = new_obj_val;

/*      p_to_coor0 (str, x_vec, coor); */
      fprintf(bfgs_fp,"    %5d %12.4e %12.4e %12.4e %12.4e %12.3e %12.3e\n",
              iter_num, obj_val, gnorm/sqrt(n), TR_size, dnorm, ared, pred);

    } else {

/*-- Decrease the trust region size by linear interpolation. */
      dd1 = (1.0 - eta) / (1.0 - (ared / pred));
      if (dd1 < 0.1)  dd1 = 0.1;
      if (dd1 > 0.5)  dd1 = 0.5;
      TR_size = dd1 * dnorm;

      fprintf(bfgs_fp,"rej %5d %12.4e %12.4e %12.4e %12.4e %12.3e %12.3e\n",
              iter_num, obj_val, gnorm/sqrt(n), TR_size, dnorm, ared, pred);

      if (TR_size < (100.0 * MCHEPS)) {
        printf ("*** Trust region too small to continue.  <lbfgs>\n");
        fprintf (bfgs_fp, "*** Trust region too small to continue.\n");
        fflush (bfgs_fp);
        exit (1);
      }
    }

  }  /***  end of while(iter_num < itmax)  ***/

/*--------------------------------------------------------------------
 *   Clean up and exit.
 *-------------------------------------------------------------------*/

  *iter = iter_num;
  *fret = obj_val;

  free (x_vec);
  free (new_x_vec);
  free (g_vec);
  free (d_vec);

  return;
}
コード例 #16
0
void dqrdc(double a[], int lda, int n, int p, double qraux[], int jpvt[],
           double work[], int job)

/******************************************************************************/
/*
  Purpose:

    DQRDC computes the QR factorization of a real rectangular matrix.

  Discussion:

    DQRDC uses Householder transformations.

    Column pivoting based on the 2-norms of the reduced columns may be
    performed at the user's option.

  Licensing:

    This code is distributed under the GNU LGPL license.

  Modified:

    07 June 2005

  Author:

    C version by John Burkardt.

  Reference:

    Jack Dongarra, Cleve Moler, Jim Bunch and Pete Stewart,
    LINPACK User's Guide,
    SIAM, (Society for Industrial and Applied Mathematics),
    3600 University City Science Center,
    Philadelphia, PA, 19104-2688.
    ISBN 0-89871-172-X

  Parameters:

    Input/output, double A(LDA,P).  On input, the N by P matrix
    whose decomposition is to be computed.  On output, A contains in
    its upper triangle the upper triangular matrix R of the QR
    factorization.  Below its diagonal A contains information from
    which the orthogonal part of the decomposition can be recovered.
    Note that if pivoting has been requested, the decomposition is not that
    of the original matrix A but that of A with its columns permuted
    as described by JPVT.

    Input, int LDA, the leading dimension of the array A.  LDA must
    be at least N.

    Input, int N, the number of rows of the matrix A.

    Input, int P, the number of columns of the matrix A.

    Output, double QRAUX[P], contains further information required
    to recover the orthogonal part of the decomposition.

    Input/output, integer JPVT[P].  On input, JPVT contains integers that
    control the selection of the pivot columns.  The K-th column A(*,K) of A
    is placed in one of three classes according to the value of JPVT(K).
      > 0, then A(K) is an initial column.
      = 0, then A(K) is a free column.
      < 0, then A(K) is a final column.
    Before the decomposition is computed, initial columns are moved to
    the beginning of the array A and final columns to the end.  Both
    initial and final columns are frozen in place during the computation
    and only free columns are moved.  At the K-th stage of the
    reduction, if A(*,K) is occupied by a free column it is interchanged
    with the free column of largest reduced norm.  JPVT is not referenced
    if JOB == 0.  On output, JPVT(K) contains the index of the column of the
    original matrix that has been interchanged into the K-th column, if
    pivoting was requested.

    Workspace, double WORK[P].  WORK is not referenced if JOB == 0.

    Input, int JOB, initiates column pivoting.
    0, no pivoting is done.
    nonzero, pivoting is done.
*/
{
  int jp;
  int j;
  int lup;
  int maxj;
  double maxnrm, nrmxl, t, tt;

  int pl = 1, pu = 0;
  /*
    If pivoting is requested, rearrange the columns.
  */
  if (job != 0) {
    for (j = 1; j <= p; j++) {
      int swapj = (0 < jpvt[j - 1]);
      jpvt[j - 1] = (jpvt[j - 1] < 0) ? -j : j;
      if (swapj) {
        if (j != pl)
          dswap(n, a + 0 + (pl - 1)*lda, 1, a + 0 + (j - 1), 1);
        jpvt[j - 1] = jpvt[pl - 1];
        jpvt[pl - 1] = j;
        pl++;
      }
    }
    pu = p;
    for (j = p; 1 <= j; j--) {
      if (jpvt[j - 1] < 0) {
        jpvt[j - 1] = -jpvt[j - 1];
        if (j != pu) {
          dswap(n, a + 0 + (pu - 1)*lda, 1, a + 0 + (j - 1)*lda, 1);
          jp = jpvt[pu - 1];
          jpvt[pu - 1] = jpvt[j - 1];
          jpvt[j - 1] = jp;
        }
        pu = pu - 1;
      }
    }
  }
  /*
    Compute the norms of the free columns.
  */
  for (j = pl; j <= pu; j++)
    qraux[j - 1] = dnrm2(n, a + 0 + (j - 1) * lda, 1);
  for (j = pl; j <= pu; j++)
    work[j - 1] = qraux[j - 1];
  /*
    Perform the Householder reduction of A.
  */
  lup = i4_min(n, p);
  for (int l = 1; l <= lup; l++) {
    /*
      Bring the column of largest norm into the pivot position.
    */
    if (pl <= l && l < pu) {
      maxnrm = 0.0;
      maxj = l;
      for (j = l; j <= pu; j++) {
        if (maxnrm < qraux[j - 1]) {
          maxnrm = qraux[j - 1];
          maxj = j;
        }
      }
      if (maxj != l) {
        dswap(n, a + 0 + (l - 1)*lda, 1, a + 0 + (maxj - 1)*lda, 1);
        qraux[maxj - 1] = qraux[l - 1];
        work[maxj - 1] = work[l - 1];
        jp = jpvt[maxj - 1];
        jpvt[maxj - 1] = jpvt[l - 1];
        jpvt[l - 1] = jp;
      }
    }
    /*
      Compute the Householder transformation for column L.
    */
    qraux[l - 1] = 0.0;
    if (l != n) {
      nrmxl = dnrm2(n - l + 1, a + l - 1 + (l - 1) * lda, 1);
      if (nrmxl != 0.0) {
        if (a[l - 1 + (l - 1)*lda] != 0.0)
          nrmxl = nrmxl * r8_sign(a[l - 1 + (l - 1) * lda]);
        dscal(n - l + 1, 1.0 / nrmxl, a + l - 1 + (l - 1)*lda, 1);
        a[l - 1 + (l - 1)*lda] = 1.0 + a[l - 1 + (l - 1) * lda];
        /*
          Apply the transformation to the remaining columns, updating the norms.
        */
        for (j = l + 1; j <= p; j++) {
          t = -ddot(n - l + 1, a + l - 1 + (l - 1) * lda, 1, a + l - 1 + (j - 1) * lda, 1)
              / a[l - 1 + (l - 1) * lda];
          daxpy(n - l + 1, t, a + l - 1 + (l - 1)*lda, 1, a + l - 1 + (j - 1)*lda, 1);
          if (pl <= j && j <= pu) {
            if (qraux[j - 1] != 0.0) {
              tt = 1.0 - pow(r8_abs(a[l - 1 + (j - 1) * lda]) / qraux[j - 1], 2);
              tt = r8_max(tt, 0.0);
              t = tt;
              tt = 1.0 + 0.05 * tt * pow(qraux[j - 1] / work[j - 1], 2);
              if (tt != 1.0)
                qraux[j - 1] = qraux[j - 1] * sqrt(t);
              else {
                qraux[j - 1] = dnrm2(n - l, a + l + (j - 1) * lda, 1);
                work[j - 1] = qraux[j - 1];
              }
            }
          }
        }
        /*
          Save the transformation.
        */
        qraux[l - 1] = a[l - 1 + (l - 1) * lda];
        a[l - 1 + (l - 1)*lda] = -nrmxl;
      }
    }
  }
}
コード例 #17
0
ファイル: norm.c プロジェクト: kingdwd/Summer2014
/* Function Definitions */
real_T b_norm(const emxArray_real_T *x)
{
    real_T y;
    int32_T n;
    ptrdiff_t n_t;
    ptrdiff_t incx_t;
    double * xix0_t;
    if ((x->size[0] == 1) || (x->size[1] == 1)) {
        emlrtPushRtStackR2012b(&ei_emlrtRSI, emlrtRootTLSGlobal);
        emlrtPushRtStackR2012b(&gi_emlrtRSI, emlrtRootTLSGlobal);
        n = x->size[0] * x->size[1];
        emlrtPushRtStackR2012b(&gc_emlrtRSI, emlrtRootTLSGlobal);
        if (1 > n) {
            y = 0.0;
        } else {
            emlrtPushRtStackR2012b(&ic_emlrtRSI, emlrtRootTLSGlobal);
            emlrtPushRtStackR2012b(&kc_emlrtRSI, emlrtRootTLSGlobal);
            emlrtPushRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal);
            emlrt_checkEscapedGlobals();
            n_t = (ptrdiff_t)(n);
            emlrtPopRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal);
            emlrtPopRtStackR2012b(&kc_emlrtRSI, emlrtRootTLSGlobal);
            emlrtPushRtStackR2012b(&lc_emlrtRSI, emlrtRootTLSGlobal);
            emlrtPushRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal);
            emlrt_checkEscapedGlobals();
            incx_t = (ptrdiff_t)(1);
            emlrtPopRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal);
            emlrtPopRtStackR2012b(&lc_emlrtRSI, emlrtRootTLSGlobal);
            emlrtPushRtStackR2012b(&mc_emlrtRSI, emlrtRootTLSGlobal);
            emlrt_checkEscapedGlobals();
            xix0_t = (double *)(&x->data[0]);
            emlrtPopRtStackR2012b(&mc_emlrtRSI, emlrtRootTLSGlobal);
            emlrt_checkEscapedGlobals();
            y = dnrm2(&n_t, xix0_t, &incx_t);
            emlrtPopRtStackR2012b(&ic_emlrtRSI, emlrtRootTLSGlobal);
        }

        emlrtPopRtStackR2012b(&gc_emlrtRSI, emlrtRootTLSGlobal);
        emlrtPopRtStackR2012b(&gi_emlrtRSI, emlrtRootTLSGlobal);
        emlrtPopRtStackR2012b(&ei_emlrtRSI, emlrtRootTLSGlobal);
    } else {
        emlrtPushRtStackR2012b(&fi_emlrtRSI, emlrtRootTLSGlobal);
        n = x->size[0] * x->size[1];
        emlrtPushRtStackR2012b(&gc_emlrtRSI, emlrtRootTLSGlobal);
        if (1 > n) {
            y = 0.0;
        } else {
            emlrtPushRtStackR2012b(&ic_emlrtRSI, emlrtRootTLSGlobal);
            emlrtPushRtStackR2012b(&kc_emlrtRSI, emlrtRootTLSGlobal);
            emlrtPushRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal);
            emlrt_checkEscapedGlobals();
            n_t = (ptrdiff_t)(n);
            emlrtPopRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal);
            emlrtPopRtStackR2012b(&kc_emlrtRSI, emlrtRootTLSGlobal);
            emlrtPushRtStackR2012b(&lc_emlrtRSI, emlrtRootTLSGlobal);
            emlrtPushRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal);
            emlrt_checkEscapedGlobals();
            incx_t = (ptrdiff_t)(1);
            emlrtPopRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal);
            emlrtPopRtStackR2012b(&lc_emlrtRSI, emlrtRootTLSGlobal);
            emlrtPushRtStackR2012b(&mc_emlrtRSI, emlrtRootTLSGlobal);
            emlrt_checkEscapedGlobals();
            xix0_t = (double *)(&x->data[0]);
            emlrtPopRtStackR2012b(&mc_emlrtRSI, emlrtRootTLSGlobal);
            emlrt_checkEscapedGlobals();
            y = dnrm2(&n_t, xix0_t, &incx_t);
            emlrtPopRtStackR2012b(&ic_emlrtRSI, emlrtRootTLSGlobal);
        }

        emlrtPopRtStackR2012b(&gc_emlrtRSI, emlrtRootTLSGlobal);
        emlrtPopRtStackR2012b(&fi_emlrtRSI, emlrtRootTLSGlobal);
    }

    return y;
}
コード例 #18
0
ファイル: mldivide.c プロジェクト: ofirENS/TestFiles
static void c_eml_qrsolve(const emlrtStack *sp, const emxArray_real_T *A,
  emxArray_real_T *B, emxArray_real_T *Y)
{
  emxArray_real_T *b_A;
  emxArray_real_T *work;
  int32_T mn;
  int32_T i51;
  int32_T ix;
  emxArray_real_T *tau;
  emxArray_int32_T *jpvt;
  int32_T m;
  int32_T n;
  int32_T b_mn;
  emxArray_real_T *vn1;
  emxArray_real_T *vn2;
  int32_T k;
  boolean_T overflow;
  boolean_T b12;
  int32_T i;
  int32_T i_i;
  int32_T nmi;
  int32_T mmi;
  int32_T pvt;
  int32_T iy;
  boolean_T b13;
  real_T xnorm;
  int32_T i52;
  real_T atmp;
  real_T d16;
  boolean_T b14;
  boolean_T b_i;
  ptrdiff_t n_t;
  ptrdiff_t incx_t;
  double * xix0_t;
  boolean_T exitg1;
  const mxArray *y;
  static const int32_T iv78[2] = { 1, 8 };

  const mxArray *m14;
  char_T cv76[8];
  static const char_T cv77[8] = { '%', '%', '%', 'd', '.', '%', 'd', 'e' };

  char_T cv78[14];
  uint32_T unnamed_idx_0;
  emlrtStack st;
  emlrtStack b_st;
  emlrtStack c_st;
  emlrtStack d_st;
  emlrtStack e_st;
  emlrtStack f_st;
  emlrtStack g_st;
  emlrtStack h_st;
  st.prev = sp;
  st.tls = sp->tls;
  b_st.prev = &st;
  b_st.tls = st.tls;
  c_st.prev = &b_st;
  c_st.tls = b_st.tls;
  d_st.prev = &c_st;
  d_st.tls = c_st.tls;
  e_st.prev = &d_st;
  e_st.tls = d_st.tls;
  f_st.prev = &e_st;
  f_st.tls = e_st.tls;
  g_st.prev = &f_st;
  g_st.tls = f_st.tls;
  h_st.prev = &g_st;
  h_st.tls = g_st.tls;
  emlrtHeapReferenceStackEnterFcnR2012b(sp);
  emxInit_real_T(sp, &b_A, 2, &m_emlrtRTEI, true);
  b_emxInit_real_T(sp, &work, 1, &rb_emlrtRTEI, true);
  mn = (int32_T)muDoubleScalarMin(A->size[0], A->size[1]);
  st.site = &mc_emlrtRSI;
  b_st.site = &nc_emlrtRSI;
  c_st.site = &oc_emlrtRSI;
  i51 = b_A->size[0] * b_A->size[1];
  b_A->size[0] = A->size[0];
  b_A->size[1] = A->size[1];
  emxEnsureCapacity(&c_st, (emxArray__common *)b_A, i51, (int32_T)sizeof(real_T),
                    &m_emlrtRTEI);
  ix = A->size[0] * A->size[1];
  for (i51 = 0; i51 < ix; i51++) {
    b_A->data[i51] = A->data[i51];
  }

  b_emxInit_real_T(&c_st, &tau, 1, &m_emlrtRTEI, true);
  b_emxInit_int32_T(&c_st, &jpvt, 2, &m_emlrtRTEI, true);
  m = b_A->size[0];
  n = b_A->size[1];
  b_mn = muIntScalarMin_sint32(b_A->size[0], b_A->size[1]);
  i51 = tau->size[0];
  tau->size[0] = b_mn;
  emxEnsureCapacity(&c_st, (emxArray__common *)tau, i51, (int32_T)sizeof(real_T),
                    &n_emlrtRTEI);
  d_st.site = &mf_emlrtRSI;
  e_st.site = &rb_emlrtRSI;
  f_st.site = &sb_emlrtRSI;
  g_st.site = &tb_emlrtRSI;
  eml_signed_integer_colon(&g_st, b_A->size[1], jpvt);
  if ((b_A->size[0] == 0) || (b_A->size[1] == 0)) {
  } else {
    ix = b_A->size[1];
    i51 = work->size[0];
    work->size[0] = ix;
    emxEnsureCapacity(&c_st, (emxArray__common *)work, i51, (int32_T)sizeof
                      (real_T), &m_emlrtRTEI);
    for (i51 = 0; i51 < ix; i51++) {
      work->data[i51] = 0.0;
    }

    b_emxInit_real_T(&c_st, &vn1, 1, &pb_emlrtRTEI, true);
    b_emxInit_real_T(&c_st, &vn2, 1, &qb_emlrtRTEI, true);
    d_st.site = &tc_emlrtRSI;
    ix = b_A->size[1];
    i51 = vn1->size[0];
    vn1->size[0] = ix;
    emxEnsureCapacity(&c_st, (emxArray__common *)vn1, i51, (int32_T)sizeof
                      (real_T), &pb_emlrtRTEI);
    i51 = vn2->size[0];
    vn2->size[0] = ix;
    emxEnsureCapacity(&c_st, (emxArray__common *)vn2, i51, (int32_T)sizeof
                      (real_T), &qb_emlrtRTEI);
    k = 1;
    d_st.site = &nf_emlrtRSI;
    overflow = (b_A->size[1] > 2147483646);
    if (overflow) {
      e_st.site = &db_emlrtRSI;
      check_forloop_overflow_error(&e_st);
    }

    for (ix = 0; ix + 1 <= b_A->size[1]; ix++) {
      d_st.site = &sc_emlrtRSI;
      vn1->data[ix] = b_eml_xnrm2(&d_st, b_A->size[0], b_A, k);
      vn2->data[ix] = vn1->data[ix];
      k += b_A->size[0];
    }

    d_st.site = &rc_emlrtRSI;
    if (1 > b_mn) {
      b12 = false;
    } else {
      b12 = (b_mn > 2147483646);
    }

    if (b12) {
      e_st.site = &db_emlrtRSI;
      check_forloop_overflow_error(&e_st);
    }

    for (i = 1; i <= b_mn; i++) {
      i_i = (i + (i - 1) * m) - 1;
      nmi = n - i;
      mmi = m - i;
      d_st.site = &of_emlrtRSI;
      ix = eml_ixamax(&d_st, 1 + nmi, vn1, i);
      pvt = (i + ix) - 2;
      if (pvt + 1 != i) {
        d_st.site = &pf_emlrtRSI;
        e_st.site = &bc_emlrtRSI;
        f_st.site = &cc_emlrtRSI;
        ix = 1 + m * pvt;
        iy = 1 + m * (i - 1);
        g_st.site = &dc_emlrtRSI;
        if (1 > m) {
          b13 = false;
        } else {
          b13 = (m > 2147483646);
        }

        if (b13) {
          h_st.site = &db_emlrtRSI;
          check_forloop_overflow_error(&h_st);
        }

        for (k = 1; k <= m; k++) {
          i51 = b_A->size[0] * b_A->size[1];
          xnorm = b_A->data[emlrtDynamicBoundsCheckFastR2012b(ix, 1, i51,
            &le_emlrtBCI, &f_st) - 1];
          i51 = b_A->size[0] * b_A->size[1];
          i52 = b_A->size[0] * b_A->size[1];
          b_A->data[emlrtDynamicBoundsCheckFastR2012b(ix, 1, i51, &le_emlrtBCI,
            &f_st) - 1] = b_A->data[emlrtDynamicBoundsCheckFastR2012b(iy, 1, i52,
            &le_emlrtBCI, &f_st) - 1];
          i51 = b_A->size[0] * b_A->size[1];
          b_A->data[emlrtDynamicBoundsCheckFastR2012b(iy, 1, i51, &le_emlrtBCI,
            &f_st) - 1] = xnorm;
          ix++;
          iy++;
        }

        ix = jpvt->data[pvt];
        jpvt->data[pvt] = jpvt->data[i - 1];
        jpvt->data[i - 1] = ix;
        vn1->data[pvt] = vn1->data[i - 1];
        vn2->data[pvt] = vn2->data[i - 1];
      }

      if (i < m) {
        d_st.site = &qc_emlrtRSI;
        atmp = b_A->data[i_i];
        d16 = 0.0;
        if (1 + mmi <= 0) {
        } else {
          e_st.site = &wc_emlrtRSI;
          xnorm = b_eml_xnrm2(&e_st, mmi, b_A, i_i + 2);
          if (xnorm != 0.0) {
            xnorm = muDoubleScalarHypot(b_A->data[i_i], xnorm);
            if (b_A->data[i_i] >= 0.0) {
              xnorm = -xnorm;
            }

            if (muDoubleScalarAbs(xnorm) < 1.0020841800044864E-292) {
              ix = 0;
              do {
                ix++;
                e_st.site = &xc_emlrtRSI;
                b_eml_xscal(&e_st, mmi, 9.9792015476736E+291, b_A, i_i + 2);
                xnorm *= 9.9792015476736E+291;
                atmp *= 9.9792015476736E+291;
              } while (!(muDoubleScalarAbs(xnorm) >= 1.0020841800044864E-292));

              e_st.site = &yc_emlrtRSI;
              xnorm = b_eml_xnrm2(&e_st, mmi, b_A, i_i + 2);
              xnorm = muDoubleScalarHypot(atmp, xnorm);
              if (atmp >= 0.0) {
                xnorm = -xnorm;
              }

              d16 = (xnorm - atmp) / xnorm;
              e_st.site = &ad_emlrtRSI;
              b_eml_xscal(&e_st, mmi, 1.0 / (atmp - xnorm), b_A, i_i + 2);
              e_st.site = &bd_emlrtRSI;
              if (1 > ix) {
                b14 = false;
              } else {
                b14 = (ix > 2147483646);
              }

              if (b14) {
                f_st.site = &db_emlrtRSI;
                check_forloop_overflow_error(&f_st);
              }

              for (k = 1; k <= ix; k++) {
                xnorm *= 1.0020841800044864E-292;
              }

              atmp = xnorm;
            } else {
              d16 = (xnorm - b_A->data[i_i]) / xnorm;
              atmp = 1.0 / (b_A->data[i_i] - xnorm);
              e_st.site = &cd_emlrtRSI;
              b_eml_xscal(&e_st, mmi, atmp, b_A, i_i + 2);
              atmp = xnorm;
            }
          }
        }

        tau->data[i - 1] = d16;
      } else {
        atmp = b_A->data[i_i];
        d_st.site = &pc_emlrtRSI;
        tau->data[i - 1] = eml_matlab_zlarfg();
      }

      b_A->data[i_i] = atmp;
      if (i < n) {
        atmp = b_A->data[i_i];
        b_A->data[i_i] = 1.0;
        d_st.site = &qf_emlrtRSI;
        eml_matlab_zlarf(&d_st, mmi + 1, nmi, i_i + 1, tau->data[i - 1], b_A, i
                         + i * m, m, work);
        b_A->data[i_i] = atmp;
      }

      d_st.site = &rf_emlrtRSI;
      if (i + 1 > n) {
        b_i = false;
      } else {
        b_i = (n > 2147483646);
      }

      if (b_i) {
        e_st.site = &db_emlrtRSI;
        check_forloop_overflow_error(&e_st);
      }

      for (ix = i; ix + 1 <= n; ix++) {
        if (vn1->data[ix] != 0.0) {
          xnorm = muDoubleScalarAbs(b_A->data[(i + b_A->size[0] * ix) - 1]) /
            vn1->data[ix];
          xnorm = 1.0 - xnorm * xnorm;
          if (xnorm < 0.0) {
            xnorm = 0.0;
          }

          atmp = vn1->data[ix] / vn2->data[ix];
          atmp = xnorm * (atmp * atmp);
          if (atmp <= 1.4901161193847656E-8) {
            if (i < m) {
              d_st.site = &sf_emlrtRSI;
              e_st.site = &uc_emlrtRSI;
              if (mmi < 1) {
                xnorm = 0.0;
              } else {
                f_st.site = &vc_emlrtRSI;
                g_st.site = &vc_emlrtRSI;
                n_t = (ptrdiff_t)(mmi);
                g_st.site = &vc_emlrtRSI;
                incx_t = (ptrdiff_t)(1);
                i51 = b_A->size[0] * b_A->size[1];
                i52 = (i + m * ix) + 1;
                xix0_t = (double *)(&b_A->data[emlrtDynamicBoundsCheckFastR2012b
                                    (i52, 1, i51, &vb_emlrtBCI, &f_st) - 1]);
                xnorm = dnrm2(&n_t, xix0_t, &incx_t);
              }

              vn1->data[ix] = xnorm;
              vn2->data[ix] = vn1->data[ix];
            } else {
              vn1->data[ix] = 0.0;
              vn2->data[ix] = 0.0;
            }
          } else {
            d_st.site = &tf_emlrtRSI;
            vn1->data[ix] *= muDoubleScalarSqrt(xnorm);
          }
        }
      }
    }

    emxFree_real_T(&vn2);
    emxFree_real_T(&vn1);
  }

  atmp = 0.0;
  if (mn > 0) {
    xnorm = muDoubleScalarMax(A->size[0], A->size[1]) * muDoubleScalarAbs
      (b_A->data[0]) * 2.2204460492503131E-16;
    k = 0;
    exitg1 = false;
    while ((!exitg1) && (k <= mn - 1)) {
      if (muDoubleScalarAbs(b_A->data[k + b_A->size[0] * k]) <= xnorm) {
        st.site = &lc_emlrtRSI;
        y = NULL;
        m14 = emlrtCreateCharArray(2, iv78);
        for (i = 0; i < 8; i++) {
          cv76[i] = cv77[i];
        }

        emlrtInitCharArrayR2013a(&st, 8, m14, cv76);
        emlrtAssign(&y, m14);
        b_st.site = &tg_emlrtRSI;
        emlrt_marshallIn(&b_st, c_sprintf(&b_st, b_sprintf(&b_st, y,
          emlrt_marshallOut(14.0), emlrt_marshallOut(6.0), &o_emlrtMCI),
          emlrt_marshallOut(xnorm), &p_emlrtMCI), "sprintf", cv78);
        st.site = &kc_emlrtRSI;
        b_eml_warning(&st, atmp, cv78);
        exitg1 = true;
      } else {
        atmp++;
        k++;
      }
    }
  }

  unnamed_idx_0 = (uint32_T)A->size[1];
  i51 = Y->size[0];
  Y->size[0] = (int32_T)unnamed_idx_0;
  emxEnsureCapacity(sp, (emxArray__common *)Y, i51, (int32_T)sizeof(real_T),
                    &m_emlrtRTEI);
  ix = (int32_T)unnamed_idx_0;
  for (i51 = 0; i51 < ix; i51++) {
    Y->data[i51] = 0.0;
  }

  for (ix = 0; ix < mn; ix++) {
    if (tau->data[ix] != 0.0) {
      xnorm = B->data[ix];
      i51 = A->size[0] + (int32_T)(1.0 - ((1.0 + (real_T)ix) + 1.0));
      emlrtForLoopVectorCheckR2012b((1.0 + (real_T)ix) + 1.0, 1.0, A->size[0],
        mxDOUBLE_CLASS, i51, &ac_emlrtRTEI, sp);
      for (i = 0; i < i51; i++) {
        unnamed_idx_0 = ((uint32_T)ix + i) + 2U;
        xnorm += b_A->data[((int32_T)unnamed_idx_0 + b_A->size[0] * ix) - 1] *
          B->data[(int32_T)unnamed_idx_0 - 1];
      }

      xnorm *= tau->data[ix];
      if (xnorm != 0.0) {
        B->data[ix] -= xnorm;
        i51 = A->size[0] + (int32_T)(1.0 - ((1.0 + (real_T)ix) + 1.0));
        emlrtForLoopVectorCheckR2012b((1.0 + (real_T)ix) + 1.0, 1.0, A->size[0],
          mxDOUBLE_CLASS, i51, &yb_emlrtRTEI, sp);
        for (i = 0; i < i51; i++) {
          unnamed_idx_0 = ((uint32_T)ix + i) + 2U;
          B->data[(int32_T)unnamed_idx_0 - 1] -= b_A->data[((int32_T)
            unnamed_idx_0 + b_A->size[0] * ix) - 1] * xnorm;
        }
      }
    }
  }

  emxFree_real_T(&tau);
  emlrtForLoopVectorCheckR2012b(1.0, 1.0, atmp, mxDOUBLE_CLASS, (int32_T)atmp,
    &xb_emlrtRTEI, sp);
  for (i = 0; i < (int32_T)atmp; i++) {
    Y->data[jpvt->data[i] - 1] = B->data[i];
  }

  emlrtForLoopVectorCheckR2012b(atmp, -1.0, 1.0, mxDOUBLE_CLASS, (int32_T)-(1.0
    + (-1.0 - atmp)), &wb_emlrtRTEI, sp);
  for (ix = 0; ix < (int32_T)-(1.0 + (-1.0 - atmp)); ix++) {
    xnorm = atmp + -(real_T)ix;
    Y->data[jpvt->data[(int32_T)xnorm - 1] - 1] = eml_div(Y->data[jpvt->data
      [(int32_T)xnorm - 1] - 1], b_A->data[((int32_T)xnorm + b_A->size[0] *
      ((int32_T)xnorm - 1)) - 1]);
    for (i = 0; i < (int32_T)(xnorm - 1.0); i++) {
      Y->data[jpvt->data[i] - 1] -= Y->data[jpvt->data[(int32_T)xnorm - 1] - 1] *
        b_A->data[i + b_A->size[0] * ((int32_T)xnorm - 1)];
    }
  }

  emxFree_int32_T(&jpvt);
  emxFree_real_T(&work);
  emxFree_real_T(&b_A);
  emlrtHeapReferenceStackLeaveFcnR2012b(sp);
}
コード例 #19
0
ファイル: pdslen.c プロジェクト: OpenCMISS-Dependencies/optpp
double pdslen(int ndim, int type, double *s, double scale,
	      double *work)
{
  /*******************************************************************
   *
   * THIS IS A SERVICE FUNCTION USED TO DETERMINE THE LENGTH OF THE
   * LONGEST EDGE IN THE INITIAL SIMPLEX.  NOTE THAT THE JOB IS MADE
   * CONSIDERABLY EASIER IF THE USER SPECIFIED EITHER A RIGHT-ANGLED
   * OR REGULAR SIMPLEX.  WRITTEN BY VIRGINIA TORCZON.  LAST
   * MODIFICATION: MAY 18, 1992.
   *
   * INPUT 
   *    N             DIMENSION OF THE PROBLEM TO BE SOLVED
   *
   *    TYPE          TYPE OF SIMPLEX SPECIFIED BY THE USER CONTAINS
   *                  THE N+1 VERTICES IN THE INITIAL SIMPLEX (STORED
   *                  BY COLUMN)
   *
   *    SCALE         (RELATIVE) LENGTH OF EDGES IN THE INITIAL
   *                  SIMPLEX IF THE USER REQUESTED THAT THE SIMPLEX
   *                  BE CONSTRUCTED (RATHER THAN ENTERED DIRECTLY)
   *
   *    WORK          ONE-DIMENSIONAL WORK ARRAY OF LENGTH N USED TO
   *                  COMPUTE THE DIFFERENCE BETWEEN VERTICES 
   *
   *******************************************************************/

  /* System generated locals */

  double ret_val;

  /* Local variables */

  static double temp;
  static int i, j, k;
  int incx = 1;

  if (type == 1) {

    /* THE USER CHOSE A SIMPLE RIGHT-ANGLED SIMPLEX SO THE MAXIMUM
     * LENGTH IS EASY TO COMPUTE. */

    ret_val = fabs(scale) * sqrt(2.);
  } 
  else if (type == 2) {

    /* THE USER CHOSE A REGULAR SIMPLEX; I.E., THE LENGTH OF EVERY
     * EDGE IN THE SIMPLEX IS SPECIFIED BY THE CHOICE OF SCALE. */

    ret_val = fabs(scale);
  } 
  else {

    /* THE USER EITHER ENTERED THE SIMPLEX OR CHOSE A SCALED
     * RIGHT-ANGLED SIMPLEX.  COMPUTE THE LENGTHS OF ALL THE EDGES. */
    
    ret_val = 0.;

    for (i = 0; i <= ndim; i++) {

      for (j = i + 1; j <= ndim ; j++) {

	for (k = 0; k < ndim; k++) {
	  work[k] = s[k + j * ndim] - s[k + i * ndim];
	}

	temp = dnrm2(&ndim, work, &incx);

	if (temp > ret_val) {
	  ret_val = temp;
	}
      }
    }
  }

  return ret_val;
}
コード例 #20
-1
ファイル: cg_ssor_precon_c.c プロジェクト: sd-omkar/ece999
/*---------------------------------------------------------------------------*/
int
main (void)
{
	/*---------------------------------------------------------------------------*/
  /* Define arrays for the upper triangle of the coefficient matrix and        */
  /* preconditioner as well as an array for rhs vector                         */
  /* Compressed sparse row storage is used for sparse representation           */
	/*---------------------------------------------------------------------------*/
  MKL_INT n = 100, rci_request, itercount, lexpected_itercount = 15,
    uexpected_itercount = 19, i;
  double rhs[100];
  MKL_INT ia[100 + 1];
  MKL_INT ja[100 - 1];
  double a[100 - 1], a1[100 - 1];
	/*---------------------------------------------------------------------------*/
  /* Allocate storage for the solver ?par and temporary storage tmp            */
	/*---------------------------------------------------------------------------*/
  MKL_INT length = 128;
  MKL_INT ipar[128];
  double dpar[128], tmp[4 * 100];
	/*---------------------------------------------------------------------------*/
  /* Some additional variables to use with the RCI (P)CG solver                */
  /* OMEGA is the relaxation parameter, NITER_SSOR is the maximum number of    */
  /* iterations for the SSOR preconditioner                                    */
	/*---------------------------------------------------------------------------*/
  double solution[100];
  double expected_sol[100];
  double omega = 0.5E0, one = 1.E0, zero = 0.E0, om = 1.E0 - omega;
  double euclidean_norm, temp[100];
  MKL_INT niter_ssor = 20;
  char matdes[6];
  char tr = 'n';
  double eone = -1.E0;
  MKL_INT ione = 1;

	/*---------------------------------------------------------------------------*/
  /* Initialize the coefficient matrix and expected solution                     */
	/*---------------------------------------------------------------------------*/
  for (i = 0; i < n; i++)
    expected_sol[i] = 1.E0;

  for (i = 0; i < n - 1; i++)
    {
      ja[i] = i + 2;
      ia[i] = i + 1;
      a[i] = 0.5E0;
      a1[i] = omega * a[i];
    }
  ia[n - 1] = n;
  ia[n] = ia[n - 1];
  matdes[0] = 's';
  matdes[1] = 'u';
  matdes[2] = 'u';
  matdes[3] = 'f';

	/*---------------------------------------------------------------------------*/
  /* Initialize vectors rhs, temp, and tmp[n:2*n-1] with zeros as mkl_dcsrmv   */
  /* routine does not set NAN to zero. Thus, if any of the values in the       */
  /* vectors above accidentally happens to be NAN, the example will fail       */
  /* to complete.                                                              */
  /* Initialize the right hand side through matrix-vector product              */
	/*---------------------------------------------------------------------------*/
  for (i = 0; i < n; i++)
    {
      rhs[i] = zero;
      temp[i] = zero;
      tmp[n + i] = zero;
    }
  mkl_dcsrmv (&tr, &n, &n, &one, matdes, a, ja, ia, &ia[1], expected_sol,
	      &zero, rhs);
	/*---------------------------------------------------------------------------*/
  /* Initialize the initial guess                                              */
	/*---------------------------------------------------------------------------*/
  for (i = 0; i < n; i++)
    solution[i] = zero;
	/*---------------------------------------------------------------------------*/
  /* Initialize the solver                                                     */
	/*---------------------------------------------------------------------------*/
  dcg_init (&n, solution, rhs, &rci_request, ipar, dpar, tmp);
  if (rci_request != 0)
    goto failure;
	/*---------------------------------------------------------------------------*/
  /* Set the desired parameters:                                               */
  /* INTEGER parameters:                                                       */
  /* set the maximal number of iterations to 100                               */
  /* LOGICAL parameters:                                                       */
  /* run the Preconditioned version of RCI (P)CG with preconditioner C_inverse */
  /* DOUBLE parameters                                                         */
  /* -                                                                         */
	/*---------------------------------------------------------------------------*/
  ipar[4] = 100;
  ipar[10] = 1;
	/*---------------------------------------------------------------------------*/
  /* Check the correctness and consistency of the newly set parameters         */
	/*---------------------------------------------------------------------------*/
  dcg_check (&n, solution, rhs, &rci_request, ipar, dpar, tmp);
  if (rci_request != 0)
    goto failure;
	/*---------------------------------------------------------------------------*/
  /* Compute the solution by RCI (P)CG solver                                  */
  /* Reverse Communications starts here                                        */
	/*---------------------------------------------------------------------------*/
rci:dcg (&n, solution, rhs, &rci_request, ipar, dpar, tmp);
	/*---------------------------------------------------------------------------*/
  /* If rci_request=0, then the solution was found according to the requested  */
  /* stopping tests. In this case, this means that it was found after 100      */
  /* iterations.                                                               */
	/*---------------------------------------------------------------------------*/
  if (rci_request == 0)
    goto getsln;
	/*---------------------------------------------------------------------------*/
  /* If rci_request=1, then compute the vector A*tmp[0]                        */
  /* and put the result in vector tmp[n]                                       */
	/*---------------------------------------------------------------------------*/
  if (rci_request == 1)
    {
      matdes[0] = 's';
      mkl_dcsrmv (&tr, &n, &n, &one, matdes, a, ja, ia, &ia[1], tmp, &zero,
		  &tmp[n]);
      goto rci;
    }
	/*---------------------------------------------------------------------------*/
  /* If rci_request=2, then do the user-defined stopping test: compute the     */
  /* Euclidean norm of the actual residual using MKL routines and check if     */
  /* it is less than 1.E-8                                                     */
	/*---------------------------------------------------------------------------*/
  if (rci_request == 2)
    {
      matdes[0] = 's';
      mkl_dcsrmv (&tr, &n, &n, &one, matdes, a, ja, ia, &ia[1], solution,
		  &zero, temp);
      daxpy (&n, &eone, rhs, &ione, temp, &ione);
      euclidean_norm = dnrm2 (&n, temp, &ione);
		/*---------------------------------------------------------------------------*/
      /* The solution has not been found yet according to the user-defined stopping */
      /* test. Continue RCI (P)CG iterations.                                      */
		/*---------------------------------------------------------------------------*/
      if (euclidean_norm > 1.E-6)
	goto rci;
		/*---------------------------------------------------------------------------*/
      /* The solution has been found according to the user-defined stopping test   */
		/*---------------------------------------------------------------------------*/
      else
	goto getsln;
    }
	/*---------------------------------------------------------------------------*/
  /* If rci_request=3, then  apply the simplest SSOR preconditioning           */
  /* on vector tmp[2*n] and put the result in vector tmp[3*n]                  */
	/*---------------------------------------------------------------------------*/
  if (rci_request == 3)
    {
      dcopy (&n, &tmp[2 * n], &ione, &tmp[3 * n], &ione);
      matdes[0] = 't';
      for (i = 1; i <= niter_ssor; i++)
	{
	  dcopy (&n, &tmp[2 * n], &ione, temp, &ione);
	  matdes[2] = 'n';
	  tr = 'n';
	  mkl_dcsrmv (&tr, &n, &n, &eone, matdes, a1, ja, ia, &ia[1],
		      &tmp[3 * n], &omega, temp);
	  daxpy (&n, &om, &tmp[3 * n], &ione, temp, &ione);
	  matdes[2] = 'u';
	  tr = 't';
	  mkl_dcsrsv (&tr, &n, &one, matdes, a1, ja, ia, &ia[1], temp,
		      &tmp[3 * n]);
	}
      goto rci;
    }
	/*---------------------------------------------------------------------------*/
  /* If rci_request=anything else, then dcg subroutine failed                  */
  /* to compute the solution vector: solution[n]                               */
	/*---------------------------------------------------------------------------*/
  goto failure;
	/*---------------------------------------------------------------------------*/
  /* Reverse Communication ends here                                           */
  /* Get the current iteration number into itercount                           */
	/*---------------------------------------------------------------------------*/
getsln:dcg_get (&n, solution, rhs, &rci_request, ipar, dpar, tmp,
	   &itercount);
	/*---------------------------------------------------------------------------*/
  /* Print solution vector: solution[n] and number of iterations: itercount    */
	/*---------------------------------------------------------------------------*/
  printf ("The system has been solved\n");
  printf ("The following solution obtained\n");
  for (i = 0; i < n / 4; i++)
    {
      printf ("%6.3f  %6.3f  %6.3f  %6.3f", solution[4 * i],
	      solution[4 * i + 1], solution[4 * i + 2], solution[4 * i + 3]);
      printf ("\n");
    }
  printf ("\nExpected solution is\n");
  for (i = 0; i < n / 4; i++)
    {
      printf ("%6.3f  %6.3f  %6.3f  %6.3f", expected_sol[4 * i],
	      expected_sol[4 * i + 1], expected_sol[4 * i + 2],
	      expected_sol[4 * i + 3]);
      expected_sol[4 * i] -= solution[4 * i];
      printf ("\n");
    }

  printf ("\nNumber of iterations: %d\n", itercount);
  i = 4;
  n /= 4;
  euclidean_norm = dnrm2 (&n, expected_sol, &i);

	/*-------------------------------------------------------------------------*/
  /* Release internal MKL memory that might be used for computations         */
  /* NOTE: It is important to call the routine below to avoid memory leaks   */
  /* unless you disable MKL Memory Manager                                   */
	/*-------------------------------------------------------------------------*/
  MKL_Free_Buffers ();

  if (lexpected_itercount <= itercount <= uexpected_itercount
      && euclidean_norm < 1.0e-4)
    {
      printf
	("This example has successfully PASSED through all steps of computation!");
      printf ("\n");
      return 0;
    }
  else
    {
      printf
	("This example may have FAILED as either the number of iterations differs");
      printf ("\nfrom the expected number of iterations %d-",
	      lexpected_itercount);
      printf ("-%d, or the computed solution\ndiffers much from ",
	      uexpected_itercount);
      printf ("the expected solution (Euclidean norm is %e), or both.\n",
	      euclidean_norm);
      return 1;
    }
	/*-------------------------------------------------------------------------*/
  /* Release internal MKL memory that might be used for computations         */
  /* NOTE: It is important to call the routine below to avoid memory leaks   */
  /* unless you disable MKL Memory Manager                                   */
	/*-------------------------------------------------------------------------*/
failure:printf
    ("This example FAILED as the solver has returned the ERROR ");
  printf ("code %d", rci_request);
  MKL_Free_Buffers ();
  return 1;
}