static int DenseSymPSDNormF2(void* AA, int n, double *dddot){ dtpumat* A=(dtpumat*) AA; ffinteger ione=1,nn=A->n*(A->n+1)/2; double dd,tt=sqrt(0.5),*val=A->val; int info; info=DTPUMatScaleDiagonal(AA,tt); dd=dnrm2(&nn,val,&ione); info=DTPUMatScaleDiagonal(AA,1.0/tt); *dddot=dd*dd*2; return 0; }
double compute_nvector_scale(double *residuals, int n, int nv, double *nrms) { int j; MW_scalar_statistics stats; double scale; for(j=0;j<nv;++j) nrms[j] = dnrm2(n,residuals+j*n,1); stats = MW_calc_statistics_double(nrms,nv); scale = stats.q3_4 - stats.q1_4; scale /= SIGMA_IQ; return(scale); }
/*--------------------------------------------------------- fem_vec_norm - to compute a norm of global vector (in parallel) ---------------------------------------------------------*/ double fem_vec_norm( /* returns: L2 norm of global Vector */ int Solver_id, /* in: solver data structure to be used */ int Level_id, /* in: level number */ int Nrdof, /* in: number of vector components */ double* Vector /* in: local part of global Vector */ ) { const int IONE=1; /*++++++++++++++++ executable statements ++++++++++++++++*/ //return(ddr_sol_vec_norm(Solver_id, Level_id, Nrdof, Vector)); return(dnrm2(&Nrdof, Vector, &IONE)); }
/* This function uses m-estimators to estimate a three vector from a cloud of points in n-space. The approach is comparable to computing the center of mass of cloud of equal mass particles. We use a component median for the initial estimate of center, then compute the m-estimator based on a Rayleigh distribution keyed on the l2 norm amplitude of the vector residual. This is comparable to the estimation of phase in robust estimates of transfer functions described by Chave and Thomson as we use a penalty function based on a Rayleigh distribution. It is not clear to the author if this is valid for other than 2 vectors, but it surely is not a bad approximation for 3-vectors. Higher order spaces should use this with care. Arguments: v - n by nv matrix containing ensemble of data n-vectors. These are assumed stored as in the blas in a pseudofortran sense as a continuous vector of floats with columns sequential (i.e. first column of v is elements v[0], v[1], ... , v[n-1] and second column starts at v[n]. n - length of vectors (number of rows in v) nv - number of vectors (number of columns in v) mode - switch (see below) minscale - minimum error scale allowed. Use depends on setting of the mode variable with which it is associated. Scale factor is determined here from interquartiles. mode is as defined in M_estimator_float above using symbols SCALE_IQ_RELATIVE and SCALE_IQ_ABSOLUTE. The former uses a relative scale derived from the scale factor determined on the first pass. That is, the minimum scale alllowed is vmag0*minscale (e.g. 0.01 would limit the minimum scale to 1% of the magnitude of the total n vector initial estimate. Absolute scaling uses the limit straight up. mean - vector of length n to hold result. Blindly assumed to be already allocated of length n. weight - vector of length to hold final weights used in robust estimation. Function returns a 3 vector it estimates from v. This is an array alloced in this function that needs to be managed externally. Author: G Pavlis Written: January 2000 */ #define EPSILON 0.01 /* convergence parameter*/ #define HUBER_LIMIT 2 /* Number of Huber weight iterations */ #define THOMPSON_LIMIT 25 /* Limit on number of iterations using Thompson formula */ #define MIN_DGF 10 /* I've seen the Thompson formula work badly with small degrees of freedom. When dgf are less than this the Thomson section is skipped */ void M_estimator_double_n_vector(double *v, int n, int nv, int mode, double minscale, double *mean, double *weight) { double *col, *row; /* work spaces for columns and rows respectively*/ double sum_weights; double *residuals; double *delta_mean; int i, j, iteration; double vmag,dvmag; double scale, fminsc; int ndgf; MW_scalar_statistics stats; double beta; allot(double *,col,n); allot(double *,row,nv); allot(double *,delta_mean, n); allot(double *,residuals,n*nv); /* We compute component medians to obtain initial estimate of vector */ for(i=0;i<n;++i) { dcopy(nv,v+i,n,row,1); stats=MW_calc_statistics_double(row,nv); mean[i] = stats.median; } /* We first do a few passes with the huber formula which is less aggressive on outliers, but helps establish a solid value for the scale factor.*/ iteration =0; vmag = dnrm2(n,mean,1); for(j=0;j<nv;++j) weight[j] = 1.0; /* done to make sure scale is computed correctly on first pass*/ if(mode==IQ_SCALE_RELATIVE) if(vmag<FLT_EPSILON) fminsc= minscale; else fminsc = vmag*minscale; else fminsc = minscale; do { compute_nvector_residuals(v,n,nv,residuals,mean); /* This produces weighted residuals -- requires weighting formula to use weights 0<w<1 */ for(j=0;j<nv;++j) dscal(n,weight[i],residuals+j,1); scale = compute_nvector_scale(residuals,n,nv,row); if(scale < fminsc) scale = fminsc; for(i=0;i<n;++i) delta_mean[i] = 0.0; for(j=0,sum_weights=0.0;j<nv;++j) { dcopy(n,residuals+j*n,1,col,1); dvmag = dnrm2(n,col,1); weight[j] = dhuber(dvmag/scale); daxpy(n,weight[j],col,1,delta_mean,1); sum_weights += weight[j]; } dscal(n,1.0/sum_weights,delta_mean,1); dvmag = dnrm2(n,delta_mean,1); for(i=0;i<n;++i) mean[i] += delta_mean[i]; ++iteration; }while( ((dvmag/scale) > EPSILON) && (iteration < HUBER_LIMIT) ); /* Now we use Thomson's redescending formula which is the opposite of the huber formula being extremely aggressive on outliers and works only if the scale factor is not too out of line. It also works badly with low degrees of freedom. Consequently, we return immediately when degrees of freedom are below a frozen threshold*/ ndgf = nv - n; if(ndgf<MIN_DGF) { free(col); free(row); free(delta_mean); free(residuals); return; } /* This is the value of beta recommended by chave and thomson, 1987, based on the nvth quantile of the Rayleigh distribution. I use number of degrees of freedom here instead to perhaps more properly work with higher order spaces, but use a minimum on ndgf to avoid making the formula unstable. The thomson formula becomes exponential-like with low degrees of freedom, which we need to avoid. */ beta = sqrt(2.0*log(2.0*((double)ndgf))); iteration = 0; do { compute_nvector_residuals(v,n,nv,residuals,mean); for(i=0;i<n;++i) delta_mean[i] = 0.0; for(j=0;j<nv;++j) { dcopy(n,residuals+j*n,1,col,1); dvmag = dnrm2(n,col,1); weight[j] = dthomson(dvmag/scale,beta); daxpy(n,weight[j],col,1,delta_mean,1); sum_weights += weight[j]; } dscal(n,1.0/sum_weights,delta_mean,1); dvmag = dnrm2(n,delta_mean,1); for(i=0;i<n;++i) mean[i] += delta_mean[i]; ++iteration; }while( ((dvmag/scale) > EPSILON) && (iteration < THOMPSON_LIMIT) ); free(col); free(row); free(delta_mean); free(residuals); }
double KNITRO_EXPORT KTR_dnrm2 (const int n, const double * const x, const int incx) { return( dnrm2 (n, x, incx) ); }
INT NS_DIM_PREFIX CheckNP (MULTIGRID *theMG, INT argc, char **argv) { MATDATA_DESC *A; VECDATA_DESC *x,*y; INT i,level,nerr; char value[VALUELEN]; VEC_SCALAR damp; DOUBLE nrm,diff; if (ReadArgvChar("A",value,argc,argv) == 0) { A = GetMatDataDescByName(theMG,value); if (A == NULL) { UserWriteF("ERROR: no matrix %s in npckeck\n",value); return(1); } if (ReadArgvOption("S",argc,argv)) { for (level=theMG->bottomLevel; level<=TOPLEVEL(theMG); level++) if (CheckSymmetryOfMatrix(GRID_ON_LEVEL(theMG,level),A)) UserWriteF("matrix %s not symmetric on level %d\n", ENVITEM_NAME(A),level); return(0); } if (ReadArgvOption("G",argc,argv)) { if (ReadArgvChar("x",value,argc,argv)) { UserWriteF("ERROR: no vector in npckeck\n"); return(1); } x = GetVecDataDescByName(theMG,value); if (x == NULL) { UserWriteF("ERROR: no vector %s in npckeck\n",value); return(1); } level = CURRENTLEVEL(theMG); if (level == BOTTOMLEVEL(theMG)) { UserWriteF("ERROR: no GalerkinCheck," "level %d is bottomlevel\n",level); return(1); } if (AllocVDFromVD(theMG,level-1,level,x,&y)) return(1); dmatset(theMG,level-1,level-1,ALL_VECTORS,A,0.0); dset(theMG,level,level,ALL_VECTORS,x,0.0); dset(theMG,level-1,level,ALL_VECTORS,y,0.0); AssembleGalerkinByMatrix(GRID_ON_LEVEL(theMG,level),A,0); for (i=0; i<VD_NCOMP(x); i++) damp[i] = 1.0; InterpolateCorrectionByMatrix(GRID_ON_LEVEL(theMG,level),x,x,damp); if (dmatmul(theMG,level,level,ALL_VECTORS,y,A,x) != NUM_OK) return(1); RestrictByMatrix(GRID_ON_LEVEL(theMG,level),y,y,damp); IFDEBUG(np,1) UserWriteF("x %d\n",level-1); PrintVector(GRID_ON_LEVEL(theMG,level-1),x,3,3); UserWriteF("x %d\n",level); PrintVector(GRID_ON_LEVEL(theMG,level),x,3,3); UserWriteF("y %d\n",level); PrintVector(GRID_ON_LEVEL(theMG,level),y,3,3); UserWriteF("y %d\n",level-1); PrintVector(GRID_ON_LEVEL(theMG,level-1),y,3,3); ENDDEBUG if (dmatmul_minus(theMG,level-1,level-1,ALL_VECTORS,y,A,x)!=NUM_OK) return(1); IFDEBUG(np,1) UserWriteF("y %d\n",level-1); PrintVector(GRID_ON_LEVEL(theMG,level-1),y,3,3); ENDDEBUG dnrm2(theMG,level-1,level-1,ALL_VECTORS,x,&nrm); dnrm2(theMG,level-1,level-1,ALL_VECTORS,y,&diff); UserWriteF("Galerkin test: nrm(x) = %f nrm(Ax-RAPx) = %f\n", nrm,diff); return(0); } }
/* calculate merit function for a local problem */ double fclib_merit_local (struct fclib_local *problem, enum fclib_merit merit, struct fclib_solution *solution) { struct fclib_matrix * W = problem->W; struct fclib_matrix * V = problem->V; struct fclib_matrix * R = problem->R; double *mu = problem->mu; double *q = problem->q; double *s = problem->s; int d = problem->spacedim; if (d !=3 ) { printf("fclib_merit_local for space dimension = %i not yet implemented\n",d); return 0; } double *v = solution->v; double *r = solution->r; double *u = solution->u; double *l = solution->l; double error_l, error; double * tmp; error=0.0; error_l=0.0; int i, ic, ic3; if (merit == MERIT_1) { /* cs M_cs; */ /* fclib_matrix_to_cssparse(W, &M_cs); */ /* cs V_cs; */ /* fclib_matrix_to_cssparse(V, &V_cs); */ /* cs R_cs; */ /* fclib_matrix_to_cssparse(R, &R_cs); */ int n_e =0; if (R) n_e = R->n; /* compute V^T {r} + R \lambda + s */ if (n_e >0) { cs * VT = cs_transpose((cs *)V, 0) ; tmp = (double *)malloc(n_e*sizeof(double)); for (i =0; i <n_e; i++) tmp[i] = s[i] ; cs_gaxpy(VT, r, tmp); cs_gaxpy((cs *)R, l, tmp); error_l += dnrm2(tmp,n_e)/(1.0 + dnrm2(s,n_e) ); free(tmp); } /* compute \hat u = W {r} + V\lambda + q */ tmp = (double *)malloc(W->n*sizeof(double)); for (i =0; i <W->n; i++) tmp[i] = q[i] ; cs_gaxpy((cs*)V, l, tmp); cs_gaxpy((cs*)W, r, tmp); /* Compute natural map */ int nc = W->n/3; for (ic = 0, ic3 = 0 ; ic < nc ; ic++, ic3 += 3) { FrictionContact3D_unitary_compute_and_add_error(r + ic3, tmp + ic3, mu[ic], &error); } free(tmp); error = sqrt(error)/(1.0 + sqrt(dnrm2(q,W->n)) )+error_l; /* printf("error_l = %12.8e", error_l); */ /* printf("norm of u = %12.8e\n", dnrm2(u,W->n)); */ /* printf("norm of r = %12.8e\n", dnrm2(r,W->n)); */ /* printf("error = %12.8e\n", error); */ return error; } return 0; /* TODO */ }
/* This function takes complex numbers x, y, and z defined by an eigenvector for a multiwavelet (would work for Fourier transforms too, however) and returns a pointer to a structure that defines the major and minor axes of the particle motion vectors defined by those three complex numbers. The up vector (assumed to be three element vector) defines the direction used to resolve the sign ambiguity inherent in defining an ellipse. That is, both the major and minor component directions are required to have a positive projection in the up direction. If they aren't the sign is flipped before returning. Normally up would point [0,0,1] or in the up radial direction for P waves. For S, it becomes more ambiguous and should be sorted out by a more complicated method. The polarization information (defined by the Particle_Motion_Ellipse structure) is allocated within this routine. Author: G. L. Pavlis Written: October 1999 */ Particle_Motion_Ellipse compute_particle_motion(complex x, complex y, complex z, double *up) { double rx,ry,rz,thetax,thetay,thetaz; /* polar forms of x,y,z*/ double a,b; double phi1,phi2; double x1[3],x2[3]; double nrmx1,nrmx2; Particle_Motion_Ellipse e; rx = hypot((double)x.r,(double)x.i); ry = hypot((double)y.r,(double)y.i); rz = hypot((double)z.r,(double)z.i); thetax = atan2((double)x.i,(double)x.r); thetay = atan2((double)y.i,(double)y.r); thetaz = atan2((double)z.i,(double)z.r); a = rx*rx*cos(2.0*thetax) + ry*ry*cos(2.0*thetay) + rz*rz*cos(2.0*thetaz); b = rx*rx*sin(2.0*thetax) + ry*ry*sin(2.0*thetay) + rz*rz*sin(2.0*thetaz); phi1 = atan2(-b,a)/2.0; phi2 = phi1 + M_PI_2; x1[0] = rx*cos(phi1+thetax); x1[1] = ry*cos(phi1+thetay); x1[2] = rz*cos(phi1+thetaz); x2[0] = rx*cos(phi2+thetax); x2[1] = ry*cos(phi2+thetay); x2[2] = rz*cos(phi2+thetaz); nrmx1 = dnrm2(3,x1,1); nrmx2 = dnrm2(3,x2,1); /* normalize to unit vectors */ dscal(3,1.0/nrmx1,x1,1); dscal(3,1.0/nrmx2,x2,1); if(nrmx1>nrmx2) { dcopy(3,x1,1,e.major,1); dcopy(3,x2,1,e.minor,1); e.rectilinearity = (1.0 - nrmx2/nrmx1); } else { dcopy(3,x2,1,e.major,1); dcopy(3,x1,1,e.minor,1); e.rectilinearity = (1.0 - nrmx1/nrmx2); } /* Choose the positive sign direction */ if(ddot(3,up,1,e.major,1) < 0.0) dscal(3,-1.0,e.major,1); if(ddot(3,up,1,e.minor,1) < 0.0) dscal(3,-1.0,e.minor,1); return(e); }
/* this is a blas like function analogous to scopy, dcopy, etc for a matrix of pointers to Particle_Motion_Vector objects. I could have made this a general matrix function, I suppose, but I decided that would be a bit opaque, and would promote on of C's most evil features. The pointers are blindly copied and it assumed the output vector bounds are not violated. Arguments: n - number of elements in input and output vectors] x - input vector of pointers incx - storage increment of x ala blas y - output vector incy - storage increment of y ala blas. Written: February 2000 Author: G Pavlis */ void pmvector_copy(int n, Particle_Motion_Ellipse *x, int incx, Particle_Motion_Ellipse *y, int incy) { int i,ix,iy; for(i=0,ix=0,iy=0;i<n;++i,ix+=incx,iy+=incy) { y[iy] = x[ix]; } } #define PM_MINSCALE_MAJOR 0.2 /* This needs to be pretty large compared to good data because if the errors get much larger than this the results are trash anyway */ #define PM_MINSCALE_MINOR 1.0 /* Minor axis can easily be totally random. Nearly always happens for pure linear pm. This essentially turns off robust weighting */ void pmvector_average(Particle_Motion_Ellipse *pmv, int n, Particle_Motion_Ellipse *pmavg, Particle_Motion_Error *pmerr) { int i,j,ii; double *v; /* work space used to store coordinates passed to m-estimator routine */ double avg[3]; double *weight; double nrm_major, nrm_minor; Spherical_Coordinate scoor; double U[9]; /* transformation matrix*/ double work[3]; double *workn; double dotprod; double sumsq,sumwt; int ndgf; MW_scalar_statistics stats; double nrmtest; allot(double *,v,3*n); allot(double *,weight,n); allot(double *,workn,n); for(i=0,ii=0;i<n;++i,ii+=3) { /* This could be done with the blas, but it would be more obscure and no faster */ v[ii] = pmv[i].major[0]; v[ii+1] = pmv[i].major[1]; v[ii+2] = pmv[i].major[2]; } /* We use relative scaling here because the pm vectors are not normalized. We could use absolute scaling if we normalized them above. This is a modification that might actually give better results. */ M_estimator_double_n_vector(v,3,n, IQ_SCALE_RELATIVE,PM_MINSCALE_MAJOR,avg,weight); nrm_major = dnrm2(3,avg,1); for(i=0;i<3;++i) { /* Needed to avoid random NaN */ if(nrm_major<FLT_EPSILON) pmavg->major[i] = avg[i]; else pmavg->major[i] = avg[i]/nrm_major; } /* Error estimates are computed completely differently here from that described in Bear and Pavlis (1999). Rather than use a jackknife on individual angles, here I've chosen to use a simple standard deviation measure using weighted residuals. The residuals, however, are computed from total angular separation computed using a dot product. This allows us to avoid wraparound errors that are inevitable with angles. First step is to compute a vector of angle residuals. */ for(i=0,ii=0;i<n;++i,ii+=3) { dotprod = ddot(3,v+ii,1,pmavg->major,1); dotprod /= dnrm2(3,v+ii,1); workn[i] = acos(dotprod); } /* weighted mean formula for error */ for(i=0,sumwt=0.0,sumsq=0.0;i<n;++i) { sumsq += workn[i]*workn[i]*weight[i]*weight[i]; sumwt += weight[i]; } ndgf = nint(sumwt) - 3; if(ndgf<1) { elog_notify(0,"pmvector_average: sum of weights = %lf in major axis average implies degrees of freedom less than 1\nUsing 1 degree of freedom\n",sumwt); ndgf = 1; } pmerr->ndgf_major = ndgf; pmerr->dtheta_major = sqrt(sumsq/((double)ndgf)); /* We scale the azimuthal error by 1/sin(theta) to get a stable error estimate that correctly goes to infinitity when theta -> 0*/ scoor = unit_vector_to_spherical(pmavg->major); pmerr->dphi_major = (pmerr->dtheta_major)/sin(scoor.theta); /* We first project the minor axis vectors onto the plane perpendicular to the average major axis. This reduces the degrees of freedom in a way that I consider reasonable and is in line with with Lorie Bear did */ for(i=0,ii=0;i<n;++i,ii+=3) { double minor_scale; /* Intentionally ignore error return of null project because the only error condition in current code cannot happen with this call. null_project writes result in the last argument, so this step is functionally like the v[ii]=pmv[i].major, etc. loop above, but combines the projection operation . */ null_project(pmavg->major,3,1,pmv[i].minor,v+ii); /* We also want to scale the vector by a factor that is determinable from rectilinearity to keep the axis length consistent to allow a refined rectilinearity average below */ minor_scale = 1.0 - pmv[i].rectilinearity; dscal(3,minor_scale,v+ii,1); } /* This constructs a rotational tranformation to a coordinate system where x1 and x2 are in the desired projection plane. Actually, the null projection above is redundant, but for now the extra work is largely irrelevant and is a good cross check for debugging. */ ray_coordinate_trans(scoor,U); for(i=0;i<n;++i) { for(j=0;j<3;++j) { work[j] = ddot(3,v+j+3*i,1,U+j,3); } dcopy(3,work,1,v+3*i,1); } /* Note the change from above to a 2-d space now. The above transformations zero the x3 direction after the transformation */ M_estimator_double_n_vector(v,2,n, IQ_SCALE_RELATIVE,PM_MINSCALE_MINOR,avg,weight); avg[2] = 0.0; nrm_minor = hypot(avg[0],avg[1]); /* This is the inverse tranformation -- u is orthogonal */ for(j=0;j<3;++j) work[j] = ddot(3,U+j*3,1,avg,1); /* This is similar to above, but, perhaps incorrectly, the degrees of freedom are larger by one because we reduce the space to 2d */ for(i=0,ii=0;i<n;++i,ii+=3) { dotprod = ddot(2,v+ii,1,avg,1); nrmtest = dnrm2(2,v+ii,1); if(nrmtest<=0.0) { elog_notify(0,"pmvector_average: minor axis estimate %d of %d estimates has 0 projection perpendicular to major\nArtificially set to average\n", i,n); workn[0] = 0.0; } else { dotprod/= nrmtest; /* because avg wasn't normalized we have divide by norm */ dotprod /= nrm_minor; workn[i] = acos(dotprod); } } /* We want the final result normalized to a unit vector length */ for(i=0;i<3;++i) pmavg->minor[i] = work[i]/nrm_minor; /* weighted mean formula again */ for(i=0,sumwt=0.0,sumsq=0.0;i<n;++i) { sumsq += workn[i]*workn[i]*weight[i]*weight[i]; sumwt += weight[i]; } ndgf = nint(sumwt) - 2; if(ndgf<1) { elog_notify(0,"pmvector_average: sum of weights = %lf in minor axis average implies degrees of freedom less than 1\nUsing 1 degree of freedom\n",sumwt); ndgf = 1; } pmerr->ndgf_minor = ndgf; pmerr->dtheta_minor = sqrt(sumsq/((double)ndgf)); /* We cast the minor axis in spherical coordinates like the major axis. This differ's from Lorie's skew measures, but it is simpler to deal with in a database output as it treats the two vector in a common way */ scoor = unit_vector_to_spherical(pmavg->minor); pmerr->dphi_minor = (pmerr->dtheta_major)/sin(scoor.theta); /* Finally, we deal with rectilinearity. We use the contents of v which are the projected minor axis values rather than the raw minor axes. This estimator will tend to give slightly better rectilinearity using the raw vectors because a projection is always <= original */ for(i=0;i<n;++i) { double minor_nrm; minor_nrm = dnrm2(3,v+i*3,1); /* Not needed because the major axis vector was previously normalized to unit length major_nrm = dnrm2(3,pmv[i].major,1); */ workn[i] = 1.0 - minor_nrm; } stats = MW_calc_statistics_double(workn,n); pmavg->rectilinearity = stats.median; pmerr->ndgf_rect = n - 1; /* Assume a simple normal distribution to convert interquartiles to standard deviation */ pmerr->delta_rect = NORMAL_IQSCALE*((stats.q3_4)-(stats.q1_4)); free(weight); free(workn); free(v); }
/* Subroutine */ int flowkm(integer ndim, doublereal **c0, doublereal **c1, integer iid, doublecomplex *ev) { /* System generated locals */ integer rwork_dim1; /* Local variables */ doublereal beta, *svde, *svds, svdu[1], *svdv; integer i, j; doublereal *v, *x; logical infev; doublereal const__; integer ndimm1; doublereal nrmc0x, nrmc1x, *qzalfi, *qzbeta; integer svdinf; doublereal *qzalfr; integer qzierr; doublereal *svdwrk, qzz[1], *rwork; rwork = (doublereal *)malloc(sizeof(doublereal)*ndim*ndim); svde = (doublereal *)malloc(sizeof(doublereal)*ndim); svds = (doublereal *)malloc(sizeof(doublereal)*(ndim+1)); svdv = (doublereal *)malloc(sizeof(doublereal)*ndim*ndim); v = (doublereal *)malloc(sizeof(doublereal)*ndim); x = (doublereal *)malloc(sizeof(doublereal)*ndim); qzalfi = (doublereal *)malloc(sizeof(doublereal)*ndim); qzbeta = (doublereal *)malloc(sizeof(doublereal)*ndim); qzalfr = (doublereal *)malloc(sizeof(doublereal)*ndim); svdwrk = (doublereal *)malloc(sizeof(doublereal)*ndim); /* Subroutine to compute Floquet multipliers via the "deflated circuit */ /* pencil" method. This routine is called by the AUTO routine FNSPBV */ /* storage for SVD computations */ /* compute right singular vectors only */ /* storage for generalized eigenvalue computations */ /* LOGICAL QZMATZ */ /* don't want to accumulate the transforms --- vectors not needed */ /* BLAS routines */ /* routines from EISPACK */ /* own routines */ /* Jim Demmel's svd routine ([email protected]) */ /* builtin F77 functions */ /* xx DOUBLE COMPLEX DCMPLX */ /* Make sure that you have enough local storage. */ /* Parameter adjustments */ /*--ev;*/ rwork_dim1 = ndim; /* Change sign of P1 so that we get the sign of the multipliers right. */ for (j = 0; j < ndim; ++j) { for (i = 0; i < ndim; ++i) { c1[j][i] = -c1[j][i]; } } /* Print the undeflated circuit pencil (C0, C1). */ if (iid > 4) { fprintf(fp9," Undeflated circuit pencil (C0, C1) \n"); fprintf(fp9," C0 : \n"); for (i = 0; i < ndim; ++i) { for (j = 0; j < ndim; ++j) { fprintf(fp9," %23.16f",c0[j][i]); } fprintf(fp9,"\n"); } fprintf(fp9," C1 : \n"); for (i = 0; i < ndim; ++i) { for (j = 0; j < ndim; ++j) { fprintf(fp9," %23.16f",c1[j][i]); } fprintf(fp9,"\n"); } } /* PART I: */ /* ======= */ /* Deflate the Floquet multiplier at +1.0 so that the deflated */ /* circuit pencil is not defective at periodic branch turning points. */ /* The matrix (C0 - C1) should be (nearly) singular. Find an approximatio n*/ /* to the right null vector (call it X). This will be our approximation */ /* to the eigenvector corresponding to the fixed multiplier at +1.0. */ /* There are many ways to get this approximation. We could use */ /* 1) p'(0) = f(p(0)) */ /* 2) AUTO'86 routine NLVC applied to C0-C1 */ /* 3) the right singular vector corresponding to the smallest */ /* singular value of C0-C1 */ /* I've chosen option 3) because it should introduce as little roundoff */ /* error as possible. Although it is more expensive, this is insignifican t*/ /* relative to the rest of the AUTO computations. Also, the SVD does give a*/ /* version of the Householder matrix which we would have to compute */ /* anyways. But note that it gives V = ( X perp | X ) and not (X | Xperp) ,*/ /* which the Householder routine would give. This will permute the deflat ed*/ /* circuit pencil, so that the part to be deflated is in the last column, */ /* not it the first column, as was shown in the paper. */ for (j = 0; j < ndim; ++j) { for (i = 0; i < ndim; ++i) { ARRAY2D(rwork, i, j) = c0[j][i] - c1[j][i]; } } { /* This is here since I don't want to change the calling sequence of the BLAS routines. */ integer tmp = 1; doublereal tmp_tol = 1.0E-16; ezsvd(rwork, &ndim, &ndim, &ndim, svds, svde, svdu, &tmp, svdv, &ndim, svdwrk, &tmp, &svdinf, &tmp_tol); } if (svdinf != 0) { fprintf(fp9," NOTE : Warning from subroutine FLOWKM SVD routine returned SVDINF = %4ld Floquet multiplier calculations may be wrong\n",svdinf); } /* Apply a Householder matrix (call it H1) based on the null vector */ /* to (C0, C1) from the right. H1 = SVDV = ( Xperp | X ), where X */ /* is the null vector. */ { /* This is here since I don't want to change the calling sequence of the BLAS routines. */ doublereal tmp1 = 1.0; doublereal tmp0 = 0.0; logical tmp_false = FALSE_; dgemm("n", "n", &ndim, &ndim, &ndim, &tmp1, *c0, &ndim, svdv, &ndim, &tmp0, rwork, &ndim, 1L, 1L); dgemc(&ndim, &ndim, rwork, &ndim, *c0, &ndim, &tmp_false); dgemm("n", "n", &ndim, &ndim, &ndim, &tmp1, *c1, &ndim, svdv, &ndim, &tmp0, rwork, &ndim, 1L, 1L); dgemc(&ndim, &ndim, rwork, &ndim, *c1, &ndim, &tmp_false); } /* Apply a Householder matrix (call it H2) based on */ /* (C0*X/||C0*X|| + C1*X/||C1*X||) / 2 */ /* to (C0*H1, C1*H1) from the left. */ { /* This is here since I don't want to change the calling sequence of the BLAS routines. */ integer tmp = 1; nrmc0x = dnrm2(&ndim, &c0[ndim - 1][0], &tmp); nrmc1x = dnrm2(&ndim, &c1[ndim - 1][0], &tmp); } for (i = 0; i < ndim; ++i) { x[i] = (c0[ndim - 1][i] / nrmc0x + c1[ndim - 1][i] / nrmc1x) / 2.; } dhhpr(1, ndim, ndim, x, 1, &beta, v); dhhap(1, ndim, ndim, ndim, &beta, v, LEFT, c0, ndim); dhhap(1, ndim, ndim, ndim, &beta, v, LEFT, c1, ndim); /* Rescale so that (H2^T)*C0*(H1)(1,NDIM) ~= (H2^T)*C1*(H1)(1,NDIM) ~= 1.0 */ /* Computing MAX */ const__ = max(fabs(c0[ndim - 1][0]),fabs(c1[ndim - 1][0])); for (j = 0; j < ndim; ++j) { for (i = 0; i < ndim; ++i) { c0[j][i] /= const__; c1[j][i] /= const__; } } /* Finished the deflation process! Print the deflated circuit pencil. */ if (iid > 4) { fprintf(fp9," Deflated cicuit pencil (H2^T)*(C0, C1)*(H1) \n"); fprintf(fp9," (H2^T)*C0*(H1) : \n"); for (i = 0; i < ndim; ++i) { for (j = 0; j < ndim; ++j) { fprintf(fp9," %23.16f",c0[j][i]); } fprintf(fp9,"\n"); } fprintf(fp9," (H2^T)*C1*(H1) : \n"); for (i = 0; i < ndim; ++i) { for (j = 0; j < ndim; ++j) { fprintf(fp9," %23.16f",c1[j][i]); } fprintf(fp9,"\n"); } } /* At this point we have */ /* (C0Bar, C1Bar) */ /* ::= (H2^T)*(C0, C1)*(H1). */ /* (( B0^T | Beta0 ) ( B1^T | Beta1 )) 1 */ /* = (( ----------------- ), ( ----------------- )) */ /* (( C0BarDef | Delta0 ) ( C1BarDef | Delta1 )) NDIM-1 */ /* NDIM-1 1 NDIM-1 1 */ /* and approximations to the Floquet multipliers are */ /* (Beta0/Beta1) union the eigenvalues of the deflated pencil */ /* (C0BarDef, C1BarDef). */ /* PART II: */ /* ======== */ /* Compute the eigenvalues of the deflated circuit pencil */ /* (C0BarDef, C1BarDef) */ /* by using the QZ routines from EISPACK. */ ndimm1 = ndim - 1; /* reduce the generalized eigenvalue problem to a simpler form */ /* (C0BarDef,C1BarDef) = (upper hessenberg, upper triangular) */ qzhes(ndim, ndimm1, &c0[0][1], &c1[0][1], FALSE_ , qzz); /* now reduce to an even simpler form */ /* (C0BarDef,C1BarDef) = (quasi-upper triangular, upper triangular) */ qzit(ndim, ndimm1, &c0[0][1], &c1[0][1], QZEPS1, FALSE_ , qzz, &qzierr); if (qzierr != 0) { fprintf(fp9," NOTE : Warning from subroutine FLOWKM : QZ routine returned QZIERR = %4ld Floquet multiplier calculations may be wrong \n",qzierr); } /* compute the generalized eigenvalues */ qzval(ndim, ndimm1, &c0[0][1], &c1[0][1], qzalfr, qzalfi, qzbeta, FALSE_, qzz); /* Pack the eigenvalues into complex form. */ ev[0].r = c0[ndim - 1][0] / c1[ndim - 1][0]; ev[0].i = 0.; infev = FALSE_; for (j = 0; j < ndimm1; ++j) { if (qzbeta[j] != 0.) { ev[j + 1].r = qzalfr[j] / qzbeta[j]; ev[j + 1].i = qzalfi[j] / qzbeta[j]; } else { ev[j + 1].r = 1e30, ev[j + 1].i = 1e30; infev = TRUE_; } } if (infev) { fprintf(fp9," NOTE : Warning from subroutine FLOWKM : Infinite Floquet multiplier represented by CMPLX( 1.0D+30, 1.0D+30 )\n"); } free(svde); free(svds); free(svdv); free(v); free(x); free(qzalfi); free(qzbeta); free(qzalfr); free(svdwrk); free(rwork); return 0; } /* flowkm_ */
/* Subroutine */ int dhhpr(integer k, integer j, integer n, doublereal *x, integer incx, doublereal *beta, doublereal *v) { /* Local variables */ integer iend, jmkp1; integer i, l; doublereal m, alpha; integer istart; /* IMPLICIT UNDEFINED (A-Z,a-z) */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DHHPR computes a Householder Plane Rotation (G&vL Alg. 3.3-1) */ /* defined by v and beta. */ /* (I - beta v vt) * x is such that x_i = 0 for i=k+1 to j. */ /* Parameters */ /* ========== */ /* K - INTEGER. */ /* On entry, K specifies that the K+1st entry of X */ /* be the first to be zeroed. */ /* K must be at least one. */ /* Unchanged on exit. */ /* J - INTEGER. */ /* On entry, J specifies the last entry of X to be zeroed. */ /* J must be >= K and <= N. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the (logical) length of X. */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( N - 1 )*abs( INCX ) ). */ /* On entry, X specifies the vector to be (partially) zeroed. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must be > zero. If X represents part of a matrix, */ /* then use INCX = 1 if a column vector is being zeroed and */ /* INCX = NDIM if a row vector is being zeroed. */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION. */ /* BETA specifies the scalar beta. (see pg. 40 of G and v.L.) */ /* V - DOUBLE PRECISION array of DIMENSION at least n. */ /* Is updated to be the appropriate Householder vector for */ /* the given problem. (Note: space for the implicit zeroes is */ /* assumed to be present. Will save on time for index translation .)*/ /* -- Written by Tom Fairgrieve, */ /* Department of Computer Science, */ /* University of Toronto, */ /* Toronto, Ontario CANADA M5S 1A4 */ /* .. Local Scalars .. */ /* .. External Functions from BLAS .. */ /* .. External Subroutines from BLAS .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ /*--v;*/ /*--x;*/ if (k < 1 || k > j) { fprintf(fp9,"Domain error for K in DHHPR\n"); throw "Domain error for K in DHHPR\n"; //exit(0); } if (j > n) { fprintf(fp9,"Domain error for J in DHHPR\n"); throw "Domain error for J in DHHPR\n"; //exit(0); } if (incx < 1) { fprintf(fp9,"Domain error for INCX in DHHPR\n"); throw "Domain error for INCX in DHHPR\n"; //exit(0); } /* Number of potential non-zero elements in V. */ jmkp1 = j - k + 1; /* Find M := max{ |x_k|, ... , |x_j| } */ m = fabs(x[-1 + idamax(&jmkp1, &x[-1 + k], &incx)]); /* alpha := 0 */ /* For i = k to j */ /* v_i = x_i / m */ /* alpha := alpha + v_i^2 (i.e. alpha = vtv) */ /* End For */ /* alpha := sqrt( alpha ) */ /* Copy X(K)/M, ... , X(J)/M to V(K), ... , V(J) */ if (incx == 1) { for (i = k - 1; i < j; ++i) { v[i] = x[i] / m; } } else { iend = jmkp1 * incx; istart = (k - 1) * incx + 1; l = k; for (i = istart; incx < 0 ? i >= iend : i <= iend; i += incx) { v[-1 + l] = x[-1 + i] / m; ++l; } } /* Compute alpha */ { /* This is here since I don't want to change the calling sequence of the BLAS routines. */ integer tmp = 1; alpha = dnrm2(&jmkp1, &v[-1 + k], &tmp); } /* beta := 1/(alpha(alpha + |V_k|)) */ *beta = 1. / (alpha * (alpha + fabs(v[-1 + k]))); /* v_k := v_k + sign(v_k)*alpha */ v[-1 + k] += d_sign(1.0, v[-1 + k]) * alpha; /* Done ! */ return 0; /* End of DHHPR. */ } /* dhhpr_ */
int cg_mkl_double(MKL_INT n, double a[], MKL_INT ia[], MKL_INT ja[], double solution[], double rhs[], MKL_INT max_iter, double r_tol, double a_tol) { MKL_INT rci_request, itercount, i; // parameter arrays for solver MKL_INT ipar[128]; double dpar[128]; double euclidean_norm; // for SpMV char tr = 'n'; double * tmp; double * residual; tmp = (double *) malloc(4 * n * sizeof(double)); residual = (double *) malloc(n * sizeof(double)); // initialize the solver dcg_init(&n,solution,rhs,&rci_request,ipar,dpar,tmp); if (rci_request!=0) goto failure; ipar[1]=6; // output all warnings and errors ipar[4]=max_iter; // maximum number of iterations ipar[7]=1; // stop iteration at maximum iterations ipar[8]=1; // residual stopping test ipar[9]=0; // request for the user defined stopping test dpar[0]=r_tol * r_tol; // relative residual tolerance dpar[1]=a_tol * a_tol; // absolute residual tolerance /*---------------------------------------------------------------------------*/ /* Check the correctness and consistency of the newly set parameters */ /*---------------------------------------------------------------------------*/ dcg_check(&n,solution,rhs,&rci_request,ipar,dpar,tmp); if (rci_request!=0) goto failure; /*---------------------------------------------------------------------------*/ /* Compute the solution by RCI (P)CG solver without preconditioning */ /* Reverse Communications starts here */ /*---------------------------------------------------------------------------*/ rci: dcg(&n,solution,rhs,&rci_request,ipar,dpar,tmp); //printf("Residual norm is %e\n", sqrt(dpar[4])); /*---------------------------------------------------------------------------*/ /* If rci_request=0, then the solution was found with the required precision */ /*---------------------------------------------------------------------------*/ if (rci_request==0) goto getsln; /*---------------------------------------------------------------------------*/ /* If rci_request=1, then compute the vector A*tmp[0] */ /* and put the result in vector tmp[n] */ /*---------------------------------------------------------------------------*/ if (rci_request==1) { mkl_cspblas_dcsrgemv(&tr, &n, a, ia, ja, tmp, &tmp[n]); goto rci; } /*---------------------------------------------------------------------------*/ /* If rci_request=anything else, then dcg subroutine failed */ /* to compute the solution vector: solution[n] */ /*---------------------------------------------------------------------------*/ goto failure; /*---------------------------------------------------------------------------*/ /* Reverse Communication ends here */ /* Get the current iteration number into itercount */ /*---------------------------------------------------------------------------*/ getsln: dcg_get(&n,solution,rhs,&rci_request,ipar,dpar,tmp,&itercount); mkl_cspblas_dcsrgemv(&tr, &n, a, ia, ja, solution, residual); for(i=0;i<n;i++) residual[i] -= rhs[i]; i=1; euclidean_norm=dnrm2(&n,residual,&i); printf("\nMKL CG reached %e residual in %d iterations\n",euclidean_norm, itercount); // release memory MKL_FreeBuffers(); free(tmp); free(residual); if (itercount <= max_iter && (euclidean_norm * euclidean_norm) < (dpar[0] * dpar[4] + dpar[5])) { // printf("This example has successfully PASSED through all steps of computation!"); // printf("\n"); // printf("(Residual norm is %e)\n", euclidean_norm); return 0; } else { // printf("This example may have FAILED as either the number of iterations exceeds"); // printf("\nthe maximum number of iterations %d, or the ", max_iter); // printf("computed solution\ndiffers has not sufficiently converged."); // printf("(Residual norm is %e), or both.\n", euclidean_norm); return 1; } /*-------------------------------------------------------------------------*/ /* Release internal MKL memory that might be used for computations */ /* NOTE: It is important to call the routine below to avoid memory leaks */ /* unless you disable MKL Memory Manager */ /*-------------------------------------------------------------------------*/ failure: printf("This example FAILED as the solver has returned the ERROR "); printf("code %d", rci_request); MKL_FreeBuffers(); return 1; }
/*---------------------------------------------------------------------------*/ int CGFMMmrhs(MKL_INT *solution, double *ShellSphs, double *rhs, double *nRhs, MKL_INT n) { /*---------------------------------------------------------------------------*/ /* Define arrays for the upper triangle of the coefficient matrix and rhs vector */ /* Compressed sparse row storage is used for sparse representation */ /*---------------------------------------------------------------------------*/ MKL_INT rci_request, expected_itercount = 20, i, j; MKL_INT itercount[nRhs]; /* Fill all arrays containing matrix data. */ /*---------------------------------------------------------------------------*/ /* Allocate storage for the solver ?par and temporary storage tmp */ /*---------------------------------------------------------------------------*/ MKL_INT length = 128, method = 1; /*---------------------------------------------------------------------------*/ /* Some additional variables to use with the RCI (P)CG solver */ /*---------------------------------------------------------------------------*/ MKL_INT ipar[128 + 2 * nRhs]; double euclidean_norm, dpar[128 + 2 * nRhs]; double *tmp; tmp = (double*)calloc(n * (3 + nRhs),sizeof(double)); double eone = -1.E0; MKL_INT ione = 1; /*---------------------------------------------------------------------------*/ /* Initialize the initial guess */ /*---------------------------------------------------------------------------*/ for (i = 0; i < n*nRhs; i++) solution[i] = 1.E0; /*---------------------------------------------------------------------------*/ /* Initialize the solver */ /*---------------------------------------------------------------------------*/ for (i = 0; i < (length + 2 * nRhs); i++) ipar[i] = 0; for (i = 0; i < (length + 2 * nRhs); i++) dpar[i] = 0.E0; dcgmrhs_init (&n, solution, &nRhs, rhs, &method, &rci_request, ipar, dpar, tmp); if (rci_request != 0) goto failure; /*---------------------------------------------------------------------------*/ /* Set the desired parameters: */ /* LOGICAL parameters: */ /* do residual stopping test */ /* do not request for the user defined stopping test */ /* DOUBLE parameters */ /* set the relative tolerance to 1.0D-5 instead of default value 1.0D-6 */ /*---------------------------------------------------------------------------*/ ipar[8] = 1; ipar[9] = 0; dpar[0] = 1.E-5; /*---------------------------------------------------------------------------*/ /* Compute the solution by RCI (P)CG solver without preconditioning */ /* Reverse Communications starts here */ /*---------------------------------------------------------------------------*/ rci:dcgmrhs (&n, solution, &nRhs, rhs, &rci_request, ipar, dpar, tmp); /*---------------------------------------------------------------------------*/ /* If rci_request=0, then the solution was found with the required precision */ /*---------------------------------------------------------------------------*/ if (rci_request == 0) goto getsln; /*---------------------------------------------------------------------------*/ /* If rci_request=1, then compute the vector A*tmp[0] */ /* and put the result in vector tmp[n] */ /*---------------------------------------------------------------------------*/ if (rci_request == 1) { //mkl_dcsrsymv (&tr, &n, a, ia, ja, tmp, &tmp[n]); //debug RPY(n, ShellSphs,tmp); // SPMV by 4 calls of FMM goto rci; } /*---------------------------------------------------------------------------*/ /* If rci_request=anything else, then dcg subroutine failed */ /* to compute the solution vector: solution[n] */ /*---------------------------------------------------------------------------*/ goto failure; /*---------------------------------------------------------------------------*/ /* Reverse Communication ends here */ /* Get the current iteration number into itercount */ /*---------------------------------------------------------------------------*/ getsln:dcgmrhs_get (&n, solution, &nRhs, rhs, &rci_request, ipar, dpar, tmp, itercount); /*---------------------------------------------------------------------------*/ /* Print solution vector: solution[n] and number of iterations: itercount */ /*---------------------------------------------------------------------------*/ printf ("The system has been solved\n"); printf ("The following solution obtained\n"); for (i = 0; i < n / 2; i++) printf ("%6.3f ", solution[i]); printf ("\n"); for (i = n / 2; i < n; i++) printf ("%6.3f ", solution[i]); printf ("\n"); for (i = 0; i < n / 2; i++) printf ("%6.3f ", solution[n + i]); printf ("\n"); for (i = n / 2; i < n; i++) printf ("%6.3f ", solution[n + i]); printf ("\nExpected solution is\n"); for (i = 0; i < n / 2; i++) { printf ("%6.3f ", expected_sol[i]); expected_sol[i] -= solution[i]; } printf ("\n"); for (i = n / 2; i < n; i++) { printf ("%6.3f ", expected_sol[i]); expected_sol[i] -= solution[i]; } printf ("\n"); for (i = 0; i < n / 2; i++) { printf ("%6.3f ", expected_sol[n + i]); expected_sol[n + i] -= solution[n + i]; } printf ("\n"); for (i = n / 2; i < n; i++) { printf ("%6.3f ", expected_sol[n + i]); expected_sol[n + i] -= solution[n + i]; } printf ("\n"); i = 1; j = n * nRhs; euclidean_norm = dnrm2 (&j, expected_sol, &i); /*-------------------------------------------------------------------------*/ /* Release internal MKL memory that might be used for computations */ /* NOTE: It is important to call the routine below to avoid memory leaks */ /* unless you disable MKL Memory Manager */ /*-------------------------------------------------------------------------*/ MKL_Free_Buffers (); if (euclidean_norm < 1.0e-12) { printf ("This example has successfully PASSED through all steps of computation!\n"); return 0; } else { printf ("This example may have FAILED as the computed solution differs\n"); printf ("much from the expected solution (Euclidean norm is %e).\n", euclidean_norm); return 1; } /*-------------------------------------------------------------------------*/ /* Release internal MKL memory that might be used for computations */ /* NOTE: It is important to call the routine below to avoid memory leaks */ /* unless you disable MKL Memory Manager */ /*-------------------------------------------------------------------------*/ failure:printf ("This example FAILED as the solver has returned the ERROR code %d", rci_request); MKL_Free_Buffers (); return 1; }
NLuint nlSolve_GMRES() { NLdouble* b = nlCurrentContext->b ; NLdouble* x = nlCurrentContext->x ; NLdouble eps = nlCurrentContext->threshold ; NLint max_iter = nlCurrentContext->max_iterations ; NLint n = nlCurrentContext->n ; NLint m = nlCurrentContext->inner_iterations ; typedef NLdouble *NLdoubleP; NLdouble *V = NL_NEW_ARRAY(NLdouble, n*(m+1) ) ; NLdouble *U = NL_NEW_ARRAY(NLdouble, m*(m+1)/2 ) ; NLdouble *r = NL_NEW_ARRAY(NLdouble, n ) ; NLdouble *y = NL_NEW_ARRAY(NLdouble, m+1 ) ; NLdouble *c = NL_NEW_ARRAY(NLdouble, m ) ; NLdouble *s = NL_NEW_ARRAY(NLdouble, m ) ; NLdouble **v = NL_NEW_ARRAY(NLdoubleP, m+1 ) ; NLdouble * Ax = NL_NEW_ARRAY(NLdouble,nlCurrentContext->n); NLdouble accu =0.0; NLint i, j, io, uij, u0j ; NLint its = -1 ; NLdouble beta, h, rd, dd, nrm2b ; for ( i=0; i<=m; ++i ){ v[i]=V+i*n ; } nrm2b=dnrm2(n,b,1); io=0; do { /* outer loop */ ++io; nlCurrentContext->matrix_vector_prod(x,r); daxpy(n,-1.,b,1,r,1); beta=dnrm2(n,r,1); dcopy(n,r,1,v[0],1); dscal(n,1./beta,v[0],1); y[0]=beta; j=0; uij=0; do { /* inner loop: j=0,...,m-1 */ u0j=uij; nlCurrentContext->matrix_vector_prod(v[j],v[j+1]); dgemv( Transpose,n,j+1,1.,V,n,v[j+1],1,0.,U+u0j,1 ); dgemv( NoTranspose,n,j+1,-1.,V,n,U+u0j,1,1.,v[j+1],1 ); h=dnrm2(n,v[j+1],1); dscal(n,1./h,v[j+1],1); for (i=0; i<j; ++i ) { /* rotiere neue Spalte */ double tmp = c[i]*U[uij]-s[i]*U[uij+1]; U[uij+1] = s[i]*U[uij]+c[i]*U[uij+1]; U[uij] = tmp; ++uij; } { /* berechne neue Rotation */ rd = U[uij]; dd = sqrt(rd*rd+h*h); c[j] = rd/dd; s[j] = -h/dd; U[uij] = dd; ++uij; } { /* rotiere rechte Seite y (vorher: y[j+1]=0) */ y[j+1] = s[j]*y[j]; y[j] = c[j]*y[j]; } ++j; } while ( j<m && fabs(y[j])>=eps*nrm2b ) ; { /* minimiere bzgl Y */ dtpsv( UpperTriangle, NoTranspose, NotUnitTriangular, j,U,y,1 ); /* correct X */ dgemv(NoTranspose,n,j,-1.,V,n,y,1,1.,x,1); } } while ( fabs(y[j])>=eps*nrm2b && (m*(io-1)+j) < max_iter); /* Count the inner iterations */ its = m*(io-1)+j; nlCurrentContext->matrix_vector_prod(x,Ax); for(i = 0 ; i < n ; ++i) accu+=(Ax[i]-b[i])*(Ax[i]-b[i]); printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/nrm2b); NL_DELETE_ARRAY(Ax); NL_DELETE_ARRAY(V) ; NL_DELETE_ARRAY(U) ; NL_DELETE_ARRAY(r) ; NL_DELETE_ARRAY(y) ; NL_DELETE_ARRAY(c) ; NL_DELETE_ARRAY(s) ; NL_DELETE_ARRAY(v) ; return its; }
void lbfgs (int n, /* I num unknowns = 3 * num atoms */ real stop_tol, /* I tol for ||g||/sqrt(n) */ int itmax, /* I max num iterations allowed */ int itmax_line, int* iter, /* IO iters required to find min */ real* fret, /* O minimum value */ int iprint, int last_call) /* not used */ /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Called by exec_minimization in "options.c". * This routine is modelled on mm_nlcg in "nlcg.c", by JC Meza. * It finds the minimum of the unconstrained molecular potential * energy using trust region methods and a limited memory BFGS * approximation of the Hessian. Each trust region subproblem is * solved by Powell's dogleg method. * * The L-BFGS structures are currently FORTRAN subroutines that * require memory allocation of CMPS and CMPY for work space. * These variables must be passed to the FORTRAN subroutines and * not altered anywhere else. * * The outline of the algorithm is as follows: * Allocate memory, initialize parameters * Compute the gradient g_vec * LOOP * IF ||g_vec|| < tol THEN RETURN * Update L-BFGS matrices B and H * Compute the dogleg step d_vec * Compute the predicted reduction of the quadratic model * Compute the actual potential energy reduction * IF ared/pred > eta * THEN x_vec = x_vec + d_vec * TR_size >= TR_size * Compute the gradient g_vec at the new point * ELSE TR_size < ||d_vec|| * CONTINUE *********************************************************************/ { real *x_vec, *new_x_vec, *g_vec, *d_vec, *y_vec; int i; int iter_num, NUPDT; real obj_val, new_obj_val; real dd1, dd2; real eta, pred, ared; real TR_size; real gnorm, xnorm, dnorm; real *CMPS, *CMPY, *tmp_vec; /*-- Open the status file for saving output. */ bfgs_fp = fopen (status_file, "w"); if (bfgs_fp == NULL) { fprintf (stderr, "lbfgs: cannot open %s\n", status_file); printf ("*** lbfgs: cannot open %s\n", status_file); exit (1); } /*-------------------------------------------------------------------- * Allocate memory and set up files. *-------------------------------------------------------------------*/ fprintf (stderr, "Allocate space in lbfgs n=%d\n",n); x_vec = (real *) calloc ((n+1) , sizeof(real)); new_x_vec = (real *) calloc ((n+1) , sizeof(real)); g_vec = (real *) calloc ((n+1) , sizeof(real)); d_vec = (real *) calloc ((n+1) , sizeof(real)); y_vec = (real *) calloc ((n+1) , sizeof(real)); CMPS = (real *) calloc ((n*T_LBFGS) , sizeof(real)); CMPY = (real *) calloc ((n*T_LBFGS) , sizeof(real)); tmp_vec = (real *) calloc ((n+1) , sizeof(real)); /*-------------------------------------------------------------------- * Evaluate the objective and its gradient at the start point. *-------------------------------------------------------------------*/ /* obj_val = potential (str, coor, force); force_to_grad0 (str, force, g_vec); */ /*-------------------------------------------------------------------- * Initialize the trust region algorithm parameters. *-------------------------------------------------------------------*/ eta = 0.3; /*-- step acceptance threshold */ TR_size = 1.0; iter_num = 0; /*-- number inner iterations */ NUPDT = 1; /*-- number L-BFGS B updates */ gnorm = dnrm2 (n, g_vec, 1); fprintf(bfgs_fp, "\n\t\t Steepest descent with Trust Regions\n"); fprintf(bfgs_fp, " Iter f(x) ||grad||/n Delta ||step|| ared pred\n"); fprintf(bfgs_fp," %5d %12.4e %12.4e %12.4e\n", iter_num, obj_val, gnorm/sqrt(n), TR_size); /*-------------------------------------------------------------------- * Begin the main loop. *-------------------------------------------------------------------*/ while (iter_num < itmax) { iter_num++; dd1 = g_vec[0]; for (i=1; i<n; i++) if (g_vec[i] > dd1) dd1 = g_vec[i]; if (dd1 <= stop_tol) { /*-- Exit main loop IF ||g||_2 sufficiently small. */ /* if (gnorm <= (stop_tol * sqrt(n))) {*/ xnorm = dnrm2 (n, x_vec, 1); fprintf (stderr, " step_size: %5d energy: %12.4f |Grad|: %10.4f |X|: %10.4f\n", iter_num, obj_val, gnorm/sqrt((real)(n)), xnorm/sqrt((real)(n))); break; } /*-- Solve the dogleg subproblem. */ Dogleg (n, g_vec, gnorm, TR_size, NUPDT, T_LBFGS, CMPS, CMPY, tmp_vec, d_vec, &dnorm); /*-- Compute pred = - g'd - 1/2 d'Bd. */ dd1 = ddot (n, g_vec, 1, d_vec, 1); i = T_LBFGS; multbv_ (&n, &i, d_vec, tmp_vec, &NUPDT, CMPS, CMPY); dd2 = ddot (n, d_vec, 1, tmp_vec, 1); pred = -dd1 - (0.5 * dd2); if (pred < sqrt(MMIN)) { printf ("*** Predicted reduction not positive. <lbfgs>\n"); printf (" pred = %15.6e\n", pred); exit (1); } /*-- Compute ared by evaluating the objective at the trial point. */ dcopy (n, x_vec, 1, new_x_vec, 1); daxpy (n, 1.0, d_vec, 1, new_x_vec, 1); /* call evalf p_to_coor0 (str, new_x_vec, coor); new_obj_val = potential (str, coor, force); */ ared = obj_val - new_obj_val; /*-- Decide whether to take the step. *-- If yes, then increase TR_size, compute the gradient, and *-- update the L-BFGS approximations. *-- If no, then decrease TR_size to a fraction of ||d||_2. */ if ((ared / pred) >= eta) { /*-- Increase the trust region size. */ if (ared / pred >= 0.9) { dd1 = 10.0 * dnorm; if (dd1 > TR_size) TR_size = dd1; } else { dd1 = 2.0 * dnorm; if (dd1 > TR_size) TR_size = dd1; } if (TR_size > 1.0e3) TR_size = 1.0e3; /*-- Get the gradient from the previously calculated force vector. *-- Set y_vec = new gradient - old gradient. */ dcopy (n, g_vec, 1, y_vec, 1); dscal (n, -1.0, y_vec, 1); /* get grad force_to_grad0 (str, force, g_vec); */ daxpy (n, 1.0, g_vec, 1, y_vec, 1); /*-- Update the L-BFGS and inverse L-BFGS approximations. */ Update_lbfgs (n, T_LBFGS, d_vec, y_vec, tmp_vec, &NUPDT, CMPS, CMPY); dcopy (n, new_x_vec, 1, x_vec, 1); gnorm = dnrm2 (n, g_vec, 1); obj_val = new_obj_val; /* p_to_coor0 (str, x_vec, coor); */ fprintf(bfgs_fp," %5d %12.4e %12.4e %12.4e %12.4e %12.3e %12.3e\n", iter_num, obj_val, gnorm/sqrt(n), TR_size, dnorm, ared, pred); } else { /*-- Decrease the trust region size by linear interpolation. */ dd1 = (1.0 - eta) / (1.0 - (ared / pred)); if (dd1 < 0.1) dd1 = 0.1; if (dd1 > 0.5) dd1 = 0.5; TR_size = dd1 * dnorm; fprintf(bfgs_fp,"rej %5d %12.4e %12.4e %12.4e %12.4e %12.3e %12.3e\n", iter_num, obj_val, gnorm/sqrt(n), TR_size, dnorm, ared, pred); if (TR_size < (100.0 * MCHEPS)) { printf ("*** Trust region too small to continue. <lbfgs>\n"); fprintf (bfgs_fp, "*** Trust region too small to continue.\n"); fflush (bfgs_fp); exit (1); } } } /*** end of while(iter_num < itmax) ***/ /*-------------------------------------------------------------------- * Clean up and exit. *-------------------------------------------------------------------*/ *iter = iter_num; *fret = obj_val; free (x_vec); free (new_x_vec); free (g_vec); free (d_vec); return; }
void dqrdc(double a[], int lda, int n, int p, double qraux[], int jpvt[], double work[], int job) /******************************************************************************/ /* Purpose: DQRDC computes the QR factorization of a real rectangular matrix. Discussion: DQRDC uses Householder transformations. Column pivoting based on the 2-norms of the reduced columns may be performed at the user's option. Licensing: This code is distributed under the GNU LGPL license. Modified: 07 June 2005 Author: C version by John Burkardt. Reference: Jack Dongarra, Cleve Moler, Jim Bunch and Pete Stewart, LINPACK User's Guide, SIAM, (Society for Industrial and Applied Mathematics), 3600 University City Science Center, Philadelphia, PA, 19104-2688. ISBN 0-89871-172-X Parameters: Input/output, double A(LDA,P). On input, the N by P matrix whose decomposition is to be computed. On output, A contains in its upper triangle the upper triangular matrix R of the QR factorization. Below its diagonal A contains information from which the orthogonal part of the decomposition can be recovered. Note that if pivoting has been requested, the decomposition is not that of the original matrix A but that of A with its columns permuted as described by JPVT. Input, int LDA, the leading dimension of the array A. LDA must be at least N. Input, int N, the number of rows of the matrix A. Input, int P, the number of columns of the matrix A. Output, double QRAUX[P], contains further information required to recover the orthogonal part of the decomposition. Input/output, integer JPVT[P]. On input, JPVT contains integers that control the selection of the pivot columns. The K-th column A(*,K) of A is placed in one of three classes according to the value of JPVT(K). > 0, then A(K) is an initial column. = 0, then A(K) is a free column. < 0, then A(K) is a final column. Before the decomposition is computed, initial columns are moved to the beginning of the array A and final columns to the end. Both initial and final columns are frozen in place during the computation and only free columns are moved. At the K-th stage of the reduction, if A(*,K) is occupied by a free column it is interchanged with the free column of largest reduced norm. JPVT is not referenced if JOB == 0. On output, JPVT(K) contains the index of the column of the original matrix that has been interchanged into the K-th column, if pivoting was requested. Workspace, double WORK[P]. WORK is not referenced if JOB == 0. Input, int JOB, initiates column pivoting. 0, no pivoting is done. nonzero, pivoting is done. */ { int jp; int j; int lup; int maxj; double maxnrm, nrmxl, t, tt; int pl = 1, pu = 0; /* If pivoting is requested, rearrange the columns. */ if (job != 0) { for (j = 1; j <= p; j++) { int swapj = (0 < jpvt[j - 1]); jpvt[j - 1] = (jpvt[j - 1] < 0) ? -j : j; if (swapj) { if (j != pl) dswap(n, a + 0 + (pl - 1)*lda, 1, a + 0 + (j - 1), 1); jpvt[j - 1] = jpvt[pl - 1]; jpvt[pl - 1] = j; pl++; } } pu = p; for (j = p; 1 <= j; j--) { if (jpvt[j - 1] < 0) { jpvt[j - 1] = -jpvt[j - 1]; if (j != pu) { dswap(n, a + 0 + (pu - 1)*lda, 1, a + 0 + (j - 1)*lda, 1); jp = jpvt[pu - 1]; jpvt[pu - 1] = jpvt[j - 1]; jpvt[j - 1] = jp; } pu = pu - 1; } } } /* Compute the norms of the free columns. */ for (j = pl; j <= pu; j++) qraux[j - 1] = dnrm2(n, a + 0 + (j - 1) * lda, 1); for (j = pl; j <= pu; j++) work[j - 1] = qraux[j - 1]; /* Perform the Householder reduction of A. */ lup = i4_min(n, p); for (int l = 1; l <= lup; l++) { /* Bring the column of largest norm into the pivot position. */ if (pl <= l && l < pu) { maxnrm = 0.0; maxj = l; for (j = l; j <= pu; j++) { if (maxnrm < qraux[j - 1]) { maxnrm = qraux[j - 1]; maxj = j; } } if (maxj != l) { dswap(n, a + 0 + (l - 1)*lda, 1, a + 0 + (maxj - 1)*lda, 1); qraux[maxj - 1] = qraux[l - 1]; work[maxj - 1] = work[l - 1]; jp = jpvt[maxj - 1]; jpvt[maxj - 1] = jpvt[l - 1]; jpvt[l - 1] = jp; } } /* Compute the Householder transformation for column L. */ qraux[l - 1] = 0.0; if (l != n) { nrmxl = dnrm2(n - l + 1, a + l - 1 + (l - 1) * lda, 1); if (nrmxl != 0.0) { if (a[l - 1 + (l - 1)*lda] != 0.0) nrmxl = nrmxl * r8_sign(a[l - 1 + (l - 1) * lda]); dscal(n - l + 1, 1.0 / nrmxl, a + l - 1 + (l - 1)*lda, 1); a[l - 1 + (l - 1)*lda] = 1.0 + a[l - 1 + (l - 1) * lda]; /* Apply the transformation to the remaining columns, updating the norms. */ for (j = l + 1; j <= p; j++) { t = -ddot(n - l + 1, a + l - 1 + (l - 1) * lda, 1, a + l - 1 + (j - 1) * lda, 1) / a[l - 1 + (l - 1) * lda]; daxpy(n - l + 1, t, a + l - 1 + (l - 1)*lda, 1, a + l - 1 + (j - 1)*lda, 1); if (pl <= j && j <= pu) { if (qraux[j - 1] != 0.0) { tt = 1.0 - pow(r8_abs(a[l - 1 + (j - 1) * lda]) / qraux[j - 1], 2); tt = r8_max(tt, 0.0); t = tt; tt = 1.0 + 0.05 * tt * pow(qraux[j - 1] / work[j - 1], 2); if (tt != 1.0) qraux[j - 1] = qraux[j - 1] * sqrt(t); else { qraux[j - 1] = dnrm2(n - l, a + l + (j - 1) * lda, 1); work[j - 1] = qraux[j - 1]; } } } } /* Save the transformation. */ qraux[l - 1] = a[l - 1 + (l - 1) * lda]; a[l - 1 + (l - 1)*lda] = -nrmxl; } } } }
/* Function Definitions */ real_T b_norm(const emxArray_real_T *x) { real_T y; int32_T n; ptrdiff_t n_t; ptrdiff_t incx_t; double * xix0_t; if ((x->size[0] == 1) || (x->size[1] == 1)) { emlrtPushRtStackR2012b(&ei_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&gi_emlrtRSI, emlrtRootTLSGlobal); n = x->size[0] * x->size[1]; emlrtPushRtStackR2012b(&gc_emlrtRSI, emlrtRootTLSGlobal); if (1 > n) { y = 0.0; } else { emlrtPushRtStackR2012b(&ic_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&kc_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal); emlrt_checkEscapedGlobals(); n_t = (ptrdiff_t)(n); emlrtPopRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&kc_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&lc_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal); emlrt_checkEscapedGlobals(); incx_t = (ptrdiff_t)(1); emlrtPopRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&lc_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&mc_emlrtRSI, emlrtRootTLSGlobal); emlrt_checkEscapedGlobals(); xix0_t = (double *)(&x->data[0]); emlrtPopRtStackR2012b(&mc_emlrtRSI, emlrtRootTLSGlobal); emlrt_checkEscapedGlobals(); y = dnrm2(&n_t, xix0_t, &incx_t); emlrtPopRtStackR2012b(&ic_emlrtRSI, emlrtRootTLSGlobal); } emlrtPopRtStackR2012b(&gc_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&gi_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&ei_emlrtRSI, emlrtRootTLSGlobal); } else { emlrtPushRtStackR2012b(&fi_emlrtRSI, emlrtRootTLSGlobal); n = x->size[0] * x->size[1]; emlrtPushRtStackR2012b(&gc_emlrtRSI, emlrtRootTLSGlobal); if (1 > n) { y = 0.0; } else { emlrtPushRtStackR2012b(&ic_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&kc_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal); emlrt_checkEscapedGlobals(); n_t = (ptrdiff_t)(n); emlrtPopRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&kc_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&lc_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal); emlrt_checkEscapedGlobals(); incx_t = (ptrdiff_t)(1); emlrtPopRtStackR2012b(&nc_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&lc_emlrtRSI, emlrtRootTLSGlobal); emlrtPushRtStackR2012b(&mc_emlrtRSI, emlrtRootTLSGlobal); emlrt_checkEscapedGlobals(); xix0_t = (double *)(&x->data[0]); emlrtPopRtStackR2012b(&mc_emlrtRSI, emlrtRootTLSGlobal); emlrt_checkEscapedGlobals(); y = dnrm2(&n_t, xix0_t, &incx_t); emlrtPopRtStackR2012b(&ic_emlrtRSI, emlrtRootTLSGlobal); } emlrtPopRtStackR2012b(&gc_emlrtRSI, emlrtRootTLSGlobal); emlrtPopRtStackR2012b(&fi_emlrtRSI, emlrtRootTLSGlobal); } return y; }
static void c_eml_qrsolve(const emlrtStack *sp, const emxArray_real_T *A, emxArray_real_T *B, emxArray_real_T *Y) { emxArray_real_T *b_A; emxArray_real_T *work; int32_T mn; int32_T i51; int32_T ix; emxArray_real_T *tau; emxArray_int32_T *jpvt; int32_T m; int32_T n; int32_T b_mn; emxArray_real_T *vn1; emxArray_real_T *vn2; int32_T k; boolean_T overflow; boolean_T b12; int32_T i; int32_T i_i; int32_T nmi; int32_T mmi; int32_T pvt; int32_T iy; boolean_T b13; real_T xnorm; int32_T i52; real_T atmp; real_T d16; boolean_T b14; boolean_T b_i; ptrdiff_t n_t; ptrdiff_t incx_t; double * xix0_t; boolean_T exitg1; const mxArray *y; static const int32_T iv78[2] = { 1, 8 }; const mxArray *m14; char_T cv76[8]; static const char_T cv77[8] = { '%', '%', '%', 'd', '.', '%', 'd', 'e' }; char_T cv78[14]; uint32_T unnamed_idx_0; emlrtStack st; emlrtStack b_st; emlrtStack c_st; emlrtStack d_st; emlrtStack e_st; emlrtStack f_st; emlrtStack g_st; emlrtStack h_st; st.prev = sp; st.tls = sp->tls; b_st.prev = &st; b_st.tls = st.tls; c_st.prev = &b_st; c_st.tls = b_st.tls; d_st.prev = &c_st; d_st.tls = c_st.tls; e_st.prev = &d_st; e_st.tls = d_st.tls; f_st.prev = &e_st; f_st.tls = e_st.tls; g_st.prev = &f_st; g_st.tls = f_st.tls; h_st.prev = &g_st; h_st.tls = g_st.tls; emlrtHeapReferenceStackEnterFcnR2012b(sp); emxInit_real_T(sp, &b_A, 2, &m_emlrtRTEI, true); b_emxInit_real_T(sp, &work, 1, &rb_emlrtRTEI, true); mn = (int32_T)muDoubleScalarMin(A->size[0], A->size[1]); st.site = &mc_emlrtRSI; b_st.site = &nc_emlrtRSI; c_st.site = &oc_emlrtRSI; i51 = b_A->size[0] * b_A->size[1]; b_A->size[0] = A->size[0]; b_A->size[1] = A->size[1]; emxEnsureCapacity(&c_st, (emxArray__common *)b_A, i51, (int32_T)sizeof(real_T), &m_emlrtRTEI); ix = A->size[0] * A->size[1]; for (i51 = 0; i51 < ix; i51++) { b_A->data[i51] = A->data[i51]; } b_emxInit_real_T(&c_st, &tau, 1, &m_emlrtRTEI, true); b_emxInit_int32_T(&c_st, &jpvt, 2, &m_emlrtRTEI, true); m = b_A->size[0]; n = b_A->size[1]; b_mn = muIntScalarMin_sint32(b_A->size[0], b_A->size[1]); i51 = tau->size[0]; tau->size[0] = b_mn; emxEnsureCapacity(&c_st, (emxArray__common *)tau, i51, (int32_T)sizeof(real_T), &n_emlrtRTEI); d_st.site = &mf_emlrtRSI; e_st.site = &rb_emlrtRSI; f_st.site = &sb_emlrtRSI; g_st.site = &tb_emlrtRSI; eml_signed_integer_colon(&g_st, b_A->size[1], jpvt); if ((b_A->size[0] == 0) || (b_A->size[1] == 0)) { } else { ix = b_A->size[1]; i51 = work->size[0]; work->size[0] = ix; emxEnsureCapacity(&c_st, (emxArray__common *)work, i51, (int32_T)sizeof (real_T), &m_emlrtRTEI); for (i51 = 0; i51 < ix; i51++) { work->data[i51] = 0.0; } b_emxInit_real_T(&c_st, &vn1, 1, &pb_emlrtRTEI, true); b_emxInit_real_T(&c_st, &vn2, 1, &qb_emlrtRTEI, true); d_st.site = &tc_emlrtRSI; ix = b_A->size[1]; i51 = vn1->size[0]; vn1->size[0] = ix; emxEnsureCapacity(&c_st, (emxArray__common *)vn1, i51, (int32_T)sizeof (real_T), &pb_emlrtRTEI); i51 = vn2->size[0]; vn2->size[0] = ix; emxEnsureCapacity(&c_st, (emxArray__common *)vn2, i51, (int32_T)sizeof (real_T), &qb_emlrtRTEI); k = 1; d_st.site = &nf_emlrtRSI; overflow = (b_A->size[1] > 2147483646); if (overflow) { e_st.site = &db_emlrtRSI; check_forloop_overflow_error(&e_st); } for (ix = 0; ix + 1 <= b_A->size[1]; ix++) { d_st.site = &sc_emlrtRSI; vn1->data[ix] = b_eml_xnrm2(&d_st, b_A->size[0], b_A, k); vn2->data[ix] = vn1->data[ix]; k += b_A->size[0]; } d_st.site = &rc_emlrtRSI; if (1 > b_mn) { b12 = false; } else { b12 = (b_mn > 2147483646); } if (b12) { e_st.site = &db_emlrtRSI; check_forloop_overflow_error(&e_st); } for (i = 1; i <= b_mn; i++) { i_i = (i + (i - 1) * m) - 1; nmi = n - i; mmi = m - i; d_st.site = &of_emlrtRSI; ix = eml_ixamax(&d_st, 1 + nmi, vn1, i); pvt = (i + ix) - 2; if (pvt + 1 != i) { d_st.site = &pf_emlrtRSI; e_st.site = &bc_emlrtRSI; f_st.site = &cc_emlrtRSI; ix = 1 + m * pvt; iy = 1 + m * (i - 1); g_st.site = &dc_emlrtRSI; if (1 > m) { b13 = false; } else { b13 = (m > 2147483646); } if (b13) { h_st.site = &db_emlrtRSI; check_forloop_overflow_error(&h_st); } for (k = 1; k <= m; k++) { i51 = b_A->size[0] * b_A->size[1]; xnorm = b_A->data[emlrtDynamicBoundsCheckFastR2012b(ix, 1, i51, &le_emlrtBCI, &f_st) - 1]; i51 = b_A->size[0] * b_A->size[1]; i52 = b_A->size[0] * b_A->size[1]; b_A->data[emlrtDynamicBoundsCheckFastR2012b(ix, 1, i51, &le_emlrtBCI, &f_st) - 1] = b_A->data[emlrtDynamicBoundsCheckFastR2012b(iy, 1, i52, &le_emlrtBCI, &f_st) - 1]; i51 = b_A->size[0] * b_A->size[1]; b_A->data[emlrtDynamicBoundsCheckFastR2012b(iy, 1, i51, &le_emlrtBCI, &f_st) - 1] = xnorm; ix++; iy++; } ix = jpvt->data[pvt]; jpvt->data[pvt] = jpvt->data[i - 1]; jpvt->data[i - 1] = ix; vn1->data[pvt] = vn1->data[i - 1]; vn2->data[pvt] = vn2->data[i - 1]; } if (i < m) { d_st.site = &qc_emlrtRSI; atmp = b_A->data[i_i]; d16 = 0.0; if (1 + mmi <= 0) { } else { e_st.site = &wc_emlrtRSI; xnorm = b_eml_xnrm2(&e_st, mmi, b_A, i_i + 2); if (xnorm != 0.0) { xnorm = muDoubleScalarHypot(b_A->data[i_i], xnorm); if (b_A->data[i_i] >= 0.0) { xnorm = -xnorm; } if (muDoubleScalarAbs(xnorm) < 1.0020841800044864E-292) { ix = 0; do { ix++; e_st.site = &xc_emlrtRSI; b_eml_xscal(&e_st, mmi, 9.9792015476736E+291, b_A, i_i + 2); xnorm *= 9.9792015476736E+291; atmp *= 9.9792015476736E+291; } while (!(muDoubleScalarAbs(xnorm) >= 1.0020841800044864E-292)); e_st.site = &yc_emlrtRSI; xnorm = b_eml_xnrm2(&e_st, mmi, b_A, i_i + 2); xnorm = muDoubleScalarHypot(atmp, xnorm); if (atmp >= 0.0) { xnorm = -xnorm; } d16 = (xnorm - atmp) / xnorm; e_st.site = &ad_emlrtRSI; b_eml_xscal(&e_st, mmi, 1.0 / (atmp - xnorm), b_A, i_i + 2); e_st.site = &bd_emlrtRSI; if (1 > ix) { b14 = false; } else { b14 = (ix > 2147483646); } if (b14) { f_st.site = &db_emlrtRSI; check_forloop_overflow_error(&f_st); } for (k = 1; k <= ix; k++) { xnorm *= 1.0020841800044864E-292; } atmp = xnorm; } else { d16 = (xnorm - b_A->data[i_i]) / xnorm; atmp = 1.0 / (b_A->data[i_i] - xnorm); e_st.site = &cd_emlrtRSI; b_eml_xscal(&e_st, mmi, atmp, b_A, i_i + 2); atmp = xnorm; } } } tau->data[i - 1] = d16; } else { atmp = b_A->data[i_i]; d_st.site = &pc_emlrtRSI; tau->data[i - 1] = eml_matlab_zlarfg(); } b_A->data[i_i] = atmp; if (i < n) { atmp = b_A->data[i_i]; b_A->data[i_i] = 1.0; d_st.site = &qf_emlrtRSI; eml_matlab_zlarf(&d_st, mmi + 1, nmi, i_i + 1, tau->data[i - 1], b_A, i + i * m, m, work); b_A->data[i_i] = atmp; } d_st.site = &rf_emlrtRSI; if (i + 1 > n) { b_i = false; } else { b_i = (n > 2147483646); } if (b_i) { e_st.site = &db_emlrtRSI; check_forloop_overflow_error(&e_st); } for (ix = i; ix + 1 <= n; ix++) { if (vn1->data[ix] != 0.0) { xnorm = muDoubleScalarAbs(b_A->data[(i + b_A->size[0] * ix) - 1]) / vn1->data[ix]; xnorm = 1.0 - xnorm * xnorm; if (xnorm < 0.0) { xnorm = 0.0; } atmp = vn1->data[ix] / vn2->data[ix]; atmp = xnorm * (atmp * atmp); if (atmp <= 1.4901161193847656E-8) { if (i < m) { d_st.site = &sf_emlrtRSI; e_st.site = &uc_emlrtRSI; if (mmi < 1) { xnorm = 0.0; } else { f_st.site = &vc_emlrtRSI; g_st.site = &vc_emlrtRSI; n_t = (ptrdiff_t)(mmi); g_st.site = &vc_emlrtRSI; incx_t = (ptrdiff_t)(1); i51 = b_A->size[0] * b_A->size[1]; i52 = (i + m * ix) + 1; xix0_t = (double *)(&b_A->data[emlrtDynamicBoundsCheckFastR2012b (i52, 1, i51, &vb_emlrtBCI, &f_st) - 1]); xnorm = dnrm2(&n_t, xix0_t, &incx_t); } vn1->data[ix] = xnorm; vn2->data[ix] = vn1->data[ix]; } else { vn1->data[ix] = 0.0; vn2->data[ix] = 0.0; } } else { d_st.site = &tf_emlrtRSI; vn1->data[ix] *= muDoubleScalarSqrt(xnorm); } } } } emxFree_real_T(&vn2); emxFree_real_T(&vn1); } atmp = 0.0; if (mn > 0) { xnorm = muDoubleScalarMax(A->size[0], A->size[1]) * muDoubleScalarAbs (b_A->data[0]) * 2.2204460492503131E-16; k = 0; exitg1 = false; while ((!exitg1) && (k <= mn - 1)) { if (muDoubleScalarAbs(b_A->data[k + b_A->size[0] * k]) <= xnorm) { st.site = &lc_emlrtRSI; y = NULL; m14 = emlrtCreateCharArray(2, iv78); for (i = 0; i < 8; i++) { cv76[i] = cv77[i]; } emlrtInitCharArrayR2013a(&st, 8, m14, cv76); emlrtAssign(&y, m14); b_st.site = &tg_emlrtRSI; emlrt_marshallIn(&b_st, c_sprintf(&b_st, b_sprintf(&b_st, y, emlrt_marshallOut(14.0), emlrt_marshallOut(6.0), &o_emlrtMCI), emlrt_marshallOut(xnorm), &p_emlrtMCI), "sprintf", cv78); st.site = &kc_emlrtRSI; b_eml_warning(&st, atmp, cv78); exitg1 = true; } else { atmp++; k++; } } } unnamed_idx_0 = (uint32_T)A->size[1]; i51 = Y->size[0]; Y->size[0] = (int32_T)unnamed_idx_0; emxEnsureCapacity(sp, (emxArray__common *)Y, i51, (int32_T)sizeof(real_T), &m_emlrtRTEI); ix = (int32_T)unnamed_idx_0; for (i51 = 0; i51 < ix; i51++) { Y->data[i51] = 0.0; } for (ix = 0; ix < mn; ix++) { if (tau->data[ix] != 0.0) { xnorm = B->data[ix]; i51 = A->size[0] + (int32_T)(1.0 - ((1.0 + (real_T)ix) + 1.0)); emlrtForLoopVectorCheckR2012b((1.0 + (real_T)ix) + 1.0, 1.0, A->size[0], mxDOUBLE_CLASS, i51, &ac_emlrtRTEI, sp); for (i = 0; i < i51; i++) { unnamed_idx_0 = ((uint32_T)ix + i) + 2U; xnorm += b_A->data[((int32_T)unnamed_idx_0 + b_A->size[0] * ix) - 1] * B->data[(int32_T)unnamed_idx_0 - 1]; } xnorm *= tau->data[ix]; if (xnorm != 0.0) { B->data[ix] -= xnorm; i51 = A->size[0] + (int32_T)(1.0 - ((1.0 + (real_T)ix) + 1.0)); emlrtForLoopVectorCheckR2012b((1.0 + (real_T)ix) + 1.0, 1.0, A->size[0], mxDOUBLE_CLASS, i51, &yb_emlrtRTEI, sp); for (i = 0; i < i51; i++) { unnamed_idx_0 = ((uint32_T)ix + i) + 2U; B->data[(int32_T)unnamed_idx_0 - 1] -= b_A->data[((int32_T) unnamed_idx_0 + b_A->size[0] * ix) - 1] * xnorm; } } } } emxFree_real_T(&tau); emlrtForLoopVectorCheckR2012b(1.0, 1.0, atmp, mxDOUBLE_CLASS, (int32_T)atmp, &xb_emlrtRTEI, sp); for (i = 0; i < (int32_T)atmp; i++) { Y->data[jpvt->data[i] - 1] = B->data[i]; } emlrtForLoopVectorCheckR2012b(atmp, -1.0, 1.0, mxDOUBLE_CLASS, (int32_T)-(1.0 + (-1.0 - atmp)), &wb_emlrtRTEI, sp); for (ix = 0; ix < (int32_T)-(1.0 + (-1.0 - atmp)); ix++) { xnorm = atmp + -(real_T)ix; Y->data[jpvt->data[(int32_T)xnorm - 1] - 1] = eml_div(Y->data[jpvt->data [(int32_T)xnorm - 1] - 1], b_A->data[((int32_T)xnorm + b_A->size[0] * ((int32_T)xnorm - 1)) - 1]); for (i = 0; i < (int32_T)(xnorm - 1.0); i++) { Y->data[jpvt->data[i] - 1] -= Y->data[jpvt->data[(int32_T)xnorm - 1] - 1] * b_A->data[i + b_A->size[0] * ((int32_T)xnorm - 1)]; } } emxFree_int32_T(&jpvt); emxFree_real_T(&work); emxFree_real_T(&b_A); emlrtHeapReferenceStackLeaveFcnR2012b(sp); }
double pdslen(int ndim, int type, double *s, double scale, double *work) { /******************************************************************* * * THIS IS A SERVICE FUNCTION USED TO DETERMINE THE LENGTH OF THE * LONGEST EDGE IN THE INITIAL SIMPLEX. NOTE THAT THE JOB IS MADE * CONSIDERABLY EASIER IF THE USER SPECIFIED EITHER A RIGHT-ANGLED * OR REGULAR SIMPLEX. WRITTEN BY VIRGINIA TORCZON. LAST * MODIFICATION: MAY 18, 1992. * * INPUT * N DIMENSION OF THE PROBLEM TO BE SOLVED * * TYPE TYPE OF SIMPLEX SPECIFIED BY THE USER CONTAINS * THE N+1 VERTICES IN THE INITIAL SIMPLEX (STORED * BY COLUMN) * * SCALE (RELATIVE) LENGTH OF EDGES IN THE INITIAL * SIMPLEX IF THE USER REQUESTED THAT THE SIMPLEX * BE CONSTRUCTED (RATHER THAN ENTERED DIRECTLY) * * WORK ONE-DIMENSIONAL WORK ARRAY OF LENGTH N USED TO * COMPUTE THE DIFFERENCE BETWEEN VERTICES * *******************************************************************/ /* System generated locals */ double ret_val; /* Local variables */ static double temp; static int i, j, k; int incx = 1; if (type == 1) { /* THE USER CHOSE A SIMPLE RIGHT-ANGLED SIMPLEX SO THE MAXIMUM * LENGTH IS EASY TO COMPUTE. */ ret_val = fabs(scale) * sqrt(2.); } else if (type == 2) { /* THE USER CHOSE A REGULAR SIMPLEX; I.E., THE LENGTH OF EVERY * EDGE IN THE SIMPLEX IS SPECIFIED BY THE CHOICE OF SCALE. */ ret_val = fabs(scale); } else { /* THE USER EITHER ENTERED THE SIMPLEX OR CHOSE A SCALED * RIGHT-ANGLED SIMPLEX. COMPUTE THE LENGTHS OF ALL THE EDGES. */ ret_val = 0.; for (i = 0; i <= ndim; i++) { for (j = i + 1; j <= ndim ; j++) { for (k = 0; k < ndim; k++) { work[k] = s[k + j * ndim] - s[k + i * ndim]; } temp = dnrm2(&ndim, work, &incx); if (temp > ret_val) { ret_val = temp; } } } } return ret_val; }
/*---------------------------------------------------------------------------*/ int main (void) { /*---------------------------------------------------------------------------*/ /* Define arrays for the upper triangle of the coefficient matrix and */ /* preconditioner as well as an array for rhs vector */ /* Compressed sparse row storage is used for sparse representation */ /*---------------------------------------------------------------------------*/ MKL_INT n = 100, rci_request, itercount, lexpected_itercount = 15, uexpected_itercount = 19, i; double rhs[100]; MKL_INT ia[100 + 1]; MKL_INT ja[100 - 1]; double a[100 - 1], a1[100 - 1]; /*---------------------------------------------------------------------------*/ /* Allocate storage for the solver ?par and temporary storage tmp */ /*---------------------------------------------------------------------------*/ MKL_INT length = 128; MKL_INT ipar[128]; double dpar[128], tmp[4 * 100]; /*---------------------------------------------------------------------------*/ /* Some additional variables to use with the RCI (P)CG solver */ /* OMEGA is the relaxation parameter, NITER_SSOR is the maximum number of */ /* iterations for the SSOR preconditioner */ /*---------------------------------------------------------------------------*/ double solution[100]; double expected_sol[100]; double omega = 0.5E0, one = 1.E0, zero = 0.E0, om = 1.E0 - omega; double euclidean_norm, temp[100]; MKL_INT niter_ssor = 20; char matdes[6]; char tr = 'n'; double eone = -1.E0; MKL_INT ione = 1; /*---------------------------------------------------------------------------*/ /* Initialize the coefficient matrix and expected solution */ /*---------------------------------------------------------------------------*/ for (i = 0; i < n; i++) expected_sol[i] = 1.E0; for (i = 0; i < n - 1; i++) { ja[i] = i + 2; ia[i] = i + 1; a[i] = 0.5E0; a1[i] = omega * a[i]; } ia[n - 1] = n; ia[n] = ia[n - 1]; matdes[0] = 's'; matdes[1] = 'u'; matdes[2] = 'u'; matdes[3] = 'f'; /*---------------------------------------------------------------------------*/ /* Initialize vectors rhs, temp, and tmp[n:2*n-1] with zeros as mkl_dcsrmv */ /* routine does not set NAN to zero. Thus, if any of the values in the */ /* vectors above accidentally happens to be NAN, the example will fail */ /* to complete. */ /* Initialize the right hand side through matrix-vector product */ /*---------------------------------------------------------------------------*/ for (i = 0; i < n; i++) { rhs[i] = zero; temp[i] = zero; tmp[n + i] = zero; } mkl_dcsrmv (&tr, &n, &n, &one, matdes, a, ja, ia, &ia[1], expected_sol, &zero, rhs); /*---------------------------------------------------------------------------*/ /* Initialize the initial guess */ /*---------------------------------------------------------------------------*/ for (i = 0; i < n; i++) solution[i] = zero; /*---------------------------------------------------------------------------*/ /* Initialize the solver */ /*---------------------------------------------------------------------------*/ dcg_init (&n, solution, rhs, &rci_request, ipar, dpar, tmp); if (rci_request != 0) goto failure; /*---------------------------------------------------------------------------*/ /* Set the desired parameters: */ /* INTEGER parameters: */ /* set the maximal number of iterations to 100 */ /* LOGICAL parameters: */ /* run the Preconditioned version of RCI (P)CG with preconditioner C_inverse */ /* DOUBLE parameters */ /* - */ /*---------------------------------------------------------------------------*/ ipar[4] = 100; ipar[10] = 1; /*---------------------------------------------------------------------------*/ /* Check the correctness and consistency of the newly set parameters */ /*---------------------------------------------------------------------------*/ dcg_check (&n, solution, rhs, &rci_request, ipar, dpar, tmp); if (rci_request != 0) goto failure; /*---------------------------------------------------------------------------*/ /* Compute the solution by RCI (P)CG solver */ /* Reverse Communications starts here */ /*---------------------------------------------------------------------------*/ rci:dcg (&n, solution, rhs, &rci_request, ipar, dpar, tmp); /*---------------------------------------------------------------------------*/ /* If rci_request=0, then the solution was found according to the requested */ /* stopping tests. In this case, this means that it was found after 100 */ /* iterations. */ /*---------------------------------------------------------------------------*/ if (rci_request == 0) goto getsln; /*---------------------------------------------------------------------------*/ /* If rci_request=1, then compute the vector A*tmp[0] */ /* and put the result in vector tmp[n] */ /*---------------------------------------------------------------------------*/ if (rci_request == 1) { matdes[0] = 's'; mkl_dcsrmv (&tr, &n, &n, &one, matdes, a, ja, ia, &ia[1], tmp, &zero, &tmp[n]); goto rci; } /*---------------------------------------------------------------------------*/ /* If rci_request=2, then do the user-defined stopping test: compute the */ /* Euclidean norm of the actual residual using MKL routines and check if */ /* it is less than 1.E-8 */ /*---------------------------------------------------------------------------*/ if (rci_request == 2) { matdes[0] = 's'; mkl_dcsrmv (&tr, &n, &n, &one, matdes, a, ja, ia, &ia[1], solution, &zero, temp); daxpy (&n, &eone, rhs, &ione, temp, &ione); euclidean_norm = dnrm2 (&n, temp, &ione); /*---------------------------------------------------------------------------*/ /* The solution has not been found yet according to the user-defined stopping */ /* test. Continue RCI (P)CG iterations. */ /*---------------------------------------------------------------------------*/ if (euclidean_norm > 1.E-6) goto rci; /*---------------------------------------------------------------------------*/ /* The solution has been found according to the user-defined stopping test */ /*---------------------------------------------------------------------------*/ else goto getsln; } /*---------------------------------------------------------------------------*/ /* If rci_request=3, then apply the simplest SSOR preconditioning */ /* on vector tmp[2*n] and put the result in vector tmp[3*n] */ /*---------------------------------------------------------------------------*/ if (rci_request == 3) { dcopy (&n, &tmp[2 * n], &ione, &tmp[3 * n], &ione); matdes[0] = 't'; for (i = 1; i <= niter_ssor; i++) { dcopy (&n, &tmp[2 * n], &ione, temp, &ione); matdes[2] = 'n'; tr = 'n'; mkl_dcsrmv (&tr, &n, &n, &eone, matdes, a1, ja, ia, &ia[1], &tmp[3 * n], &omega, temp); daxpy (&n, &om, &tmp[3 * n], &ione, temp, &ione); matdes[2] = 'u'; tr = 't'; mkl_dcsrsv (&tr, &n, &one, matdes, a1, ja, ia, &ia[1], temp, &tmp[3 * n]); } goto rci; } /*---------------------------------------------------------------------------*/ /* If rci_request=anything else, then dcg subroutine failed */ /* to compute the solution vector: solution[n] */ /*---------------------------------------------------------------------------*/ goto failure; /*---------------------------------------------------------------------------*/ /* Reverse Communication ends here */ /* Get the current iteration number into itercount */ /*---------------------------------------------------------------------------*/ getsln:dcg_get (&n, solution, rhs, &rci_request, ipar, dpar, tmp, &itercount); /*---------------------------------------------------------------------------*/ /* Print solution vector: solution[n] and number of iterations: itercount */ /*---------------------------------------------------------------------------*/ printf ("The system has been solved\n"); printf ("The following solution obtained\n"); for (i = 0; i < n / 4; i++) { printf ("%6.3f %6.3f %6.3f %6.3f", solution[4 * i], solution[4 * i + 1], solution[4 * i + 2], solution[4 * i + 3]); printf ("\n"); } printf ("\nExpected solution is\n"); for (i = 0; i < n / 4; i++) { printf ("%6.3f %6.3f %6.3f %6.3f", expected_sol[4 * i], expected_sol[4 * i + 1], expected_sol[4 * i + 2], expected_sol[4 * i + 3]); expected_sol[4 * i] -= solution[4 * i]; printf ("\n"); } printf ("\nNumber of iterations: %d\n", itercount); i = 4; n /= 4; euclidean_norm = dnrm2 (&n, expected_sol, &i); /*-------------------------------------------------------------------------*/ /* Release internal MKL memory that might be used for computations */ /* NOTE: It is important to call the routine below to avoid memory leaks */ /* unless you disable MKL Memory Manager */ /*-------------------------------------------------------------------------*/ MKL_Free_Buffers (); if (lexpected_itercount <= itercount <= uexpected_itercount && euclidean_norm < 1.0e-4) { printf ("This example has successfully PASSED through all steps of computation!"); printf ("\n"); return 0; } else { printf ("This example may have FAILED as either the number of iterations differs"); printf ("\nfrom the expected number of iterations %d-", lexpected_itercount); printf ("-%d, or the computed solution\ndiffers much from ", uexpected_itercount); printf ("the expected solution (Euclidean norm is %e), or both.\n", euclidean_norm); return 1; } /*-------------------------------------------------------------------------*/ /* Release internal MKL memory that might be used for computations */ /* NOTE: It is important to call the routine below to avoid memory leaks */ /* unless you disable MKL Memory Manager */ /*-------------------------------------------------------------------------*/ failure:printf ("This example FAILED as the solver has returned the ERROR "); printf ("code %d", rci_request); MKL_Free_Buffers (); return 1; }