NLuint nlSolve_CG() { NLdouble* b = nlCurrentContext->b ; NLdouble* x = nlCurrentContext->x ; NLdouble eps = nlCurrentContext->threshold ; NLuint max_iter = nlCurrentContext->max_iterations ; NLint N = nlCurrentContext->n ; NLdouble *g = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *r = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *p = NL_NEW_ARRAY(NLdouble, N) ; NLuint its=0; NLint i; NLdouble t, tau, sig, rho, gam; NLdouble b_square=ddot(N,b,1,b,1); NLdouble err=eps*eps*b_square; NLdouble accu =0.0; NLdouble * Ax=NL_NEW_ARRAY(NLdouble,nlCurrentContext->n); NLdouble curr_err; nlCurrentContext->matrix_vector_prod(x,g); daxpy(N,-1.,b,1,g,1); dscal(N,-1.,g,1); dcopy(N,g,1,r,1); curr_err = ddot(N,g,1,g,1); while ( curr_err >err && its < max_iter) { if(!(its % 100)) { printf ( "%d : %.10e -- %.10e\n", its, curr_err, err ) ; } nlCurrentContext->matrix_vector_prod(r,p); rho=ddot(N,p,1,p,1); sig=ddot(N,r,1,p,1); tau=ddot(N,g,1,r,1); t=tau/sig; daxpy(N,t,r,1,x,1); daxpy(N,-t,p,1,g,1); gam=(t*t*rho-tau)/tau; dscal(N,gam,r,1); daxpy(N,1.,g,1,r,1); ++its; curr_err = ddot(N,g,1,g,1); } nlCurrentContext->matrix_vector_prod(x,Ax); for(i = 0 ; i < N ; ++i) accu+=(Ax[i]-b[i])*(Ax[i]-b[i]); printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/sqrt(b_square)); NL_DELETE_ARRAY(Ax); NL_DELETE_ARRAY(g) ; NL_DELETE_ARRAY(r) ; NL_DELETE_ARRAY(p) ; return its; }
/* Prepare the matrices for the lcp call pA = [-M -1 q] */ PT_Matrix lcp_Matrix_Init(ptrdiff_t m, double *M, double *q) { ptrdiff_t i,n,incx; double tmp; PT_Matrix pA; /* Build column-major matrix A = [-M -1 q] */ pA = Matrix_Init(m,m+2,"A"); /* A(:,1:m) = M */ memcpy(pMAT(pA), M, m*m*sizeof(double)); /* A(:,1:m) = -A(:,1:m) */ tmp = -1.0; incx = 1; n = m*m; dscal(&n, &tmp, pMAT(pA), &incx); /* A(:,m+1) = -1 */ for(i=0;i<m;i++) C_SEL(pA,i,m) = -1.0; /* A(:,m+2) = q */ memcpy(&(C_SEL(pA,0,m+1)),q,m*sizeof(double)); return pA; }
void Update_lbfgs (int n, /* I num unknowns */ int t, /* I num vectors to store */ real *s_vec, /* I x_new - x_old */ real *y_vec, /* I g_new - g_old */ real *Bs_vec, /* IO scratch space */ int *NUPDT, /* IO num updates made to B/H */ real *CMPS, /* IO storage for t s_vecs */ real *CMPY) /* IO storage for t y_vecs */ /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * This routine updates the limited memory BFGS approximations for * the Hessian B and its inverse H. The update uses s_vec and * y_vec, whose product should be negative to preserve the positive * definiteness of the BFGS approximations. If s'y is not a descent * direction, then y is "damped", an idea due to Powell. * Then y_vec becomes: * y_damped = alpha*y + (1-alpha)*Bs * * 0.8*s'Bs * where alpha = ---------- * s'Bs - s'y * * If 0 <= s'y <= 1.0E-8 y'y then the update is skipped to prevent * division by a small number. * * The L-BFGS structures are currently FORTRAN subroutines that * require the variables NUPDT, CMPS, and CMPY. These are used as * work space storage and should not be altered between FORTRAN calls. *********************************************************************/ { real s_dot_y, sBs, y_dot_y, alpha; /*-- Damp the estimate if necessary. */ s_dot_y = ddot (n, s_vec, 1, y_vec, 1); multbv_ (&n, &t, s_vec, Bs_vec, NUPDT, CMPS, CMPY); sBs = ddot (n, s_vec, 1, Bs_vec, 1); if (s_dot_y < (0.2 * sBs)) { fprintf (bfgs_fp, "--- damping L-BFGS update\n"); alpha = 0.8 * sBs / (sBs - s_dot_y); dscal (n, alpha, y_vec, 1); daxpy (n, (1.0 - alpha), Bs_vec, 1, y_vec, 1); s_dot_y = ddot (n, s_vec, 1, y_vec, 1); } /*-- Decide whether to skip the update. */ y_dot_y = ddot (n, y_vec, 1, y_vec, 1); if ((s_dot_y >= 0.0) && (s_dot_y <= (sqrt(MCHEPS) * y_dot_y))) { fprintf (bfgs_fp, "--- skipping L-BFGS update\n"); return; } /*-- Make the updates. */ updtbh_ (&n, &t, s_vec, y_vec, NUPDT, CMPS, CMPY); return; }
void KNITRO_EXPORT KTR_dscal (const int n, const double alpha, double * const x, const int incx) { dscal (n, alpha, x, incx); return; }
INT NS_DIM_PREFIX descal (MULTIGRID *mg, INT fl, INT tl, INT mode, EVECDATA_DESC *x, DOUBLE a) { INT ret,level,i; ret=dscal(mg,fl,tl,mode,x->vd,a); if (ret!=NUM_OK) return ret; for (level=fl; level<=tl; level++) for (i=0; i<x->n; i++) EVDD_E(x,level,i)*=a; return NUM_OK; }
NLuint nlSolve_CG_precond() { NLdouble* b = nlCurrentContext->b ; NLdouble* x = nlCurrentContext->x ; NLdouble eps = nlCurrentContext->threshold ; NLuint max_iter = nlCurrentContext->max_iterations ; NLint N = nlCurrentContext->n ; NLdouble* r = NL_NEW_ARRAY(NLdouble, N) ; NLdouble* d = NL_NEW_ARRAY(NLdouble, N) ; NLdouble* h = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *Ad = h; NLuint its=0; NLdouble rh, alpha, beta; NLdouble b_square = ddot(N,b,1,b,1); NLdouble err=eps*eps*b_square; NLint i; NLdouble * Ax=NL_NEW_ARRAY(NLdouble,nlCurrentContext->n); NLdouble accu =0.0; NLdouble curr_err; nlCurrentContext->matrix_vector_prod(x,r); daxpy(N,-1.,b,1,r,1); nlCurrentContext->precond_vector_prod(r,d); dcopy(N,d,1,h,1); rh=ddot(N,r,1,h,1); curr_err = ddot(N,r,1,r,1); while ( curr_err >err && its < max_iter) { if(!(its % 100)) { printf ( "%d : %.10e -- %.10e\n", its, curr_err, err ) ; } nlCurrentContext->matrix_vector_prod(d,Ad); alpha=rh/ddot(N,d,1,Ad,1); daxpy(N,-alpha,d,1,x,1); daxpy(N,-alpha,Ad,1,r,1); nlCurrentContext->precond_vector_prod(r,h); beta=1./rh; rh=ddot(N,r,1,h,1); beta*=rh; dscal(N,beta,d,1); daxpy(N,1.,h,1,d,1); ++its; // calcul de l'erreur courante curr_err = ddot(N,r,1,r,1); } nlCurrentContext->matrix_vector_prod(x,Ax); for(i = 0 ; i < N ; ++i) accu+=(Ax[i]-b[i])*(Ax[i]-b[i]); printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/sqrt(b_square)); NL_DELETE_ARRAY(Ax); NL_DELETE_ARRAY(r) ; NL_DELETE_ARRAY(d) ; NL_DELETE_ARRAY(h) ; return its; }
/* ************************************************************ TIME-CRITICAL PROCEDURE -- isscalarmul(x,alpha,n) Computes x *= alpha using BLAS. ************************************************************ */ void isscalarmul(double *x, const double alpha, const mwIndex n) { mwIndex one=1; #ifdef PC dscal(&n,&alpha,x,&one); #endif #ifdef UNIX dscal_(&n,&alpha,x,&one); #endif return; }
static vl::Error scal(vl::Context & context, ptrdiff_t n, type alpha, type *x, ptrdiff_t incx) { dscal(&n, &alpha, (double*)x, &incx) ; return vl::vlSuccess ; }
// Factors a double precision matrix by gaussian elimination. void dgefa(double ** a, int * ipvt) { double temp; int k, j; for (k = 0; k < NM1; k++) { double * col_k = a[k]; int kp1 = k + 1; // find l = pivot index int l = idamax(N - k, col_k, k, 1) + k; ipvt[k] = l; // zero pivot implies this column already triangularized if (col_k[l] != 0) { // interchange if necessary if (l != k) { temp = col_k[l]; col_k[l] = col_k[k]; col_k[k] = temp; } // compute multipliers temp = -1.0 / col_k[k]; dscal(N - kp1, temp, col_k, kp1, 1); // row elimination with column indexing for (j = kp1; j < N; j++) { double * col_j = a[j]; temp = col_j[l]; if (l != k) { col_j[l] = col_j[k]; col_j[k] = temp; } daxpy(N - kp1, temp, col_k, kp1, 1, col_j, kp1, 1); } } } ipvt[N - 1] = N - 1; }
/* This routine integrates the physical values */ void Integrate_CNAM(int Je, double dt, Element_List *U, Element_List *Uf, double **uf) { register int i; int nq; double theta = dparam("THETA"); nq = U->htot*U->nz; dcopy(nq, Uf->base_h, 1, uf[0], 1); /* multiply u^n by theta factor */ dscal(nq, 1.0/(1-theta),U->base_h,1); for(i = 0; i < Je; ++i) daxpy(nq, Beta_Int[i]*dt, uf[i], 1, U->base_h,1); reshuffle(uf,Je); }
double Quad::get_1diag_massmat(int ID){ double *wa, *wb; double *wvec = dvector(0, qtot-1), vmmat; Mode mw,*m; #ifndef PCONTBASE double **ba, **bb; Mode m1; get_moda_GL (qa, &ba); get_moda_GL (qb, &bb); m1.a = ba[ID]; m1.b = bb[ID]; m = &m1; #else Basis *b = getbasis(); m = b->vert+ID; #endif getzw(qa,&wa,&wa,'a'); getzw(qb,&wb,&wb,'a'); mw.a = wa; mw.b = wb; fillvec(&mw, wvec); if(curvX) dvmul(qtot, wvec, 1, geom->jac.p, 1, wvec, 1); else dscal(qtot, geom->jac.d, wvec, 1); vmmat = Quad_mass_mprod(this, m, wvec); free(wvec); return vmmat; }
/* This function takes complex numbers x, y, and z defined by an eigenvector for a multiwavelet (would work for Fourier transforms too, however) and returns a pointer to a structure that defines the major and minor axes of the particle motion vectors defined by those three complex numbers. The up vector (assumed to be three element vector) defines the direction used to resolve the sign ambiguity inherent in defining an ellipse. That is, both the major and minor component directions are required to have a positive projection in the up direction. If they aren't the sign is flipped before returning. Normally up would point [0,0,1] or in the up radial direction for P waves. For S, it becomes more ambiguous and should be sorted out by a more complicated method. The polarization information (defined by the Particle_Motion_Ellipse structure) is allocated within this routine. Author: G. L. Pavlis Written: October 1999 */ Particle_Motion_Ellipse compute_particle_motion(complex x, complex y, complex z, double *up) { double rx,ry,rz,thetax,thetay,thetaz; /* polar forms of x,y,z*/ double a,b; double phi1,phi2; double x1[3],x2[3]; double nrmx1,nrmx2; Particle_Motion_Ellipse e; rx = hypot((double)x.r,(double)x.i); ry = hypot((double)y.r,(double)y.i); rz = hypot((double)z.r,(double)z.i); thetax = atan2((double)x.i,(double)x.r); thetay = atan2((double)y.i,(double)y.r); thetaz = atan2((double)z.i,(double)z.r); a = rx*rx*cos(2.0*thetax) + ry*ry*cos(2.0*thetay) + rz*rz*cos(2.0*thetaz); b = rx*rx*sin(2.0*thetax) + ry*ry*sin(2.0*thetay) + rz*rz*sin(2.0*thetaz); phi1 = atan2(-b,a)/2.0; phi2 = phi1 + M_PI_2; x1[0] = rx*cos(phi1+thetax); x1[1] = ry*cos(phi1+thetay); x1[2] = rz*cos(phi1+thetaz); x2[0] = rx*cos(phi2+thetax); x2[1] = ry*cos(phi2+thetay); x2[2] = rz*cos(phi2+thetaz); nrmx1 = dnrm2(3,x1,1); nrmx2 = dnrm2(3,x2,1); /* normalize to unit vectors */ dscal(3,1.0/nrmx1,x1,1); dscal(3,1.0/nrmx2,x2,1); if(nrmx1>nrmx2) { dcopy(3,x1,1,e.major,1); dcopy(3,x2,1,e.minor,1); e.rectilinearity = (1.0 - nrmx2/nrmx1); } else { dcopy(3,x2,1,e.major,1); dcopy(3,x1,1,e.minor,1); e.rectilinearity = (1.0 - nrmx1/nrmx2); } /* Choose the positive sign direction */ if(ddot(3,up,1,e.major,1) < 0.0) dscal(3,-1.0,e.major,1); if(ddot(3,up,1,e.minor,1) < 0.0) dscal(3,-1.0,e.minor,1); return(e); }
/*! Given a vector of pointers to dynamic bodies, and the number of bodies in the vector, this routine will move those bodies in the direction of their current velocity for the length of the timestep, \a h. It uses the pre-computed velocities and accelerations computed by iterateDynamics and stored for each body. */ int moveBodies(int numBodies,std::vector<DynamicBody *> bodyVec,double h) { static double V[42]; static double tmp12[12]; static double B[12]; static double R_N_B[9]; static double newPos[7]; static mat3 Rot; int bn; double currq[7]; double currv[6]; int errCode=SUCCESS; for (bn=0;bn<numBodies;bn++) { memcpy(currq,bodyVec[bn]->getPos(),7*sizeof(double)); memcpy(currv,bodyVec[bn]->getVelocity(),6*sizeof(double));; Quaternion tmpQuat(currq[3],currq[4],currq[5],currq[6]); tmpQuat.ToRotationMatrix(Rot); // The rotation matrix returned by ToRotationMatrix is expressed as // a graphics style rot matrix (new axes are in rows), the R_N_B matrix // is a robotics style rot matrix (new axes in columns) R_N_B[0] = Rot[0]; R_N_B[3] = Rot[1]; R_N_B[6] = Rot[2]; R_N_B[1] = Rot[3]; R_N_B[4] = Rot[4]; R_N_B[7] = Rot[5]; R_N_B[2] = Rot[6]; R_N_B[5] = Rot[7]; R_N_B[8] = Rot[8]; // B relates the angular velocity of the body (expressed in // the body frame) to the time derivative of the Euler parameters B[0] = -currq[4]; B[4] = -currq[5]; B[8] = -currq[6]; B[1] = currq[3]; B[5] = -currq[6]; B[9] = currq[5]; B[2] = currq[6]; B[6] = currq[3]; B[10]= -currq[4]; B[3] = -currq[5]; B[7] = currq[4]; B[11]= currq[3]; dscal(12,0.5,B,1); // V is a list of matrices. Each matrix (V_bn) can be multiplied by // body bn's 6x1 velocity vector to get the 7x1 time derivative // of the body's position. // V_bn = [ eye(3,3) zeros(3,3); // zeros(4,3) B*R_N_B' ]; // This list of matrices will be used at the end to compute the new // position from the new velocity dgemm("N","T",4,3,3,1.0,B,4,R_N_B,3,0.0,tmp12,4); V[0] = 1.0; V[8] = 1.0; V[16]= 1.0; fillMatrixBlock(tmp12,4,3,3,6,5,V,7); dcopy(7,currq,1,newPos,1); dgemv("N",7,6,h,V,7,currv,1,1.0,newPos,1); #ifdef GRASPITDBG fprintf(stdout,"object %s new velocity: \n", bodyVec[bn]->getName().latin1()); for (int i=0;i<6;i++) fprintf(stdout,"%le ",currv[i]); printf("\n"); fprintf(stdout,"object %s new position: \n", bodyVec[bn]->getName().latin1()); disp_mat(stdout,newPos,1,7,0); #endif //should we bother to check if the object has moved? (for optimization) if (!bodyVec[bn]->setPos(newPos)) { DBGP("requested position is out of bounds for this object!"); errCode = FAILURE; } } return errCode; }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { /* Variables */ int k,nSamples,maxIter,sparse=0,*iVals,*covered,*lastVisited; long i,j,nVars,one=1; mwIndex *jc,*ir; double *w, *Xt, *y, lambda, alpha, innerProd, sig,c=1,*g,*d,nCovered=0,*cumSum,scaling; if (nrhs != 9) mexErrMsgTxt("Function needs nine arguments: {w,Xt,y,lambda,alpha,iVals,d,g,covered}"); /* Input */ w = mxGetPr(prhs[0]); Xt = mxGetPr(prhs[1]); y = mxGetPr(prhs[2]); lambda = mxGetScalar(prhs[3]); alpha = mxGetScalar(prhs[4]); iVals = (int*)mxGetPr(prhs[5]); if (!mxIsClass(prhs[5],"int32")) mexErrMsgTxt("iVals must be int32"); d = mxGetPr(prhs[6]); g = mxGetPr(prhs[7]); covered = (int*)mxGetPr(prhs[8]); /* Compute Sizes */ nVars = mxGetM(prhs[1]); nSamples = mxGetN(prhs[1]); maxIter = mxGetM(prhs[5]); if (nVars != mxGetM(prhs[0])) mexErrMsgTxt("w and Xt must have the same number of rows"); if (nSamples != mxGetM(prhs[2])) mexErrMsgTxt("number of columns of Xt must be the same as the number of rows in y"); if (nVars != mxGetM(prhs[6])) mexErrMsgTxt("w and d must have the same number of rows"); if (nSamples != mxGetM(prhs[7])) mexErrMsgTxt("w and g must have the same number of rows"); if (nSamples != mxGetM(prhs[8])) mexErrMsgTxt("covered and y must hvae the same number of rows"); if (mxIsSparse(prhs[1])) { sparse = 1; jc = mxGetJc(prhs[1]); ir = mxGetIr(prhs[1]); } if (sparse && alpha*lambda==1) mexErrMsgTxt("Sorry, I don't like it when Xt is sparse and alpha*lambda=1\n"); /* Allocate memory needed for lazy updates */ if (sparse) { lastVisited = mxCalloc(nVars,sizeof(int)); cumSum = mxCalloc(maxIter,sizeof(double)); /*for(j=0;j<nVars;j++) lastVisited[j] = -1;*/ } for(i=0; i<nSamples; i++) { if (covered[i]!=0) nCovered++; } for(k=0; k<maxIter; k++) { /* Select next training example */ i = iVals[k]-1; /* Compute current values of needed parameters */ if (sparse && k > 0) { for(j=jc[i]; j<jc[i+1]; j++) { if (lastVisited[ir[j]]==0) { w[ir[j]] -= d[ir[j]]*cumSum[k-1]; } else { w[ir[j]] -= d[ir[j]]*(cumSum[k-1]-cumSum[lastVisited[ir[j]]-1]); } lastVisited[ir[j]] = k; } } /* Compute derivative of loss */ if (sparse) { innerProd = 0; for(j=jc[i]; j<jc[i+1]; j++) innerProd += w[ir[j]]*Xt[j]; innerProd *= c; } else innerProd = ddot(&nVars,w,&one,&Xt[nVars*i],&one); sig = -y[i]/(1+exp(y[i]*innerProd)); /* Update direction */ if (sparse) { for(j=jc[i]; j<jc[i+1]; j++) d[ir[j]] += Xt[j]*(sig - g[i]); } else { scaling = sig-g[i]; daxpy(&nVars,&scaling,&Xt[i*nVars],&one,d,&one); } /* Store derivative of loss */ g[i] = sig; /* Update the number of examples that we have seen */ if (covered[i]==0) { covered[i]=1; nCovered++; } /* Update parameters */ if (sparse) { c *= 1-alpha*lambda; if (k==0) cumSum[0] = alpha/(c*nCovered); else cumSum[k] = cumSum[k-1] + alpha/(c*nCovered); } else { scaling = 1-alpha*lambda; dscal(&nVars,&scaling,w,&one); scaling = -alpha/nCovered; daxpy(&nVars,&scaling,d,&one,w,&one); } } if (sparse) { for(j=0; j<nVars; j++) { if (lastVisited[j]==0) { w[j] -= d[j]*cumSum[maxIter-1]; } else { w[j] -= d[j]*(cumSum[maxIter-1]-cumSum[lastVisited[j]-1]); } } scaling = c; dscal(&nVars,&scaling,w,&one); mxFree(lastVisited); mxFree(cumSum); } }
// ------------------------------- mexFunction ---------------------------------------// void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { /* Check for proper number of arguments */ if(nrhs!=5) { Usage(); mexErrMsgIdAndTxt("MyToolbox:arrayProduct:nrhs", "Five inputs required."); } if(nlhs!=4) { Usage(); mexErrMsgIdAndTxt("MyToolbox:arrayProduct:nlhs", "Four output required."); } // Input argument and dimensions const mwSize DataR = mxGetM(prhs[0]), DataC = mxGetN(prhs[0]), MeanIR = mxGetM(prhs[1]), MeanIC = mxGetN(prhs[1]); const double *Data = mxGetPr(prhs[0]), *MeanI = mxGetPr(prhs[1]); const double MaxIter = mxGetScalar(prhs[2]), Thresh = mxGetScalar(prhs[3]); const int Verbose = (int) mxGetScalar(prhs[4]); // Set the number of means elements, dimension and number of data points int NM = MeanIC, ND = DataC, Dim = DataR; // Check appropriate sizes and modes if( Dim != MeanIR ) { Usage(); mexErrMsgIdAndTxt("KMeans:Means","Means matrix dimension"); } // Data elements double *Prior = NULL, *Mean = NULL, *Cov = NULL, *Assign; // ** KM parameters double *SqrTemp = new double[Dim]; double *DistTemp = new double[NM]; double alpha = 1, beta = 1, Scale = 0; double Term, TermP = 0; int AssignTemp; int *AssignNum; AssignNum = new int[NM]; mwSize CSize[] = {Dim,Dim,NM}; int CNumSize = 3; int i,k,j; int inc = 1; int index = 0; Assign = new double[ND]; Prior = new double[NM]; Mean = new double[Dim*NM]; Cov = new double[Dim*Dim*NM]; memcpy(Mean, MeanI, Dim*NM*sizeof(double)); for(int E = 0; E < MaxIter; E++){ // ** Assign features to each center #pragma omp parallel for shared(Data, Mean, Dim, inc, Assign) private(i,j, SqrTemp, DistTemp, AssignTemp) for(i = 0; i < ND; i++){ for(j = 0; j < NM; j++){ // Calculate elements of Data-Mean vdSub( Dim, &Data[i*Dim], &Mean[j*Dim], SqrTemp ); // Square elements vdSqr( Dim, SqrTemp, SqrTemp ); // Sum distances DistTemp[j] = dasum( &Dim, SqrTemp, &inc); } // Find minimum distance index AssignTemp = idamin(&NM, DistTemp, &inc); Assign[i] = double(AssignTemp-1); } // ** Calculate new center values // Zero out centers memset(Mean, 0, Dim*NM*sizeof(double)); memset(AssignNum, 0, NM*sizeof(int)); for(i = 0; i < ND; i++){ index = Assign[i]; daxpy(&Dim, &alpha, &Data[i*Dim], &inc, &Mean[index*Dim], &inc); AssignNum[index] = AssignNum[index] + 1; } for(i = 0; i < NM; i++){ // If no point was assigned to an average, assign a different point if(AssignNum[i] == 0){ memcpy(&Mean[i*Dim], &Data[(rand()%ND)*Dim], Dim*sizeof(double)); Scale = 1; dscal(&Dim, &Scale, &Mean[i*Dim], &inc); } else{ Scale = 1 / double(AssignNum[i]); dscal(&Dim, &Scale, &Mean[i*Dim], &inc); } } // Termination conditions k = Dim*NM; Term = dasum(&k, Mean, &inc); if(Verbose == 1){ printf("Iteration %d - Shift %3.9f\n",E, fabs(Term-TermP)); mexEvalString("pause(.000001);"); } if(fabs(Term-TermP) <= Thresh) break; TermP = Term; } // ** Create and assign output variables double *Out; plhs[0] = mxCreateDoubleMatrix(1, ND ,mxREAL ); plhs[1] = mxCreateDoubleMatrix(Dim, NM, mxREAL ); plhs[2] = mxCreateDoubleMatrix(1, NM, mxREAL ); plhs[3] = mxCreateNumericArray( CNumSize, CSize, mxDOUBLE_CLASS, mxREAL); // Priors k = 0; for(i = 0; i < NM; i++ ) k = k + AssignNum[i]; for(i = 0; i < NM; i++){ Prior[i] = double(AssignNum[i])/k; } // Covariances memset(Cov, 0, Dim*Dim*NM*sizeof(double)); char trans = 'N'; k = 1; for(i = 0; i < ND; i++){ index = Assign[i]; Scale = 1/(Prior[index]*ND); // Calculate elements of Data-Mean vdSub( Dim, &Data[i*Dim], &Mean[index*Dim], SqrTemp ); // covariance dgemm(&trans, &trans, &Dim, &Dim, &k, &Scale, SqrTemp, &Dim, SqrTemp, &k, &beta, &Cov[index*Dim*Dim], &Dim); } Out = mxGetPr(plhs[0]); memcpy(Out, Assign, ND*sizeof(double)); Out = mxGetPr(plhs[1]); memcpy(Out, Mean, Dim*NM*sizeof(double)); Out = mxGetPr(plhs[2]); memcpy(Out, Prior, NM*sizeof(double)); Out = mxGetPr(plhs[3]); memcpy(Out, Cov, Dim*Dim*NM*sizeof(double)); //** Delete dynamic variables delete [] Prior; delete [] Mean; delete [] Cov; delete [] Assign; delete [] AssignNum; }
/* Function: mdlOutputs ======================================================= * do the main optimization routine here * no discrete states are considered */ static void mdlOutputs(SimStruct *S, int_T tid) { int_T i, j, Result, qinfeas=0; real_T *z = ssGetOutputPortRealSignal(S,0); real_T *w = ssGetOutputPortRealSignal(S,1); real_T *I = ssGetOutputPortRealSignal(S,2); real_T *exitflag = ssGetOutputPortRealSignal(S,3); real_T *pivots = ssGetOutputPortRealSignal(S,4); real_T *time = ssGetOutputPortRealSignal(S,5); InputRealPtrsType M = ssGetInputPortRealSignalPtrs(S,0); InputRealPtrsType q = ssGetInputPortRealSignalPtrs(S,1); ptrdiff_t pivs, info, m, n, inc=1; char_T T='N'; double total_time, alpha=1.0, tmp=-1.0, *x, *Mn, *qn, *r, *c, s=0.0, sn=0.0; PT_Matrix pA; /* Problem data A = [M -1 q] */ PT_Basis pB; /* The basis */ T_Options options; /* options structure defined in lcp_matrix.h */ /* for RTW we do not need these variables */ #ifdef MATLAB_MEX_FILE const char *fname; mxArray *fval; int_T nfields; clock_t t1,t2; #endif UNUSED_ARG(tid); /* not used in single tasking mode */ /* default options */ options.zerotol = 1e-10; /* zero tolerance */ options.lextol = 1e-10; /* lexicographic tolerance - a small treshold to determine if values are equal */ options.maxpiv = INT_MAX; /* maximum number of pivots */ /* if LUMOD routine is chosen, this options refactorizes the basis after n steps using DGETRF routine from lapack to avoid numerical problems */ options.nstepf = 50; options.clock = 0; /* 0 or 1 - to print computational time */ options.verbose = 0; /* 0 or 1 - verbose output */ /* which routine in Basis_solve should solve a set of linear equations: 0 - corresponds to LUmod package that performs factorization in the form L*A = U. Depending on the change in A factors L, U are updated. This is the fastest method. 1 - corresponds to DGESV simple driver which solves the system AX = B by factorizing A and overwriting B with the solution X 2 - corresponds to DGELS simple driver which solves overdetermined or underdetermined real linear systems min ||b - Ax||_2 involving an M-by-N matrix A, or its transpose, using a QR or LQ factorization of A. */ options.routine = 0; options.timelimit = 3600; /* time limit in seconds to interrupt iterations */ options.normalize = 1; /* 0 or 1 - perform scaling of input matrices M, q */ options.normalizethres = 1e6; /* If the normalize option is on, then the matrix scaling is performed only if 1 norm of matrix M (maximum absolute column sum) is above this threshold. This enforce additional control over normalization since it seems to be more aggressive also for well-conditioned problems. */ #ifdef MATLAB_MEX_FILE /* overwriting default options by the user */ if (ssGetSFcnParamsCount(S)==3) { nfields = mxGetNumberOfFields(ssGetSFcnParam(S,2)); for(i=0; i<nfields; i++){ fname = mxGetFieldNameByNumber(ssGetSFcnParam(S,2), i); fval = mxGetField(ssGetSFcnParam(S,2), 0, fname); if ( strcmp(fname,"zerotol")==0 ) options.zerotol = mxGetScalar(fval); if ( strcmp(fname,"lextol")==0 ) options.lextol = mxGetScalar(fval); if ( strcmp(fname,"maxpiv")==0 ) { if (mxGetScalar(fval)>=(double)INT_MAX) options.maxpiv = INT_MAX; else options.maxpiv = (int_T)mxGetScalar(fval); } if ( strcmp(fname,"nstepf")==0 ) options.nstepf = (int_T)mxGetScalar(fval); if ( strcmp(fname,"timelimit")==0 ) options.timelimit = mxGetScalar(fval); if ( strcmp(fname,"clock")==0 ) options.clock = (int_T)mxGetScalar(fval); if ( strcmp(fname,"verbose")==0 ) options.verbose = (int_T)mxGetScalar(fval); if ( strcmp(fname,"routine")==0 ) options.routine = (int_T)mxGetScalar(fval); if ( strcmp(fname,"normalize")==0 ) options.normalize = (int_T)mxGetScalar(fval); if ( strcmp(fname, "normalizethres")==0 ) options.normalizethres = mxGetScalar(fval); } } #endif /* Normalize M, q to avoid numerical problems if possible Mn = diag(r)*M*diag(c) , qn = diag(r)*q */ /* initialize Mn, qn */ Mn = (double *)ssGetPWork(S)[0]; qn = (double *)ssGetPWork(S)[1]; /* initialize vectors r, c */ r = (double *)ssGetPWork(S)[2]; c = (double *)ssGetPWork(S)[3]; /* initialize auxiliary vector x */ x = (double *)ssGetPWork(S)[4]; /* initialize to ones */ for (i=0; i<NSTATES(S); i++) { r[i] = 1.0; c[i] = 1.0; } m = NSTATES(S); n = m*m; /* write data to Mn = M */ memcpy(Mn, *M, n*sizeof(double)); /* write data to qn = q */ memcpy(qn, *q, m*sizeof(double)); /* check out the 1-norm of matrix M (maximum column sum) */ for (i=0; i<m; i++) { sn = dasum(&m, &Mn[i*m], &inc); if (sn>s) { s = sn; } } /* scale matrix M, q and write scaling factors to r (rows) and c (columns) */ if (options.normalize && s>options.normalizethres) { NormalizeMatrix (m, m, Mn, qn, r, c, options); } /* Setup the problem */ pA = ssGetPWork(S)[5]; /* A(:,1:m) = M */ memcpy(pMAT(pA), Mn, n*sizeof(double)); /* A(:,1:m) = -A(:,1:m) */ dscal(&n, &tmp, pMAT(pA), &inc); /* A(:,m+1) = -1 */ for(i=0;i<m;i++) C_SEL(pA,i,m) = -1.0; /* A(:,m+2) = q */ memcpy(&(C_SEL(pA,0,m+1)),qn,m*sizeof(double)); /* initialize basis */ pB = ssGetPWork(S)[6]; /* check if the problem is not feasible at the beginning */ for (i=0; i<m; i++) { if (qn[i]<-options.zerotol) { qinfeas = 1; break; } } /* Solve the LCP */ if (qinfeas) { #ifdef MATLAB_MEX_FILE t1 = clock(); #endif /* main LCP rouinte */ Result = lcp(pB, pA, &pivs, options); #ifdef MATLAB_MEX_FILE t2 = clock(); total_time = ((double)(t2-t1))/CLOCKS_PER_SEC; #else total_time = -1; #endif } else { pivs = 0; total_time = 0; Result = LCP_FEASIBLE; } #ifdef MATLAB_MEX_FILE if (options.clock) { printf("Time needed to perform pivoting:\n time= %i (%lf seconds)\n", t2-t1,total_time); printf("Pivots: %ld\n", pivs); printf("CLOCKS_PER_SEC = %i\n",CLOCKS_PER_SEC); } #endif /* initialize values to 0 */ for(i=0;i<NSTATES(S);i++) { w[i] = 0.0; z[i] = 0.0; I[i] = 0.0; } /* for a feasible basis, compute the solution */ if ( Result == LCP_FEASIBLE || Result == LCP_PRETERMINATED ) { #ifdef MATLAB_MEX_FILE t1 = clock(); #endif info = Basis_Solve(pB, &(C_SEL(pA,0,m+1)), x, options); for (j=0,i=0;i<Index_Length(pB->pW);i++,j++) { w[Index_Get(pB->pW,i)] = x[j]; /* add 1 due to matlab 1-indexing */ I[j] = Index_Get(pB->pW,i)+1; } for(i=0;i<Index_Length(pB->pZ);i++,j++) { /* take only positive values */ if (x[j] > options.zerotol ) { z[Index_Get(pB->pZ,i)] = x[j]; } /* add 1 due to matlab 1-indexing */ I[j] = Index_Get(pB->pZ, i)+m+1; } #ifdef MATLAB_MEX_FILE t2 = clock(); total_time+=(double)(t2-t1)/(double)CLOCKS_PER_SEC; if (options.clock) { printf("Time in total needed to solve LCP: %lf seconds\n", total_time + (double)(t2-t1)/(double)CLOCKS_PER_SEC); } #endif if (options.normalize) { /* do the backward normalization */ /* z = diag(c)*zn */ for (i=0; i<m; i++) { z[i] = c[i]*z[i]; } /* since the normalization does not compute w properly, we recalculate it from * recovered z */ /* write data to Mn = M */ memcpy(Mn, *M, n*sizeof(double)); /* write data to qn = q */ memcpy(qn, *q, m*sizeof(double)); /* copy w <- q; */ dcopy(&m, qn, &inc, w, &inc); /* compute w = M*z + q */ dgemv(&T, &m, &m, &alpha, Mn, &m, z, &inc, &alpha, w, &inc); /* if w is less than eps, consider it as zero */ for (i=0; i<m; i++) { if (w[i]<options.zerotol) { w[i] = 0.0; } } } } /* outputs */ *exitflag = (real_T )Result; *pivots =(real_T)pivs; *time = (real_T)total_time; /* reset dimensions and values in basis for a recursive call */ Reinitialize_Basis(m, pB); }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { /* Variables */ int k,nSamples,maxIter,sparse=0,*iVals,*covered,*lastVisited,stepSizeType=1,temp; long i,j,nVars,one=1; mwIndex *jc,*ir; double *w, *Xt, *y, lambda, *Li, alpha, innerProd, sig,c=1,*g,*d,nCovered=0,*cumSum,fi,fi_new,gg,precision,scaling,wtx,*xtx; if (nrhs < 9) mexErrMsgTxt("Function needs nine arguments: {w,Xt,y,lambda,alpha,iVals,d,g,covered[,stepSizeType,xtx]}"); /* Input */ w = mxGetPr(prhs[0]); Xt = mxGetPr(prhs[1]); y = mxGetPr(prhs[2]); lambda = mxGetScalar(prhs[3]); Li = mxGetPr(prhs[4]); iVals = (int*)mxGetPr(prhs[5]); if (!mxIsClass(prhs[5],"int32")) mexErrMsgTxt("iVals must be int32"); d = mxGetPr(prhs[6]); g = mxGetPr(prhs[7]); covered = (int*)mxGetPr(prhs[8]); if (nrhs >= 10) { stepSizeType = (int)mxGetScalar(prhs[9]); if (!mxIsClass(prhs[9],"int32")) mexErrMsgTxt("stepSizeType must be int32"); } /* Compute Sizes */ nVars = mxGetM(prhs[1]); nSamples = mxGetN(prhs[1]); maxIter = mxGetM(prhs[5]); precision = 1.490116119384765625e-8; if (nVars != mxGetM(prhs[0])) mexErrMsgTxt("w and Xt must have the same number of rows"); if (nSamples != mxGetM(prhs[2])) mexErrMsgTxt("number of columns of Xt must be the same as the number of rows in y"); if (nVars != mxGetM(prhs[6])) mexErrMsgTxt("w and d must have the same number of rows"); if (nSamples != mxGetM(prhs[7])) mexErrMsgTxt("w and g must have the same number of rows"); if (nSamples != mxGetM(prhs[8])) mexErrMsgTxt("covered and y must have the same number of rows"); if (mxIsSparse(prhs[1])) { sparse = 1; jc = mxGetJc(prhs[1]); ir = mxGetIr(prhs[1]); } if (sparse && alpha*lambda==1) mexErrMsgTxt("Sorry, I don't like it when Xt is sparse and alpha*lambda=1\n"); /* Allocate memory needed for lazy updates */ if (sparse) { lastVisited = mxCalloc(nVars,sizeof(double)); cumSum = mxCalloc(maxIter,sizeof(double)); /*for(j=0;j<nVars;j++) lastVisited[j] = -1;*/ } for(i=0;i<nSamples;i++) { if (covered[i]!=0) nCovered++; } if (nrhs >= 11) { xtx = mxGetPr(prhs[10]); if (nSamples != mxGetM(prhs[10])) mexErrMsgTxt("covered and xtx must have the same number or rows"); } else { xtx = mxCalloc(nSamples,sizeof(double)); for(i = 0; i < nSamples;i++) { if (sparse) { xtx[i] = 0; for(j=jc[i];j<jc[i+1];j++) xtx[i] += Xt[j]*Xt[j]; } else xtx[i] = ddot(&nVars,&Xt[i*nVars],&one,&Xt[i*nVars],&one); } } for(k=0;k<maxIter;k++) { /* Select next training example */ i = iVals[k]-1; /* Compute current values of needed parameters */ if (sparse && k > 0) { for(j=jc[i];j<jc[i+1];j++) { if (lastVisited[ir[j]]==0) { w[ir[j]] -= d[ir[j]]*cumSum[k-1]; } else { w[ir[j]] -= d[ir[j]]*(cumSum[k-1]-cumSum[lastVisited[ir[j]]-1]); } lastVisited[ir[j]] = k; } } /* Compute derivative of loss */ if (sparse) { innerProd = 0; for(j=jc[i];j<jc[i+1];j++) innerProd += w[ir[j]]*Xt[j]; innerProd *= c; } else innerProd = ddot(&nVars,w,&one,&Xt[nVars*i],&one); sig = -y[i]/(1+exp(y[i]*innerProd)); /* Update direction */ if (sparse) { for(j=jc[i];j<jc[i+1];j++) d[ir[j]] += Xt[j]*(sig - g[i]); } else { scaling = sig-g[i]; daxpy(&nVars,&scaling,&Xt[i*nVars],&one,d,&one); } /* Store derivative of loss */ g[i] = sig; /* Update the number of examples that we have seen */ if (covered[i]==0) { covered[i]=1; nCovered++; } /* Line-search for Li */ fi = log(1 + exp(-y[i]*innerProd)); /* Compute f_new as the function value obtained by taking * a step size of 1/Li in the gradient direction */ wtx = innerProd; gg = sig*sig*xtx[i]; innerProd = wtx - xtx[i]*sig/(*Li); fi_new = log(1 + exp(-y[i]*innerProd)); /*printf("fi = %e, fi_new = %e, gg = %e\n",fi,fi_new,gg);*/ while (gg > precision && fi_new > fi - gg/(2*(*Li))) { /*printf("Lipschitz Backtracking (k = %d, fi = %e, fi_new = %e, 1/Li = %e)\n",k+1,fi,fi_new,1/(*Li));*/ *Li *= 2; innerProd = wtx - xtx[i]*sig/(*Li); fi_new = log(1 + exp(-y[i]*innerProd)); } /* Compute step size */ if (stepSizeType == 1) alpha = 1/(*Li + lambda); else alpha = 2/(*Li + (nSamples+1)*lambda); /* Update parameters */ if (sparse) { c *= 1-alpha*lambda; if (k==0) cumSum[0] = alpha/(c*nCovered); else cumSum[k] = cumSum[k-1] + alpha/(c*nCovered); } else { scaling = 1-alpha*lambda; dscal(&nVars,&scaling,w,&one); scaling = -alpha/nCovered; daxpy(&nVars,&scaling,d,&one,w,&one); } /* Decrease value of Lipschitz constant */ *Li *= pow(2.0,-1.0/nSamples); } if (sparse) { for(j=0;j<nVars;j++) { if (lastVisited[j]==0) { w[j] -= d[j]*cumSum[maxIter-1]; } else { w[j] -= d[j]*(cumSum[maxIter-1]-cumSum[lastVisited[j]-1]); } } scaling = c; dscal(&nVars,&scaling,w,&one); mxFree(lastVisited); mxFree(cumSum); } if (nrhs < 11) mxFree(xtx); }
NLuint nlSolve_BICGSTAB_precond() { NLdouble* b = nlCurrentContext->b ; NLdouble* x = nlCurrentContext->x ; NLdouble eps = nlCurrentContext->threshold ; NLuint max_iter = nlCurrentContext->max_iterations ; NLint N = nlCurrentContext->n ; NLint i; NLdouble *rT = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *d = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *h = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *u = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *Sd = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *t = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *aux = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *s = h; NLdouble rTh, rTSd, rTr, alpha, beta, omega, st, tt; NLuint its=0; NLdouble b_square = ddot(N,b,1,b,1); NLdouble err = eps*eps*b_square; NLdouble *r = NL_NEW_ARRAY(NLdouble, N); NLdouble * Ax = NL_NEW_ARRAY(NLdouble,nlCurrentContext->n); NLdouble accu =0.0; nlCurrentContext->matrix_vector_prod(x,r); daxpy(N,-1.,b,1,r,1); nlCurrentContext->precond_vector_prod(r,d); dcopy(N,d,1,h,1); dcopy(N,h,1,rT,1); nl_assert( ddot(N,rT,1,rT,1)>1e-40 ); rTh=ddot(N,rT,1,h,1); rTr=ddot(N,r,1,r,1); while ( rTr>err && its < max_iter) { if(!(its % 100)) { printf ( "%d : %.10e -- %.10e\n", its, rTr, err ) ; } nlCurrentContext->matrix_vector_prod(d,aux); nlCurrentContext->precond_vector_prod(aux,Sd); rTSd=ddot(N,rT,1,Sd,1); nl_assert( fabs(rTSd)>1e-40 ); alpha=rTh/rTSd; daxpy(N,-alpha,aux,1,r,1); dcopy(N,h,1,s,1); daxpy(N,-alpha,Sd,1,s,1); nlCurrentContext->matrix_vector_prod(s,aux); nlCurrentContext->precond_vector_prod(aux,t); daxpy(N,1.,t,1,u,1); dscal(N,alpha,u,1); st=ddot(N,s,1,t,1); tt=ddot(N,t,1,t,1); if ( fabs(st)<1e-40 || fabs(tt)<1e-40 ) { omega = 0.; } else { omega = st/tt; } daxpy(N,-omega,aux,1,r,1); daxpy(N,-alpha,d,1,x,1); daxpy(N,-omega,s,1,x,1); dcopy(N,s,1,h,1); daxpy(N,-omega,t,1,h,1); beta=(alpha/omega)/rTh; rTh=ddot(N,rT,1,h,1); beta*=rTh; dscal(N,beta,d,1); daxpy(N,1.,h,1,d,1); daxpy(N,-beta*omega,Sd,1,d,1); rTr=ddot(N,r,1,r,1); ++its; } nlCurrentContext->matrix_vector_prod(x,Ax); for(i = 0 ; i < N ; ++i){ accu+=(Ax[i]-b[i])*(Ax[i]-b[i]); } printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/sqrt(b_square)); NL_DELETE_ARRAY(Ax); NL_DELETE_ARRAY(r); NL_DELETE_ARRAY(rT); NL_DELETE_ARRAY(d); NL_DELETE_ARRAY(h); NL_DELETE_ARRAY(u); NL_DELETE_ARRAY(Sd); NL_DELETE_ARRAY(t); NL_DELETE_ARRAY(aux); return its; }
double bundle_guess (bundle_t* bundle, int max_qp_iterations) /* * Guesses where the x giving the estimated best z lies by solving the penalized bundle QP. * * If the resolution of the QP takes more than max_qp_iterations mask guesses, it is aborted, * and the bundle is repopulated as if it were aggregated in the previous bundle iteration, * in other words reduced to only 2 subgradients, the previous aggregate and the previous evaluated subgradient. * The QP is then solved anew, and completely. * The procedure then computes x, the aggregate subgradient and the linearization error for this QP. * */ { int solved; int i, n_iter = max_qp_iterations; bundle->time_qp -= getutime(1); bundle->kkt_z = INFINITY; solved = bundle_qp_solve(bundle, 0ULL, bundle->m, &n_iter); if (!solved) { bundle->time_qp += getutime(1); ++bundle->n_iterations; //printf("trim\nit: %i\tpen: %f\t", bundle->n_iterations, bundle->scale); // trim a if (bundle->m != 2) memcpy(&bundle->a[bundle->n], &bundle->a[(bundle->m - 1) * bundle->n], bundle->n * sizeof(double)); memcpy(&bundle->a[0], bundle->agg_subg, bundle->n * sizeof(double)); // recompute aat bundle->aat[0] = ddot(bundle->n, bundle->agg_subg, bundle->agg_subg); bundle->aat[bundle->max_m + 1] = bundle->aat[(bundle->m - 1) * bundle->max_m + bundle->m - 1]; bundle->aat[1] = bundle->aat[bundle->max_m] = ddot(bundle->n, bundle->agg_subg, &bundle->a[bundle->n]); // trim b bundle->b[0] = (bundle->most_recent_i == -1) ? bundle->agg_next_b : bundle->agg_b; bundle->b[1] = bundle->b[bundle->m - 1]; // update bundle information and solve again bundle->most_recent_i = -1; bundle->m = 2; bundle->time_qp -= getutime(1); n_iter = (max_qp_iterations < 3) ? 3 : max_qp_iterations; bundle->kkt_z = INFINITY; solved = bundle_qp_solve(bundle, 0ULL, bundle->m, &n_iter); assert(solved); } // compute the aggregate subgradient using BLAS (in bundle->agg_subg[]) bundle->agg_b = bundle->agg_next_b = 0.0; bzero(bundle->agg_subg, bundle->n * sizeof(double)); for (i = 0; i < bundle->kkt_m; i++) { daxpy(bundle->n, bundle->kkt_mul[i], &bundle->a[bundle->kkt_i[i] * bundle->n], bundle->agg_subg); bundle->agg_b += bundle->kkt_mul[i] * bundle->b[bundle->kkt_i[i]]; bundle->agg_next_b += bundle->kkt_mul[i] * bundle->next_b[bundle->kkt_i[i]]; } // compute local x memcpy(bundle->kkt_x, bundle->agg_subg, bundle->n * sizeof(double)); dscal(bundle->n, 1.0 / bundle->scale, bundle->kkt_x); // compute global x memcpy(bundle->x, bundle->best_x, bundle->n * sizeof(double)); daxpy(bundle->n, 1.0, bundle->kkt_x, bundle->x); bundle->time_qp += getutime(1); // return guessed z return bundle->kkt_mul[bundle->kkt_m]; }
void cblas_dscal( mwSignedIndex N, double alpha, double *X, mwSignedIndex incX) { dscal(&N,&alpha,X,&incX); }
static void setupRHS (Element_List **V, Element_List **Vf,double *rhs, double *u0, Bndry **Vbc, Bsystem **Vbsys){ register int i,k; int N,nbl; int eDIM = V[0]->flist[0]->dim(); Bsystem *PB = Vbsys[eDIM],*B = Vbsys[0]; int nel = B->nel,info; int **ipiv = B->Gmat->cipiv; double **binvc = B->Gmat->binvc; double **invc = B->Gmat->invc; double ***dbinvc = B->Gmat->dbinvc; double **p_binvc = PB->Gmat->binvc; Element *E,*E1; Bndry *Ebc; double *tmp; if(eDIM == 2) tmp = dvector(0,max(8*LGmax,(LGmax-2)*(LGmax-2))); else tmp = dvector(0,18*LGmax*LGmax); B = Vbsys[0]; PB = Vbsys[eDIM]; #ifdef __LIBCATAMOUNT__ st1 = dclock(); #else st1 = clock(); #endif /* save initial condition */ saveinit(V,u0,Vbsys); Timing1("saveinit.........."); /* take inner product if in physical space */ for(i = 0; i < eDIM; ++i){ if(Vf[i]->fhead->state == 'p') Vf[i]->Iprod(Vf[i]); } /* zero pressure field */ dzero(Vf[eDIM]->hjtot,Vf[eDIM]->base_hj,1); Timing1("zeroing..........."); /* condense out interior from u-vel + p */ for(i = 0; i < eDIM; ++i) for(E=Vf[i]->fhead;E;E=E->next){ nbl = E->Nbmodes; N = E->Nmodes - nbl; if(N) dgemv('T', N, nbl, -1., binvc[E->geom->id], N, E->vert->hj+nbl, 1, 1., E->vert->hj,1); } Timing1("first condense(v)."); for(i = 0; i < eDIM; ++i) for(E=Vf[i]->fhead;E;E=E->next){ nbl = E->Nbmodes; N = E->Nmodes - nbl; if(N) { E1 = Vf[eDIM]->flist[E->id]; if(B->lambda->wave){ dcopy(N,E->vert->hj+nbl,1,tmp,1); dgetrs('N', N, 1, invc[E->geom->id], N,ipiv[E->geom->id],tmp,N,info); dgemv('T', N, E1->Nmodes, -1., dbinvc[i][E->geom->id], N, tmp, 1, 1., E1->vert->hj,1); } else{ dgemv('T', N, E1->Nmodes, -1., dbinvc[i][E->geom->id], N, E->vert->hj+nbl, 1, 1., E1->vert->hj,1); } } } Timing1("first condense(p)."); /* add flux terms */ for(i = 0; i < eDIM; ++i) for(Ebc = Vbc[i]; Ebc; Ebc = Ebc->next) if(Ebc->type == 'F' || Ebc->type == 'R') Vf[i]->flist[Ebc->elmt->id]->Add_flux_terms(Ebc); /* second level of factorisation to orthogonalise basis to p */ for(E=Vf[eDIM]->fhead;E;E=E->next){ E1 = Vf[0]->flist[E->id]; nbl = eDIM*E1->Nbmodes + 1; N = E->Nmodes-1; dgemv('T', N, nbl, -1.0, p_binvc[E->geom->id], N, E->vert->hj+1, 1, 0.0, tmp,1); for(i = 0; i < eDIM; ++i){ E1 = Vf[i]->flist[E->id]; dvadd(E1->Nbmodes,tmp+i*E1->Nbmodes,1,E1->vert->hj,1,E1->vert->hj,1); } E->vert->hj[0] += tmp[nbl-1]; } Timing1("second condense..."); /* subtract boundary initial conditions */ if(PB->smeth == iterative){ double **wk; double **a = PB->Gmat->a; if(eDIM == 2) wk = dmatrix(0,1,0,eDIM*4*LGmax); else wk = dmatrix(0,1,0,eDIM*6*LGmax*LGmax); for(k = 0; k < nel; ++k){ nbl = V[0]->flist[k]->Nbmodes; /* gather vector */ for(i = 0; i < eDIM; ++i) dcopy(nbl,V[i]->flist[k]->vert->hj,1,wk[0]+i*nbl,1); dspmv('U',eDIM*nbl+1,1.0,a[V[0]->flist[k]->geom->id], wk[0],1,0.0,wk[1],1); /* subtract of Vf */ for(i = 0; i < eDIM; ++i) dvsub(nbl,Vf[i]->flist[k]->vert->hj,1,wk[1]+i*nbl,1, Vf[i]->flist[k]->vert->hj,1); Vf[eDIM]->flist[k]->vert->hj[0] -= wk[1][eDIM*nbl]; } GathrBndry_Stokes(Vf,rhs,Vbsys); free_dmatrix(wk,0,0); } else{ if(Vbc[0]->DirRHS){ GathrBndry_Stokes(Vf,rhs,Vbsys); /* subtract of bcs */ dvsub(PB->nsolve,rhs,1,Vbc[0]->DirRHS,1,rhs,1); /* zero ic vector */ dzero(PB->nsolve,u0,1); } else{ /* zero out interior components since only deal with boundary initial conditions (interior is always direct) */ for(i = 0; i < eDIM; ++i) for(E = V[i]->fhead; E; E = E->next){ nbl = E->Nbmodes; N = E->Nmodes - nbl; dzero(N, E->vert->hj + nbl, 1); } /* inner product of divergence for pressure forcing */ for(i = 0; i < eDIM; ++i) V[i]->Trans(V[i], J_to_Q); V[0]->Grad(V[eDIM],0,0,'x'); V[1]->Grad(0,Vf[eDIM],0,'y'); dvadd(V[1]->htot,V[eDIM]->base_h,1,Vf[eDIM]->base_h,1, V[eDIM]->base_h,1); if(eDIM == 3){ V[2]->Grad(0,V[eDIM],0,'z'); dvadd(V[2]->htot,V[eDIM]->base_h,1,Vf[eDIM]->base_h,1, V[eDIM]->base_h,1); } #ifndef PCONTBASE for(k = 0; k < nel; ++k) V[eDIM]->flist[k]->Ofwd(*V[eDIM]->flist[k]->h, V[eDIM]->flist[k]->vert->hj, V[eDIM]->flist[k]->dgL); #else V[eDIM]->Iprod(V[eDIM]); #endif for(i = 0; i < eDIM; ++i){ for(k = 0; k < nel; ++k){ E = V[i]->flist[k]; nbl = E->Nbmodes; N = E->Nmodes - nbl; E->HelmHoltz(PB->lambda+k); dscal(E->Nmodes, -B->lambda[k].d, E->vert->hj, 1); if(N) { /* condense out interior terms in velocity */ dgemv('T', N, nbl, -1., binvc[E->geom->id], N, E->vert->hj+nbl, 1, 1., E->vert->hj,1); /* condense out interior terms in pressure*/ E1 = V[eDIM]->flist[k]; if(B->lambda->wave){ dcopy(N,E->vert->hj+nbl,1,tmp,1); dgetrs('N',N,1,invc[E->geom->id],N,ipiv[E->geom->id],tmp,N,info); dgemv('T', N, E1->Nmodes, -1., dbinvc[i][E->geom->id], N, tmp, 1, 1., E1->vert->hj,1); } else{ dgemv('T', N, E1->Nmodes, -1., dbinvc[i][E->geom->id], N, E->vert->hj+nbl, 1, 1., E1->vert->hj,1); } } } } /* second level of factorisation to orthogonalise basis to p */ /* p - vel */ for(E=V[eDIM]->fhead;E;E=E->next){ E1 = V[0]->flist[E->id]; nbl = eDIM*E1->Nbmodes + 1; N = E->Nmodes-1; dgemv('T', N, nbl, -1.0, p_binvc[E->geom->id], N, E->vert->hj+1, 1, 0.0, tmp,1); for(i = 0; i < eDIM; ++i){ E1 = V[i]->flist[E->id]; dvadd(E1->Nbmodes,tmp+i*E1->Nbmodes,1,E1->vert->hj,1,E1->vert->hj,1); dvadd(E1->Nbmodes,E1->vert->hj,1,Vf[i]->flist[E->id]->vert->hj,1, Vf[i]->flist[E->id]->vert->hj,1); } Vf[eDIM]->flist[E->id]->vert->hj[0] += E->vert->hj[0] + tmp[nbl-1]; } Timing1("bc condense......."); GathrBndry_Stokes(Vf,rhs,Vbsys); Timing1("GatherBndry......."); } } /* finally copy inner product of f into v for inner solve */ for(i = 0; i < eDIM; ++i) for(E = V[i]->fhead; E; E= E->next){ nbl = E->Nbmodes; N = E->Nmodes - nbl; E1 = Vf[i]->flist[E->id]; dcopy(N, E1->vert->hj+nbl, 1, E->vert->hj+nbl, 1); } for(E = Vf[eDIM]->fhead; E; E = E->next){ E1 = V[eDIM]->flist[E->id]; dcopy(E->Nmodes,E->vert->hj,1,E1->vert->hj,1); } dzero(PB->nglobal-PB->nsolve, rhs + PB->nsolve, 1); free(tmp); }
void scaleVector(Vector x, double alpha) { dscal(&x->len, &alpha, x->data, &x->stride); }
/*----------------------*/ void dgefa(REAL a[], int lda, int n, int ipvt[], int *info) /* We would like to declare a[][lda], but c does not allow it. In this function, references to a[i][j] are written a[lda*i+j]. */ /* dgefa factors a double precision matrix by gaussian elimination. dgefa is usually called by dgeco, but it can be called directly with a saving in time if rcond is not needed. (time for dgeco) = (1 + 9/n)*(time for dgefa) . on entry a REAL precision[n][lda] the matrix to be factored. lda integer the leading dimension of the array a . n integer the order of the matrix a . on return a an upper triangular matrix and the multipliers which were used to obtain it. the factorization can be written a = l*u where l is a product of permutation and unit lower triangular matrices and u is upper triangular. ipvt integer[n] an integer vector of pivot indices. info integer = 0 normal value. = k if u[k][k] .eq. 0.0 . this is not an error condition for this subroutine, but it does indicate that dgesl or dgedi will divide by zero if called. use rcond in dgeco for a reliable indication of singularity. linpack. this version dated 08/14/78 . cleve moler, university of new mexico, argonne national lab. functions blas daxpy,dscal,idamax */ { /* internal variables */ REAL t; int j,k,kp1,l,nm1; /* gaussian elimination with partial pivoting */ *info = 0; nm1 = n - 1; if (nm1 >= 0) { for (k = 0; k < nm1; k++) { kp1 = k + 1; /* find l = pivot index */ l = idamax(n-k,&a[lda*k+k],1) + k; ipvt[k] = l; /* zero pivot implies this column already triangularized */ if (a[lda*k+l] != ZERO) { /* interchange if necessary */ if (l != k) { t = a[lda*k+l]; a[lda*k+l] = a[lda*k+k]; a[lda*k+k] = t; } /* compute multipliers */ t = -ONE/a[lda*k+k]; dscal(n-(k+1),t,&a[lda*k+k+1],1); /* row elimination with column indexing */ for (j = kp1; j < n; j++) { t = a[lda*j+l]; if (l != k) { a[lda*j+l] = a[lda*j+k]; a[lda*j+k] = t; } daxpy(n-(k+1),t,&a[lda*k+k+1],1, &a[lda*j+k+1],1); } } else { *info = k; } } } ipvt[n-1] = n-1; if (a[lda*(n-1)+(n-1)] == ZERO) *info = n-1; return; }
void lbfgs (int n, /* I num unknowns = 3 * num atoms */ real stop_tol, /* I tol for ||g||/sqrt(n) */ int itmax, /* I max num iterations allowed */ int itmax_line, int* iter, /* IO iters required to find min */ real* fret, /* O minimum value */ int iprint, int last_call) /* not used */ /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Called by exec_minimization in "options.c". * This routine is modelled on mm_nlcg in "nlcg.c", by JC Meza. * It finds the minimum of the unconstrained molecular potential * energy using trust region methods and a limited memory BFGS * approximation of the Hessian. Each trust region subproblem is * solved by Powell's dogleg method. * * The L-BFGS structures are currently FORTRAN subroutines that * require memory allocation of CMPS and CMPY for work space. * These variables must be passed to the FORTRAN subroutines and * not altered anywhere else. * * The outline of the algorithm is as follows: * Allocate memory, initialize parameters * Compute the gradient g_vec * LOOP * IF ||g_vec|| < tol THEN RETURN * Update L-BFGS matrices B and H * Compute the dogleg step d_vec * Compute the predicted reduction of the quadratic model * Compute the actual potential energy reduction * IF ared/pred > eta * THEN x_vec = x_vec + d_vec * TR_size >= TR_size * Compute the gradient g_vec at the new point * ELSE TR_size < ||d_vec|| * CONTINUE *********************************************************************/ { real *x_vec, *new_x_vec, *g_vec, *d_vec, *y_vec; int i; int iter_num, NUPDT; real obj_val, new_obj_val; real dd1, dd2; real eta, pred, ared; real TR_size; real gnorm, xnorm, dnorm; real *CMPS, *CMPY, *tmp_vec; /*-- Open the status file for saving output. */ bfgs_fp = fopen (status_file, "w"); if (bfgs_fp == NULL) { fprintf (stderr, "lbfgs: cannot open %s\n", status_file); printf ("*** lbfgs: cannot open %s\n", status_file); exit (1); } /*-------------------------------------------------------------------- * Allocate memory and set up files. *-------------------------------------------------------------------*/ fprintf (stderr, "Allocate space in lbfgs n=%d\n",n); x_vec = (real *) calloc ((n+1) , sizeof(real)); new_x_vec = (real *) calloc ((n+1) , sizeof(real)); g_vec = (real *) calloc ((n+1) , sizeof(real)); d_vec = (real *) calloc ((n+1) , sizeof(real)); y_vec = (real *) calloc ((n+1) , sizeof(real)); CMPS = (real *) calloc ((n*T_LBFGS) , sizeof(real)); CMPY = (real *) calloc ((n*T_LBFGS) , sizeof(real)); tmp_vec = (real *) calloc ((n+1) , sizeof(real)); /*-------------------------------------------------------------------- * Evaluate the objective and its gradient at the start point. *-------------------------------------------------------------------*/ /* obj_val = potential (str, coor, force); force_to_grad0 (str, force, g_vec); */ /*-------------------------------------------------------------------- * Initialize the trust region algorithm parameters. *-------------------------------------------------------------------*/ eta = 0.3; /*-- step acceptance threshold */ TR_size = 1.0; iter_num = 0; /*-- number inner iterations */ NUPDT = 1; /*-- number L-BFGS B updates */ gnorm = dnrm2 (n, g_vec, 1); fprintf(bfgs_fp, "\n\t\t Steepest descent with Trust Regions\n"); fprintf(bfgs_fp, " Iter f(x) ||grad||/n Delta ||step|| ared pred\n"); fprintf(bfgs_fp," %5d %12.4e %12.4e %12.4e\n", iter_num, obj_val, gnorm/sqrt(n), TR_size); /*-------------------------------------------------------------------- * Begin the main loop. *-------------------------------------------------------------------*/ while (iter_num < itmax) { iter_num++; dd1 = g_vec[0]; for (i=1; i<n; i++) if (g_vec[i] > dd1) dd1 = g_vec[i]; if (dd1 <= stop_tol) { /*-- Exit main loop IF ||g||_2 sufficiently small. */ /* if (gnorm <= (stop_tol * sqrt(n))) {*/ xnorm = dnrm2 (n, x_vec, 1); fprintf (stderr, " step_size: %5d energy: %12.4f |Grad|: %10.4f |X|: %10.4f\n", iter_num, obj_val, gnorm/sqrt((real)(n)), xnorm/sqrt((real)(n))); break; } /*-- Solve the dogleg subproblem. */ Dogleg (n, g_vec, gnorm, TR_size, NUPDT, T_LBFGS, CMPS, CMPY, tmp_vec, d_vec, &dnorm); /*-- Compute pred = - g'd - 1/2 d'Bd. */ dd1 = ddot (n, g_vec, 1, d_vec, 1); i = T_LBFGS; multbv_ (&n, &i, d_vec, tmp_vec, &NUPDT, CMPS, CMPY); dd2 = ddot (n, d_vec, 1, tmp_vec, 1); pred = -dd1 - (0.5 * dd2); if (pred < sqrt(MMIN)) { printf ("*** Predicted reduction not positive. <lbfgs>\n"); printf (" pred = %15.6e\n", pred); exit (1); } /*-- Compute ared by evaluating the objective at the trial point. */ dcopy (n, x_vec, 1, new_x_vec, 1); daxpy (n, 1.0, d_vec, 1, new_x_vec, 1); /* call evalf p_to_coor0 (str, new_x_vec, coor); new_obj_val = potential (str, coor, force); */ ared = obj_val - new_obj_val; /*-- Decide whether to take the step. *-- If yes, then increase TR_size, compute the gradient, and *-- update the L-BFGS approximations. *-- If no, then decrease TR_size to a fraction of ||d||_2. */ if ((ared / pred) >= eta) { /*-- Increase the trust region size. */ if (ared / pred >= 0.9) { dd1 = 10.0 * dnorm; if (dd1 > TR_size) TR_size = dd1; } else { dd1 = 2.0 * dnorm; if (dd1 > TR_size) TR_size = dd1; } if (TR_size > 1.0e3) TR_size = 1.0e3; /*-- Get the gradient from the previously calculated force vector. *-- Set y_vec = new gradient - old gradient. */ dcopy (n, g_vec, 1, y_vec, 1); dscal (n, -1.0, y_vec, 1); /* get grad force_to_grad0 (str, force, g_vec); */ daxpy (n, 1.0, g_vec, 1, y_vec, 1); /*-- Update the L-BFGS and inverse L-BFGS approximations. */ Update_lbfgs (n, T_LBFGS, d_vec, y_vec, tmp_vec, &NUPDT, CMPS, CMPY); dcopy (n, new_x_vec, 1, x_vec, 1); gnorm = dnrm2 (n, g_vec, 1); obj_val = new_obj_val; /* p_to_coor0 (str, x_vec, coor); */ fprintf(bfgs_fp," %5d %12.4e %12.4e %12.4e %12.4e %12.3e %12.3e\n", iter_num, obj_val, gnorm/sqrt(n), TR_size, dnorm, ared, pred); } else { /*-- Decrease the trust region size by linear interpolation. */ dd1 = (1.0 - eta) / (1.0 - (ared / pred)); if (dd1 < 0.1) dd1 = 0.1; if (dd1 > 0.5) dd1 = 0.5; TR_size = dd1 * dnorm; fprintf(bfgs_fp,"rej %5d %12.4e %12.4e %12.4e %12.4e %12.3e %12.3e\n", iter_num, obj_val, gnorm/sqrt(n), TR_size, dnorm, ared, pred); if (TR_size < (100.0 * MCHEPS)) { printf ("*** Trust region too small to continue. <lbfgs>\n"); fprintf (bfgs_fp, "*** Trust region too small to continue.\n"); fflush (bfgs_fp); exit (1); } } } /*** end of while(iter_num < itmax) ***/ /*-------------------------------------------------------------------- * Clean up and exit. *-------------------------------------------------------------------*/ *iter = iter_num; *fret = obj_val; free (x_vec); free (new_x_vec); free (g_vec); free (d_vec); return; }
int iterateDynamics(std::vector<Robot *> robotVec, std::vector<DynamicBody *> bodyVec, DynamicParameters *dp) { double h = dp->timeStep; bool useContactEps = dp->useContactEps; static double Jcg_tmp[9],Jcg_B[9],Jcg_N[9],Jcg_N_inv[9],R_N_B[9]; static double db0=0.0,tmp3[3]; static mat3 Rot; static int info; World *myWorld; KinematicChain *chain; int numBodies = bodyVec.size(),errCode = 0; int numRobots = robotVec.size(); int numJoints=0; int numDOF=0; int bn,cn,i,j; int Mrows,Dcols,Arows,Hrows,Hcols,Nurows,Nucols; int numDOFLimits=0; std::list<Contact *> contactList; std::list<Contact *> objContactList; std::list<Contact *>::iterator cp; // unsigned long dmark = dmalloc_mark(); double *ql = new double[7*numBodies]; double *qnew = new double[7*numBodies]; double *vl = new double[6*numBodies]; double *vlnew = new double[6*numBodies]; double *M = new double[(6*numBodies)*(6*numBodies)]; double *M_i = new double[(6*numBodies)*(6*numBodies)]; double *fext = new double[6*numBodies]; // LCP matrix double *A; // LCP vectors double *g,*lambda; double *predLambda = NULL; //used for debugging the prediction of LCP basis // main matrices for contact constraints double *H; // main matrices for joint constraints double *Nu; // main vector for contact constraints double *k; // main vectors for joint constraints double *eps; // intermediate matrices for contact constraints double *HtM_i,*v1; // intermediate matrices for contact constraints double *v2; // intermediate matrices for case of both joint and contact constraints double *NutM_i,*NutM_iNu,*INVNutM_iNu,*INVNutM_iNuNut; double *INVNutM_iNuNutM_i,*INVNutM_iNuNutM_iH; // intermediate vectors for case of both joint and contact constraints double *NutM_ikminuseps,*INVNutM_iNuNutM_ikminuseps; double *currq,*currM; Mrows = 6*numBodies; myWorld = bodyVec[0]->getWorld(); std::map<Body*, int> islandIndices; for (i=0;i<myWorld->getNumBodies();i++) { islandIndices.insert( std::pair<Body*, int>(myWorld->getBody(i), -1) ); } for (i=0;i<numBodies;i++) { islandIndices[ bodyVec[i] ] = i; } // count the joints and DOF, and the joint coupling constraints int numCouplingConstraints = 0; for (i=0;i<numRobots;i++) { numDOF += robotVec[i]->getNumDOF(); for (j=0;j<robotVec[i]->getNumChains();j++) { chain = robotVec[i]->getChain(j); numJoints += chain->getNumJoints(); } for (j=0;j<robotVec[i]->getNumDOF();j++) { numCouplingConstraints += robotVec[i]->getDOF(j)->getNumCouplingConstraints(); numDOFLimits += robotVec[i]->getDOF(j)->getNumLimitConstraints(); } } DBGP("Dynamics time step: " << h); DBGP("numJoints: " << numJoints); // count the total number of joints and contacts int numContacts = 0; int numTotalFrictionEdges = 0; int numDynJointConstraints=0; for (bn=0;bn<numBodies;bn++) { //count joints if (bodyVec[bn]->getDynJoint()) { int numCon = bodyVec[bn]->getDynJoint()->getNumConstraints(); numDynJointConstraints += numCon; DBGP(bodyVec[bn]->getName().latin1() << ": " << numCon << " constraints"); } //count contacts objContactList = bodyVec[bn]->getContacts(); for (cp=objContactList.begin();cp!=objContactList.end();cp++) { // check if the mate of this contact is already in the contact list if (std::find(contactList.begin(),contactList.end(),(*cp)->getMate()) == contactList.end()) { numContacts++; numTotalFrictionEdges += (*cp)->numFrictionEdges; contactList.push_back(*cp); } } } DBGP("Num contacts: " << numContacts); DBGP("Num friction edges: " << numTotalFrictionEdges); DBGP("Num dynjoint: " << numDynJointConstraints); // zero out matrices dcopy(Mrows*Mrows,&db0,0,M,1); dcopy(Mrows*Mrows,&db0,0,M_i,1); dcopy(Mrows,&db0,0,fext,1); //allocate the joint constraint matrices if (numJoints) { Nurows = Mrows; Nucols = numDynJointConstraints + numCouplingConstraints; DBGP("Nucols: " << Nucols); Nu = new double[Nurows * Nucols]; dcopy(Nurows*Nucols,&db0,0,Nu,1); eps = new double[Nucols]; dcopy(Nucols,&db0,0,eps,1); Arows = Mrows+Nucols; } // allocate the LCP matrix if (numContacts || numDOFLimits) { Dcols = numTotalFrictionEdges; DBGP("numContacts " << numContacts); DBGP("Dcols " << Dcols); DBGP("numDOFLimits " << numDOFLimits); Hrows = Mrows; Hcols = Dcols + 2*numContacts + numDOFLimits; H = new double[Hrows * Hcols]; dcopy(Hrows*Hcols,&db0,0,H,1); v1 = new double[Hrows * Hcols]; v2 = new double[Hrows]; dcopy(Hrows*Hcols,&db0,0,v1,1); dcopy(Hrows,&db0,0,v2,1); k = new double[Mrows]; //holds mass*previous velocity and external impulses Arows = Hcols; lambda = new double[Arows]; // the LCP solution } else { Dcols = 0; } // allocate the constraint matrix if (numJoints || numContacts) { A = new double[Arows*Arows]; g = new double[Arows]; dcopy(Arows*Arows,&db0,0,A,1); dcopy(Arows,&db0,0,g,1); } // compute mass matrix and external forces for (bn=0;bn<numBodies;bn++) { memcpy(vl+6*bn,bodyVec[bn]->getVelocity(),6*sizeof(double)); memcpy(vlnew+6*bn,bodyVec[bn]->getVelocity(),6*sizeof(double)); memcpy(ql+7*bn,bodyVec[bn]->getPos(),7*sizeof(double)); memcpy(qnew+7*bn,bodyVec[bn]->getPos(),7*sizeof(double)); currq = qnew + 7*bn; Quaternion tmpQuat(currq[3],currq[4],currq[5],currq[6]); tmpQuat.ToRotationMatrix(Rot); // The rotation matrix returned by ToRotationMatrix is expressed as // a graphics style rot matrix (new axes are in rows), the R_N_B matrix // is a robotics style rot matrix (new axes in columns) R_N_B[0] = Rot[0]; R_N_B[3] = Rot[1]; R_N_B[6] = Rot[2]; R_N_B[1] = Rot[3]; R_N_B[4] = Rot[4]; R_N_B[7] = Rot[5]; R_N_B[2] = Rot[6]; R_N_B[5] = Rot[7]; R_N_B[8] = Rot[8]; // Jcg_N = R_N_B * Jcg_B * R_N_B'; // where Jcg_B is inertia matrix in body coords // Jcg_N is inertia matrix in world coords ? memcpy(Jcg_B,bodyVec[bn]->getInertia(),9*sizeof(double)); //multiply by mass dscal(9, bodyVec[bn]->getMass(), Jcg_B, 1); dgemm("N","N",3,3,3,1.0,R_N_B,3,Jcg_B,3,0.0,Jcg_tmp,3); dgemm("N","T",3,3,3,1.0,Jcg_tmp,3,R_N_B,3,0.0,Jcg_N,3); if ((info = invertMatrix(3,Jcg_N,Jcg_N_inv))) { printf("In iterateDynamics, inertia matrix inversion failed (info is %d)\n",info); fprintf(stderr,"%f %f %f\n",Jcg_B[0], Jcg_B[1], Jcg_B[2]); fprintf(stderr,"%f %f %f\n",Jcg_B[3], Jcg_B[4], Jcg_B[5]); fprintf(stderr,"%f %f %f\n",Jcg_B[6], Jcg_B[7], Jcg_B[8]); fprintf(stderr,"Body is %s\n",bodyVec[bn]->getName().latin1()); } currM = M+((6*bn)*Mrows + bn*6); //point to the correct block of M currM[0] = bodyVec[bn]->getMass(); currM[6*numBodies+1] = bodyVec[bn]->getMass(); currM[12*numBodies+2] = bodyVec[bn]->getMass(); fillMatrixBlock(Jcg_N,3,3,3,5,5,currM,Mrows); currM = M_i+((6*bn)*Mrows + bn*6);//point to correct block of M_i currM[0] = 1.0/bodyVec[bn]->getMass(); currM[Mrows+1] = 1.0/bodyVec[bn]->getMass(); currM[2*Mrows+2] = 1.0/bodyVec[bn]->getMass(); fillMatrixBlock(Jcg_N_inv,3,3,3,5,5,currM,Mrows); // compute external wrench // fext = [ 0 0 -9810.0*mass -[ang_vel_N x (Jcg_N * ang_vel_N)] ] //based on this, it would appear that graspit force units are N*1.0e6 fext[6*bn+2] = -9810.0 * bodyVec[bn]->getMass() * dp->gravityMultiplier; // force of gravity // fext[6*bn+2] = 0; // NO force of gravity dgemv("N",3,3,1.0,Jcg_N,3,&vl[6*bn+3],1,0.0,tmp3,1); // inertial moments fext[6*bn+3] = - (vl[6*bn+4]*tmp3[2] - vl[6*bn+5]*tmp3[1]); fext[6*bn+4] = - (vl[6*bn+5]*tmp3[0] - vl[6*bn+3]*tmp3[2]); fext[6*bn+5] = - (vl[6*bn+3]*tmp3[1] - vl[6*bn+4]*tmp3[0]); double ForcesToBodyFrame[36]; transf invBody = bodyVec[bn]->getTran().inverse(); vec3 invBodyTransl = invBody.translation(); buildForceTransform(invBody,invBodyTransl,ForcesToBodyFrame); DBGP("fext initial: "); DBGST( disp_mat(stdout,&fext[6*bn],1,6,0) ); // add any other wrenches that have accumulated on the body daxpy(6,1.0,bodyVec[bn]->getExtWrenchAcc(),1,&fext[6*bn],1); DBGP("fext with accumulated wrench: "); DBGST( disp_mat(stdout,&fext[6*bn],1,6,0) ); if (numContacts||numDOFLimits) { // k = Mv_l + hfext currM = M+((6*bn)*Mrows + bn*6); //point to the correct block of M dgemv("N",6,6,1.0,currM,Mrows,vl+6*bn,1,0.0,k+6*bn,1); } } if (numJoints) { int ncn = 0; int hcn = 0; for (i=0;i<numBodies;i++) { if (bodyVec[i]->getDynJoint()) bodyVec[i]->getDynJoint()-> buildConstraints(Nu,eps,numBodies,islandIndices,ncn); } for (i=0;i<numRobots;i++) { robotVec[i]->buildDOFLimitConstraints(islandIndices,numBodies,H,g,hcn); robotVec[i]->buildDOFCouplingConstraints(islandIndices,numBodies,Nu,eps,ncn); } for (i=0;i<Nucols;i++) { eps[i] *= ERP/h; } for (i=0; i<hcn; i++) { g[i] *= ERP/h; } } // add contacts to the LCP if (!contactList.empty()) { DBGP("processing contacts"); double Ftform_N_C[36]; // A is square double *Wn = &H[numDOFLimits*Hrows]; double *D = &H[(numDOFLimits+numContacts)*Hrows]; double *E = &A[(numDOFLimits+numContacts+Dcols)*Arows + numDOFLimits+numContacts]; double *negET = &A[(numDOFLimits+numContacts)*Arows + numDOFLimits+numContacts+Dcols]; double *MU = &A[numDOFLimits*Arows + numDOFLimits+numContacts+Dcols]; double *contactEps = &g[numDOFLimits]; int frictionEdgesCount = 0; for (cp=contactList.begin(),cn=0; cp!=contactList.end(); cp++,cn++){ //DBGP("contact " << cn); transf cf = (*cp)->getContactFrame() * (*cp)->getBody1Tran(); transf cf2 = (*cp)->getMate()->getContactFrame() * (*cp)->getBody2Tran(); DBGP("CONTACT DISTANCE: " << (cf.translation() - cf2.translation()).len()); if (useContactEps) { contactEps[cn] = MIN(0.0,-ERP/h * (Contact::THRESHOLD/2.0 - (cf.translation() - cf2.translation()).len())); } DBGP(" EPS: " << contactEps[cn]); vec3 normal(cf.affine().element(2,0), cf.affine().element(2,1), cf.affine().element(2,2)); // find which body is this contact from for (bn=0;bn<numBodies;bn++) if ((*cp)->getBody1() == bodyVec[bn]) break; if (bn<numBodies) { //????? this doesn't seem correct vec3 radius = cf.translation() - ( bodyVec[bn]->getCoG() * (*cp)->getBody1Tran() - position::ORIGIN ); // radius = radius / 1000.0; // convert to meters vec3 RcrossN = radius * normal; DBGP("body1 normal: " << normal); DBGP("body1 radius: " << radius); Wn[cn*Hrows+6*bn] = normal.x(); Wn[cn*Hrows+6*bn+1] = normal.y(); Wn[cn*Hrows+6*bn+2] = normal.z(); Wn[cn*Hrows+6*bn+3] = RcrossN.x(); Wn[cn*Hrows+6*bn+4] = RcrossN.y(); Wn[cn*Hrows+6*bn+5] = RcrossN.z(); vec3 bodyOrigin = bodyVec[bn]->getCoG() * (*cp)->getBody1Tran() - position::ORIGIN; buildForceTransform(cf,bodyOrigin,Ftform_N_C); /* dgemm("N","N", 6,Contact::numFrictionEdges,6, 1.0,Ftform_N_C,6, Contact::frictionEdges,6, 0.0,&D[Contact::numFrictionEdges*cn*Hrows+6*bn],Hrows); */ dgemm("N","N", 6,(*cp)->numFrictionEdges,6, //m, n, k 1.0,Ftform_N_C,6, //alfa, A, lda (*cp)->frictionEdges,6, //B, ldb 0.0,&D[ frictionEdgesCount*Hrows+6*bn],Hrows); //beta, C, ldc } //find the other body for(bn=0;bn<numBodies;bn++) if ((*cp)->getBody2() == bodyVec[bn]) break; if (bn<numBodies) { //normal = vec3(cf2.affine().element(2,0), cf2.affine().element(2,1),cf2.affine().element(2,2)); normal = -normal; //vec3 radius = cf2.translation() - (bodyVec[bn]->getCoG() * (*cp)->getBody2Tran() - position::ORIGIN); vec3 radius = cf.translation() - (bodyVec[bn]->getCoG() * (*cp)->getBody2Tran() - position::ORIGIN); vec3 RcrossN = radius * normal; DBGP("body2 normal: " << normal); DBGP("body2 radius: " << radius); Wn[cn*Hrows+6*bn] = normal.x(); Wn[cn*Hrows+6*bn+1] = normal.y(); Wn[cn*Hrows+6*bn+2] = normal.z(); Wn[cn*Hrows+6*bn+3] = RcrossN.x(); Wn[cn*Hrows+6*bn+4] = RcrossN.y(); Wn[cn*Hrows+6*bn+5] = RcrossN.z(); vec3 bodyOrigin = bodyVec[bn]->getCoG()*(*cp)->getBody2Tran() - position::ORIGIN; buildForceTransform(cf,bodyOrigin,Ftform_N_C); //buildForceTransform(cf2,bodyOrigin,Ftform_N_C); /* dgemm("N","N",6,Contact::numFrictionEdges,6,-1.0,Ftform_N_C,6, Contact::frictionEdges,6, 0.0,&D[Contact::numFrictionEdges*cn*Hrows+6*bn],Hrows);*/ //original graspit had a -1.0 here in front of Ftform_N_C dgemm("N","N", 6,(*cp)->numFrictionEdges,6, -1.0,Ftform_N_C,6, (*cp)->frictionEdges,6, 0.0,&D[ frictionEdgesCount*Hrows+6*bn ],Hrows); } //for (i=cn*Contact::numFrictionEdges; i<(cn+1)*Contact::numFrictionEdges; i++) { for (i=frictionEdgesCount; i<frictionEdgesCount+(*cp)->numFrictionEdges; i++) { E[cn*Arows+i] = 1.0; negET[i*Arows+cn] = -1.0; } MU[cn*Arows + cn] = (*cp)->getCof(); frictionEdgesCount += (*cp)->numFrictionEdges; } } if (numContacts || numDOFLimits) daxpy(Mrows,h,fext,1,k,1); if (numJoints && (numContacts || numDOFLimits)) { // Cnu1 = INV(Nu'M_iNu)Nu'M_iH // Cnu2 = INV(Nu'M_iNu)(Nu'M_ik-eps) // v1 = -NuCnu1 // v2 = -NuCnu2 NutM_i = new double[Nucols*Mrows]; NutM_iNu = new double[Nucols*Nucols]; INVNutM_iNu = new double[Nucols*Nucols]; INVNutM_iNuNut = new double[Nucols*Nurows]; INVNutM_iNuNutM_i = new double[Nucols*Mrows]; INVNutM_iNuNutM_iH = new double[Nucols*Hcols]; NutM_ikminuseps = new double[Nucols]; INVNutM_iNuNutM_ikminuseps = new double[Nucols]; dgemm("T","N",Nucols,Mrows,Mrows,1.0,Nu,Nurows,M_i,Mrows, 0.0,NutM_i,Nucols); dgemm("N","N",Nucols,Nucols,Mrows,1.0,NutM_i,Nucols,Nu,Nurows, 0.0,NutM_iNu,Nucols); if ((info = invertMatrix(Nucols,NutM_iNu,INVNutM_iNu))) printf("In iterateDynamics, NutM_iNu matrix inversion failed (info is %d)\n",info); dgemm("N","T",Nucols,Nurows,Nucols,1.0,INVNutM_iNu,Nucols,Nu,Nurows, 0.0,INVNutM_iNuNut,Nucols); dgemm("N","N",Nucols,Mrows,Mrows,1.0,INVNutM_iNuNut,Nucols,M_i,Mrows, 0.0,INVNutM_iNuNutM_i,Nucols); dgemm("N","N",Nucols,Hcols,Mrows,1.0,INVNutM_iNuNutM_i,Nucols,H,Hrows, 0.0,INVNutM_iNuNutM_iH,Nucols); dgemm("N","N",Nurows,Hcols,Nucols,-1.0,Nu,Nurows,INVNutM_iNuNutM_iH,Nucols, 0.0,v1,Nurows); dgemv("N",Nucols,Mrows,1.0,NutM_i,Nucols,k,1,0.0,NutM_ikminuseps,1); daxpy(Nucols,-1.0,eps,1,NutM_ikminuseps,1); dgemv("N",Nucols,Nucols,1.0,INVNutM_iNu,Nucols,NutM_ikminuseps,1, 0.0,INVNutM_iNuNutM_ikminuseps,1); dgemv("N",Nurows,Nucols,-1.0,Nu,Nurows,INVNutM_iNuNutM_ikminuseps,1, 0.0,v2,1); } if (numContacts || numDOFLimits) { // in the simple case without joint constraints // A = H'M_iv1 + N // g = H'M_iv2 // where N is already stored in A // v1 is the first term of v_(l+1) and v2 is the second term // v_l+1 = M_i(v1 lambda + v2) = M_i(H lambda + k) // k is (Mv_l + hfext) //add H to v1 //add k to v2 DBGP("k:"); DBGST( disp_mat(stdout,k,1,Mrows,0) ); DBGP("first g:"); DBGST( disp_mat(stdout,g,1,Arows,0) ); daxpy(Mrows*Hcols,1.0,H,1,v1,1); daxpy(Mrows,1.0,k,1,v2,1); // build A and g HtM_i = new double[Hcols*Mrows]; dgemm("T","N",Hcols,Mrows,Hrows,1.0,H,Hrows,M_i,Mrows,0.0,HtM_i,Hcols); dgemm("N","N",Hcols,Hcols,Mrows,1.0,HtM_i,Hcols,v1,Mrows,1.0,A,Arows); // dgemv("N",Hcols,Mrows,1.0,HtM_i,Hcols,v2,1,0.0,g,1); dgemv("N",Hcols,Mrows,1.0,HtM_i,Hcols,v2,1,1.0,g,1); } int frictionEdgesCount; //debug information; can be removed if (numContacts || numDOFLimits) { bool lemkePredict = false; if (lemkePredict) { //try to use information from previous time steps to guess a good starting basis for Lemke's algorithm assembleLCPPrediction(lambda, Arows, numDOFLimits, &contactList); predLambda = new double[Arows]; // keep a copy of the prediction so we can check it later dcopy(Arows, lambda, 1, predLambda, 1); // fprintf(stderr,"Prediction: \n"); // printLCPBasis(predLambda, Arows, numDOFLimits, numContacts); } // double startTime; // startTime = getTime(); DBGP("g:"); DBGST( for (i=0;i<Arows;i++) printf("%le ",g[i]); );
NLuint nlSolve_GMRES() { NLdouble* b = nlCurrentContext->b ; NLdouble* x = nlCurrentContext->x ; NLdouble eps = nlCurrentContext->threshold ; NLint max_iter = nlCurrentContext->max_iterations ; NLint n = nlCurrentContext->n ; NLint m = nlCurrentContext->inner_iterations ; typedef NLdouble *NLdoubleP; NLdouble *V = NL_NEW_ARRAY(NLdouble, n*(m+1) ) ; NLdouble *U = NL_NEW_ARRAY(NLdouble, m*(m+1)/2 ) ; NLdouble *r = NL_NEW_ARRAY(NLdouble, n ) ; NLdouble *y = NL_NEW_ARRAY(NLdouble, m+1 ) ; NLdouble *c = NL_NEW_ARRAY(NLdouble, m ) ; NLdouble *s = NL_NEW_ARRAY(NLdouble, m ) ; NLdouble **v = NL_NEW_ARRAY(NLdoubleP, m+1 ) ; NLdouble * Ax = NL_NEW_ARRAY(NLdouble,nlCurrentContext->n); NLdouble accu =0.0; NLint i, j, io, uij, u0j ; NLint its = -1 ; NLdouble beta, h, rd, dd, nrm2b ; for ( i=0; i<=m; ++i ){ v[i]=V+i*n ; } nrm2b=dnrm2(n,b,1); io=0; do { /* outer loop */ ++io; nlCurrentContext->matrix_vector_prod(x,r); daxpy(n,-1.,b,1,r,1); beta=dnrm2(n,r,1); dcopy(n,r,1,v[0],1); dscal(n,1./beta,v[0],1); y[0]=beta; j=0; uij=0; do { /* inner loop: j=0,...,m-1 */ u0j=uij; nlCurrentContext->matrix_vector_prod(v[j],v[j+1]); dgemv( Transpose,n,j+1,1.,V,n,v[j+1],1,0.,U+u0j,1 ); dgemv( NoTranspose,n,j+1,-1.,V,n,U+u0j,1,1.,v[j+1],1 ); h=dnrm2(n,v[j+1],1); dscal(n,1./h,v[j+1],1); for (i=0; i<j; ++i ) { /* rotiere neue Spalte */ double tmp = c[i]*U[uij]-s[i]*U[uij+1]; U[uij+1] = s[i]*U[uij]+c[i]*U[uij+1]; U[uij] = tmp; ++uij; } { /* berechne neue Rotation */ rd = U[uij]; dd = sqrt(rd*rd+h*h); c[j] = rd/dd; s[j] = -h/dd; U[uij] = dd; ++uij; } { /* rotiere rechte Seite y (vorher: y[j+1]=0) */ y[j+1] = s[j]*y[j]; y[j] = c[j]*y[j]; } ++j; } while ( j<m && fabs(y[j])>=eps*nrm2b ) ; { /* minimiere bzgl Y */ dtpsv( UpperTriangle, NoTranspose, NotUnitTriangular, j,U,y,1 ); /* correct X */ dgemv(NoTranspose,n,j,-1.,V,n,y,1,1.,x,1); } } while ( fabs(y[j])>=eps*nrm2b && (m*(io-1)+j) < max_iter); /* Count the inner iterations */ its = m*(io-1)+j; nlCurrentContext->matrix_vector_prod(x,Ax); for(i = 0 ; i < n ; ++i) accu+=(Ax[i]-b[i])*(Ax[i]-b[i]); printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/nrm2b); NL_DELETE_ARRAY(Ax); NL_DELETE_ARRAY(V) ; NL_DELETE_ARRAY(U) ; NL_DELETE_ARRAY(r) ; NL_DELETE_ARRAY(y) ; NL_DELETE_ARRAY(c) ; NL_DELETE_ARRAY(s) ; NL_DELETE_ARRAY(v) ; return its; }
int main (int argc, char** argv) /* ------------------------------------------------------------------------- * * Wrapper. * ------------------------------------------------------------------------- */ { char buf[STR_MAX], fields[STR_MAX], fmt[STR_MAX]; int i, j, n, np, nz, nel, swab = 0; int nfields, nplane; FILE *fp_in = stdin, *fp_out = stdout; double **idata, **odata, *vcmpt1, *vcmpt2; getargs (argc, argv, &fp_in); format (fmt); while (fgets (buf, STR_MAX, fp_in)) { /* -- Process header. */ fputs (buf, fp_out); fgets (buf, STR_MAX, fp_in); fputs (buf, fp_out); fgets (buf, STR_MAX, fp_in); if (sscanf (buf, "%d%*s%d%d", &np, &nz, &nel) != 3) message (prog, "unable to read the file size", ERROR); if (nz != 2) { sprintf (fields, "input must have nz = 2 (here %1d)", nz); message (prog, fields, ERROR); } fprintf (fp_out, hdr_fmt[2], np, np, 1, nel); n = 6; while (--n) { fgets (buf, STR_MAX, fp_in); fputs (buf, fp_out); } fgets(fields, STR_MAX, fp_in); memset(fields+25, '\0', STR_MAX-25); for (nfields = 0, i = 0; i < 25; i++) if (isalpha(fields[i])) nfields++; if (!((nfields == 4) && (strstr (fields, "uvwp")))) message (prog, "input must have only fields u v w p.", ERROR); fprintf (fp_out, hdr_fmt[8], "uvwpABCDEF"); fgets (buf, STR_MAX, fp_in); for (i = 0; i < strlen (buf); i++) buf[i] = tolower (buf[i]); if (!strstr(buf, "binary")) message (prog, "input file not binary format", ERROR); if (!strstr (buf, "endian")) message (prog, "input field file in unknown binary format", WARNING); else swab = ((strstr (buf, "big") && strstr (fmt, "little")) || (strstr (fmt, "big") && strstr (buf, "little")) ); sprintf (buf, "%s %s", "binary", fmt); fprintf (fp_out, hdr_fmt[9], buf); /* -- Set sizes, allocate storage, set to zero. */ nplane = np * np * nel; idata = dmatrix (0, 3, 0, nplane * 2); odata = dmatrix (0, 9, 0, nplane); /* -- uvwpABCDEF = 10 */ dzero (4*nplane, idata[0], 1); dzero (10*nplane, odata[0], 1); /* -- Read in all data fields. */ for (i = 0; i < nfields; i++) { if (fread (idata[i], sizeof (double), nplane * 2, fp_in) != nplane * 2) message (prog, "an error occured while reading", ERROR); if (swab) dbrev (nplane*2, idata[i], 1, idata[i], 1); } /* -- Compute A. */ vcmpt1 = idata[0]; /* -- Real part of u. */ vcmpt2 = idata[0] + nplane; /* -- Imag part of u. */ dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[4], 1, odata[4], 1); dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[4], 1, odata[4], 1); dscal (nplane, 2.0, odata[4], 1); /* -- Compute B . */ vcmpt1 = idata[0]; /* -- Real part of u. */ vcmpt2 = idata[1]; /* -- Real part of v. */ dvvtvp (nplane, vcmpt1, 1, vcmpt2, 1, odata[5], 1, odata[5], 1); vcmpt1 = idata[0] + nplane; /* -- Imag part of u. */ vcmpt2 = idata[1] + nplane; /* -- Imag part of v. */ dvvtvp (nplane, vcmpt1, 1, vcmpt2, 1, odata[5], 1, odata[5], 1); dscal (nplane, 2.0, odata[5], 1); /* -- Compute C. */ vcmpt1 = idata[1]; /* -- Real part of v. */ vcmpt2 = idata[1] + nplane; /* -- Imag part of v. */ dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[6], 1, odata[6], 1); dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[6], 1, odata[6], 1); dscal (nplane, 2.0, odata[6], 1); /* -- Compute D . */ vcmpt1 = idata[0]; /* -- Real part of u. */ vcmpt2 = idata[2]; /* -- Real part of w. */ dvvtvp (nplane, vcmpt1, 1, vcmpt2, 1, odata[7], 1, odata[7], 1); vcmpt1 = idata[0] + nplane; /* -- Imag part of u. */ vcmpt2 = idata[2] + nplane; /* -- Imag part of w. */ dvvtvp (nplane, vcmpt1, 1, vcmpt2, 1, odata[7], 1, odata[7], 1); dscal (nplane, 2.0, odata[7], 1); /* -- Compute E . */ vcmpt1 = idata[1]; /* -- Real part of v. */ vcmpt2 = idata[2]; /* -- Real part of w. */ dvvtvp (nplane, vcmpt1, 1, vcmpt2, 1, odata[8], 1, odata[8], 1); vcmpt1 = idata[1] + nplane; /* -- Imag part of v. */ vcmpt2 = idata[2] + nplane; /* -- Imag part of w. */ dvvtvp (nplane, vcmpt1, 1, vcmpt2, 1, odata[8], 1, odata[8], 1); dscal (nplane, 2.0, odata[8], 1); /* -- Compute F. */ vcmpt1 = idata[2]; /* -- Real part of w. */ vcmpt2 = idata[2] + nplane; /* -- Imag part of w. */ dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[9], 1, odata[9], 1); dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[9], 1, odata[9], 1); dscal (nplane, 2.0, odata[9], 1); /* -- Compute p. */ vcmpt1 = idata[3]; /* -- Real part of p. */ vcmpt2 = idata[3] + nplane; /* -- Imag part of p. */ dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[2], 1, odata[2], 1); dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[2], 1, odata[2], 1); dvsqrt (nplane, odata[2], 1, odata[2], 1); /* -- Compute u & v. */ vcmpt1 = idata[0]; /* -- Real part of u. */ vcmpt2 = idata[0] + nplane; /* -- Imag part of u. */ dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[0], 1, odata[0], 1); dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[0], 1, odata[0], 1); dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[1], 1, odata[1], 1); dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[1], 1, odata[1], 1); vcmpt1 = idata[1]; /* -- Real part of v. */ vcmpt2 = idata[1] + nplane; /* -- Imag part of v. */ dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[0], 1, odata[0], 1); dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[0], 1, odata[0], 1); dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[1], 1, odata[1], 1); dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[1], 1, odata[1], 1); vcmpt1 = idata[1]; /* -- Real part of w. */ vcmpt2 = idata[1] + nplane; /* -- Imag part of w. */ dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[1], 1, odata[1], 1); dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[1], 1, odata[1], 1); dvsqrt (nplane, odata[0], 1, odata[0], 1); dsmul (nplane, 0.5, odata[0], 1, odata[0], 1); dvsqrt (nplane, odata[1], 1, odata[1], 1); dsmul (nplane, 0.5, odata[1], 1, odata[1], 1); /* FIXME: compute w if needed in future. */ /* -- Write out uvwpABCDEF in binary. */ for (i = 0; i < 10; i++) if (fwrite (odata[i], sizeof (double), nplane, fp_out) != nplane) message (prog, "an error occured while writing", ERROR); freeDmatrix (idata, 0, 0); freeDmatrix (odata, 0, 0); } return EXIT_SUCCESS; }
int dgefa ( double a[], int lda, int n, int ipvt[] ) /******************************************************************************/ /* Purpose: DGEFA factors a real general matrix. Modified: 16 May 2005 Author: C version by John Burkardt. Reference: Jack Dongarra, Cleve Moler, Jim Bunch and Pete Stewart, LINPACK User's Guide, SIAM, (Society for Industrial and Applied Mathematics), 3600 University City Science Center, Philadelphia, PA, 19104-2688. ISBN 0-89871-172-X Parameters: Input/output, double A[LDA*N]. On intput, the matrix to be factored. On output, an upper triangular matrix and the multipliers used to obtain it. The factorization can be written A=L*U, where L is a product of permutation and unit lower triangular matrices, and U is upper triangular. Input, int LDA, the leading dimension of A. Input, int N, the order of the matrix A. Output, int IPVT[N], the pivot indices. Output, int DGEFA, singularity indicator. 0, normal value. K, if U(K,K) == 0. This is not an error condition for this subroutine, but it does indicate that DGESL or DGEDI will divide by zero if called. Use RCOND in DGECO for a reliable indication of singularity. */ { int info; int j; int k; int l; double t; /* Gaussian elimination with partial pivoting. */ info = 0; for ( k = 1; k <= n-1; k++ ) { /* Find L = pivot index. */ l = idamax ( n-k+1, a+(k-1)+(k-1)*lda, 1 ) + k - 1; ipvt[k-1] = l; /* Zero pivot implies this column already triangularized. */ if ( a[l-1+(k-1)*lda] == 0.0 ) { info = k; continue; } /* Interchange if necessary. */ if ( l != k ) { t = a[l-1+(k-1)*lda]; a[l-1+(k-1)*lda] = a[k-1+(k-1)*lda]; a[k-1+(k-1)*lda] = t; } /* Compute multipliers. */ t = -1.0 / a[k-1+(k-1)*lda]; dscal ( n-k, t, a+k+(k-1)*lda, 1 ); /* Row elimination with column indexing. */ for ( j = k+1; j <= n; j++ ) { t = a[l-1+(j-1)*lda]; if ( l != k ) { a[l-1+(j-1)*lda] = a[k-1+(j-1)*lda]; a[k-1+(j-1)*lda] = t; } daxpy ( n-k, t, a+k+(k-1)*lda, 1, a+k+(j-1)*lda, 1 ); } } ipvt[n-1] = n; if ( a[n-1+(n-1)*lda] == 0.0 ) { info = n; } return info; }
void dqrdc(double a[], int lda, int n, int p, double qraux[], int jpvt[], double work[], int job) /******************************************************************************/ /* Purpose: DQRDC computes the QR factorization of a real rectangular matrix. Discussion: DQRDC uses Householder transformations. Column pivoting based on the 2-norms of the reduced columns may be performed at the user's option. Licensing: This code is distributed under the GNU LGPL license. Modified: 07 June 2005 Author: C version by John Burkardt. Reference: Jack Dongarra, Cleve Moler, Jim Bunch and Pete Stewart, LINPACK User's Guide, SIAM, (Society for Industrial and Applied Mathematics), 3600 University City Science Center, Philadelphia, PA, 19104-2688. ISBN 0-89871-172-X Parameters: Input/output, double A(LDA,P). On input, the N by P matrix whose decomposition is to be computed. On output, A contains in its upper triangle the upper triangular matrix R of the QR factorization. Below its diagonal A contains information from which the orthogonal part of the decomposition can be recovered. Note that if pivoting has been requested, the decomposition is not that of the original matrix A but that of A with its columns permuted as described by JPVT. Input, int LDA, the leading dimension of the array A. LDA must be at least N. Input, int N, the number of rows of the matrix A. Input, int P, the number of columns of the matrix A. Output, double QRAUX[P], contains further information required to recover the orthogonal part of the decomposition. Input/output, integer JPVT[P]. On input, JPVT contains integers that control the selection of the pivot columns. The K-th column A(*,K) of A is placed in one of three classes according to the value of JPVT(K). > 0, then A(K) is an initial column. = 0, then A(K) is a free column. < 0, then A(K) is a final column. Before the decomposition is computed, initial columns are moved to the beginning of the array A and final columns to the end. Both initial and final columns are frozen in place during the computation and only free columns are moved. At the K-th stage of the reduction, if A(*,K) is occupied by a free column it is interchanged with the free column of largest reduced norm. JPVT is not referenced if JOB == 0. On output, JPVT(K) contains the index of the column of the original matrix that has been interchanged into the K-th column, if pivoting was requested. Workspace, double WORK[P]. WORK is not referenced if JOB == 0. Input, int JOB, initiates column pivoting. 0, no pivoting is done. nonzero, pivoting is done. */ { int jp; int j; int lup; int maxj; double maxnrm, nrmxl, t, tt; int pl = 1, pu = 0; /* If pivoting is requested, rearrange the columns. */ if (job != 0) { for (j = 1; j <= p; j++) { int swapj = (0 < jpvt[j - 1]); jpvt[j - 1] = (jpvt[j - 1] < 0) ? -j : j; if (swapj) { if (j != pl) dswap(n, a + 0 + (pl - 1)*lda, 1, a + 0 + (j - 1), 1); jpvt[j - 1] = jpvt[pl - 1]; jpvt[pl - 1] = j; pl++; } } pu = p; for (j = p; 1 <= j; j--) { if (jpvt[j - 1] < 0) { jpvt[j - 1] = -jpvt[j - 1]; if (j != pu) { dswap(n, a + 0 + (pu - 1)*lda, 1, a + 0 + (j - 1)*lda, 1); jp = jpvt[pu - 1]; jpvt[pu - 1] = jpvt[j - 1]; jpvt[j - 1] = jp; } pu = pu - 1; } } } /* Compute the norms of the free columns. */ for (j = pl; j <= pu; j++) qraux[j - 1] = dnrm2(n, a + 0 + (j - 1) * lda, 1); for (j = pl; j <= pu; j++) work[j - 1] = qraux[j - 1]; /* Perform the Householder reduction of A. */ lup = i4_min(n, p); for (int l = 1; l <= lup; l++) { /* Bring the column of largest norm into the pivot position. */ if (pl <= l && l < pu) { maxnrm = 0.0; maxj = l; for (j = l; j <= pu; j++) { if (maxnrm < qraux[j - 1]) { maxnrm = qraux[j - 1]; maxj = j; } } if (maxj != l) { dswap(n, a + 0 + (l - 1)*lda, 1, a + 0 + (maxj - 1)*lda, 1); qraux[maxj - 1] = qraux[l - 1]; work[maxj - 1] = work[l - 1]; jp = jpvt[maxj - 1]; jpvt[maxj - 1] = jpvt[l - 1]; jpvt[l - 1] = jp; } } /* Compute the Householder transformation for column L. */ qraux[l - 1] = 0.0; if (l != n) { nrmxl = dnrm2(n - l + 1, a + l - 1 + (l - 1) * lda, 1); if (nrmxl != 0.0) { if (a[l - 1 + (l - 1)*lda] != 0.0) nrmxl = nrmxl * r8_sign(a[l - 1 + (l - 1) * lda]); dscal(n - l + 1, 1.0 / nrmxl, a + l - 1 + (l - 1)*lda, 1); a[l - 1 + (l - 1)*lda] = 1.0 + a[l - 1 + (l - 1) * lda]; /* Apply the transformation to the remaining columns, updating the norms. */ for (j = l + 1; j <= p; j++) { t = -ddot(n - l + 1, a + l - 1 + (l - 1) * lda, 1, a + l - 1 + (j - 1) * lda, 1) / a[l - 1 + (l - 1) * lda]; daxpy(n - l + 1, t, a + l - 1 + (l - 1)*lda, 1, a + l - 1 + (j - 1)*lda, 1); if (pl <= j && j <= pu) { if (qraux[j - 1] != 0.0) { tt = 1.0 - pow(r8_abs(a[l - 1 + (j - 1) * lda]) / qraux[j - 1], 2); tt = r8_max(tt, 0.0); t = tt; tt = 1.0 + 0.05 * tt * pow(qraux[j - 1] / work[j - 1], 2); if (tt != 1.0) qraux[j - 1] = qraux[j - 1] * sqrt(t); else { qraux[j - 1] = dnrm2(n - l, a + l + (j - 1) * lda, 1); work[j - 1] = qraux[j - 1]; } } } } /* Save the transformation. */ qraux[l - 1] = a[l - 1 + (l - 1) * lda]; a[l - 1 + (l - 1)*lda] = -nrmxl; } } } }
/* This function uses m-estimators to estimate a three vector from a cloud of points in n-space. The approach is comparable to computing the center of mass of cloud of equal mass particles. We use a component median for the initial estimate of center, then compute the m-estimator based on a Rayleigh distribution keyed on the l2 norm amplitude of the vector residual. This is comparable to the estimation of phase in robust estimates of transfer functions described by Chave and Thomson as we use a penalty function based on a Rayleigh distribution. It is not clear to the author if this is valid for other than 2 vectors, but it surely is not a bad approximation for 3-vectors. Higher order spaces should use this with care. Arguments: v - n by nv matrix containing ensemble of data n-vectors. These are assumed stored as in the blas in a pseudofortran sense as a continuous vector of floats with columns sequential (i.e. first column of v is elements v[0], v[1], ... , v[n-1] and second column starts at v[n]. n - length of vectors (number of rows in v) nv - number of vectors (number of columns in v) mode - switch (see below) minscale - minimum error scale allowed. Use depends on setting of the mode variable with which it is associated. Scale factor is determined here from interquartiles. mode is as defined in M_estimator_float above using symbols SCALE_IQ_RELATIVE and SCALE_IQ_ABSOLUTE. The former uses a relative scale derived from the scale factor determined on the first pass. That is, the minimum scale alllowed is vmag0*minscale (e.g. 0.01 would limit the minimum scale to 1% of the magnitude of the total n vector initial estimate. Absolute scaling uses the limit straight up. mean - vector of length n to hold result. Blindly assumed to be already allocated of length n. weight - vector of length to hold final weights used in robust estimation. Function returns a 3 vector it estimates from v. This is an array alloced in this function that needs to be managed externally. Author: G Pavlis Written: January 2000 */ #define EPSILON 0.01 /* convergence parameter*/ #define HUBER_LIMIT 2 /* Number of Huber weight iterations */ #define THOMPSON_LIMIT 25 /* Limit on number of iterations using Thompson formula */ #define MIN_DGF 10 /* I've seen the Thompson formula work badly with small degrees of freedom. When dgf are less than this the Thomson section is skipped */ void M_estimator_double_n_vector(double *v, int n, int nv, int mode, double minscale, double *mean, double *weight) { double *col, *row; /* work spaces for columns and rows respectively*/ double sum_weights; double *residuals; double *delta_mean; int i, j, iteration; double vmag,dvmag; double scale, fminsc; int ndgf; MW_scalar_statistics stats; double beta; allot(double *,col,n); allot(double *,row,nv); allot(double *,delta_mean, n); allot(double *,residuals,n*nv); /* We compute component medians to obtain initial estimate of vector */ for(i=0;i<n;++i) { dcopy(nv,v+i,n,row,1); stats=MW_calc_statistics_double(row,nv); mean[i] = stats.median; } /* We first do a few passes with the huber formula which is less aggressive on outliers, but helps establish a solid value for the scale factor.*/ iteration =0; vmag = dnrm2(n,mean,1); for(j=0;j<nv;++j) weight[j] = 1.0; /* done to make sure scale is computed correctly on first pass*/ if(mode==IQ_SCALE_RELATIVE) if(vmag<FLT_EPSILON) fminsc= minscale; else fminsc = vmag*minscale; else fminsc = minscale; do { compute_nvector_residuals(v,n,nv,residuals,mean); /* This produces weighted residuals -- requires weighting formula to use weights 0<w<1 */ for(j=0;j<nv;++j) dscal(n,weight[i],residuals+j,1); scale = compute_nvector_scale(residuals,n,nv,row); if(scale < fminsc) scale = fminsc; for(i=0;i<n;++i) delta_mean[i] = 0.0; for(j=0,sum_weights=0.0;j<nv;++j) { dcopy(n,residuals+j*n,1,col,1); dvmag = dnrm2(n,col,1); weight[j] = dhuber(dvmag/scale); daxpy(n,weight[j],col,1,delta_mean,1); sum_weights += weight[j]; } dscal(n,1.0/sum_weights,delta_mean,1); dvmag = dnrm2(n,delta_mean,1); for(i=0;i<n;++i) mean[i] += delta_mean[i]; ++iteration; }while( ((dvmag/scale) > EPSILON) && (iteration < HUBER_LIMIT) ); /* Now we use Thomson's redescending formula which is the opposite of the huber formula being extremely aggressive on outliers and works only if the scale factor is not too out of line. It also works badly with low degrees of freedom. Consequently, we return immediately when degrees of freedom are below a frozen threshold*/ ndgf = nv - n; if(ndgf<MIN_DGF) { free(col); free(row); free(delta_mean); free(residuals); return; } /* This is the value of beta recommended by chave and thomson, 1987, based on the nvth quantile of the Rayleigh distribution. I use number of degrees of freedom here instead to perhaps more properly work with higher order spaces, but use a minimum on ndgf to avoid making the formula unstable. The thomson formula becomes exponential-like with low degrees of freedom, which we need to avoid. */ beta = sqrt(2.0*log(2.0*((double)ndgf))); iteration = 0; do { compute_nvector_residuals(v,n,nv,residuals,mean); for(i=0;i<n;++i) delta_mean[i] = 0.0; for(j=0;j<nv;++j) { dcopy(n,residuals+j*n,1,col,1); dvmag = dnrm2(n,col,1); weight[j] = dthomson(dvmag/scale,beta); daxpy(n,weight[j],col,1,delta_mean,1); sum_weights += weight[j]; } dscal(n,1.0/sum_weights,delta_mean,1); dvmag = dnrm2(n,delta_mean,1); for(i=0;i<n;++i) mean[i] += delta_mean[i]; ++iteration; }while( ((dvmag/scale) > EPSILON) && (iteration < THOMPSON_LIMIT) ); free(col); free(row); free(delta_mean); free(residuals); }