NLuint nlSolve_CG() {
    NLdouble* b        = nlCurrentContext->b ;
    NLdouble* x        = nlCurrentContext->x ;
    NLdouble  eps      = nlCurrentContext->threshold ;
    NLuint    max_iter = nlCurrentContext->max_iterations ;
    NLint     N        = nlCurrentContext->n ;

    NLdouble *g = NL_NEW_ARRAY(NLdouble, N) ;
    NLdouble *r = NL_NEW_ARRAY(NLdouble, N) ; 
    NLdouble *p = NL_NEW_ARRAY(NLdouble, N) ;
    NLuint its=0;
    NLint i;
    NLdouble t, tau, sig, rho, gam;
    NLdouble b_square=ddot(N,b,1,b,1);
    NLdouble err=eps*eps*b_square;
    NLdouble accu =0.0;
    NLdouble * Ax=NL_NEW_ARRAY(NLdouble,nlCurrentContext->n);
    NLdouble curr_err;
    
    nlCurrentContext->matrix_vector_prod(x,g);
    daxpy(N,-1.,b,1,g,1);
    dscal(N,-1.,g,1);
    dcopy(N,g,1,r,1);
    curr_err = ddot(N,g,1,g,1);
    while ( curr_err >err && its < max_iter) {
	    if(!(its % 100)) {
	        printf ( "%d : %.10e -- %.10e\n", its, curr_err, err ) ;
	    }
        nlCurrentContext->matrix_vector_prod(r,p);
        rho=ddot(N,p,1,p,1);
        sig=ddot(N,r,1,p,1);
        tau=ddot(N,g,1,r,1);
        t=tau/sig;
        daxpy(N,t,r,1,x,1);
        daxpy(N,-t,p,1,g,1);
        gam=(t*t*rho-tau)/tau;
        dscal(N,gam,r,1);
        daxpy(N,1.,g,1,r,1);
        ++its;
        curr_err = ddot(N,g,1,g,1); 
    }
    nlCurrentContext->matrix_vector_prod(x,Ax);
    for(i = 0 ; i < N ; ++i)
        accu+=(Ax[i]-b[i])*(Ax[i]-b[i]);
    printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/sqrt(b_square));
    NL_DELETE_ARRAY(Ax);
    NL_DELETE_ARRAY(g) ;
    NL_DELETE_ARRAY(r) ;
    NL_DELETE_ARRAY(p) ;
    return its;
} 
Exemplo n.º 2
0
/* Prepare the matrices for the lcp call

   pA = [-M -1 q]

*/
PT_Matrix lcp_Matrix_Init(ptrdiff_t m, double *M, double *q)
{
	ptrdiff_t i,n,incx;
	double tmp;

	PT_Matrix pA;
      
	/* Build column-major matrix A = [-M -1 q] */
	pA = Matrix_Init(m,m+2,"A");

	/* A(:,1:m) = M */
	memcpy(pMAT(pA), M, m*m*sizeof(double));

	/* A(:,1:m) = -A(:,1:m) */
	tmp  = -1.0;
	incx = 1;
	n = m*m;
	dscal(&n, &tmp, pMAT(pA), &incx);

	/* A(:,m+1) = -1 */
	for(i=0;i<m;i++)
		C_SEL(pA,i,m) = -1.0;

	/* A(:,m+2) = q */
	memcpy(&(C_SEL(pA,0,m+1)),q,m*sizeof(double));

	return pA;
}
Exemplo n.º 3
0
void Update_lbfgs
   (int   n,               /* I  num unknowns              */
    int   t,               /* I  num vectors to store      */
    real  *s_vec,          /* I  x_new - x_old             */
    real  *y_vec,          /* I  g_new - g_old             */
    real  *Bs_vec,         /* IO scratch space             */
    int   *NUPDT,          /* IO num updates made to B/H   */
    real  *CMPS,           /* IO storage for t s_vecs      */
    real  *CMPY)           /* IO storage for t y_vecs      */
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*   This routine updates the limited memory BFGS approximations for
*   the Hessian B and its inverse H.  The update uses s_vec and
*   y_vec, whose product should be negative to preserve the positive
*   definiteness of the BFGS approximations.  If s'y is not a descent
*   direction, then y is "damped", an idea due to Powell.
*   Then y_vec becomes:
*        y_damped = alpha*y + (1-alpha)*Bs
*
*                                      0.8*s'Bs
*                       where alpha = ----------
*                                     s'Bs - s'y
*
*   If 0 <= s'y <= 1.0E-8 y'y then the update is skipped to prevent
*   division by a small number.
*
*   The L-BFGS structures are currently FORTRAN subroutines that
*   require the variables NUPDT, CMPS, and CMPY.  These are used as
*   work space storage and should not be altered between FORTRAN calls.
*********************************************************************/
{
  real  s_dot_y, sBs, y_dot_y, alpha;


/*-- Damp the estimate if necessary. */

  s_dot_y = ddot (n, s_vec, 1, y_vec, 1);
  multbv_ (&n, &t, s_vec, Bs_vec, NUPDT, CMPS, CMPY);
  sBs = ddot (n, s_vec, 1, Bs_vec, 1);

  if (s_dot_y < (0.2 * sBs)) {
    fprintf (bfgs_fp, "--- damping L-BFGS update\n");
    alpha = 0.8 * sBs / (sBs - s_dot_y);
    dscal (n, alpha, y_vec, 1);
    daxpy (n, (1.0 - alpha), Bs_vec, 1, y_vec, 1);
    s_dot_y = ddot (n, s_vec, 1, y_vec, 1);
  }

/*-- Decide whether to skip the update. */

  y_dot_y = ddot (n, y_vec, 1, y_vec, 1);
  if ((s_dot_y >= 0.0) && (s_dot_y <= (sqrt(MCHEPS) * y_dot_y))) {
    fprintf (bfgs_fp, "--- skipping L-BFGS update\n");
    return;
  }

/*-- Make the updates. */
  updtbh_ (&n, &t, s_vec, y_vec, NUPDT, CMPS, CMPY);

  return;
}
Exemplo n.º 4
0
void    KNITRO_EXPORT  KTR_dscal (const int             n,
                                  const double          alpha,
                                        double * const  x,
                                  const int             incx)
{
    dscal (n, alpha, x, incx);
    return;
}
Exemplo n.º 5
0
Arquivo: ugeblas.c Projeto: rolk/ug
INT NS_DIM_PREFIX descal (MULTIGRID *mg, INT fl, INT tl, INT mode, EVECDATA_DESC *x, DOUBLE a)
{
  INT ret,level,i;

  ret=dscal(mg,fl,tl,mode,x->vd,a); if (ret!=NUM_OK) return ret;
  for (level=fl; level<=tl; level++)
    for (i=0; i<x->n; i++) EVDD_E(x,level,i)*=a;

  return NUM_OK;
}
NLuint nlSolve_CG_precond()  {
    NLdouble* b        = nlCurrentContext->b ;
    NLdouble* x        = nlCurrentContext->x ;
    NLdouble  eps      = nlCurrentContext->threshold ;
    NLuint    max_iter = nlCurrentContext->max_iterations ;
    NLint     N        = nlCurrentContext->n ;

    NLdouble* r = NL_NEW_ARRAY(NLdouble, N) ;
    NLdouble* d = NL_NEW_ARRAY(NLdouble, N) ;
    NLdouble* h = NL_NEW_ARRAY(NLdouble, N) ;
    NLdouble *Ad = h;
    NLuint its=0;
    NLdouble rh, alpha, beta;
    NLdouble b_square = ddot(N,b,1,b,1);
    NLdouble err=eps*eps*b_square;
    NLint i;
    NLdouble * Ax=NL_NEW_ARRAY(NLdouble,nlCurrentContext->n);
    NLdouble accu =0.0;
    NLdouble curr_err;
    

    nlCurrentContext->matrix_vector_prod(x,r);
    daxpy(N,-1.,b,1,r,1);
    nlCurrentContext->precond_vector_prod(r,d);
    dcopy(N,d,1,h,1);
    rh=ddot(N,r,1,h,1);
    curr_err = ddot(N,r,1,r,1);
    while ( curr_err >err && its < max_iter) {
	if(!(its % 100)) {
	   printf ( "%d : %.10e -- %.10e\n", its, curr_err, err ) ;
	}
        nlCurrentContext->matrix_vector_prod(d,Ad);
        alpha=rh/ddot(N,d,1,Ad,1);
        daxpy(N,-alpha,d,1,x,1);
        daxpy(N,-alpha,Ad,1,r,1);
        nlCurrentContext->precond_vector_prod(r,h);
        beta=1./rh; rh=ddot(N,r,1,h,1); beta*=rh;
        dscal(N,beta,d,1);
        daxpy(N,1.,h,1,d,1);
        ++its;
        // calcul de l'erreur courante
        curr_err = ddot(N,r,1,r,1);

    }
    nlCurrentContext->matrix_vector_prod(x,Ax);
    for(i = 0 ; i < N ; ++i)
        accu+=(Ax[i]-b[i])*(Ax[i]-b[i]);
    printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/sqrt(b_square));
    NL_DELETE_ARRAY(Ax);
    NL_DELETE_ARRAY(r) ;
    NL_DELETE_ARRAY(d) ;
    NL_DELETE_ARRAY(h) ;
    
    return its;
}
Exemplo n.º 7
0
/* ************************************************************
   TIME-CRITICAL PROCEDURE -- isscalarmul(x,alpha,n)
   Computes x *= alpha using BLAS.
   ************************************************************ */
void isscalarmul(double *x, const double alpha, const mwIndex n)
{
    mwIndex one=1;
    #ifdef PC
    dscal(&n,&alpha,x,&one);
    #endif
    #ifdef UNIX
    dscal_(&n,&alpha,x,&one);
    #endif
    return;
}
Exemplo n.º 8
0
 static vl::Error
 scal(vl::Context & context,
      ptrdiff_t n,
      type alpha,
      type *x, ptrdiff_t incx)
 {
   dscal(&n,
         &alpha,
         (double*)x, &incx) ;
   return vl::vlSuccess ;
 }
Exemplo n.º 9
0
// Factors a double precision matrix by gaussian elimination.
void dgefa(double ** a, int * ipvt)
{
    double temp;
    int k, j;

    for (k = 0; k < NM1; k++)
    {
        double * col_k = a[k];
        int kp1 = k + 1;

        // find l = pivot index
        int l = idamax(N - k, col_k, k, 1) + k;
        ipvt[k] = l;

        // zero pivot implies this column already triangularized
        if (col_k[l] != 0)
        {
            // interchange if necessary
            if (l != k)
            {
                temp     = col_k[l];
                col_k[l] = col_k[k];
                col_k[k] = temp;
            }

            // compute multipliers
            temp = -1.0 / col_k[k];
            dscal(N - kp1, temp, col_k, kp1, 1);

            // row elimination with column indexing
            for (j = kp1; j < N; j++)
            {
                double * col_j = a[j];
                temp = col_j[l];

                if (l != k)
                {
                    col_j[l] = col_j[k];
                    col_j[k] = temp;
                }

                daxpy(N - kp1, temp, col_k, kp1, 1, col_j, kp1, 1);
            }
        }
    }

    ipvt[N - 1] = N - 1;
}
Exemplo n.º 10
0
/* This routine integrates the physical values */
void Integrate_CNAM(int Je, double dt, Element_List *U, Element_List *Uf,
                    double **uf)
{
	register  int i;
	int       nq;
	double    theta = dparam("THETA");

	nq = U->htot*U->nz;
	dcopy(nq, Uf->base_h, 1, uf[0], 1);

	/* multiply u^n by theta factor */
	dscal(nq, 1.0/(1-theta),U->base_h,1);
	for(i = 0; i < Je; ++i)
		daxpy(nq, Beta_Int[i]*dt,  uf[i], 1, U->base_h,1);

	reshuffle(uf,Je);
}
Exemplo n.º 11
0
double Quad::get_1diag_massmat(int ID){
  double *wa, *wb;
  double *wvec = dvector(0, qtot-1), vmmat;
  Mode mw,*m;

#ifndef PCONTBASE
  double **ba, **bb;
  Mode m1;
  get_moda_GL (qa, &ba);
  get_moda_GL (qb, &bb);
  m1.a = ba[ID];
  m1.b = bb[ID];
  m = &m1;
#else
  Basis *b = getbasis();
  m = b->vert+ID;
#endif

  getzw(qa,&wa,&wa,'a');
  getzw(qb,&wb,&wb,'a');

  mw.a = wa;  mw.b = wb;

  fillvec(&mw, wvec);

  if(curvX)
    dvmul(qtot, wvec, 1, geom->jac.p, 1, wvec, 1);
  else
    dscal(qtot, geom->jac.d, wvec, 1);

  vmmat = Quad_mass_mprod(this, m, wvec);

  free(wvec);

  return vmmat;
}
Exemplo n.º 12
0
/* This function takes complex numbers x, y, and z defined by an 
eigenvector for a multiwavelet (would work for Fourier transforms
too, however) and returns a pointer to a structure that defines
the major and minor axes of the particle motion vectors defined
by those three complex numbers.  The up vector (assumed to be
three element vector) defines the direction used to resolve
the sign ambiguity inherent in defining an ellipse.  That is,
both the major and minor component directions are required
to have a positive projection in the up direction.  If they 
aren't the sign is flipped before returning.  Normally up 
would point [0,0,1] or in the up radial direction for P waves.
For S, it becomes more ambiguous and should be sorted out 
by a more complicated method.

The polarization information (defined by the Particle_Motion_Ellipse 
structure) is allocated within this routine. 

Author:  G. L. Pavlis
Written:  October 1999
*/
Particle_Motion_Ellipse compute_particle_motion(complex x, 
						complex y, 
						complex z,
						double *up)
{
	double rx,ry,rz,thetax,thetay,thetaz;  /* polar forms of x,y,z*/
	double a,b;
	double phi1,phi2;
	double x1[3],x2[3];
	double nrmx1,nrmx2;
	Particle_Motion_Ellipse e;


	rx = hypot((double)x.r,(double)x.i);
	ry = hypot((double)y.r,(double)y.i);
	rz = hypot((double)z.r,(double)z.i);
	thetax = atan2((double)x.i,(double)x.r);
	thetay = atan2((double)y.i,(double)y.r);
	thetaz = atan2((double)z.i,(double)z.r);

	a = rx*rx*cos(2.0*thetax) 
		+ ry*ry*cos(2.0*thetay) 
		+ rz*rz*cos(2.0*thetaz);
	b = rx*rx*sin(2.0*thetax) 
		+ ry*ry*sin(2.0*thetay) 
		+ rz*rz*sin(2.0*thetaz);

	phi1 = atan2(-b,a)/2.0;
	phi2 = phi1 + M_PI_2;

	x1[0] = rx*cos(phi1+thetax);
	x1[1] = ry*cos(phi1+thetay);
	x1[2] = rz*cos(phi1+thetaz);
	x2[0] = rx*cos(phi2+thetax);
	x2[1] = ry*cos(phi2+thetay);
	x2[2] = rz*cos(phi2+thetaz);

	nrmx1 = dnrm2(3,x1,1);
	nrmx2 = dnrm2(3,x2,1);
	/* normalize to unit vectors */
	dscal(3,1.0/nrmx1,x1,1);
	dscal(3,1.0/nrmx2,x2,1);

	if(nrmx1>nrmx2)
	{
		dcopy(3,x1,1,e.major,1);
		dcopy(3,x2,1,e.minor,1);
		e.rectilinearity = (1.0 - nrmx2/nrmx1);
	}
	else
	{
		dcopy(3,x2,1,e.major,1);
		dcopy(3,x1,1,e.minor,1);
		e.rectilinearity = (1.0 - nrmx1/nrmx2);
	}
	/* Choose the positive sign direction */
	if(ddot(3,up,1,e.major,1) < 0.0)
		dscal(3,-1.0,e.major,1);
	if(ddot(3,up,1,e.minor,1) < 0.0)
		dscal(3,-1.0,e.minor,1);
	return(e);
}
Exemplo n.º 13
0
/*! Given a vector of pointers to dynamic bodies, and the number of bodies in
	the vector, this routine will move those bodies in the direction of their
	current velocity for the length of the timestep, \a h. It uses the 
	pre-computed velocities and accelerations computed by iterateDynamics and
	stored for each body. 
*/
int
moveBodies(int numBodies,std::vector<DynamicBody *> bodyVec,double h)
{
  static double V[42];
  static double tmp12[12];
  static double B[12];
  static double R_N_B[9];
  static double newPos[7];
  static mat3 Rot;
  int bn;
  double currq[7];
  double currv[6];
  int errCode=SUCCESS;
  
  for (bn=0;bn<numBodies;bn++) {
    memcpy(currq,bodyVec[bn]->getPos(),7*sizeof(double));
    memcpy(currv,bodyVec[bn]->getVelocity(),6*sizeof(double));;
    
    Quaternion tmpQuat(currq[3],currq[4],currq[5],currq[6]);
    tmpQuat.ToRotationMatrix(Rot);   
    
    // The rotation matrix returned by ToRotationMatrix is expressed as
    // a graphics style rot matrix (new axes are in rows), the R_N_B matrix
    // is a robotics style rot matrix (new axes in columns)
    
    R_N_B[0] = Rot[0];  R_N_B[3] = Rot[1];  R_N_B[6] = Rot[2];
    R_N_B[1] = Rot[3];  R_N_B[4] = Rot[4];  R_N_B[7] = Rot[5];
    R_N_B[2] = Rot[6];  R_N_B[5] = Rot[7];  R_N_B[8] = Rot[8];
    
    // B relates the angular velocity of the body (expressed in
    // the body frame) to the time derivative of the Euler parameters
    B[0] = -currq[4];  B[4] = -currq[5];  B[8] = -currq[6];
    B[1] =  currq[3];  B[5] = -currq[6];  B[9] =  currq[5];
    B[2] =  currq[6];  B[6] =  currq[3];  B[10]= -currq[4];
    B[3] = -currq[5];  B[7] =  currq[4];  B[11]=  currq[3];
    dscal(12,0.5,B,1);
    
    // V is a list of matrices.  Each matrix (V_bn) can be multiplied by
    // body bn's 6x1 velocity vector to get the 7x1 time derivative 
    // of the body's position.
    // V_bn = [ eye(3,3)  zeros(3,3);
    //         zeros(4,3)  B*R_N_B'  ];
    // This list of matrices will be used at the end to compute the new
    // position from the new velocity
    dgemm("N","T",4,3,3,1.0,B,4,R_N_B,3,0.0,tmp12,4);
    V[0] = 1.0;
    V[8] = 1.0;
    V[16]= 1.0;
    fillMatrixBlock(tmp12,4,3,3,6,5,V,7);
    
    dcopy(7,currq,1,newPos,1);
    
    dgemv("N",7,6,h,V,7,currv,1,1.0,newPos,1);
    
#ifdef GRASPITDBG
    fprintf(stdout,"object %s new velocity: \n", bodyVec[bn]->getName().latin1());
    for (int i=0;i<6;i++) fprintf(stdout,"%le   ",currv[i]);
    printf("\n");
    fprintf(stdout,"object %s new position: \n", bodyVec[bn]->getName().latin1());
    disp_mat(stdout,newPos,1,7,0);
#endif
    
    //should we bother to check if the object has moved? (for optimization)
    if (!bodyVec[bn]->setPos(newPos)) {
      DBGP("requested position is out of bounds for this object!");
      errCode = FAILURE;
    }
  }
  return errCode;
}
Exemplo n.º 14
0
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
    /* Variables */
    int k,nSamples,maxIter,sparse=0,*iVals,*covered,*lastVisited;
    long i,j,nVars,one=1;

    mwIndex *jc,*ir;

    double *w, *Xt, *y, lambda, alpha, innerProd, sig,c=1,*g,*d,nCovered=0,*cumSum,scaling;

    if (nrhs != 9)
        mexErrMsgTxt("Function needs nine arguments: {w,Xt,y,lambda,alpha,iVals,d,g,covered}");

    /* Input */

    w = mxGetPr(prhs[0]);
    Xt = mxGetPr(prhs[1]);
    y = mxGetPr(prhs[2]);
    lambda = mxGetScalar(prhs[3]);
    alpha = mxGetScalar(prhs[4]);
    iVals = (int*)mxGetPr(prhs[5]);
    if (!mxIsClass(prhs[5],"int32"))
        mexErrMsgTxt("iVals must be int32");
    d = mxGetPr(prhs[6]);
    g = mxGetPr(prhs[7]);
    covered = (int*)mxGetPr(prhs[8]);

    /* Compute Sizes */
    nVars = mxGetM(prhs[1]);
    nSamples = mxGetN(prhs[1]);
    maxIter = mxGetM(prhs[5]);

    if (nVars != mxGetM(prhs[0]))
        mexErrMsgTxt("w and Xt must have the same number of rows");
    if (nSamples != mxGetM(prhs[2]))
        mexErrMsgTxt("number of columns of Xt must be the same as the number of rows in y");
    if (nVars != mxGetM(prhs[6]))
        mexErrMsgTxt("w and d must have the same number of rows");
    if (nSamples != mxGetM(prhs[7]))
        mexErrMsgTxt("w and g must have the same number of rows");
    if (nSamples != mxGetM(prhs[8]))
        mexErrMsgTxt("covered and y must hvae the same number of rows");

    if (mxIsSparse(prhs[1])) {
        sparse = 1;
        jc = mxGetJc(prhs[1]);
        ir = mxGetIr(prhs[1]);
    }

    if (sparse && alpha*lambda==1)
        mexErrMsgTxt("Sorry, I don't like it when Xt is sparse and alpha*lambda=1\n");

    /* Allocate memory needed for lazy updates */
    if (sparse) {
        lastVisited = mxCalloc(nVars,sizeof(int));
        cumSum = mxCalloc(maxIter,sizeof(double));

        /*for(j=0;j<nVars;j++)
            lastVisited[j] = -1;*/
    }

    for(i=0; i<nSamples; i++) {
        if (covered[i]!=0)
            nCovered++;
    }

    for(k=0; k<maxIter; k++)
    {
        /* Select next training example */
        i = iVals[k]-1;

        /* Compute current values of needed parameters */
        if (sparse && k > 0) {
            for(j=jc[i]; j<jc[i+1]; j++) {
                if (lastVisited[ir[j]]==0) {
                    w[ir[j]] -= d[ir[j]]*cumSum[k-1];
                }
                else {
                    w[ir[j]] -= d[ir[j]]*(cumSum[k-1]-cumSum[lastVisited[ir[j]]-1]);
                }
                lastVisited[ir[j]] = k;
            }
        }

        /* Compute derivative of loss */
        if (sparse) {
            innerProd = 0;
            for(j=jc[i]; j<jc[i+1]; j++)
                innerProd += w[ir[j]]*Xt[j];
            innerProd *= c;
        }
        else
            innerProd = ddot(&nVars,w,&one,&Xt[nVars*i],&one);

        sig = -y[i]/(1+exp(y[i]*innerProd));

        /* Update direction */
        if (sparse) {
            for(j=jc[i]; j<jc[i+1]; j++)
                d[ir[j]] += Xt[j]*(sig - g[i]);
        }
        else {
            scaling = sig-g[i];
            daxpy(&nVars,&scaling,&Xt[i*nVars],&one,d,&one);
        }

        /* Store derivative of loss */
        g[i] = sig;

        /* Update the number of examples that we have seen */
        if (covered[i]==0) {
            covered[i]=1;
            nCovered++;
        }

        /* Update parameters */
        if (sparse)
        {
            c *= 1-alpha*lambda;

            if (k==0)
                cumSum[0] = alpha/(c*nCovered);
            else
                cumSum[k] = cumSum[k-1] + alpha/(c*nCovered);
        }
        else {
            scaling = 1-alpha*lambda;
            dscal(&nVars,&scaling,w,&one);
            scaling = -alpha/nCovered;
            daxpy(&nVars,&scaling,d,&one,w,&one);
        }

    }

    if (sparse) {
        for(j=0; j<nVars; j++) {
            if (lastVisited[j]==0) {
                w[j] -= d[j]*cumSum[maxIter-1];
            }
            else
            {
                w[j] -= d[j]*(cumSum[maxIter-1]-cumSum[lastVisited[j]-1]);
            }
        }
        scaling = c;
        dscal(&nVars,&scaling,w,&one);
        mxFree(lastVisited);
        mxFree(cumSum);
    }

}
// -------------------------------  mexFunction ---------------------------------------//
void mexFunction( int nlhs, mxArray *plhs[],
		int nrhs, const mxArray *prhs[]) {

	/* Check for proper number of arguments */
	if(nrhs!=5) {
		Usage();
		mexErrMsgIdAndTxt("MyToolbox:arrayProduct:nrhs",
				"Five inputs required.");
	}
	if(nlhs!=4) {
		Usage();
		mexErrMsgIdAndTxt("MyToolbox:arrayProduct:nlhs",
				"Four output required.");
	}

	// Input argument and dimensions
	const mwSize 	DataR = mxGetM(prhs[0]),
					DataC = mxGetN(prhs[0]),
					MeanIR = mxGetM(prhs[1]),
					MeanIC = mxGetN(prhs[1]);
	const double 	*Data = mxGetPr(prhs[0]),
					*MeanI = mxGetPr(prhs[1]);
	const double 	MaxIter = mxGetScalar(prhs[2]),
					Thresh = mxGetScalar(prhs[3]);
	const int 		Verbose = (int) mxGetScalar(prhs[4]);


	// Set the number of means elements, dimension and number of data points
	int 	NM = MeanIC,
			ND = DataC,
			Dim = DataR;

	// Check appropriate sizes and modes
	if( Dim != MeanIR ) {
		Usage();
		mexErrMsgIdAndTxt("KMeans:Means","Means matrix dimension");
	}

	// Data elements
	double 	*Prior = NULL,
			*Mean = NULL,
			*Cov = NULL,
			*Assign;

	// ** KM parameters
	double	*SqrTemp = new double[Dim];
	double  *DistTemp = new double[NM];
	double	alpha = 1,
			beta = 1,
			Scale = 0;
	double  Term, TermP = 0;
	int     AssignTemp;
	int		*AssignNum; AssignNum = new int[NM];
	mwSize	CSize[] = {Dim,Dim,NM};
	int     CNumSize = 3;
	int 	i,k,j;
	int		inc = 1;
	int 	index = 0;

	Assign = new double[ND];
	Prior = new double[NM];
	Mean = new double[Dim*NM];
	Cov = new double[Dim*Dim*NM];

	memcpy(Mean, MeanI, Dim*NM*sizeof(double));

	for(int E = 0; E < MaxIter; E++){

		// ** Assign features to each center
#pragma omp parallel for shared(Data, Mean, Dim, inc, Assign) private(i,j, SqrTemp, DistTemp, AssignTemp)
		for(i = 0; i < ND; i++){
			for(j = 0; j < NM; j++){

				// Calculate elements of Data-Mean
				vdSub( Dim, &Data[i*Dim], &Mean[j*Dim], SqrTemp );

				// Square elements
				vdSqr( Dim, SqrTemp, SqrTemp );

				// Sum distances
				DistTemp[j] = dasum( &Dim, SqrTemp, &inc);
			}

			// Find minimum distance index
			AssignTemp = idamin(&NM, DistTemp, &inc);

			Assign[i] = double(AssignTemp-1);
		}

		// ** Calculate new center values
		// Zero out centers
		memset(Mean, 0, Dim*NM*sizeof(double));
		memset(AssignNum, 0, NM*sizeof(int));

		for(i = 0; i < ND; i++){
			index = Assign[i];
			daxpy(&Dim, &alpha, &Data[i*Dim], &inc, &Mean[index*Dim], &inc);
			AssignNum[index] = AssignNum[index] + 1;
		}

		for(i = 0; i < NM; i++){

			// If no point was assigned to an average, assign a different point
			if(AssignNum[i] == 0){
				memcpy(&Mean[i*Dim], &Data[(rand()%ND)*Dim], Dim*sizeof(double));
				Scale = 1;
				dscal(&Dim, &Scale, &Mean[i*Dim], &inc);
			}
			else{
				Scale = 1 / double(AssignNum[i]);
				dscal(&Dim, &Scale, &Mean[i*Dim], &inc);
			}
		}

		// Termination conditions
		k = Dim*NM;
		Term = dasum(&k, Mean, &inc);

		if(Verbose == 1){
			printf("Iteration %d - Shift %3.9f\n",E, fabs(Term-TermP));
			mexEvalString("pause(.000001);");
		}

		if(fabs(Term-TermP) <= Thresh)
			break;

		TermP = Term;
	}


	// ** Create and assign output variables
	double *Out;
	plhs[0] = mxCreateDoubleMatrix(1, ND ,mxREAL );
	plhs[1] = mxCreateDoubleMatrix(Dim, NM, mxREAL );
	plhs[2] = mxCreateDoubleMatrix(1, NM, mxREAL );
	plhs[3] = mxCreateNumericArray( CNumSize, CSize,
			mxDOUBLE_CLASS, mxREAL);

	// Priors
	k = 0;
	for(i = 0; i < NM; i++ )
		k = k + AssignNum[i];

	for(i = 0; i < NM; i++){
		Prior[i] = double(AssignNum[i])/k;
	}

	// Covariances
	memset(Cov, 0, Dim*Dim*NM*sizeof(double));
	char trans = 'N';
	k = 1;
	for(i = 0; i < ND; i++){

		index = Assign[i];
		Scale = 1/(Prior[index]*ND);

		// Calculate elements of Data-Mean
		vdSub( Dim, &Data[i*Dim], &Mean[index*Dim], SqrTemp );

		// covariance
		dgemm(&trans, &trans, &Dim, &Dim, &k, &Scale, SqrTemp, &Dim, SqrTemp,
				&k, &beta, &Cov[index*Dim*Dim], &Dim);

	}

	Out = mxGetPr(plhs[0]);
	memcpy(Out, Assign, ND*sizeof(double));
	Out = mxGetPr(plhs[1]);
	memcpy(Out, Mean, Dim*NM*sizeof(double));
	Out = mxGetPr(plhs[2]);
	memcpy(Out, Prior, NM*sizeof(double));
	Out = mxGetPr(plhs[3]);
	memcpy(Out, Cov, Dim*Dim*NM*sizeof(double));

	//** Delete dynamic variables
	delete [] Prior;
	delete [] Mean;
	delete [] Cov;
	delete [] Assign;
	delete [] AssignNum;

}
Exemplo n.º 16
0
/* Function: mdlOutputs =======================================================
 * do the main optimization routine here
 * no discrete states are considered
 */
static void mdlOutputs(SimStruct *S, int_T tid)
{
	int_T i, j, Result, qinfeas=0;
	real_T  *z = ssGetOutputPortRealSignal(S,0);
	real_T  *w = ssGetOutputPortRealSignal(S,1);
	real_T  *I = ssGetOutputPortRealSignal(S,2);
	real_T  *exitflag = ssGetOutputPortRealSignal(S,3);
	real_T  *pivots = ssGetOutputPortRealSignal(S,4);
	real_T  *time = ssGetOutputPortRealSignal(S,5);

	InputRealPtrsType M = ssGetInputPortRealSignalPtrs(S,0);
	InputRealPtrsType q = ssGetInputPortRealSignalPtrs(S,1);

	ptrdiff_t pivs, info, m, n, inc=1;
	char_T T='N';
	double total_time,  alpha=1.0, tmp=-1.0, *x, *Mn, *qn, *r, *c, s=0.0, sn=0.0; 
	PT_Matrix pA; /* Problem data A = [M -1 q] */
	PT_Basis  pB; /* The basis */  
	T_Options options;  /* options structure defined in lcp_matrix.h */          

/* for RTW we do not need these variables */
#ifdef MATLAB_MEX_FILE
	const char *fname;
	mxArray *fval;
	int_T nfields;
	clock_t t1,t2;
#endif

    
	UNUSED_ARG(tid); /* not used in single tasking mode */
	
	/* default options */
	options.zerotol = 1e-10; /* zero tolerance */
	options.lextol = 1e-10; /* lexicographic tolerance - a small treshold to determine if values are equal */
	options.maxpiv = INT_MAX; /* maximum number of pivots */
	/* if LUMOD routine is chosen, this options refactorizes the basis after n steps using DGETRF
	   routine from lapack to avoid numerical problems */
	options.nstepf = 50;
	options.clock = 0; /* 0 or 1 - to print computational time */
	options.verbose = 0; /* 0 or 1 - verbose output */
	/*  which routine in Basis_solve should solve a set of linear equations: 
	    0 - corresponds to LUmod package that performs factorization in the form L*A = U. Depending on
	    the change in A factors L, U are updated. This is the fastest method.
	    1 - corresponds to DGESV simple driver 
	    which solves the system AX = B by factorizing A and overwriting B with the solution X
	    2 - corresponds to DGELS simple driver 
	    which solves overdetermined or underdetermined real linear systems min ||b - Ax||_2
	    involving an M-by-N matrix A, or its transpose, using a QR or LQ  factorization of A.  */
	options.routine = 0; 
	options.timelimit = 3600; /* time limit in seconds to interrupt iterations */
	options.normalize = 1; /* 0 or 1 - perform scaling of input matrices M, q */
    options.normalizethres = 1e6; 
    /* If the normalize option is on, then the matrix scaling is performed 
      only if 1 norm of matrix M (maximum absolute column sum) is above this threshold.
      This enforce additional control over normalization since it seems to be more
      aggressive also for well-conditioned problems. */

#ifdef MATLAB_MEX_FILE
	/* overwriting default options by the user */
	if (ssGetSFcnParamsCount(S)==3) {
		nfields = mxGetNumberOfFields(ssGetSFcnParam(S,2));
		for(i=0; i<nfields; i++){
			fname = mxGetFieldNameByNumber(ssGetSFcnParam(S,2), i);   
			fval = mxGetField(ssGetSFcnParam(S,2), 0, fname);
			if ( strcmp(fname,"zerotol")==0 )
				options.zerotol = mxGetScalar(fval);
			if ( strcmp(fname,"lextol")==0 )
				options.lextol = mxGetScalar(fval);
			if ( strcmp(fname,"maxpiv")==0 ) {
				if (mxGetScalar(fval)>=(double)INT_MAX)
					options.maxpiv = INT_MAX;
				else
					options.maxpiv = (int_T)mxGetScalar(fval);
			}
			if ( strcmp(fname,"nstepf")==0 )
				options.nstepf = (int_T)mxGetScalar(fval);
			if ( strcmp(fname,"timelimit")==0 )
				options.timelimit = mxGetScalar(fval);
			if ( strcmp(fname,"clock")==0 )
				options.clock = (int_T)mxGetScalar(fval);
			if ( strcmp(fname,"verbose")==0 )
				options.verbose = (int_T)mxGetScalar(fval);
			if ( strcmp(fname,"routine")==0 )
				options.routine = (int_T)mxGetScalar(fval);
			if ( strcmp(fname,"normalize")==0 )
				options.normalize = (int_T)mxGetScalar(fval);
            if ( strcmp(fname, "normalizethres")==0 )
                options.normalizethres = mxGetScalar(fval);          
		}
	}
#endif
    

	/* Normalize M, q to avoid numerical problems if possible 
	   Mn = diag(r)*M*diag(c) , qn = diag(r)*q  */
	/* initialize Mn, qn */
	Mn = (double *)ssGetPWork(S)[0];
	qn = (double *)ssGetPWork(S)[1];
	/* initialize vectors r, c */
	r = (double *)ssGetPWork(S)[2];
	c = (double *)ssGetPWork(S)[3];
	/* initialize auxiliary vector x */
	x = (double *)ssGetPWork(S)[4];
	/* initialize to ones */
	for (i=0; i<NSTATES(S); i++) {
		r[i] = 1.0;
		c[i] = 1.0;
	}
	m = NSTATES(S);
	n = m*m;
	/* write data to Mn = M */
	memcpy(Mn, *M, n*sizeof(double));
	/* write data to qn = q */
	memcpy(qn, *q, m*sizeof(double));
    /* check out the 1-norm of matrix M (maximum column sum) */
    for (i=0; i<m; i++) {
        sn = dasum(&m, &Mn[i*m], &inc);
        if (sn>s) {
            s = sn;
        }
    }

	/* scale matrix M, q and write scaling factors to r (rows) and c (columns) */
	if (options.normalize && s>options.normalizethres) {
		NormalizeMatrix (m, m, Mn, qn, r, c,  options);
    }
    
	/* Setup the problem */
	pA = ssGetPWork(S)[5];
	/* A(:,1:m) = M */
	memcpy(pMAT(pA), Mn, n*sizeof(double));

	/* A(:,1:m) = -A(:,1:m) */
	dscal(&n, &tmp, pMAT(pA), &inc);

	/* A(:,m+1) = -1 */
	for(i=0;i<m;i++)
		C_SEL(pA,i,m) = -1.0;

	/* A(:,m+2) = q */
	memcpy(&(C_SEL(pA,0,m+1)),qn,m*sizeof(double));

	/* initialize basis */
	pB = ssGetPWork(S)[6];

    /* check if the problem is not feasible at the beginning */
    for (i=0; i<m; i++) {
        if (qn[i]<-options.zerotol) {
            qinfeas = 1;
            break;
        }
    }


    /* Solve the LCP */
    if (qinfeas) {
#ifdef MATLAB_MEX_FILE
	t1 = clock();
#endif

    /* main LCP rouinte */    
    Result = lcp(pB, pA, &pivs, options);
        
#ifdef MATLAB_MEX_FILE
    t2 = clock();
    total_time = ((double)(t2-t1))/CLOCKS_PER_SEC;
#else
    total_time = -1;
#endif
    } else {
        pivs = 0;
        total_time = 0;
        Result = LCP_FEASIBLE;    
    }

#ifdef MATLAB_MEX_FILE
	if (options.clock) {
		printf("Time needed to perform pivoting:\n time= %i  (%lf seconds)\n",
		       t2-t1,total_time);
		printf("Pivots: %ld\n", pivs);
		printf("CLOCKS_PER_SEC = %i\n",CLOCKS_PER_SEC);
	}
#endif

	/* initialize values to 0 */
	for(i=0;i<NSTATES(S);i++)
	{
		w[i] = 0.0;
		z[i] = 0.0;
		I[i] = 0.0;
	}

	/* for a feasible basis, compute the solution */
	if ( Result == LCP_FEASIBLE || Result == LCP_PRETERMINATED )
	{
#ifdef MATLAB_MEX_FILE
		t1 = clock();
#endif
		info = Basis_Solve(pB, &(C_SEL(pA,0,m+1)), x, options);
		for (j=0,i=0;i<Index_Length(pB->pW);i++,j++)
		{
			w[Index_Get(pB->pW,i)] = x[j];
			/* add 1 due to matlab 1-indexing */
			I[j] = Index_Get(pB->pW,i)+1;
		}
		for(i=0;i<Index_Length(pB->pZ);i++,j++)
		{
			/* take only positive values */
			if (x[j] > options.zerotol ) {
				z[Index_Get(pB->pZ,i)] = x[j];
			}
			/* add 1 due to matlab 1-indexing */
			I[j] = Index_Get(pB->pZ, i)+m+1;
		}
#ifdef MATLAB_MEX_FILE
		t2 = clock();
		total_time+=(double)(t2-t1)/(double)CLOCKS_PER_SEC;
		if (options.clock) {
			printf("Time in total needed to solve LCP: %lf seconds\n",
			       total_time + (double)(t2-t1)/(double)CLOCKS_PER_SEC);
		}
#endif
		
		if (options.normalize) {
			/* do the backward normalization */
			/* z = diag(c)*zn */
			for (i=0; i<m; i++) {
				z[i] = c[i]*z[i];
			}
			
			/* since the normalization does not compute w properly, we recalculate it from
			 * recovered z */
			/* write data to Mn = M */
			memcpy(Mn, *M, n*sizeof(double));
			/* write data to qn = q */
			memcpy(qn, *q, m*sizeof(double));
			/* copy w <- q; */
			dcopy(&m, qn, &inc, w, &inc);
			/* compute w = M*z + q */
			dgemv(&T, &m, &m, &alpha, Mn, &m, z, &inc, &alpha, w, &inc);
			/* if w is less than eps, consider it as zero */
			for (i=0; i<m; i++) {
				if (w[i]<options.zerotol) {
					w[i] = 0.0;
				}
			}
		}	       
	}


	/* outputs */
	*exitflag = (real_T )Result;
	*pivots =(real_T)pivs;
	*time = (real_T)total_time;

	/* reset dimensions and values in basis for a recursive call */
	Reinitialize_Basis(m, pB);
	
}
Exemplo n.º 17
0
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
    /* Variables */
    int k,nSamples,maxIter,sparse=0,*iVals,*covered,*lastVisited,stepSizeType=1,temp;
    long i,j,nVars,one=1;
    
    mwIndex *jc,*ir;
    
    double *w, *Xt, *y, lambda, *Li, alpha, innerProd, sig,c=1,*g,*d,nCovered=0,*cumSum,fi,fi_new,gg,precision,scaling,wtx,*xtx;
    
    if (nrhs < 9)
        mexErrMsgTxt("Function needs nine arguments: {w,Xt,y,lambda,alpha,iVals,d,g,covered[,stepSizeType,xtx]}");
    
    /* Input */
    
    w = mxGetPr(prhs[0]);
    Xt = mxGetPr(prhs[1]);
    y = mxGetPr(prhs[2]);
    lambda = mxGetScalar(prhs[3]);
    Li = mxGetPr(prhs[4]);
    iVals = (int*)mxGetPr(prhs[5]);
    if (!mxIsClass(prhs[5],"int32"))
        mexErrMsgTxt("iVals must be int32");
    d = mxGetPr(prhs[6]);
    g = mxGetPr(prhs[7]);
    covered = (int*)mxGetPr(prhs[8]);
    
    if (nrhs >= 10) {
        stepSizeType = (int)mxGetScalar(prhs[9]);
        if (!mxIsClass(prhs[9],"int32"))
            mexErrMsgTxt("stepSizeType must be int32");
    }    
    
    /* Compute Sizes */
    nVars = mxGetM(prhs[1]);
    nSamples = mxGetN(prhs[1]);
    maxIter = mxGetM(prhs[5]);
    precision = 1.490116119384765625e-8;
    
    if (nVars != mxGetM(prhs[0]))
        mexErrMsgTxt("w and Xt must have the same number of rows");
    if (nSamples != mxGetM(prhs[2]))
        mexErrMsgTxt("number of columns of Xt must be the same as the number of rows in y");
    if (nVars != mxGetM(prhs[6]))
        mexErrMsgTxt("w and d must have the same number of rows");
    if (nSamples != mxGetM(prhs[7]))
        mexErrMsgTxt("w and g must have the same number of rows");
    if (nSamples != mxGetM(prhs[8]))
        mexErrMsgTxt("covered and y must have the same number of rows");
        
    if (mxIsSparse(prhs[1])) {
        sparse = 1;
        jc = mxGetJc(prhs[1]);
        ir = mxGetIr(prhs[1]);
    }
    
    if (sparse && alpha*lambda==1)
        mexErrMsgTxt("Sorry, I don't like it when Xt is sparse and alpha*lambda=1\n");
    
    /* Allocate memory needed for lazy updates */
    if (sparse) {
        lastVisited = mxCalloc(nVars,sizeof(double));
        cumSum = mxCalloc(maxIter,sizeof(double));
        
        /*for(j=0;j<nVars;j++)
            lastVisited[j] = -1;*/
    }
    
    for(i=0;i<nSamples;i++) {
        if (covered[i]!=0)
            nCovered++;
    }
    
    if (nrhs >= 11) {
        xtx = mxGetPr(prhs[10]);
        if (nSamples != mxGetM(prhs[10]))
            mexErrMsgTxt("covered and xtx must have the same number or rows");
    }
    else {
        xtx = mxCalloc(nSamples,sizeof(double));
        for(i = 0; i < nSamples;i++) {
            if (sparse) {
                xtx[i] = 0;
                for(j=jc[i];j<jc[i+1];j++)
                    xtx[i] += Xt[j]*Xt[j];
            }
            else
                xtx[i] = ddot(&nVars,&Xt[i*nVars],&one,&Xt[i*nVars],&one);
        }
    }
    
    for(k=0;k<maxIter;k++)
    {
        /* Select next training example */
        i = iVals[k]-1;
        
        /* Compute current values of needed parameters */
        if (sparse && k > 0) {
            for(j=jc[i];j<jc[i+1];j++) {
                if (lastVisited[ir[j]]==0) {
                    w[ir[j]] -= d[ir[j]]*cumSum[k-1];
                }
                else {
                    w[ir[j]] -= d[ir[j]]*(cumSum[k-1]-cumSum[lastVisited[ir[j]]-1]);
                }
                lastVisited[ir[j]] = k;
            }
        }
        
        /* Compute derivative of loss */
        if (sparse) {
            innerProd = 0;
            for(j=jc[i];j<jc[i+1];j++)
                innerProd += w[ir[j]]*Xt[j];
            innerProd *= c;
        }
        else
            innerProd = ddot(&nVars,w,&one,&Xt[nVars*i],&one);
        
        sig = -y[i]/(1+exp(y[i]*innerProd));
        
        /* Update direction */
        if (sparse) {
            for(j=jc[i];j<jc[i+1];j++)
                d[ir[j]] += Xt[j]*(sig - g[i]);
        }
        else {
            scaling = sig-g[i];
            daxpy(&nVars,&scaling,&Xt[i*nVars],&one,d,&one);
        }
        
        /* Store derivative of loss */
        g[i] = sig;
            
        /* Update the number of examples that we have seen */
        if (covered[i]==0) {
            covered[i]=1;
            nCovered++;
        }
        
        /* Line-search for Li */
        fi = log(1 + exp(-y[i]*innerProd));
        /* Compute f_new as the function value obtained by taking 
         * a step size of 1/Li in the gradient direction */
        wtx = innerProd;
        gg = sig*sig*xtx[i];
        innerProd = wtx - xtx[i]*sig/(*Li);
        fi_new = log(1 + exp(-y[i]*innerProd));
        /*printf("fi = %e, fi_new = %e, gg = %e\n",fi,fi_new,gg);*/
        while (gg > precision && fi_new > fi - gg/(2*(*Li))) {
            /*printf("Lipschitz Backtracking (k = %d, fi = %e, fi_new = %e, 1/Li = %e)\n",k+1,fi,fi_new,1/(*Li));*/
            *Li *= 2;
            innerProd = wtx - xtx[i]*sig/(*Li);
            fi_new = log(1 + exp(-y[i]*innerProd));
            
        }

        /* Compute step size */
        if (stepSizeType == 1)
            alpha = 1/(*Li + lambda);
        else
            alpha = 2/(*Li + (nSamples+1)*lambda);

        
        /* Update parameters */
        if (sparse)
        {
            c *= 1-alpha*lambda;
            
            if (k==0)
                cumSum[0] = alpha/(c*nCovered);
            else
                cumSum[k] = cumSum[k-1] + alpha/(c*nCovered);
        }
        else {
            scaling = 1-alpha*lambda;
            dscal(&nVars,&scaling,w,&one);
            scaling = -alpha/nCovered;
            daxpy(&nVars,&scaling,d,&one,w,&one);
        }
        
        /* Decrease value of Lipschitz constant */
        *Li *= pow(2.0,-1.0/nSamples);
                
    }
    
    if (sparse) {
        for(j=0;j<nVars;j++) {
            if (lastVisited[j]==0) {
                w[j] -= d[j]*cumSum[maxIter-1];
            }
            else
            {
                w[j] -= d[j]*(cumSum[maxIter-1]-cumSum[lastVisited[j]-1]);
            }
        }
        scaling = c;
        dscal(&nVars,&scaling,w,&one);
        mxFree(lastVisited);
        mxFree(cumSum);
    }
    if (nrhs < 11)
        mxFree(xtx);
}
NLuint nlSolve_BICGSTAB_precond() {

    NLdouble* b        = nlCurrentContext->b ;
    NLdouble* x        = nlCurrentContext->x ;
    NLdouble  eps      = nlCurrentContext->threshold ;
    NLuint    max_iter = nlCurrentContext->max_iterations ;
    NLint     N        = nlCurrentContext->n ;
    NLint     i;

    NLdouble *rT  = NL_NEW_ARRAY(NLdouble, N) ;
    NLdouble *d   = NL_NEW_ARRAY(NLdouble, N) ;
    NLdouble *h   = NL_NEW_ARRAY(NLdouble, N) ;
    NLdouble *u   = NL_NEW_ARRAY(NLdouble, N) ;
    NLdouble *Sd  = NL_NEW_ARRAY(NLdouble, N) ;
    NLdouble *t   = NL_NEW_ARRAY(NLdouble, N) ;
    NLdouble *aux = NL_NEW_ARRAY(NLdouble, N) ;
    NLdouble *s   = h;
    NLdouble rTh, rTSd, rTr, alpha, beta, omega, st, tt;
    NLuint its=0;
    NLdouble b_square = ddot(N,b,1,b,1);
    NLdouble err  = eps*eps*b_square;
    NLdouble *r   = NL_NEW_ARRAY(NLdouble, N);
    NLdouble * Ax = NL_NEW_ARRAY(NLdouble,nlCurrentContext->n);
    NLdouble accu =0.0;

    nlCurrentContext->matrix_vector_prod(x,r);
    daxpy(N,-1.,b,1,r,1);
    nlCurrentContext->precond_vector_prod(r,d);
    dcopy(N,d,1,h,1);
    dcopy(N,h,1,rT,1);
    nl_assert( ddot(N,rT,1,rT,1)>1e-40 );
    rTh=ddot(N,rT,1,h,1);
    rTr=ddot(N,r,1,r,1);
    while ( rTr>err && its < max_iter) {
	    if(!(its % 100)) {
	       printf ( "%d : %.10e -- %.10e\n", its, rTr, err ) ;
	    }
        nlCurrentContext->matrix_vector_prod(d,aux);
        nlCurrentContext->precond_vector_prod(aux,Sd);
        rTSd=ddot(N,rT,1,Sd,1);
        nl_assert( fabs(rTSd)>1e-40 );
        alpha=rTh/rTSd;
        daxpy(N,-alpha,aux,1,r,1);
        dcopy(N,h,1,s,1);
        daxpy(N,-alpha,Sd,1,s,1);
        nlCurrentContext->matrix_vector_prod(s,aux);
        nlCurrentContext->precond_vector_prod(aux,t);
        daxpy(N,1.,t,1,u,1);
        dscal(N,alpha,u,1);
        st=ddot(N,s,1,t,1);
        tt=ddot(N,t,1,t,1);
        if ( fabs(st)<1e-40 || fabs(tt)<1e-40 ) {
            omega = 0.;
        } else {
            omega = st/tt;
        }
        daxpy(N,-omega,aux,1,r,1);
        daxpy(N,-alpha,d,1,x,1);
        daxpy(N,-omega,s,1,x,1);
        dcopy(N,s,1,h,1);
        daxpy(N,-omega,t,1,h,1);
        beta=(alpha/omega)/rTh; rTh=ddot(N,rT,1,h,1); beta*=rTh;
        dscal(N,beta,d,1);
        daxpy(N,1.,h,1,d,1);
        daxpy(N,-beta*omega,Sd,1,d,1);
        rTr=ddot(N,r,1,r,1);
        ++its;
    }

    nlCurrentContext->matrix_vector_prod(x,Ax);
    for(i = 0 ; i < N ; ++i){
        accu+=(Ax[i]-b[i])*(Ax[i]-b[i]);
    }
    printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/sqrt(b_square));
    NL_DELETE_ARRAY(Ax);
    NL_DELETE_ARRAY(r);
    NL_DELETE_ARRAY(rT);
    NL_DELETE_ARRAY(d);
    NL_DELETE_ARRAY(h);
    NL_DELETE_ARRAY(u);
    NL_DELETE_ARRAY(Sd);
    NL_DELETE_ARRAY(t);
    NL_DELETE_ARRAY(aux);

    return its;
}
Exemplo n.º 19
0
double bundle_guess (bundle_t* bundle, int max_qp_iterations)
	/*
	 * Guesses where the x giving the estimated best z lies by solving the penalized bundle QP.
	 *
	 * If the resolution of the QP takes more than max_qp_iterations mask guesses, it is aborted,
	 *   and the bundle is repopulated as if it were aggregated in the previous bundle iteration,
	 *   in other words reduced to only 2 subgradients, the previous aggregate and the previous evaluated subgradient.
	 *   The QP is then solved anew, and completely.
	 * The procedure then computes x, the aggregate subgradient and the linearization error for this QP.
	 *
	 */
{
	int solved;
	int i, n_iter = max_qp_iterations;

	bundle->time_qp -= getutime(1);
	bundle->kkt_z = INFINITY;
	solved = bundle_qp_solve(bundle, 0ULL, bundle->m, &n_iter);

	if (!solved) {
		bundle->time_qp += getutime(1);
		++bundle->n_iterations;
		//printf("trim\nit: %i\tpen: %f\t", bundle->n_iterations, bundle->scale);
		// trim a
		if (bundle->m != 2)
			memcpy(&bundle->a[bundle->n], &bundle->a[(bundle->m - 1) * bundle->n], bundle->n * sizeof(double));
		memcpy(&bundle->a[0], bundle->agg_subg, bundle->n * sizeof(double));
		// recompute aat
		bundle->aat[0] = ddot(bundle->n, bundle->agg_subg, bundle->agg_subg);
		bundle->aat[bundle->max_m + 1] = bundle->aat[(bundle->m - 1) * bundle->max_m + bundle->m - 1];
		bundle->aat[1] = bundle->aat[bundle->max_m] = ddot(bundle->n, bundle->agg_subg, &bundle->a[bundle->n]);
		// trim b
		bundle->b[0] = (bundle->most_recent_i == -1) ? bundle->agg_next_b : bundle->agg_b;
		bundle->b[1] = bundle->b[bundle->m - 1];
		// update bundle information and solve again
		bundle->most_recent_i = -1;
		bundle->m = 2;
		bundle->time_qp -= getutime(1);
		n_iter = (max_qp_iterations < 3) ? 3 : max_qp_iterations;
		bundle->kkt_z = INFINITY;
		solved = bundle_qp_solve(bundle, 0ULL, bundle->m, &n_iter);
		assert(solved);
	}
	
	// compute the aggregate subgradient using BLAS (in bundle->agg_subg[])
	bundle->agg_b = bundle->agg_next_b = 0.0;
	bzero(bundle->agg_subg, bundle->n * sizeof(double));
	for (i = 0; i < bundle->kkt_m; i++) {
		daxpy(bundle->n, bundle->kkt_mul[i], &bundle->a[bundle->kkt_i[i] * bundle->n], bundle->agg_subg);
		bundle->agg_b += bundle->kkt_mul[i] * bundle->b[bundle->kkt_i[i]];
		bundle->agg_next_b += bundle->kkt_mul[i] * bundle->next_b[bundle->kkt_i[i]];
	}
	// compute local x 
	memcpy(bundle->kkt_x, bundle->agg_subg, bundle->n * sizeof(double));
	dscal(bundle->n, 1.0 / bundle->scale, bundle->kkt_x);
	// compute global x
	memcpy(bundle->x, bundle->best_x, bundle->n * sizeof(double));
	daxpy(bundle->n, 1.0, bundle->kkt_x, bundle->x);

	bundle->time_qp += getutime(1);
	// return guessed z
	return bundle->kkt_mul[bundle->kkt_m];
}
Exemplo n.º 20
0
void cblas_dscal( mwSignedIndex N, double alpha, double *X, mwSignedIndex incX)
{
	dscal(&N,&alpha,X,&incX);
}
Exemplo n.º 21
0
static void setupRHS (Element_List **V, Element_List **Vf,double *rhs,
          double *u0, Bndry **Vbc, Bsystem **Vbsys){
  register int i,k;
  int      N,nbl;
  int      eDIM = V[0]->flist[0]->dim();
  Bsystem *PB   = Vbsys[eDIM],*B = Vbsys[0];
  int      nel  = B->nel,info;
  int      **ipiv    = B->Gmat->cipiv;
  double   **binvc   = B->Gmat->binvc;
  double   **invc    = B->Gmat->invc;
  double   ***dbinvc = B->Gmat->dbinvc;
  double   **p_binvc  = PB->Gmat->binvc;
  Element  *E,*E1;
  Bndry    *Ebc;
  double   *tmp;

  if(eDIM == 2)
    tmp = dvector(0,max(8*LGmax,(LGmax-2)*(LGmax-2)));
  else
    tmp = dvector(0,18*LGmax*LGmax);

  B  = Vbsys[0];
  PB = Vbsys[eDIM];

#ifdef __LIBCATAMOUNT__
  st1 = dclock();
#else
  st1 = clock();
#endif

  /* save initial condition */
  saveinit(V,u0,Vbsys);
  Timing1("saveinit..........");

  /* take inner product if in physical space */
  for(i = 0; i < eDIM; ++i){
    if(Vf[i]->fhead->state == 'p')
      Vf[i]->Iprod(Vf[i]);
  }

  /* zero pressure field */
  dzero(Vf[eDIM]->hjtot,Vf[eDIM]->base_hj,1);
  Timing1("zeroing...........");

  /* condense out interior from u-vel + p */
  for(i = 0; i < eDIM; ++i)
    for(E=Vf[i]->fhead;E;E=E->next){
      nbl = E->Nbmodes;
      N   = E->Nmodes - nbl;
      if(N)
  dgemv('T', N, nbl, -1., binvc[E->geom->id], N,
      E->vert->hj+nbl, 1, 1., E->vert->hj,1);
    }
  Timing1("first condense(v).");

  for(i = 0; i < eDIM; ++i)
    for(E=Vf[i]->fhead;E;E=E->next){
      nbl = E->Nbmodes;
      N   = E->Nmodes - nbl;
      if(N) {
  E1 = Vf[eDIM]->flist[E->id];
  if(B->lambda->wave){
    dcopy(N,E->vert->hj+nbl,1,tmp,1);
    dgetrs('N', N, 1, invc[E->geom->id], N,ipiv[E->geom->id],tmp,N,info);
    dgemv('T', N, E1->Nmodes, -1., dbinvc[i][E->geom->id], N,
    tmp, 1, 1., E1->vert->hj,1);
  }
  else{
     dgemv('T', N, E1->Nmodes, -1., dbinvc[i][E->geom->id], N,
    E->vert->hj+nbl, 1, 1., E1->vert->hj,1);
  }
      }
   }

  Timing1("first condense(p).");

  /* add flux terms */
  for(i = 0; i < eDIM; ++i)
    for(Ebc = Vbc[i]; Ebc; Ebc = Ebc->next)
      if(Ebc->type == 'F' || Ebc->type == 'R')
  Vf[i]->flist[Ebc->elmt->id]->Add_flux_terms(Ebc);

  /* second level of factorisation to orthogonalise basis to p */
  for(E=Vf[eDIM]->fhead;E;E=E->next){

    E1 = Vf[0]->flist[E->id];

    nbl = eDIM*E1->Nbmodes + 1;
    N   = E->Nmodes-1;

    dgemv('T', N, nbl, -1.0, p_binvc[E->geom->id], N,
    E->vert->hj+1, 1, 0.0, tmp,1);

    for(i = 0; i < eDIM; ++i){
      E1 = Vf[i]->flist[E->id];
      dvadd(E1->Nbmodes,tmp+i*E1->Nbmodes,1,E1->vert->hj,1,E1->vert->hj,1);
    }

    E->vert->hj[0] += tmp[nbl-1];
  }

  Timing1("second condense...");

  /* subtract boundary initial conditions */
  if(PB->smeth == iterative){
    double **wk;
    double **a = PB->Gmat->a;

    if(eDIM == 2)
      wk = dmatrix(0,1,0,eDIM*4*LGmax);
    else
      wk = dmatrix(0,1,0,eDIM*6*LGmax*LGmax);

    for(k = 0; k < nel; ++k){
      nbl = V[0]->flist[k]->Nbmodes;

      /* gather vector */
      for(i = 0; i < eDIM; ++i)
  dcopy(nbl,V[i]->flist[k]->vert->hj,1,wk[0]+i*nbl,1);

      dspmv('U',eDIM*nbl+1,1.0,a[V[0]->flist[k]->geom->id],
    wk[0],1,0.0,wk[1],1);

      /* subtract of Vf */
      for(i = 0; i < eDIM; ++i)
  dvsub(nbl,Vf[i]->flist[k]->vert->hj,1,wk[1]+i*nbl,1,
        Vf[i]->flist[k]->vert->hj,1);
      Vf[eDIM]->flist[k]->vert->hj[0] -= wk[1][eDIM*nbl];
    }

    GathrBndry_Stokes(Vf,rhs,Vbsys);

    free_dmatrix(wk,0,0);
  }
  else{
    if(Vbc[0]->DirRHS){
      GathrBndry_Stokes(Vf,rhs,Vbsys);

      /* subtract of bcs */
      dvsub(PB->nsolve,rhs,1,Vbc[0]->DirRHS,1,rhs,1);

      /* zero ic vector */
      dzero(PB->nsolve,u0,1);
    }
    else{

      /* zero out interior components since only deal with boundary initial
   conditions (interior is always direct) */

      for(i = 0; i < eDIM; ++i)
  for(E = V[i]->fhead; E; E = E->next){
    nbl = E->Nbmodes;
    N   = E->Nmodes - nbl;
    dzero(N, E->vert->hj + nbl, 1);
  }

      /* inner product of divergence for pressure forcing */
      for(i = 0; i < eDIM; ++i)
  V[i]->Trans(V[i], J_to_Q);

      V[0]->Grad(V[eDIM],0,0,'x');
      V[1]->Grad(0,Vf[eDIM],0,'y');
      dvadd(V[1]->htot,V[eDIM]->base_h,1,Vf[eDIM]->base_h,1,
      V[eDIM]->base_h,1);

      if(eDIM == 3){
  V[2]->Grad(0,V[eDIM],0,'z');
  dvadd(V[2]->htot,V[eDIM]->base_h,1,Vf[eDIM]->base_h,1,
        V[eDIM]->base_h,1);
      }

#ifndef PCONTBASE
      for(k = 0; k < nel; ++k)
  V[eDIM]->flist[k]->Ofwd(*V[eDIM]->flist[k]->h,
        V[eDIM]->flist[k]->vert->hj,
        V[eDIM]->flist[k]->dgL);
#else
      V[eDIM]->Iprod(V[eDIM]);
#endif

      for(i = 0; i < eDIM; ++i){
  for(k = 0; k < nel; ++k){
    E   = V[i]->flist[k];
    nbl = E->Nbmodes;
    N   = E->Nmodes - nbl;

    E->HelmHoltz(PB->lambda+k);

    dscal(E->Nmodes, -B->lambda[k].d, E->vert->hj, 1);

    if(N) {
      /* condense out interior terms in velocity */
      dgemv('T', N, nbl, -1., binvc[E->geom->id], N,
      E->vert->hj+nbl, 1, 1., E->vert->hj,1);

      /* condense out interior terms in pressure*/
      E1 = V[eDIM]->flist[k];
      if(B->lambda->wave){
        dcopy(N,E->vert->hj+nbl,1,tmp,1);
        dgetrs('N',N,1,invc[E->geom->id],N,ipiv[E->geom->id],tmp,N,info);
        dgemv('T', N, E1->Nmodes, -1., dbinvc[i][E->geom->id], N,
        tmp, 1, 1., E1->vert->hj,1);
      }
      else{
        dgemv('T', N, E1->Nmodes, -1., dbinvc[i][E->geom->id], N,
        E->vert->hj+nbl, 1, 1., E1->vert->hj,1);
      }
    }
  }
      }

      /* second level of factorisation to orthogonalise basis to  p */
      /* p - vel */
      for(E=V[eDIM]->fhead;E;E=E->next){

  E1 = V[0]->flist[E->id];

  nbl = eDIM*E1->Nbmodes + 1;
  N   = E->Nmodes-1;

  dgemv('T', N, nbl, -1.0, p_binvc[E->geom->id], N,
        E->vert->hj+1, 1, 0.0, tmp,1);

  for(i = 0; i < eDIM; ++i){
    E1 = V[i]->flist[E->id];
    dvadd(E1->Nbmodes,tmp+i*E1->Nbmodes,1,E1->vert->hj,1,E1->vert->hj,1);
    dvadd(E1->Nbmodes,E1->vert->hj,1,Vf[i]->flist[E->id]->vert->hj,1,
    Vf[i]->flist[E->id]->vert->hj,1);
  }

  Vf[eDIM]->flist[E->id]->vert->hj[0] += E->vert->hj[0] + tmp[nbl-1];
      }
      Timing1("bc condense.......");

      GathrBndry_Stokes(Vf,rhs,Vbsys);
      Timing1("GatherBndry.......");
    }
  }

  /* finally copy inner product of f into v for inner solve */
  for(i = 0; i < eDIM; ++i)
    for(E  = V[i]->fhead; E; E= E->next){
      nbl = E->Nbmodes;
      N   = E->Nmodes - nbl;
      E1 = Vf[i]->flist[E->id];
      dcopy(N, E1->vert->hj+nbl, 1, E->vert->hj+nbl, 1);
    }
  for(E = Vf[eDIM]->fhead; E; E = E->next){
    E1 = V[eDIM]->flist[E->id];
    dcopy(E->Nmodes,E->vert->hj,1,E1->vert->hj,1);
  }

  dzero(PB->nglobal-PB->nsolve, rhs + PB->nsolve, 1);

  free(tmp);
}
Exemplo n.º 22
0
void scaleVector(Vector x, double alpha)
{
  dscal(&x->len, &alpha, x->data, &x->stride);
}
/*----------------------*/ 
void dgefa(REAL a[], int lda, int n, int ipvt[], int *info)


/* We would like to declare a[][lda], but c does not allow it.  In this
function, references to a[i][j] are written a[lda*i+j].  */
/*
     dgefa factors a double precision matrix by gaussian elimination.

     dgefa is usually called by dgeco, but it can be called
     directly with a saving in time if  rcond  is not needed.
     (time for dgeco) = (1 + 9/n)*(time for dgefa) .

     on entry

        a       REAL precision[n][lda]
                the matrix to be factored.

        lda     integer
                the leading dimension of the array  a .

        n       integer
                the order of the matrix  a .

     on return

        a       an upper triangular matrix and the multipliers
                which were used to obtain it.
                the factorization can be written  a = l*u  where
                l  is a product of permutation and unit lower
                triangular matrices and  u  is upper triangular.

        ipvt    integer[n]
                an integer vector of pivot indices.

        info    integer
                = 0  normal value.
                = k  if  u[k][k] .eq. 0.0 .  this is not an error
                     condition for this subroutine, but it does
                     indicate that dgesl or dgedi will divide by zero
                     if called.  use  rcond  in dgeco for a reliable
                     indication of singularity.

     linpack. this version dated 08/14/78 .
     cleve moler, university of new mexico, argonne national lab.

     functions

     blas daxpy,dscal,idamax
*/

{
/*     internal variables       */

REAL t;
int j,k,kp1,l,nm1;


/*     gaussian elimination with partial pivoting       */

        *info = 0;
        nm1 = n - 1;
        if (nm1 >=  0) {
                for (k = 0; k < nm1; k++) {
                        kp1 = k + 1;

                        /* find l = pivot index */

                        l = idamax(n-k,&a[lda*k+k],1) + k;
                        ipvt[k] = l;

                        /* zero pivot implies this column already 
                           triangularized */

                        if (a[lda*k+l] != ZERO) {

                                /* interchange if necessary */

                                if (l != k) {
                                        t = a[lda*k+l];
                                        a[lda*k+l] = a[lda*k+k];
                                        a[lda*k+k] = t; 
                                }

                                /* compute multipliers */

                                t = -ONE/a[lda*k+k];
                                dscal(n-(k+1),t,&a[lda*k+k+1],1);

                                /* row elimination with column indexing */

                                for (j = kp1; j < n; j++) {
                                        t = a[lda*j+l];
                                        if (l != k) {
                                                a[lda*j+l] = a[lda*j+k];
                                                a[lda*j+k] = t;
                                        }
                                        daxpy(n-(k+1),t,&a[lda*k+k+1],1,
                                              &a[lda*j+k+1],1);
                                } 
                        }
                        else { 
                                *info = k;
                        }
                } 
        }
        ipvt[n-1] = n-1;
        if (a[lda*(n-1)+(n-1)] == ZERO) *info = n-1;
        return;
}
Exemplo n.º 24
0
void lbfgs
   (int          n,              /* I  num unknowns = 3 * num atoms */
    real         stop_tol,       /* I  tol for ||g||/sqrt(n)        */
    int          itmax,          /* I  max num iterations allowed   */
    int          itmax_line,
    int*         iter,           /* IO iters required to find min   */
    real*        fret,           /*  O minimum value                */
    int          iprint,
    int          last_call)      /*    not used                     */
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*   Called by exec_minimization in "options.c".
*   This routine is modelled on mm_nlcg in "nlcg.c", by JC Meza.
*   It finds the minimum of the unconstrained molecular potential
*   energy using trust region methods and a limited memory BFGS
*   approximation of the Hessian.  Each trust region subproblem is
*   solved by Powell's dogleg method.
*
*   The L-BFGS structures are currently FORTRAN subroutines that
*   require memory allocation of CMPS and CMPY for work space.
*   These variables must be passed to the FORTRAN subroutines and
*   not altered anywhere else.
*
*   The outline of the algorithm is as follows:
*      Allocate memory, initialize parameters
*      Compute the gradient g_vec
*      LOOP
*        IF ||g_vec|| < tol THEN RETURN
*        Update L-BFGS matrices B and H
*        Compute the dogleg step d_vec
*        Compute the predicted reduction of the quadratic model
*        Compute the actual potential energy reduction
*        IF ared/pred > eta
*          THEN x_vec = x_vec + d_vec
*               TR_size >= TR_size
*               Compute the gradient g_vec at the new point
*          ELSE TR_size < ||d_vec||
*      CONTINUE
*********************************************************************/
{
  real         *x_vec, *new_x_vec, *g_vec, *d_vec, *y_vec;
  int          i;
  int          iter_num, NUPDT;
  real         obj_val, new_obj_val;
  real         dd1, dd2;
  real         eta, pred, ared;
  real         TR_size;
  real         gnorm, xnorm, dnorm;
  real         *CMPS, *CMPY, *tmp_vec;


/*-- Open the status file for saving output. */
  bfgs_fp = fopen (status_file, "w");
  if (bfgs_fp == NULL) {
    fprintf (stderr, "lbfgs: cannot open %s\n", status_file);
    printf ("*** lbfgs: cannot open %s\n", status_file);
    exit (1);
  }

/*--------------------------------------------------------------------
 *   Allocate memory and set up files.
 *-------------------------------------------------------------------*/

  fprintf (stderr, "Allocate space in lbfgs n=%d\n",n);
  x_vec     = (real *) calloc ((n+1) , sizeof(real));
  new_x_vec = (real *) calloc ((n+1) , sizeof(real));
  g_vec     = (real *) calloc ((n+1) , sizeof(real));
  d_vec     = (real *) calloc ((n+1) , sizeof(real));
  y_vec     = (real *) calloc ((n+1) , sizeof(real));
  CMPS      = (real *) calloc ((n*T_LBFGS) , sizeof(real));
  CMPY      = (real *) calloc ((n*T_LBFGS) , sizeof(real));
  tmp_vec   = (real *) calloc ((n+1) , sizeof(real));

/*--------------------------------------------------------------------
 *   Evaluate the objective and its gradient at the start point.
 *-------------------------------------------------------------------*/

/*  obj_val = potential (str, coor, force);
  force_to_grad0 (str, force, g_vec);
 */

/*--------------------------------------------------------------------
 *   Initialize the trust region algorithm parameters.
 *-------------------------------------------------------------------*/

  eta = 0.3;                   /*-- step acceptance threshold */

  TR_size = 1.0;

  iter_num = 0;                /*-- number inner iterations   */
  NUPDT    = 1;                /*-- number L-BFGS B updates   */

  gnorm = dnrm2 (n, g_vec, 1);

  fprintf(bfgs_fp, "\n\t\t Steepest descent with Trust Regions\n");
  fprintf(bfgs_fp, "     Iter     f(x)       ||grad||/n      Delta      ||step||       ared         pred\n");
  fprintf(bfgs_fp,"    %5d %12.4e %12.4e %12.4e\n",
          iter_num, obj_val, gnorm/sqrt(n), TR_size);

/*--------------------------------------------------------------------
 *   Begin the main loop.
 *-------------------------------------------------------------------*/

  while (iter_num < itmax) {

    iter_num++;

    dd1 = g_vec[0];
    for (i=1; i<n; i++)
      if (g_vec[i] > dd1)  dd1 = g_vec[i];
    if (dd1 <= stop_tol) {
/*-- Exit main loop IF ||g||_2 sufficiently small. */
/*    if (gnorm <= (stop_tol * sqrt(n))) {*/
      xnorm = dnrm2 (n, x_vec, 1);
      fprintf (stderr,
               " step_size: %5d   energy: %12.4f  |Grad|: %10.4f |X|: %10.4f\n",
               iter_num, obj_val, gnorm/sqrt((real)(n)),
               xnorm/sqrt((real)(n)));
      break;
    }

/*-- Solve the dogleg subproblem. */
    Dogleg (n, g_vec, gnorm, TR_size, NUPDT, T_LBFGS, CMPS, CMPY,
            tmp_vec, d_vec, &dnorm);

/*-- Compute pred = - g'd - 1/2 d'Bd. */

    dd1 = ddot (n, g_vec, 1, d_vec, 1);
    i = T_LBFGS;
    multbv_ (&n, &i, d_vec, tmp_vec, &NUPDT, CMPS, CMPY);
    dd2 = ddot (n, d_vec, 1, tmp_vec, 1);

    pred = -dd1 - (0.5 * dd2);

    if (pred < sqrt(MMIN)) {
      printf ("*** Predicted reduction not positive.  <lbfgs>\n");
      printf ("    pred = %15.6e\n", pred);
      exit (1);
    }

/*-- Compute ared by evaluating the objective at the trial point. */

    dcopy (n, x_vec, 1, new_x_vec, 1);
    daxpy (n, 1.0, d_vec, 1, new_x_vec, 1);

/*  call evalf
    p_to_coor0 (str, new_x_vec, coor);
    new_obj_val = potential (str, coor, force);
*/
    ared = obj_val - new_obj_val;

/*-- Decide whether to take the step.
 *-- If yes, then increase TR_size, compute the gradient, and
 *-- update the L-BFGS approximations.
 *-- If no, then decrease TR_size to a fraction of ||d||_2. */

    if ((ared / pred) >= eta) {

/*-- Increase the trust region size. */
      if (ared / pred >= 0.9) {
        dd1 = 10.0 * dnorm;
        if (dd1 > TR_size) TR_size = dd1;
      } else {
        dd1 = 2.0 * dnorm;
        if (dd1 > TR_size) TR_size = dd1;
      }
      if (TR_size > 1.0e3)  TR_size = 1.0e3;

/*-- Get the gradient from the previously calculated force vector.
 *-- Set y_vec = new gradient - old gradient. */
      dcopy (n, g_vec, 1, y_vec, 1);
      dscal (n, -1.0, y_vec, 1);
/* get grad
      force_to_grad0 (str, force, g_vec);
 */
      daxpy (n, 1.0, g_vec, 1, y_vec, 1);

/*-- Update the L-BFGS and inverse L-BFGS approximations. */
      Update_lbfgs (n, T_LBFGS, d_vec, y_vec, tmp_vec, &NUPDT, CMPS, CMPY);

      dcopy (n, new_x_vec, 1, x_vec, 1);

      gnorm = dnrm2 (n, g_vec, 1);
      obj_val = new_obj_val;

/*      p_to_coor0 (str, x_vec, coor); */
      fprintf(bfgs_fp,"    %5d %12.4e %12.4e %12.4e %12.4e %12.3e %12.3e\n",
              iter_num, obj_val, gnorm/sqrt(n), TR_size, dnorm, ared, pred);

    } else {

/*-- Decrease the trust region size by linear interpolation. */
      dd1 = (1.0 - eta) / (1.0 - (ared / pred));
      if (dd1 < 0.1)  dd1 = 0.1;
      if (dd1 > 0.5)  dd1 = 0.5;
      TR_size = dd1 * dnorm;

      fprintf(bfgs_fp,"rej %5d %12.4e %12.4e %12.4e %12.4e %12.3e %12.3e\n",
              iter_num, obj_val, gnorm/sqrt(n), TR_size, dnorm, ared, pred);

      if (TR_size < (100.0 * MCHEPS)) {
        printf ("*** Trust region too small to continue.  <lbfgs>\n");
        fprintf (bfgs_fp, "*** Trust region too small to continue.\n");
        fflush (bfgs_fp);
        exit (1);
      }
    }

  }  /***  end of while(iter_num < itmax)  ***/

/*--------------------------------------------------------------------
 *   Clean up and exit.
 *-------------------------------------------------------------------*/

  *iter = iter_num;
  *fret = obj_val;

  free (x_vec);
  free (new_x_vec);
  free (g_vec);
  free (d_vec);

  return;
}
Exemplo n.º 25
0
int
iterateDynamics(std::vector<Robot *> robotVec,
		std::vector<DynamicBody *> bodyVec,
		DynamicParameters *dp)		
{
  double h = dp->timeStep;
  bool useContactEps = dp->useContactEps;
  static double Jcg_tmp[9],Jcg_B[9],Jcg_N[9],Jcg_N_inv[9],R_N_B[9];
  static double db0=0.0,tmp3[3];
  static mat3 Rot;
  static int info;
  World *myWorld;
  KinematicChain *chain;
  int numBodies = bodyVec.size(),errCode = 0;
  int numRobots = robotVec.size();
  int numJoints=0;
  int numDOF=0;
  int bn,cn,i,j;
  int Mrows,Dcols,Arows,Hrows,Hcols,Nurows,Nucols;
  int numDOFLimits=0;

  std::list<Contact *> contactList;
  std::list<Contact *> objContactList;
  std::list<Contact *>::iterator cp;

  //  unsigned long dmark = dmalloc_mark();

  double *ql = new double[7*numBodies];
  double *qnew = new double[7*numBodies];
  double *vl = new double[6*numBodies];
  double *vlnew = new double[6*numBodies];
  double *M = new double[(6*numBodies)*(6*numBodies)];
  double *M_i = new double[(6*numBodies)*(6*numBodies)];
  double *fext = new double[6*numBodies];

  // LCP matrix
  double *A;

  // LCP vectors
  double *g,*lambda;
  double *predLambda = NULL; //used for debugging the prediction of LCP basis

  // main matrices for contact constraints
  double *H;

  // main matrices for joint constraints
  double *Nu;

  // main vector for contact constraints
  double *k;
  
  // main vectors for joint constraints
  double *eps;

  // intermediate matrices for contact constraints
  double *HtM_i,*v1;

  // intermediate matrices for contact constraints
  double *v2;

  // intermediate matrices for case of both joint and contact constraints
  double *NutM_i,*NutM_iNu,*INVNutM_iNu,*INVNutM_iNuNut;
  double *INVNutM_iNuNutM_i,*INVNutM_iNuNutM_iH;

  // intermediate vectors for case of both joint and contact constraints
  double *NutM_ikminuseps,*INVNutM_iNuNutM_ikminuseps;

  double *currq,*currM;

  Mrows = 6*numBodies;

  myWorld = bodyVec[0]->getWorld();

  std::map<Body*, int> islandIndices;
  for (i=0;i<myWorld->getNumBodies();i++) {
	islandIndices.insert( std::pair<Body*, int>(myWorld->getBody(i), -1) );
  }
  for (i=0;i<numBodies;i++) {
	islandIndices[ bodyVec[i] ] = i;
  }

  // count the joints and DOF, and the joint coupling constraints
  int numCouplingConstraints = 0;
  for (i=0;i<numRobots;i++) {
    numDOF += robotVec[i]->getNumDOF();
    for (j=0;j<robotVec[i]->getNumChains();j++) {
      chain = robotVec[i]->getChain(j);
      numJoints += chain->getNumJoints();
    }
	for (j=0;j<robotVec[i]->getNumDOF();j++) {
	  numCouplingConstraints += robotVec[i]->getDOF(j)->getNumCouplingConstraints();
	  numDOFLimits += robotVec[i]->getDOF(j)->getNumLimitConstraints();
	}
  }

  DBGP("Dynamics time step: " << h);
  DBGP("numJoints: " << numJoints);

  // count the total number of joints and contacts
  int numContacts = 0;
  int numTotalFrictionEdges = 0;
  int numDynJointConstraints=0;
  for (bn=0;bn<numBodies;bn++) {
    //count joints
    if (bodyVec[bn]->getDynJoint()) {
	  int numCon = bodyVec[bn]->getDynJoint()->getNumConstraints();
	  numDynJointConstraints += numCon;
	  DBGP(bodyVec[bn]->getName().latin1() << ": " << numCon << " constraints");
    }
	//count contacts
    objContactList = bodyVec[bn]->getContacts();
    for (cp=objContactList.begin();cp!=objContactList.end();cp++) {
      // check if the mate of this contact is already in the contact list
      if (std::find(contactList.begin(),contactList.end(),(*cp)->getMate()) == contactList.end()) {
		numContacts++;
		numTotalFrictionEdges += (*cp)->numFrictionEdges;
		contactList.push_back(*cp);
      }
	}
  }

  DBGP("Num contacts: " << numContacts);
  DBGP("Num friction edges: " << numTotalFrictionEdges);
  DBGP("Num dynjoint: " << numDynJointConstraints);

  // zero out matrices
  dcopy(Mrows*Mrows,&db0,0,M,1);
  dcopy(Mrows*Mrows,&db0,0,M_i,1);
  dcopy(Mrows,&db0,0,fext,1);

  //allocate the joint constraint matrices
  if (numJoints) {
    Nurows = Mrows;
    Nucols = numDynJointConstraints + numCouplingConstraints;
    DBGP("Nucols: " << Nucols);

    Nu = new double[Nurows * Nucols];
    dcopy(Nurows*Nucols,&db0,0,Nu,1);

    eps = new double[Nucols];
    dcopy(Nucols,&db0,0,eps,1);
    Arows = Mrows+Nucols;
  }
    
  // allocate the LCP matrix
  if (numContacts || numDOFLimits) {
	Dcols = numTotalFrictionEdges;

    DBGP("numContacts " << numContacts);	
    DBGP("Dcols " << Dcols);
    DBGP("numDOFLimits " << numDOFLimits);

    Hrows = Mrows;
    Hcols = Dcols + 2*numContacts + numDOFLimits;
    H = new double[Hrows * Hcols];

    dcopy(Hrows*Hcols,&db0,0,H,1);

    v1 = new double[Hrows * Hcols];
    v2 = new double[Hrows];
    dcopy(Hrows*Hcols,&db0,0,v1,1);
    dcopy(Hrows,&db0,0,v2,1);

    k = new double[Mrows]; //holds mass*previous velocity and external impulses
    Arows = Hcols;
    lambda = new double[Arows];  // the LCP solution    
  } else {
    Dcols = 0;
  }

  // allocate the constraint matrix
  if (numJoints || numContacts) {    
    A = new double[Arows*Arows];
    g = new double[Arows];

    dcopy(Arows*Arows,&db0,0,A,1); 
    dcopy(Arows,&db0,0,g,1); 
  }

  // compute mass matrix and external forces
  for (bn=0;bn<numBodies;bn++) {
	memcpy(vl+6*bn,bodyVec[bn]->getVelocity(),6*sizeof(double));
	memcpy(vlnew+6*bn,bodyVec[bn]->getVelocity(),6*sizeof(double));

    memcpy(ql+7*bn,bodyVec[bn]->getPos(),7*sizeof(double));    
    memcpy(qnew+7*bn,bodyVec[bn]->getPos(),7*sizeof(double));

    currq = qnew + 7*bn;    
    Quaternion tmpQuat(currq[3],currq[4],currq[5],currq[6]);
    tmpQuat.ToRotationMatrix(Rot);   

    // The rotation matrix returned by ToRotationMatrix is expressed as
    // a graphics style rot matrix (new axes are in rows), the R_N_B matrix
    // is a robotics style rot matrix (new axes in columns)
    
    R_N_B[0] = Rot[0];  R_N_B[3] = Rot[1];  R_N_B[6] = Rot[2];
    R_N_B[1] = Rot[3];  R_N_B[4] = Rot[4];  R_N_B[7] = Rot[5];
    R_N_B[2] = Rot[6];  R_N_B[5] = Rot[7];  R_N_B[8] = Rot[8];

    // Jcg_N = R_N_B * Jcg_B * R_N_B'; 
    // where Jcg_B is inertia matrix in body coords
    //       Jcg_N is inertia matrix in world coords ?
    memcpy(Jcg_B,bodyVec[bn]->getInertia(),9*sizeof(double));
	//multiply by mass
	dscal(9, bodyVec[bn]->getMass(), Jcg_B, 1);
    dgemm("N","N",3,3,3,1.0,R_N_B,3,Jcg_B,3,0.0,Jcg_tmp,3);
    dgemm("N","T",3,3,3,1.0,Jcg_tmp,3,R_N_B,3,0.0,Jcg_N,3);

	if ((info = invertMatrix(3,Jcg_N,Jcg_N_inv))) {
      printf("In iterateDynamics, inertia matrix inversion failed (info is %d)\n",info);
	  fprintf(stderr,"%f %f %f\n",Jcg_B[0], Jcg_B[1], Jcg_B[2]);
	  fprintf(stderr,"%f %f %f\n",Jcg_B[3], Jcg_B[4], Jcg_B[5]);
	  fprintf(stderr,"%f %f %f\n",Jcg_B[6], Jcg_B[7], Jcg_B[8]);
	  fprintf(stderr,"Body is %s\n",bodyVec[bn]->getName().latin1());
	}
    
    currM = M+((6*bn)*Mrows + bn*6);  //point to the correct block of M
    
    currM[0]              = bodyVec[bn]->getMass();
    currM[6*numBodies+1]  = bodyVec[bn]->getMass();
    currM[12*numBodies+2] = bodyVec[bn]->getMass();
    fillMatrixBlock(Jcg_N,3,3,3,5,5,currM,Mrows);
  
    currM = M_i+((6*bn)*Mrows + bn*6);//point to correct block of M_i

    currM[0]         = 1.0/bodyVec[bn]->getMass();
    currM[Mrows+1]   = 1.0/bodyVec[bn]->getMass();
    currM[2*Mrows+2] = 1.0/bodyVec[bn]->getMass();
    fillMatrixBlock(Jcg_N_inv,3,3,3,5,5,currM,Mrows);

    // compute external wrench
    // fext = [ 0 0 -9810.0*mass -[ang_vel_N x (Jcg_N * ang_vel_N)] ]
	//based on this, it would appear that graspit force units are N*1.0e6
	fext[6*bn+2] = -9810.0 * bodyVec[bn]->getMass() * dp->gravityMultiplier;  // force of gravity
	// fext[6*bn+2] = 0;  // NO force of gravity

    dgemv("N",3,3,1.0,Jcg_N,3,&vl[6*bn+3],1,0.0,tmp3,1);  // inertial moments
    fext[6*bn+3] = - (vl[6*bn+4]*tmp3[2] - vl[6*bn+5]*tmp3[1]);
    fext[6*bn+4] = - (vl[6*bn+5]*tmp3[0] - vl[6*bn+3]*tmp3[2]);
    fext[6*bn+5] = - (vl[6*bn+3]*tmp3[1] - vl[6*bn+4]*tmp3[0]);

    double ForcesToBodyFrame[36];
    transf invBody = bodyVec[bn]->getTran().inverse();
    vec3 invBodyTransl = invBody.translation();
    buildForceTransform(invBody,invBodyTransl,ForcesToBodyFrame);
	DBGP("fext initial: ");
    DBGST( disp_mat(stdout,&fext[6*bn],1,6,0) );

    // add any other wrenches that have accumulated on the body
    daxpy(6,1.0,bodyVec[bn]->getExtWrenchAcc(),1,&fext[6*bn],1);
	DBGP("fext with accumulated wrench: ");
    DBGST( disp_mat(stdout,&fext[6*bn],1,6,0) );

	if (numContacts||numDOFLimits) {
      // k = Mv_l + hfext
      currM = M+((6*bn)*Mrows + bn*6);  //point to the correct block of M
      dgemv("N",6,6,1.0,currM,Mrows,vl+6*bn,1,0.0,k+6*bn,1);
    }
  }

  if (numJoints) {
    int ncn = 0;
	int hcn = 0;
	for (i=0;i<numBodies;i++) {
	  if (bodyVec[i]->getDynJoint())
		bodyVec[i]->getDynJoint()-> buildConstraints(Nu,eps,numBodies,islandIndices,ncn);
	}
	for (i=0;i<numRobots;i++) {
      robotVec[i]->buildDOFLimitConstraints(islandIndices,numBodies,H,g,hcn);
      robotVec[i]->buildDOFCouplingConstraints(islandIndices,numBodies,Nu,eps,ncn);
	}
	for (i=0;i<Nucols;i++) {
	  eps[i] *= ERP/h;
	}
	for (i=0; i<hcn; i++) {
		g[i] *= ERP/h;
	}
  }

  // add contacts to the LCP
  if (!contactList.empty()) {
    DBGP("processing contacts");
    double Ftform_N_C[36];
    
    // A is square
    double *Wn = &H[numDOFLimits*Hrows];
    double *D  = &H[(numDOFLimits+numContacts)*Hrows];
    
    double *E =		&A[(numDOFLimits+numContacts+Dcols)*Arows + numDOFLimits+numContacts];
    double *negET = &A[(numDOFLimits+numContacts)*Arows + numDOFLimits+numContacts+Dcols]; 
    double *MU    = &A[numDOFLimits*Arows + numDOFLimits+numContacts+Dcols];
    double *contactEps = &g[numDOFLimits];

	int frictionEdgesCount = 0;
    for (cp=contactList.begin(),cn=0; cp!=contactList.end(); cp++,cn++){

      //DBGP("contact " << cn);
      transf cf  = (*cp)->getContactFrame() *  (*cp)->getBody1Tran();
      transf cf2 = (*cp)->getMate()->getContactFrame() * (*cp)->getBody2Tran();

      DBGP("CONTACT DISTANCE: " << (cf.translation() - cf2.translation()).len());
      if (useContactEps) {
            contactEps[cn] = MIN(0.0,-ERP/h *
      			(Contact::THRESHOLD/2.0 - (cf.translation() - cf2.translation()).len()));
	  }
      DBGP(" EPS: " << contactEps[cn]);
      vec3 normal(cf.affine().element(2,0), cf.affine().element(2,1), cf.affine().element(2,2));
        
      // find which body is this contact from
      for (bn=0;bn<numBodies;bn++)
	    if ((*cp)->getBody1() == bodyVec[bn]) break;
      if (bn<numBodies) {
		//????? this doesn't seem correct
       	vec3 radius = cf.translation() - ( bodyVec[bn]->getCoG() * (*cp)->getBody1Tran() - position::ORIGIN );

	    //	radius = radius / 1000.0;  // convert to meters

		vec3 RcrossN = radius * normal;
		DBGP("body1 normal: " << normal);
		DBGP("body1 radius: " << radius);

		Wn[cn*Hrows+6*bn]   = normal.x();
		Wn[cn*Hrows+6*bn+1] = normal.y();
		Wn[cn*Hrows+6*bn+2] = normal.z();
		Wn[cn*Hrows+6*bn+3] = RcrossN.x();
		Wn[cn*Hrows+6*bn+4] = RcrossN.y();
		Wn[cn*Hrows+6*bn+5] = RcrossN.z();
	
		vec3 bodyOrigin = bodyVec[bn]->getCoG() * (*cp)->getBody1Tran() - position::ORIGIN;
		buildForceTransform(cf,bodyOrigin,Ftform_N_C);

		/* dgemm("N","N", 6,Contact::numFrictionEdges,6, 1.0,Ftform_N_C,6, Contact::frictionEdges,6,
			    0.0,&D[Contact::numFrictionEdges*cn*Hrows+6*bn],Hrows); */
				
		dgemm("N","N",
				6,(*cp)->numFrictionEdges,6,  //m, n, k
				1.0,Ftform_N_C,6,			 //alfa, A, lda
				(*cp)->frictionEdges,6,		//B, ldb
			    0.0,&D[ frictionEdgesCount*Hrows+6*bn],Hrows);	//beta, C, ldc
	  }

      //find the other body
      for(bn=0;bn<numBodies;bn++)
		if ((*cp)->getBody2() == bodyVec[bn]) break;
      if (bn<numBodies) {

        //normal = vec3(cf2.affine().element(2,0), cf2.affine().element(2,1),cf2.affine().element(2,2));
		normal = -normal;

		//vec3 radius = cf2.translation() - (bodyVec[bn]->getCoG() * (*cp)->getBody2Tran() - position::ORIGIN);
		vec3 radius = cf.translation() - (bodyVec[bn]->getCoG() * (*cp)->getBody2Tran() - position::ORIGIN);
		vec3 RcrossN = radius * normal;
		DBGP("body2 normal: " << normal);
		DBGP("body2 radius: " << radius);

		Wn[cn*Hrows+6*bn]   = normal.x();
		Wn[cn*Hrows+6*bn+1] = normal.y();
		Wn[cn*Hrows+6*bn+2] = normal.z();
		Wn[cn*Hrows+6*bn+3] = RcrossN.x();
		Wn[cn*Hrows+6*bn+4] = RcrossN.y();
		Wn[cn*Hrows+6*bn+5] = RcrossN.z();
	
		vec3 bodyOrigin = bodyVec[bn]->getCoG()*(*cp)->getBody2Tran() - position::ORIGIN;
		buildForceTransform(cf,bodyOrigin,Ftform_N_C);
		//buildForceTransform(cf2,bodyOrigin,Ftform_N_C);

/*		dgemm("N","N",6,Contact::numFrictionEdges,6,-1.0,Ftform_N_C,6, Contact::frictionEdges,6,
			  0.0,&D[Contact::numFrictionEdges*cn*Hrows+6*bn],Hrows);*/
		//original graspit had a -1.0 here in front of Ftform_N_C
		dgemm("N","N",
				6,(*cp)->numFrictionEdges,6,
				-1.0,Ftform_N_C,6,
				(*cp)->frictionEdges,6,
				0.0,&D[ frictionEdgesCount*Hrows+6*bn ],Hrows);
      }

      //for (i=cn*Contact::numFrictionEdges; i<(cn+1)*Contact::numFrictionEdges; i++) {
	  for (i=frictionEdgesCount; i<frictionEdgesCount+(*cp)->numFrictionEdges; i++) {
		E[cn*Arows+i] = 1.0;
		negET[i*Arows+cn] = -1.0;
      }      
      MU[cn*Arows + cn] = (*cp)->getCof();
	  frictionEdgesCount += (*cp)->numFrictionEdges;
    }
  }
  
  if (numContacts || numDOFLimits)
    daxpy(Mrows,h,fext,1,k,1);

  if (numJoints && (numContacts || numDOFLimits)) {
    // Cnu1 = INV(Nu'M_iNu)Nu'M_iH
    // Cnu2 = INV(Nu'M_iNu)(Nu'M_ik-eps)
    // v1 = -NuCnu1
    // v2 = -NuCnu2
    
    NutM_i = new double[Nucols*Mrows];
    NutM_iNu = new double[Nucols*Nucols];
    INVNutM_iNu = new double[Nucols*Nucols];
    INVNutM_iNuNut = new double[Nucols*Nurows];
    INVNutM_iNuNutM_i = new double[Nucols*Mrows];
    INVNutM_iNuNutM_iH = new double[Nucols*Hcols];
    

    NutM_ikminuseps = new double[Nucols];
    INVNutM_iNuNutM_ikminuseps = new double[Nucols];
    
    dgemm("T","N",Nucols,Mrows,Mrows,1.0,Nu,Nurows,M_i,Mrows, 0.0,NutM_i,Nucols);
    dgemm("N","N",Nucols,Nucols,Mrows,1.0,NutM_i,Nucols,Nu,Nurows, 0.0,NutM_iNu,Nucols);
    if ((info = invertMatrix(Nucols,NutM_iNu,INVNutM_iNu)))
      printf("In iterateDynamics, NutM_iNu matrix inversion failed (info is %d)\n",info);
    
    dgemm("N","T",Nucols,Nurows,Nucols,1.0,INVNutM_iNu,Nucols,Nu,Nurows,
	  0.0,INVNutM_iNuNut,Nucols);
    dgemm("N","N",Nucols,Mrows,Mrows,1.0,INVNutM_iNuNut,Nucols,M_i,Mrows,
	  0.0,INVNutM_iNuNutM_i,Nucols);
    dgemm("N","N",Nucols,Hcols,Mrows,1.0,INVNutM_iNuNutM_i,Nucols,H,Hrows,
	  0.0,INVNutM_iNuNutM_iH,Nucols);
    dgemm("N","N",Nurows,Hcols,Nucols,-1.0,Nu,Nurows,INVNutM_iNuNutM_iH,Nucols,
	  0.0,v1,Nurows);

    dgemv("N",Nucols,Mrows,1.0,NutM_i,Nucols,k,1,0.0,NutM_ikminuseps,1);
    daxpy(Nucols,-1.0,eps,1,NutM_ikminuseps,1);

    dgemv("N",Nucols,Nucols,1.0,INVNutM_iNu,Nucols,NutM_ikminuseps,1,
	  0.0,INVNutM_iNuNutM_ikminuseps,1);

    dgemv("N",Nurows,Nucols,-1.0,Nu,Nurows,INVNutM_iNuNutM_ikminuseps,1,
	  0.0,v2,1);
  }

  if (numContacts || numDOFLimits) {
    // in the simple case without joint constraints
    // A = H'M_iv1 + N
    // g = H'M_iv2
    // where N is already stored in A
    // v1 is the first term of v_(l+1) and v2 is the second term
    // v_l+1 = M_i(v1 lambda + v2) = M_i(H lambda + k)
    // k is (Mv_l + hfext)

    //add H to v1
    //add k to v2
    DBGP("k:");
    DBGST( disp_mat(stdout,k,1,Mrows,0) );
    DBGP("first g:");
    DBGST( disp_mat(stdout,g,1,Arows,0) );

	daxpy(Mrows*Hcols,1.0,H,1,v1,1);
    daxpy(Mrows,1.0,k,1,v2,1);

    // build A and g
    HtM_i = new double[Hcols*Mrows];
    dgemm("T","N",Hcols,Mrows,Hrows,1.0,H,Hrows,M_i,Mrows,0.0,HtM_i,Hcols);

    dgemm("N","N",Hcols,Hcols,Mrows,1.0,HtM_i,Hcols,v1,Mrows,1.0,A,Arows);
    //    dgemv("N",Hcols,Mrows,1.0,HtM_i,Hcols,v2,1,0.0,g,1);
    dgemv("N",Hcols,Mrows,1.0,HtM_i,Hcols,v2,1,1.0,g,1);
  }

	int frictionEdgesCount;
	//debug information; can be removed

	if (numContacts || numDOFLimits) {
		bool lemkePredict = false;
		if (lemkePredict) {
			//try to use information from previous time steps to guess a good starting basis for Lemke's algorithm
			assembleLCPPrediction(lambda, Arows, numDOFLimits, &contactList);
	        predLambda = new double[Arows];  // keep a copy of the prediction so we can check it later
			dcopy(Arows, lambda, 1, predLambda, 1);
//			fprintf(stderr,"Prediction: \n");
//			printLCPBasis(predLambda, Arows, numDOFLimits, numContacts);
		}

	    //    double startTime;   
	    //    startTime = getTime();
   
		DBGP("g:");
		DBGST( for (i=0;i<Arows;i++) printf("%le ",g[i]); );
NLuint nlSolve_GMRES() {

    NLdouble* b        = nlCurrentContext->b ;
    NLdouble* x        = nlCurrentContext->x ;
    NLdouble  eps      = nlCurrentContext->threshold ;
    NLint    max_iter  = nlCurrentContext->max_iterations ;
    NLint    n         = nlCurrentContext->n ;
    NLint    m         = nlCurrentContext->inner_iterations ;

    typedef NLdouble *NLdoubleP;
    NLdouble *V   = NL_NEW_ARRAY(NLdouble, n*(m+1)   ) ;
    NLdouble *U   = NL_NEW_ARRAY(NLdouble, m*(m+1)/2 ) ;
    NLdouble *r   = NL_NEW_ARRAY(NLdouble, n         ) ;
    NLdouble *y   = NL_NEW_ARRAY(NLdouble, m+1       ) ;
    NLdouble *c   = NL_NEW_ARRAY(NLdouble, m         ) ;
    NLdouble *s   = NL_NEW_ARRAY(NLdouble, m         ) ;
    NLdouble **v  = NL_NEW_ARRAY(NLdoubleP, m+1      ) ;
    NLdouble * Ax = NL_NEW_ARRAY(NLdouble,nlCurrentContext->n);
    NLdouble accu =0.0;
    NLint i, j, io, uij, u0j ; 
    NLint its = -1 ;
    NLdouble beta, h, rd, dd, nrm2b ;

    for ( i=0; i<=m; ++i ){
        v[i]=V+i*n ;
    }
    
    nrm2b=dnrm2(n,b,1);
    io=0;
    do  { /* outer loop */
        ++io;
        nlCurrentContext->matrix_vector_prod(x,r);
        daxpy(n,-1.,b,1,r,1);
        beta=dnrm2(n,r,1);
        dcopy(n,r,1,v[0],1);
        dscal(n,1./beta,v[0],1);
        
        y[0]=beta;
        j=0;
        uij=0;
        do { /* inner loop: j=0,...,m-1 */
            u0j=uij;
            nlCurrentContext->matrix_vector_prod(v[j],v[j+1]);
            dgemv(
                Transpose,n,j+1,1.,V,n,v[j+1],1,0.,U+u0j,1
            );
            dgemv(
                NoTranspose,n,j+1,-1.,V,n,U+u0j,1,1.,v[j+1],1
            );
            h=dnrm2(n,v[j+1],1);
            dscal(n,1./h,v[j+1],1);
            for (i=0; i<j; ++i ) { /* rotiere neue Spalte */
                double tmp = c[i]*U[uij]-s[i]*U[uij+1];
                U[uij+1]   = s[i]*U[uij]+c[i]*U[uij+1];
                U[uij]     = tmp;
                ++uij;
            }
            { /* berechne neue Rotation */
                rd     = U[uij];
                dd     = sqrt(rd*rd+h*h);
                c[j]   = rd/dd;
                s[j]   = -h/dd;
                U[uij] = dd;
                ++uij;
            }
            { /* rotiere rechte Seite y (vorher: y[j+1]=0) */
                y[j+1] = s[j]*y[j];
                y[j]   = c[j]*y[j];
            }
            ++j;
        } while ( 
            j<m && fabs(y[j])>=eps*nrm2b 
        ) ;
        { /* minimiere bzgl Y */
            dtpsv(
                UpperTriangle,
                NoTranspose,
                NotUnitTriangular,
                j,U,y,1
            );
            /* correct X */
            dgemv(NoTranspose,n,j,-1.,V,n,y,1,1.,x,1);
        }
    } while ( fabs(y[j])>=eps*nrm2b && (m*(io-1)+j) < max_iter);
    
    /* Count the inner iterations */
    its = m*(io-1)+j;

    nlCurrentContext->matrix_vector_prod(x,Ax);
    for(i = 0 ; i < n ; ++i)
        accu+=(Ax[i]-b[i])*(Ax[i]-b[i]);
    printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/nrm2b);
    NL_DELETE_ARRAY(Ax);
    NL_DELETE_ARRAY(V) ;
    NL_DELETE_ARRAY(U) ;
    NL_DELETE_ARRAY(r) ;
    NL_DELETE_ARRAY(y) ;
    NL_DELETE_ARRAY(c) ;
    NL_DELETE_ARRAY(s) ;
    NL_DELETE_ARRAY(v) ;
    
    return its;
}
Exemplo n.º 27
0
int main (int    argc,
	  char** argv)
/* ------------------------------------------------------------------------- *
 * Wrapper.
 * ------------------------------------------------------------------------- */
{
  char   buf[STR_MAX], fields[STR_MAX], fmt[STR_MAX];
  int    i, j, n, np, nz, nel, swab = 0;
  int    nfields, nplane;
  FILE   *fp_in = stdin, *fp_out = stdout;
  double **idata, **odata, *vcmpt1, *vcmpt2;

  getargs (argc, argv, &fp_in);
  format  (fmt);

  while (fgets (buf, STR_MAX, fp_in)) {

    /* -- Process header. */

    fputs (buf, fp_out); fgets (buf, STR_MAX, fp_in);
    fputs (buf, fp_out); fgets (buf, STR_MAX, fp_in);

    if (sscanf (buf, "%d%*s%d%d", &np, &nz, &nel) != 3)
      message (prog, "unable to read the file size", ERROR);

    if (nz != 2) {
      sprintf (fields, "input must have nz = 2 (here %1d)", nz);
      message (prog, fields, ERROR);
    }
    fprintf (fp_out, hdr_fmt[2], np, np, 1, nel);

    n = 6;
    while (--n) {
      fgets (buf, STR_MAX, fp_in);
      fputs (buf, fp_out);
    }

    fgets(fields, STR_MAX, fp_in);
    memset(fields+25, '\0', STR_MAX-25);
    for (nfields = 0, i = 0; i < 25; i++) if (isalpha(fields[i])) nfields++;
    if (!((nfields == 4) && (strstr (fields, "uvwp"))))
	message (prog, "input must have only fields u v w p.", ERROR);
    fprintf (fp_out, hdr_fmt[8], "uvwpABCDEF");

    fgets (buf, STR_MAX, fp_in);
    for (i = 0; i < strlen (buf); i++) buf[i] = tolower (buf[i]);

    if (!strstr(buf, "binary"))
      message (prog, "input file not binary format", ERROR);
    if (!strstr (buf, "endian"))
      message (prog, "input field file in unknown binary format", WARNING);
    else
      swab = ((strstr (buf, "big") && strstr (fmt, "little")) ||
	      (strstr (fmt, "big") && strstr (buf, "little")) );
    sprintf (buf, "%s %s", "binary", fmt);
    fprintf (fp_out, hdr_fmt[9], buf);

    /* -- Set sizes, allocate storage, set to zero. */

    nplane = np * np * nel;

    idata = dmatrix (0, 3, 0, nplane * 2);
    odata = dmatrix (0, 9, 0, nplane); /* -- uvwpABCDEF = 10 */

    dzero (4*nplane,  idata[0], 1);
    dzero (10*nplane, odata[0], 1);

    /* -- Read in all data fields. */

    for (i = 0; i < nfields; i++) {
      if (fread (idata[i], sizeof (double), nplane * 2, fp_in) != nplane * 2)
	message (prog, "an error occured while reading", ERROR);
      if (swab) dbrev (nplane*2, idata[i], 1, idata[i], 1);
    }

    /* -- Compute A. */

    vcmpt1 = idata[0];		/* -- Real part of u. */
    vcmpt2 = idata[0] + nplane; /* -- Imag part of u. */
    dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[4], 1, odata[4], 1);
    dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[4], 1, odata[4], 1);
    dscal  (nplane, 2.0, odata[4], 1);

    /* -- Compute B . */

    vcmpt1 = idata[0];		/* -- Real part of u. */
    vcmpt2 = idata[1];		/* -- Real part of v. */
    dvvtvp (nplane, vcmpt1, 1, vcmpt2, 1, odata[5], 1, odata[5], 1);

    vcmpt1 = idata[0] + nplane; /* -- Imag part of u. */
    vcmpt2 = idata[1] + nplane;	/* -- Imag part of v. */
    dvvtvp (nplane, vcmpt1, 1, vcmpt2, 1, odata[5], 1, odata[5], 1);

    dscal  (nplane, 2.0, odata[5], 1);

    /* -- Compute C. */

    vcmpt1 = idata[1];		/* -- Real part of v. */
    vcmpt2 = idata[1] + nplane; /* -- Imag part of v. */
    dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[6], 1, odata[6], 1);
    dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[6], 1, odata[6], 1);
    dscal  (nplane, 2.0, odata[6], 1);

    /* -- Compute D . */

    vcmpt1 = idata[0];          /* -- Real part of u. */
    vcmpt2 = idata[2];          /* -- Real part of w. */
    dvvtvp (nplane, vcmpt1, 1, vcmpt2, 1, odata[7], 1, odata[7], 1);

    vcmpt1 = idata[0] + nplane; /* -- Imag part of u. */
    vcmpt2 = idata[2] + nplane; /* -- Imag part of w. */
    dvvtvp (nplane, vcmpt1, 1, vcmpt2, 1, odata[7], 1, odata[7], 1);

    dscal  (nplane, 2.0, odata[7], 1);

    /* -- Compute E . */

    vcmpt1 = idata[1];          /* -- Real part of v. */
    vcmpt2 = idata[2];          /* -- Real part of w. */
    dvvtvp (nplane, vcmpt1, 1, vcmpt2, 1, odata[8], 1, odata[8], 1);

    vcmpt1 = idata[1] + nplane; /* -- Imag part of v. */
    vcmpt2 = idata[2] + nplane; /* -- Imag part of w. */
    dvvtvp (nplane, vcmpt1, 1, vcmpt2, 1, odata[8], 1, odata[8], 1);

    dscal  (nplane, 2.0, odata[8], 1);

    /* -- Compute F. */

    vcmpt1 = idata[2];          /* -- Real part of w. */
    vcmpt2 = idata[2] + nplane; /* -- Imag part of w. */
    dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[9], 1, odata[9], 1);
    dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[9], 1, odata[9], 1);
    dscal  (nplane, 2.0, odata[9], 1);

    /* -- Compute p. */

    vcmpt1 = idata[3];		/* -- Real part of p. */
    vcmpt2 = idata[3] + nplane; /* -- Imag part of p. */
    dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[2], 1, odata[2], 1);
    dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[2], 1, odata[2], 1);
    dvsqrt (nplane, odata[2], 1, odata[2], 1);

    /* -- Compute u & v. */

    vcmpt1 = idata[0];		/* -- Real part of u. */
    vcmpt2 = idata[0] + nplane; /* -- Imag part of u. */
    dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[0], 1, odata[0], 1);
    dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[0], 1, odata[0], 1);
    dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[1], 1, odata[1], 1);
    dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[1], 1, odata[1], 1);
    vcmpt1 = idata[1];		/* -- Real part of v. */
    vcmpt2 = idata[1] + nplane; /* -- Imag part of v. */
    dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[0], 1, odata[0], 1);
    dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[0], 1, odata[0], 1);
    dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[1], 1, odata[1], 1);
    dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[1], 1, odata[1], 1);
    vcmpt1 = idata[1];		/* -- Real part of w. */
    vcmpt2 = idata[1] + nplane; /* -- Imag part of w. */
    dvvtvp (nplane, vcmpt1, 1, vcmpt1, 1, odata[1], 1, odata[1], 1);
    dvvtvp (nplane, vcmpt2, 1, vcmpt2, 1, odata[1], 1, odata[1], 1);

    dvsqrt (nplane, odata[0], 1, odata[0], 1);
    dsmul  (nplane, 0.5, odata[0], 1, odata[0], 1);
    dvsqrt (nplane, odata[1], 1, odata[1], 1);
    dsmul  (nplane, 0.5, odata[1], 1, odata[1], 1);

    /* FIXME: compute w if needed in future. */

    /* -- Write out uvwpABCDEF in binary. */

    for (i = 0; i < 10; i++)
      if (fwrite (odata[i], sizeof (double), nplane, fp_out) != nplane)
	message (prog, "an error occured while writing", ERROR);

    freeDmatrix (idata, 0, 0);
    freeDmatrix (odata, 0, 0);
  }

  return EXIT_SUCCESS;
}
Exemplo n.º 28
0
int dgefa ( double a[], int lda, int n, int ipvt[] )

/******************************************************************************/
/*
  Purpose:

    DGEFA factors a real general matrix.

  Modified:

    16 May 2005

  Author:

    C version by John Burkardt.

  Reference:

    Jack Dongarra, Cleve Moler, Jim Bunch and Pete Stewart,
    LINPACK User's Guide,
    SIAM, (Society for Industrial and Applied Mathematics),
    3600 University City Science Center,
    Philadelphia, PA, 19104-2688.
    ISBN 0-89871-172-X

  Parameters:

    Input/output, double A[LDA*N].
    On intput, the matrix to be factored.
    On output, an upper triangular matrix and the multipliers used to obtain
    it.  The factorization can be written A=L*U, where L is a product of
    permutation and unit lower triangular matrices, and U is upper triangular.

    Input, int LDA, the leading dimension of A.

    Input, int N, the order of the matrix A.

    Output, int IPVT[N], the pivot indices.

    Output, int DGEFA, singularity indicator.
    0, normal value.
    K, if U(K,K) == 0.  This is not an error condition for this subroutine,
    but it does indicate that DGESL or DGEDI will divide by zero if called.
    Use RCOND in DGECO for a reliable indication of singularity.
*/
{
  int info;
  int j;
  int k;
  int l;
  double t;
/*
  Gaussian elimination with partial pivoting.
*/
  info = 0;

  for ( k = 1; k <= n-1; k++ )
  {
/*
  Find L = pivot index.
*/
    l = idamax ( n-k+1, a+(k-1)+(k-1)*lda, 1 ) + k - 1;
    ipvt[k-1] = l;
/*
  Zero pivot implies this column already triangularized.
*/
    if ( a[l-1+(k-1)*lda] == 0.0 )
    {
      info = k;
      continue;
    }
/*
  Interchange if necessary.
*/
    if ( l != k )
    {
      t = a[l-1+(k-1)*lda];
      a[l-1+(k-1)*lda] = a[k-1+(k-1)*lda];
      a[k-1+(k-1)*lda] = t;
    }
/*
  Compute multipliers.
*/
    t = -1.0 / a[k-1+(k-1)*lda];

    dscal ( n-k, t, a+k+(k-1)*lda, 1 );
/*
  Row elimination with column indexing.
*/
    for ( j = k+1; j <= n; j++ )
    {
      t = a[l-1+(j-1)*lda];
      if ( l != k )
      {
        a[l-1+(j-1)*lda] = a[k-1+(j-1)*lda];
        a[k-1+(j-1)*lda] = t;
      }
      daxpy ( n-k, t, a+k+(k-1)*lda, 1, a+k+(j-1)*lda, 1 );
    }

  }

  ipvt[n-1] = n;

  if ( a[n-1+(n-1)*lda] == 0.0 )
  {
    info = n;
  }

  return info;
}
Exemplo n.º 29
0
void dqrdc(double a[], int lda, int n, int p, double qraux[], int jpvt[],
           double work[], int job)

/******************************************************************************/
/*
  Purpose:

    DQRDC computes the QR factorization of a real rectangular matrix.

  Discussion:

    DQRDC uses Householder transformations.

    Column pivoting based on the 2-norms of the reduced columns may be
    performed at the user's option.

  Licensing:

    This code is distributed under the GNU LGPL license.

  Modified:

    07 June 2005

  Author:

    C version by John Burkardt.

  Reference:

    Jack Dongarra, Cleve Moler, Jim Bunch and Pete Stewart,
    LINPACK User's Guide,
    SIAM, (Society for Industrial and Applied Mathematics),
    3600 University City Science Center,
    Philadelphia, PA, 19104-2688.
    ISBN 0-89871-172-X

  Parameters:

    Input/output, double A(LDA,P).  On input, the N by P matrix
    whose decomposition is to be computed.  On output, A contains in
    its upper triangle the upper triangular matrix R of the QR
    factorization.  Below its diagonal A contains information from
    which the orthogonal part of the decomposition can be recovered.
    Note that if pivoting has been requested, the decomposition is not that
    of the original matrix A but that of A with its columns permuted
    as described by JPVT.

    Input, int LDA, the leading dimension of the array A.  LDA must
    be at least N.

    Input, int N, the number of rows of the matrix A.

    Input, int P, the number of columns of the matrix A.

    Output, double QRAUX[P], contains further information required
    to recover the orthogonal part of the decomposition.

    Input/output, integer JPVT[P].  On input, JPVT contains integers that
    control the selection of the pivot columns.  The K-th column A(*,K) of A
    is placed in one of three classes according to the value of JPVT(K).
      > 0, then A(K) is an initial column.
      = 0, then A(K) is a free column.
      < 0, then A(K) is a final column.
    Before the decomposition is computed, initial columns are moved to
    the beginning of the array A and final columns to the end.  Both
    initial and final columns are frozen in place during the computation
    and only free columns are moved.  At the K-th stage of the
    reduction, if A(*,K) is occupied by a free column it is interchanged
    with the free column of largest reduced norm.  JPVT is not referenced
    if JOB == 0.  On output, JPVT(K) contains the index of the column of the
    original matrix that has been interchanged into the K-th column, if
    pivoting was requested.

    Workspace, double WORK[P].  WORK is not referenced if JOB == 0.

    Input, int JOB, initiates column pivoting.
    0, no pivoting is done.
    nonzero, pivoting is done.
*/
{
  int jp;
  int j;
  int lup;
  int maxj;
  double maxnrm, nrmxl, t, tt;

  int pl = 1, pu = 0;
  /*
    If pivoting is requested, rearrange the columns.
  */
  if (job != 0) {
    for (j = 1; j <= p; j++) {
      int swapj = (0 < jpvt[j - 1]);
      jpvt[j - 1] = (jpvt[j - 1] < 0) ? -j : j;
      if (swapj) {
        if (j != pl)
          dswap(n, a + 0 + (pl - 1)*lda, 1, a + 0 + (j - 1), 1);
        jpvt[j - 1] = jpvt[pl - 1];
        jpvt[pl - 1] = j;
        pl++;
      }
    }
    pu = p;
    for (j = p; 1 <= j; j--) {
      if (jpvt[j - 1] < 0) {
        jpvt[j - 1] = -jpvt[j - 1];
        if (j != pu) {
          dswap(n, a + 0 + (pu - 1)*lda, 1, a + 0 + (j - 1)*lda, 1);
          jp = jpvt[pu - 1];
          jpvt[pu - 1] = jpvt[j - 1];
          jpvt[j - 1] = jp;
        }
        pu = pu - 1;
      }
    }
  }
  /*
    Compute the norms of the free columns.
  */
  for (j = pl; j <= pu; j++)
    qraux[j - 1] = dnrm2(n, a + 0 + (j - 1) * lda, 1);
  for (j = pl; j <= pu; j++)
    work[j - 1] = qraux[j - 1];
  /*
    Perform the Householder reduction of A.
  */
  lup = i4_min(n, p);
  for (int l = 1; l <= lup; l++) {
    /*
      Bring the column of largest norm into the pivot position.
    */
    if (pl <= l && l < pu) {
      maxnrm = 0.0;
      maxj = l;
      for (j = l; j <= pu; j++) {
        if (maxnrm < qraux[j - 1]) {
          maxnrm = qraux[j - 1];
          maxj = j;
        }
      }
      if (maxj != l) {
        dswap(n, a + 0 + (l - 1)*lda, 1, a + 0 + (maxj - 1)*lda, 1);
        qraux[maxj - 1] = qraux[l - 1];
        work[maxj - 1] = work[l - 1];
        jp = jpvt[maxj - 1];
        jpvt[maxj - 1] = jpvt[l - 1];
        jpvt[l - 1] = jp;
      }
    }
    /*
      Compute the Householder transformation for column L.
    */
    qraux[l - 1] = 0.0;
    if (l != n) {
      nrmxl = dnrm2(n - l + 1, a + l - 1 + (l - 1) * lda, 1);
      if (nrmxl != 0.0) {
        if (a[l - 1 + (l - 1)*lda] != 0.0)
          nrmxl = nrmxl * r8_sign(a[l - 1 + (l - 1) * lda]);
        dscal(n - l + 1, 1.0 / nrmxl, a + l - 1 + (l - 1)*lda, 1);
        a[l - 1 + (l - 1)*lda] = 1.0 + a[l - 1 + (l - 1) * lda];
        /*
          Apply the transformation to the remaining columns, updating the norms.
        */
        for (j = l + 1; j <= p; j++) {
          t = -ddot(n - l + 1, a + l - 1 + (l - 1) * lda, 1, a + l - 1 + (j - 1) * lda, 1)
              / a[l - 1 + (l - 1) * lda];
          daxpy(n - l + 1, t, a + l - 1 + (l - 1)*lda, 1, a + l - 1 + (j - 1)*lda, 1);
          if (pl <= j && j <= pu) {
            if (qraux[j - 1] != 0.0) {
              tt = 1.0 - pow(r8_abs(a[l - 1 + (j - 1) * lda]) / qraux[j - 1], 2);
              tt = r8_max(tt, 0.0);
              t = tt;
              tt = 1.0 + 0.05 * tt * pow(qraux[j - 1] / work[j - 1], 2);
              if (tt != 1.0)
                qraux[j - 1] = qraux[j - 1] * sqrt(t);
              else {
                qraux[j - 1] = dnrm2(n - l, a + l + (j - 1) * lda, 1);
                work[j - 1] = qraux[j - 1];
              }
            }
          }
        }
        /*
          Save the transformation.
        */
        qraux[l - 1] = a[l - 1 + (l - 1) * lda];
        a[l - 1 + (l - 1)*lda] = -nrmxl;
      }
    }
  }
}
Exemplo n.º 30
0
/* This function uses m-estimators to estimate a three vector from 
a cloud of points in n-space.  The approach is comparable to 
computing the center of mass of cloud of equal mass particles.
We use a component median for the initial estimate of center, then compute
the m-estimator based on a Rayleigh distribution keyed on the 
l2 norm amplitude of the vector residual.  This is comparable to the 
estimation of phase in robust estimates of transfer functions
described by Chave and Thomson as we use a penalty function based on
a Rayleigh distribution.  It is not clear to the author if this is
valid for other than 2 vectors, but it surely is not a bad approximation
for 3-vectors.  Higher order spaces should use this with care. 

Arguments:

v -  n by nv matrix containing ensemble of data n-vectors.  These are assumed
	stored as in the blas in a pseudofortran sense as a continuous
	vector of floats with columns sequential (i.e. first column
	of v is elements v[0], v[1], ... , v[n-1] and second column
	starts at v[n].
n - length of vectors (number of rows in v)
nv - number of vectors (number of columns in v)
mode - switch (see below)
minscale - minimum error scale allowed.  Use depends on setting of 
	the mode variable with which it is associated.  Scale factor
	is determined here from interquartiles.  mode is as defined
	in M_estimator_float above using symbols SCALE_IQ_RELATIVE
	and SCALE_IQ_ABSOLUTE.  The former uses a relative scale derived
	from the scale factor determined on the first pass.  That is, 
	the minimum scale alllowed is vmag0*minscale (e.g. 0.01 would
	limit the minimum scale to 1% of the magnitude of the total n
	vector initial estimate.  Absolute scaling
	uses the limit straight up.    
mean - vector of length n to hold result.  Blindly assumed to be already
	allocated of length n.
weight - vector of length to hold final weights used in robust
	estimation.

Function returns a 3 vector it estimates from v.  This is an array
alloced in this function that needs to be managed externally.  

Author:  G Pavlis
Written:  January 2000
*/
#define EPSILON 0.01 /* convergence parameter*/
#define HUBER_LIMIT 2 /* Number of Huber weight iterations */
#define THOMPSON_LIMIT 25 /* Limit on number of iterations using Thompson formula */
#define MIN_DGF 10  /* I've seen the Thompson formula work badly with 
		small degrees of freedom.  When dgf are less than 
		this the Thomson section is skipped */
void M_estimator_double_n_vector(double *v, 
	int n, 
	int nv,
	int mode, 
	double minscale,
	double *mean,
	double *weight)
{
	double *col, *row;  /* work spaces for columns and rows respectively*/
	double sum_weights;
	double *residuals;
	double *delta_mean;  
	int i, j, iteration;
	double vmag,dvmag; 
	double scale, fminsc;
	int ndgf;
	MW_scalar_statistics stats;
	double beta;

	allot(double *,col,n);
	allot(double *,row,nv);
	allot(double *,delta_mean, n);
	allot(double *,residuals,n*nv);

	/* We compute component medians to obtain initial estimate 
	of vector */
	for(i=0;i<n;++i)
	{
		dcopy(nv,v+i,n,row,1);
		stats=MW_calc_statistics_double(row,nv);
		mean[i] = stats.median;
	}
	/* We first do a few passes with the huber formula
	which is less aggressive on outliers, but helps 
	establish a solid value for the scale factor.*/
	iteration =0;
	vmag = dnrm2(n,mean,1);
	for(j=0;j<nv;++j) weight[j] = 1.0; /* done to make sure scale
					is computed correctly on first pass*/
	if(mode==IQ_SCALE_RELATIVE)
	    if(vmag<FLT_EPSILON)
		fminsc= minscale;
	    else
		fminsc = vmag*minscale;
	else
		fminsc = minscale;
	do
	{
		compute_nvector_residuals(v,n,nv,residuals,mean);
		/* This produces weighted residuals -- requires
		weighting formula to use weights 0<w<1 */
		for(j=0;j<nv;++j) dscal(n,weight[i],residuals+j,1);
		scale = compute_nvector_scale(residuals,n,nv,row);
		if(scale < fminsc) scale = fminsc;
		for(i=0;i<n;++i) delta_mean[i] = 0.0;
		for(j=0,sum_weights=0.0;j<nv;++j)
		{
			dcopy(n,residuals+j*n,1,col,1);
			dvmag = dnrm2(n,col,1);
			weight[j] = dhuber(dvmag/scale);
			daxpy(n,weight[j],col,1,delta_mean,1);
			sum_weights += weight[j];
		}
		dscal(n,1.0/sum_weights,delta_mean,1);
		dvmag = dnrm2(n,delta_mean,1);
		for(i=0;i<n;++i) mean[i] += delta_mean[i];
		++iteration;
	}while( ((dvmag/scale) > EPSILON)
		&& (iteration < HUBER_LIMIT) );
	
	/* Now we use Thomson's redescending formula which is the 
	opposite of the huber formula being extremely aggressive 
	on outliers and works only if the scale factor is not 
	too out of line.  It also works badly with low degrees
	of freedom.  Consequently, we return immediately when
	degrees of freedom are below a frozen threshold*/

	ndgf = nv - n;
	if(ndgf<MIN_DGF)
	{
		free(col);
		free(row);
		free(delta_mean);
                free(residuals);
		return;
	}
	/* This is the value of beta recommended by chave and thomson, 1987,
	based on the nvth quantile of the Rayleigh distribution.  I use
	number of degrees of freedom here instead to perhaps more properly
	work with higher order spaces, but use a minimum on ndgf to 
	avoid making the formula unstable.  The thomson formula becomes
	exponential-like with low degrees of freedom, which we need to 
	avoid. */
	beta = sqrt(2.0*log(2.0*((double)ndgf)));
	iteration = 0;
	do
	{
		compute_nvector_residuals(v,n,nv,residuals,mean);
		for(i=0;i<n;++i) delta_mean[i] = 0.0;
		for(j=0;j<nv;++j)
		{
			dcopy(n,residuals+j*n,1,col,1);
			dvmag = dnrm2(n,col,1);
			weight[j] = dthomson(dvmag/scale,beta);
			daxpy(n,weight[j],col,1,delta_mean,1);
			sum_weights += weight[j];
		}
		dscal(n,1.0/sum_weights,delta_mean,1);
		dvmag = dnrm2(n,delta_mean,1);
		for(i=0;i<n;++i) mean[i] += delta_mean[i];
		++iteration;
	}while( ((dvmag/scale) > EPSILON)
		&& (iteration < THOMPSON_LIMIT) );

	free(col);
	free(row);
	free(delta_mean);
	free(residuals);
}