コード例 #1
0
ファイル: ceigs_cs.c プロジェクト: bobbens/ceigs
/**
 * @brief Performs w<-Av where A is a sparse matrix and w,v are both vectors.
 *    @param[out] w Output vector.
 *    @param[in] n Length of vectors.
 *    @param[in] A Sparse matrix to multiply v by.
 *    @param[in] v Vector to multiply the sparse matrix by.
 *    @return 0 on success.
 */
static int eigs_w_Av_cs( double *w, int n, const cs *A, const double *v )
{
    int err;
    memset( w, 0, n*sizeof(double) );
    err = cs_gaxpy( A, v, w );
    if (err != 1)
        fprintf( stderr, "error while running cs_gaxpy\n" );
    return !err;
}
コード例 #2
0
ファイル: mexHelperBLAS.c プロジェクト: hosna/m-files
/* Computes: y <- alpha A^T*x + beta y */
int mfiles_dgemv1(double alpha, const mxArray *A, const mxArray *x,
                  double beta, mxArray *y) {
    size_t rA = mxGetM(A);
    size_t cA = mxGetN(A);
    size_t rx = mxGetM(x);
    size_t cx = mxGetN(x);
    size_t ry = mxGetM(y);
    size_t cy = mxGetN(y);

    if (mxIsSparse(x) || mxIsSparse(y)) {
        mexErrMsgIdAndTxt("mfiles:BadType",
                          "Sparse vectors are not supported.");
    }

    if (mxIsComplex(A) || mxIsComplex(x) || mxIsComplex(y)) {
        mexErrMsgIdAndTxt("mfiles:BadType",
                          "Complex data is not supported.");
    }

    if ((rA != rx) || (cA != ry) || (cx != 1) || (cy != 1)) {
        mexErrMsgIdAndTxt("mfiles:BadDim",
                          "Dimensions of matrices do not match.");
    }

    if (mxIsSparse(A)) {
        double *px = mxGetPr(x);
        double *py = mxGetPr(y);
        double *pz = mxCalloc(ry, sizeof (double));

        cs *cs_A = cs_calloc(1, sizeof (cs));
        mfiles_mx2cs(A, cs_A);

        /* Transpose A */
        cs *cs_AT = cs_transpose(cs_A, 1);
        /* Compute z <- A^T*x */
        cs_gaxpy(cs_AT, px, pz);

        /* Compute y <- beta y */
        cblas_dscal(ry, beta, py, 1);
        /* Compute y <- alpha*z+y */
        cblas_daxpy(ry, alpha, pz, 1, py, 1);

        cs_free(cs_A); /* Check this cs_free and cs_spfree ? */
        cs_spfree(cs_AT);
        mxFree(pz);
    } else {
        double *pA = mxGetPr(A);
        double *px = mxGetPr(x);
        double *py = mxGetPr(y);
        cblas_dgemv(CblasRowMajor, CblasTrans,
                    rA, cA, alpha, pA, rA, px, 1, beta, py, 1);

    }

    return EXIT_SUCCESS;
}
コード例 #3
0
/* compute residual, norm(A*x-b,inf) / (norm(A,1)*norm(x,inf) + norm(b,inf)) */
static void print_resid (int ok, cs *A, double *x, double *b, double *resid)
{
    int i, m, n ;
    if (!ok) { printf ("    (failed)\n") ; return ; }
    m = A->m ; n = A->n ;
    for (i = 0 ; i < m ; i++) resid [i] = -b [i] ;  /* resid = -b */
    cs_gaxpy (A, x, resid) ;                        /* resid = resid + A*x  */
    printf ("resid: %8.2e\n", norm (resid,m) / ((n == 0) ? 1 :
        (cs_norm (A) * norm (x,n) + norm (b,m)))) ;
}
コード例 #4
0
ファイル: utils.c プロジェクト: dsimba/glmgen
void calc_beta_max(double * y, double * w, int n, gqr * Dt_qr, cs * Dt,
    double * temp_n, double * beta_max)
{
  int i;	
  for (i = 0; i < n; i++) 
    temp_n[i] = sqrt(w[i]) * y[i];
  glmgen_qrsol (Dt_qr, temp_n);
  for (i = 0; i < n; i++) 
    beta_max[i] = 0;
  cs_gaxpy(Dt, temp_n, beta_max);
  /* Dt has a W^{-1/2}, so in the next step divide by sqrt(w) instead of w. */
  for (i = 0; i < n; i++) 
    beta_max[i] = y[i] - beta_max[i]/sqrt(w[i]);
}
コード例 #5
0
ファイル: cs_gaxpy_mex.c プロジェクト: Al-th/matlab
/* z = cs_gaxpy (A,x,y) computes z = A*x+y */
void mexFunction
(
    int nargout,
    mxArray *pargout [ ],
    int nargin,
    const mxArray *pargin [ ]
)
{
    cs Amatrix, *A ;
    double *x, *y, *z ;
    if (nargout > 1 || nargin != 3)
    {
        mexErrMsgTxt ("Usage: z = cs_gaxpy(A,x,y)") ;
    }
    A = cs_mex_get_sparse (&Amatrix, 0, 1, pargin [0]) ;    /* get A */
    x = cs_mex_get_double (A->n, pargin [1]) ;              /* get x */
    y = cs_mex_get_double (A->m, pargin [2]) ;              /* get y */
    z = cs_mex_put_double (A->m, y, &(pargout [0])) ;       /* z = y */
    cs_gaxpy (A, x, z) ;                                    /* z = z + A*x */
}
コード例 #6
0
void bi_conjugate_gradient_sparse(cs *A, double *b, double* x, int n, double itol){
   
    int i,j,iter;
     
    double rho,rho1,alpha,beta,omega;
     
    double r[n], r_t[n];
    double z[n], z_t[n];
    double q[n], q_t[n], temp_q[n];
    double p[n], p_t[n], temp_p[n];
    double res[n];                  //NA VGEI!
    double precond[n];
     
    //Initializations      
    memset(precond, 0, n*sizeof(double));
    memset(r, 0, n*sizeof(double));
    memset(r_t, 0, n*sizeof(double));
    memset(z, 0, n*sizeof(double));
    memset(z_t, 0, n*sizeof(double));
    memset(q, 0, n*sizeof(double));
    memset(q_t, 0, n*sizeof(double));
    memset(temp_q, 0, n*sizeof(double));
    memset(p, 0, n*sizeof(double));
    memset(p_t, 0, n*sizeof(double));
    memset(temp_p, 0, n*sizeof(double));
    memset(res, 0, n*sizeof(double));
     
    /* Preconditioner */
    double max;
    int pp;
    for(j = 0; j < n; ++j){
        for(pp = A->p[j], max = fabs(A->x[pp]); pp < A->p[j+1]; pp++)
            if(fabs(A->x[pp]) > max)                  //vriskei to diagonio stoixeio
                max = fabs(A->x[pp]);
        precond[j] = 1/max;    
    }  
    cs *AT = cs_transpose (A, 1) ;
 
    cblas_dcopy (n, x, 1, res, 1);
 
    //r=b-Ax
    cblas_dcopy (n, b, 1, r, 1);
    memset(p, 0, n*sizeof(double));
    cs_gaxpy (A, x, p);
    for(i=0;i<n;i++){
        r[i]=r[i]-p[i];
     
    }
     
    cblas_dcopy (n, r, 1, r_t, 1);
     
    double r_norm = cblas_dnrm2 (n, r, 1);
    double b_norm = cblas_dnrm2 (n, b, 1);
    if(!b_norm)
        b_norm = 1;
 
    iter = 0;  
   
    while( r_norm/b_norm > itol && iter < n ){
       
        iter++;
 
        cblas_dcopy (n, r, 1, z, 1);            //gia na min allaksei o r
        cblas_dcopy (n, r_t, 1, z_t, 1);        //gia na min allaksei o r_t
        for(i=0;i<n;i++){
            z[i]=precond[i]*z[i];
            z_t[i]=precond[i]*z_t[i];
        }
     
        rho = cblas_ddot (n, z, 1, r_t, 1);    
        if (fpclassify(fabs(rho)) == FP_ZERO){
            printf("RHO aborting Bi-CG due to EPS...\n");
            exit(42);
        }
         
        if (iter == 1){
            cblas_dcopy (n, z, 1, p, 1);
            cblas_dcopy (n, z_t, 1, p_t, 1);
        }
        else{      
            //p = z + beta*p;
            beta = rho/rho1;           
 
            cblas_dscal (n, beta, p, 1);        //rescale p by beta
            cblas_dscal (n, beta, p_t, 1);      //rescale p_t by beta
         
            cblas_daxpy (n, 1, z, 1, p, 1);     //p = 1*z + p
            cblas_daxpy (n, 1, z_t, 1, p_t, 1); //p_t = 1*z_t + p_t
        }
         
        rho1 = rho;
         
        //q = Ap
        //q_t = trans(A)*p_t
        memset(q, 0, n*sizeof(double));
        cs_gaxpy (A, p, q);
        memset(q_t, 0, n*sizeof(double));
        cs_gaxpy(AT, p_t, q_t);        
         
        omega = cblas_ddot (n, p_t, 1, q, 1);
        if (fpclassify(fabs(omega)) == FP_ZERO){
            printf("OMEGA aborting Bi-CG due to EPS...\n");
            exit(42);
        }
 
        alpha = rho/omega;     
 
        //x = x + aplha*p;
        cblas_dcopy (n, p, 1, temp_p, 1);
        cblas_dscal (n, alpha, temp_p, 1);//rescale by aplha
        cblas_daxpy (n, 1, temp_p, 1, res, 1);// sum x = 1*x + temp_p
 
        //R = R - aplha*Q;
        cblas_dcopy (n, q, 1, temp_q, 1);
        cblas_dscal (n, -alpha, temp_q, 1);//rescale by -aplha
        cblas_daxpy (n, 1, temp_q, 1, r, 1);// sum r = 1*r - temp_p    
 
        //~r=~r-alpha*~q
        cblas_dcopy (n, q_t, 1, temp_q, 1);
        cblas_dscal (n, -alpha, temp_q, 1);//rescale by -aplha
        cblas_daxpy (n, 1, temp_q, 1, r_t, 1);// sum r = 1*r - temp_p
 
        r_norm = cblas_dnrm2 (n, r, 1); //next step
    }
    cblas_dcopy (n, res, 1, x, 1);
 
    cs_spfree(AT);
}
コード例 #7
0
void conjugate_gradient_sparse(cs *A, double *b, double* x, int n, double itol)
{  
    int i,j;
    int iter;
    double rho,rho1,alpha,beta,omega;
     
    double r[n];
    double z[n];
    double q[n], temp_q[n];
    double p[n], temp_p[n];
    double res[n];
    double precond[n];  //Preconditioner
     
    memset(precond, 0, n*sizeof(double));
    memset(r, 0, n*sizeof(double));
    memset(z, 0, n*sizeof(double));
    memset(q, 0, n*sizeof(double));
    memset(temp_q, 0, n*sizeof(double));
    memset(p, 0, n*sizeof(double));
    memset(temp_p, 0, n*sizeof(double));
 
    /* Preconditioner */
    double max;
    int pp;
    for(j = 0; j < n; ++j){
        for(pp = A->p[j], max = fabs(A->x[pp]); pp < A->p[j+1]; pp++)
            if(fabs(A->x[pp]) > max)                  //vriskei to diagonio stoixeio
                max = fabs(A->x[pp]);
        precond[j] = 1/max;    
    }  
 
    cblas_dcopy (n, x, 1, res, 1);
 
    //r=b-Ax
    cblas_dcopy (n, b, 1, r, 1);
    memset(p, 0, n*sizeof(double));
    cs_gaxpy (A, x, p);
    for(i=0;i<n;i++){
        r[i]=r[i]-p[i];
     
    }
     
    double r_norm = cblas_dnrm2 (n, r, 1);
    double b_norm = cblas_dnrm2 (n, b, 1);
    if(!b_norm)
        b_norm = 1;
 
    iter = 0;  
     
    while( r_norm/b_norm > itol && iter < n )
    {
        iter++;
 
        cblas_dcopy (n, r, 1, z, 1);                //gia na min allaksei o r
         
        for(i=0;i<n;i++){
            z[i]=precond[i]*z[i];
     
        }
 
        rho = cblas_ddot (n, z, 1, r, 1);
        if (fpclassify(fabs(rho)) == FP_ZERO){
            printf("RHO aborting CG due to EPS...\n");
            exit(42);
        }
 
        if (iter == 1){
            cblas_dcopy (n, z, 1, p, 1);
        }
        else{      
            beta = rho/rho1;
     
            //p = z + beta*p;
            cblas_dscal (n, beta, p, 1);    //rescale
            cblas_daxpy (n, 1, z, 1, p, 1); //p = 1*z + p
             
        }      
        rho1 = rho;
         
        //q = Ap
        memset(q, 0, n*sizeof(double));
        cs_gaxpy (A, p, q);
 
        omega = cblas_ddot (n, p, 1, q, 1);
        if (fpclassify(fabs(omega)) == FP_ZERO){
            printf("OMEGA aborting CG due to EPS...\n");
            exit(42);
        }
 
        alpha = rho/omega; 
 
        //x = x + aplha*p;
        cblas_dcopy (n, p, 1, temp_p, 1);
        cblas_dscal (n, alpha, temp_p, 1);//rescale by alpha
        cblas_daxpy (n, 1, temp_p, 1, res, 1);// sum x = 1*x + temp_p
 
        //r = r - aplha*q;
        cblas_dcopy (n, q, 1, temp_q, 1);
        cblas_dscal (n, -alpha, temp_q, 1);//rescale by alpha
        cblas_daxpy (n, 1, temp_q, 1, r, 1);// sum r = 1*r - temp_p
 
        //next step
        r_norm = cblas_dnrm2 (n, r, 1);
    }
    cblas_dcopy (n, res, 1, x, 1);
 
}
コード例 #8
0
int _globalLineSearchSparseGP(
  GlobalFrictionContactProblem *problem,
  AlartCurnierFun3x3Ptr computeACFun3x3,
  double *solution,
  double *direction,
  double *mu,
  double *rho,
  double *F,
  double *psi,
  CSparseMatrix *J,
  double *tmp,
  double alpha[1],
  unsigned int maxiter_ls)
{
  double inf = 1e10;
  double alphamin = 1e-16;
  double alphamax = inf;

  double m1 = 0.01, m2 = 0.99;

  unsigned int n = (unsigned)NM_triplet(problem->M)->m;

  unsigned int m = problem->H->size1;

  unsigned int problem_size = n+2*m;

  // Computation of q(t) and q'(t) for t =0

  double q0 = 0.5 * cblas_ddot(problem_size, psi, 1, psi, 1);

  //  tmp <- J * direction
  cblas_dscal(problem_size, 0., tmp, 1);
  cs_gaxpy(J, direction, tmp);

  double dqdt0 = cblas_ddot(problem_size, psi, 1, tmp, 1);
  DEBUG_PRINTF("dqdt0=%e\n",dqdt0);
  DEBUG_PRINTF("q0=%e\n",q0);

  for(unsigned int iter = 0; iter < maxiter_ls; ++iter)
  {

    // tmp <- alpha*direction+solution
    cblas_dcopy(problem_size, solution, 1, tmp, 1);
    cblas_daxpy(problem_size, alpha[0], direction, 1, tmp, 1);

    ACPsi(
      problem,
      computeACFun3x3,
      tmp,  /* v */
      tmp+problem->M->size0+problem->H->size1, /* P */
      tmp+problem->M->size0, /* U */
      rho, psi);

    double q  = 0.5 * cblas_ddot(problem_size, psi, 1, psi, 1);

    assert(q >= 0);

    double slope = (q - q0) / alpha[0];

    int C1 = (slope >= m2 * dqdt0);
    int C2 = (slope <= m1 * dqdt0);

    DEBUG_PRINTF("C1=%i\t C2=%i\n",C1,C2);
    if(C1 && C2)
    {
      numerics_printf_verbose(1, "---- GFC3D - NSN_AC - global line search success. Number of ls iteration = %i  alpha = %.10e, q = %.10e",
                              iter,
                              alpha[0], q);
      
      return 0;

    }
    else if(!C1)
    {
      alphamin = alpha[0];
    }
    else
    {
      // not(C2)
      alphamax = alpha[0];
    }

    if(alpha[0] < inf)
    {
      alpha[0] = 0.5 * (alphamin + alphamax);
    }
    else
    {
      alpha[0] = alphamin;
    }

  }
  numerics_printf_verbose(1,"---- GFC3D - NSN_AC - global line search unsuccessful. Max number of ls iteration reached  = %i  with alpha = %.10e",
                  maxiter_ls, alpha[0]);
  

  return -1;
}
コード例 #9
0
ファイル: tf_admm.c プロジェクト: alexdeng/glmgen
/**
 * @brief Main wrapper for fitting a trendfilter model.
 * Takes as input either a sequence of lambda tuning parameters, or the number
 * of desired lambda values. In the latter case the function will also calculate
 * a lambda sequence. The user must supply allocated memory to store the output,
 * with the function itself returning only @c void. For default values, and an
 * example of how to call the function, see the function tf_admm_default.
 *
 * @param y                    a vector of responses
 * @param x                    a vector of response locations; must be in increasing order
 * @param w                    a vector of sample weights
 * @param n                    the length of y, x, and w
 * @param k                    degree of the trendfilter; i.e., k=1 linear
 * @param family               family code for the type of fit; family=0 for OLS
 * @param max_iter             maximum number of ADMM interations; ignored for k=0
 * @param lam_flag             0/1 flag for whether lambda sequence needs to be estimated
 * @param lambda               either a sequence of lambda when lam_flag=0, or empty
 *                             allocated space if lam_flag=1
 * @param nlambda              number of lambda values; need for both lam_flag=0 and 1
 * @param lambda_min_ratio     minimum ratio between min and max lambda; ignored for lam_flag=0
 * @param beta                 allocated space of size n*nlambda to store the output coefficents
 * @param obj                  allocated space of size max_iter*nlambda to store the objective
 * @param iter                 allocated space of size nlambda to store the number of iterations
 * @param status               allocated space of size nlambda to store the status of each run
 * @param rho                  tuning parameter for the ADMM algorithm
 * @param obj_tol              stopping criteria tolerance
 * @param alpha_ls             for family != 0, line search tuning parameter
 * @param gamma_ls             for family != 0, line search tuning parameter
 * @param max_iter_ls          for family != 0, max number of iterations in line search
 * @param max_iter_newton      for family != 0, max number of iterations in inner ADMM
 * @param verbose              0/1 flag for printing progress
 * @return void
 * @see tf_admm_default
 */
void tf_admm (double * y, double * x, double * w, int n, int k, int family,
              int max_iter, int lam_flag, double * lambda,
              int nlambda, double lambda_min_ratio, double * beta,
              double * obj, int * iter, int * status, double rho,
              double obj_tol, double alpha_ls, double gamma_ls,
              int max_iter_ls, int max_iter_newton, int verbose)
{
  int i;
  int j;
  double max_lam;
  double min_lam;
  double * temp_n;
  double * beta_max;
  double * alpha;
  double * u;

  cs * D;
  cs * Dt;
  cs * Dk;
  cs * Dkt;
  cs * DktDk;
  gqr * Dt_qr;
  gqr * Dkt_qr;

  beta_max = (double *) malloc(n * sizeof(double));
  temp_n   = (double *) malloc(n * sizeof(double));
  alpha    = (double *) malloc(n * sizeof(double)); /* we use extra buffer (n vs n-k) */
  u        = (double *) malloc(n * sizeof(double)); /* we use extra buffer (n vs n-k) */

  /* Assume w does not have zeros */
  for(i = 0; i < n; i++) temp_n[i] = 1/sqrt(w[i]);

  D = tf_calc_dk(n, k+1, x);
  Dk = tf_calc_dktil(n, k, x);
  Dt = cs_transpose(D, 1);
  diag_times_sparse(Dt, temp_n); /* Dt = W^{-1/2} Dt */
  Dkt = cs_transpose(Dk, 1);
  Dt_qr = glmgen_qr(Dt);
  Dkt_qr = glmgen_qr(Dkt);
  DktDk = cs_multiply(Dkt,Dk);

  /* Determine the maximum lambda in the path, and initiate the path if needed
   * using the input lambda_min_ratio and equally spaced log points.
   */
  max_lam = tf_maxlam(n, y, Dt_qr, w);
  if (!lam_flag)
  {
    min_lam = max_lam * lambda_min_ratio;
    lambda[0] = max_lam;
    for (i = 1; i < nlambda; i++)
      lambda[i] = exp((log(max_lam) * (nlambda - i -1) + log(min_lam) * i) / (nlambda-1));

  }

  rho = rho * pow( (x[n-1] - x[0])/n, (double)k);

  /* Initiate alpha and u for a warm start */
  if (lambda[0] < max_lam * 1e-5)
  {
    for (i = 0; i < n - k; i++)
    {
      alpha[i] = 0;
      u[i] = 0;
    }
  } else {

    /* beta_max */
    for (i = 0; i < n; i++) temp_n[i] = -sqrt(w[i]) * y[i];
    glmgen_qrsol (Dt_qr, temp_n);
    for (i = 0; i < n; i++) beta_max[i] = 0;
    cs_gaxpy(Dt, temp_n, beta_max);
    /* Dt has a W^{-1/2}, so in the next step divide by sqrt(w) instead of w. */
    for (i = 0; i < n; i++) beta_max[i] = y[i] - beta_max[i]/sqrt(w[i]);

    /* alpha_max */
    tf_dxtil(x, n, k, beta_max, alpha);

    /* u_max */
    switch (family)
    {
    case FAMILY_GAUSSIAN:
      for (i = 0; i < n; i++) u[i] = w[i] * (beta_max[i] - y[i]) / (rho * lambda[0]);
      break;

    case FAMILY_LOGISTIC:
      for (i = 0; i < n; i++) {
        u[i] = logi_b2(beta_max[i]) * w[i] * (beta_max[i] - y[i]) / (rho * lambda[0]);
      }
      break;

    case FAMILY_POISSON:
      for (i = 0; i < n; i++) {
        u[i] = pois_b2(beta_max[i]) * w[i] *(beta_max[i] - y[i]) / (rho * lambda[0]);
      }
      break;

    default:
      for (i = 0; i < nlambda; i++) status[i] = 2;
      return;
    }

    glmgen_qrsol (Dkt_qr, u);
  }

  /* Iterate lower level functions over all lambda values;
   * the alpha and u vectors get used each time of subsequent
   * warm starts
   */
  for (i = 0; i < nlambda; i++)
  {
    /* warm start */
    double * beta_init = (i == 0) ? beta_max : beta + (i-1)*n;
    for(j = 0; j < n; j++) beta[i*n + j] = beta_init[j];

    switch (family)
    {
      case FAMILY_GAUSSIAN:
        tf_admm_gauss(y, x, w, n, k, max_iter, lambda[i], beta+i*n, alpha,
                      u, obj+i*max_iter, iter+i, rho * lambda[i], obj_tol,
                      DktDk, verbose);
        break;

      case FAMILY_LOGISTIC:
        tf_admm_glm(y, x, w, n, k, max_iter, lambda[i], beta+i*n, alpha, u, obj+i*max_iter, iter+i,
                    rho * lambda[i], obj_tol, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton,
                    DktDk, &logi_b, &logi_b1, &logi_b2, verbose);
        break;

      case FAMILY_POISSON:
        tf_admm_glm(y, x, w, n, k, max_iter, lambda[i], beta+i*n, alpha, u, obj+i*max_iter, iter+i,
                    rho * lambda[i], obj_tol, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton,
                    DktDk, &pois_b, &pois_b1, &pois_b2, verbose);
        break;
    }

    /* If there any NaNs in beta: reset beta, alpha, u */
    if(has_nan(beta + i * n, n))
    {
      for(j = 0; j < n; j++) beta[i*n + j] = 0;
      for(j = 0; j < n-k; j++) { alpha[j] = 0; u[j] = 0; }
      status[i] = 1;
      printf("Numerical error in lambda[%d]=%f",i,lambda[i]);
    }
  }

  cs_spfree(D);
  cs_spfree(Dt);
  cs_spfree(Dk);
  cs_spfree(Dkt);
  cs_spfree(DktDk);
  glmgen_gqr_free(Dt_qr);
  glmgen_gqr_free(Dkt_qr);

  free(temp_n);
  free(beta_max);
  free(alpha);
  free(u);
}
コード例 #10
0
ファイル: fcmer.c プロジェクト: xhub/fclib
/* calculate merit function for a local problem */
double fclib_merit_local (struct fclib_local *problem, enum fclib_merit merit, struct fclib_solution *solution)
{

  struct fclib_matrix * W =  problem->W;
  struct fclib_matrix * V =  problem->V;
  struct fclib_matrix * R =  problem->R;
  
  double *mu = problem->mu;
  double *q = problem->q;
  double *s = problem->s;
  int d = problem->spacedim;          
  if (d !=3 )
  {
    printf("fclib_merit_local for space dimension = %i not yet implemented\n",d);
    return 0;
  }

  double *v = solution->v;
  double *r = solution->r;
  double *u = solution->u;
  double *l = solution->l;

  double error_l, error;
  double * tmp;

  error=0.0;
  error_l=0.0;
  int i, ic, ic3;
  if (merit == MERIT_1)
  {
    
    /* cs M_cs;  */
    /* fclib_matrix_to_cssparse(W, &M_cs); */
    /* cs V_cs;  */
    /* fclib_matrix_to_cssparse(V, &V_cs); */
    /* cs R_cs;  */
    /* fclib_matrix_to_cssparse(R, &R_cs); */
    int n_e =0;
    if (R) n_e = R->n;
    /* compute V^T {r} + R \lambda + s */
    if (n_e >0)
    {
      cs * VT = cs_transpose((cs *)V, 0) ;
      tmp = (double *)malloc(n_e*sizeof(double));
      for (i =0; i <n_e; i++) tmp[i] = s[i] ;
      cs_gaxpy(VT, r, tmp);
      cs_gaxpy((cs *)R, l, tmp);
      error_l += dnrm2(tmp,n_e)/(1.0 +  dnrm2(s,n_e) );
      free(tmp);
    }
    /* compute  \hat u = W {r}    + V\lambda  + q  */
    
    tmp = (double *)malloc(W->n*sizeof(double));
    for (i =0; i <W->n; i++) tmp[i] = q[i] ;
    cs_gaxpy((cs*)V, l, tmp);
    cs_gaxpy((cs*)W, r, tmp);

    /* Compute natural map */
    int nc = W->n/3;
    for (ic = 0, ic3 = 0 ; ic < nc ; ic++, ic3 += 3)
    {
      FrictionContact3D_unitary_compute_and_add_error(r + ic3, tmp + ic3, mu[ic], &error);
    }
          
    free(tmp);
    error = sqrt(error)/(1.0 +  sqrt(dnrm2(q,W->n)) )+error_l;  

    /* printf("error_l = %12.8e", error_l); */
    /* printf("norm of u  = %12.8e\n",  dnrm2(u,W->n)); */
    /* printf("norm of r  = %12.8e\n",  dnrm2(r,W->n)); */
    /* printf("error = %12.8e\n", error); */
  
    return error;
  }

  return 0; /* TODO */
}
コード例 #11
0
ファイル: solvers.c プロジェクト: hriskons/cirPar
void conjugate_gradient_sparse(cs *A, double *b, int n, double *x, double itol)
{
	int i,j;
	int iter;
	double rho,rho1,alpha,beta,omega;

	double *r;
	double *z;
	double *q, *temp_q;
	double *p, *temp_p;
	double *res;
	double *precond;	//Preconditioner


	r = (double *)safe_malloc(n * sizeof(double));
	z = (double *)safe_malloc(n * sizeof(double));
	q = (double *)safe_malloc(n * sizeof(double));
	p = (double *)safe_malloc(n * sizeof(double));
	res = (double *)safe_malloc(n * sizeof(double));
	precond = (double *)safe_malloc(n * sizeof(double));
	temp_q = (double *)safe_malloc(n * sizeof(double));
	temp_p = (double *)safe_malloc(n * sizeof(double));

	for(i = 0; i < n; i++){
		precond[i] =  0;
		r[i] = 0;
		z[i] = 0;
		q[i] = 0;
		temp_q[i] = 0;
		p[i] =0;
		temp_p[i] = 0;
	}

	/* Preconditioner */
	double max;
	int pp;
	for(j = 0; j < n; ++j){
		for(pp = A->p[j], max = fabs(A->x[pp]); pp < A->p[j+1]; pp++)
			if(fabs(A->x[pp]) > max)					//vriskei to diagonio stoixeio
				max = fabs(A->x[pp]);
		precond[j] = 1/max;
	}

	cblas_dcopy (n, x, 1, res, 1);

	//r=b-Ax
	cblas_dcopy (n, b, 1, r, 1);
	memset(p, 0, n*sizeof(double));
	cs_gaxpy (A, x, p);
	for(i=0;i<n;i++){
 		r[i]=r[i]-p[i];

	}

	double r_norm = cblas_dnrm2 (n, r, 1);
	double b_norm = cblas_dnrm2 (n, b, 1);
	if(!b_norm)
		b_norm = 1;

	iter = 0;

	double resid;
	while((resid = r_norm/b_norm) > 1e-3 && iter < ITER_NUM )
	{
		if(!(iter % 100))
			printf("Iteration: %d %f\n",iter,resid);
		iter++;

		cblas_dcopy (n, r, 1, z, 1);				//gia na min allaksei o r

		for(i=0;i<n;i++){
 			z[i]=precond[i]*z[i];

		}

		rho = cblas_ddot (n, z, 1, r, 1);
		if (fpclassify(fabs(rho)) == FP_ZERO){
			printf("RHO aborting CG due to EPS...\n");
			exit(42);
		}

		if (iter == 1){
			cblas_dcopy (n, z, 1, p, 1);
		}
		else{
			beta = rho/rho1;

			//p = z + beta*p;
			cblas_dscal (n, beta, p, 1);	//rescale
			cblas_daxpy (n, 1, z, 1, p, 1);	//p = 1*z + p

		}
		rho1 = rho;

		//q = Ap
		memset(q, 0, n*sizeof(double));
		cs_gaxpy (A, p, q);

		omega = cblas_ddot (n, p, 1, q, 1);
		if (fpclassify(fabs(omega)) == FP_ZERO){
			printf("OMEGA aborting CG due to EPS...\n");
			exit(42);
		}

		alpha = rho/omega;

		//x = x + aplha*p;
		cblas_dcopy (n, p, 1, temp_p, 1);
		cblas_dscal (n, alpha, temp_p, 1);//rescale by alpha
		cblas_daxpy (n, 1, temp_p, 1, res, 1);// sum x = 1*x + temp_p

		//r = r - aplha*q;
		cblas_dcopy (n, q, 1, temp_q, 1);
		cblas_dscal (n, -alpha, temp_q, 1);//rescale by alpha
		cblas_daxpy (n, 1, temp_q, 1, r, 1);// sum r = 1*r - temp_p

		//next step
		r_norm = cblas_dnrm2 (n, r, 1);
	}
	printf("Solution approximated after %d iterations for tolerance %f\n",iter,resid);
	cblas_dcopy (n, res, 1, x, 1);

}