/** * @brief Performs w<-Av where A is a sparse matrix and w,v are both vectors. * @param[out] w Output vector. * @param[in] n Length of vectors. * @param[in] A Sparse matrix to multiply v by. * @param[in] v Vector to multiply the sparse matrix by. * @return 0 on success. */ static int eigs_w_Av_cs( double *w, int n, const cs *A, const double *v ) { int err; memset( w, 0, n*sizeof(double) ); err = cs_gaxpy( A, v, w ); if (err != 1) fprintf( stderr, "error while running cs_gaxpy\n" ); return !err; }
/* Computes: y <- alpha A^T*x + beta y */ int mfiles_dgemv1(double alpha, const mxArray *A, const mxArray *x, double beta, mxArray *y) { size_t rA = mxGetM(A); size_t cA = mxGetN(A); size_t rx = mxGetM(x); size_t cx = mxGetN(x); size_t ry = mxGetM(y); size_t cy = mxGetN(y); if (mxIsSparse(x) || mxIsSparse(y)) { mexErrMsgIdAndTxt("mfiles:BadType", "Sparse vectors are not supported."); } if (mxIsComplex(A) || mxIsComplex(x) || mxIsComplex(y)) { mexErrMsgIdAndTxt("mfiles:BadType", "Complex data is not supported."); } if ((rA != rx) || (cA != ry) || (cx != 1) || (cy != 1)) { mexErrMsgIdAndTxt("mfiles:BadDim", "Dimensions of matrices do not match."); } if (mxIsSparse(A)) { double *px = mxGetPr(x); double *py = mxGetPr(y); double *pz = mxCalloc(ry, sizeof (double)); cs *cs_A = cs_calloc(1, sizeof (cs)); mfiles_mx2cs(A, cs_A); /* Transpose A */ cs *cs_AT = cs_transpose(cs_A, 1); /* Compute z <- A^T*x */ cs_gaxpy(cs_AT, px, pz); /* Compute y <- beta y */ cblas_dscal(ry, beta, py, 1); /* Compute y <- alpha*z+y */ cblas_daxpy(ry, alpha, pz, 1, py, 1); cs_free(cs_A); /* Check this cs_free and cs_spfree ? */ cs_spfree(cs_AT); mxFree(pz); } else { double *pA = mxGetPr(A); double *px = mxGetPr(x); double *py = mxGetPr(y); cblas_dgemv(CblasRowMajor, CblasTrans, rA, cA, alpha, pA, rA, px, 1, beta, py, 1); } return EXIT_SUCCESS; }
/* compute residual, norm(A*x-b,inf) / (norm(A,1)*norm(x,inf) + norm(b,inf)) */ static void print_resid (int ok, cs *A, double *x, double *b, double *resid) { int i, m, n ; if (!ok) { printf (" (failed)\n") ; return ; } m = A->m ; n = A->n ; for (i = 0 ; i < m ; i++) resid [i] = -b [i] ; /* resid = -b */ cs_gaxpy (A, x, resid) ; /* resid = resid + A*x */ printf ("resid: %8.2e\n", norm (resid,m) / ((n == 0) ? 1 : (cs_norm (A) * norm (x,n) + norm (b,m)))) ; }
void calc_beta_max(double * y, double * w, int n, gqr * Dt_qr, cs * Dt, double * temp_n, double * beta_max) { int i; for (i = 0; i < n; i++) temp_n[i] = sqrt(w[i]) * y[i]; glmgen_qrsol (Dt_qr, temp_n); for (i = 0; i < n; i++) beta_max[i] = 0; cs_gaxpy(Dt, temp_n, beta_max); /* Dt has a W^{-1/2}, so in the next step divide by sqrt(w) instead of w. */ for (i = 0; i < n; i++) beta_max[i] = y[i] - beta_max[i]/sqrt(w[i]); }
/* z = cs_gaxpy (A,x,y) computes z = A*x+y */ void mexFunction ( int nargout, mxArray *pargout [ ], int nargin, const mxArray *pargin [ ] ) { cs Amatrix, *A ; double *x, *y, *z ; if (nargout > 1 || nargin != 3) { mexErrMsgTxt ("Usage: z = cs_gaxpy(A,x,y)") ; } A = cs_mex_get_sparse (&Amatrix, 0, 1, pargin [0]) ; /* get A */ x = cs_mex_get_double (A->n, pargin [1]) ; /* get x */ y = cs_mex_get_double (A->m, pargin [2]) ; /* get y */ z = cs_mex_put_double (A->m, y, &(pargout [0])) ; /* z = y */ cs_gaxpy (A, x, z) ; /* z = z + A*x */ }
void bi_conjugate_gradient_sparse(cs *A, double *b, double* x, int n, double itol){ int i,j,iter; double rho,rho1,alpha,beta,omega; double r[n], r_t[n]; double z[n], z_t[n]; double q[n], q_t[n], temp_q[n]; double p[n], p_t[n], temp_p[n]; double res[n]; //NA VGEI! double precond[n]; //Initializations memset(precond, 0, n*sizeof(double)); memset(r, 0, n*sizeof(double)); memset(r_t, 0, n*sizeof(double)); memset(z, 0, n*sizeof(double)); memset(z_t, 0, n*sizeof(double)); memset(q, 0, n*sizeof(double)); memset(q_t, 0, n*sizeof(double)); memset(temp_q, 0, n*sizeof(double)); memset(p, 0, n*sizeof(double)); memset(p_t, 0, n*sizeof(double)); memset(temp_p, 0, n*sizeof(double)); memset(res, 0, n*sizeof(double)); /* Preconditioner */ double max; int pp; for(j = 0; j < n; ++j){ for(pp = A->p[j], max = fabs(A->x[pp]); pp < A->p[j+1]; pp++) if(fabs(A->x[pp]) > max) //vriskei to diagonio stoixeio max = fabs(A->x[pp]); precond[j] = 1/max; } cs *AT = cs_transpose (A, 1) ; cblas_dcopy (n, x, 1, res, 1); //r=b-Ax cblas_dcopy (n, b, 1, r, 1); memset(p, 0, n*sizeof(double)); cs_gaxpy (A, x, p); for(i=0;i<n;i++){ r[i]=r[i]-p[i]; } cblas_dcopy (n, r, 1, r_t, 1); double r_norm = cblas_dnrm2 (n, r, 1); double b_norm = cblas_dnrm2 (n, b, 1); if(!b_norm) b_norm = 1; iter = 0; while( r_norm/b_norm > itol && iter < n ){ iter++; cblas_dcopy (n, r, 1, z, 1); //gia na min allaksei o r cblas_dcopy (n, r_t, 1, z_t, 1); //gia na min allaksei o r_t for(i=0;i<n;i++){ z[i]=precond[i]*z[i]; z_t[i]=precond[i]*z_t[i]; } rho = cblas_ddot (n, z, 1, r_t, 1); if (fpclassify(fabs(rho)) == FP_ZERO){ printf("RHO aborting Bi-CG due to EPS...\n"); exit(42); } if (iter == 1){ cblas_dcopy (n, z, 1, p, 1); cblas_dcopy (n, z_t, 1, p_t, 1); } else{ //p = z + beta*p; beta = rho/rho1; cblas_dscal (n, beta, p, 1); //rescale p by beta cblas_dscal (n, beta, p_t, 1); //rescale p_t by beta cblas_daxpy (n, 1, z, 1, p, 1); //p = 1*z + p cblas_daxpy (n, 1, z_t, 1, p_t, 1); //p_t = 1*z_t + p_t } rho1 = rho; //q = Ap //q_t = trans(A)*p_t memset(q, 0, n*sizeof(double)); cs_gaxpy (A, p, q); memset(q_t, 0, n*sizeof(double)); cs_gaxpy(AT, p_t, q_t); omega = cblas_ddot (n, p_t, 1, q, 1); if (fpclassify(fabs(omega)) == FP_ZERO){ printf("OMEGA aborting Bi-CG due to EPS...\n"); exit(42); } alpha = rho/omega; //x = x + aplha*p; cblas_dcopy (n, p, 1, temp_p, 1); cblas_dscal (n, alpha, temp_p, 1);//rescale by aplha cblas_daxpy (n, 1, temp_p, 1, res, 1);// sum x = 1*x + temp_p //R = R - aplha*Q; cblas_dcopy (n, q, 1, temp_q, 1); cblas_dscal (n, -alpha, temp_q, 1);//rescale by -aplha cblas_daxpy (n, 1, temp_q, 1, r, 1);// sum r = 1*r - temp_p //~r=~r-alpha*~q cblas_dcopy (n, q_t, 1, temp_q, 1); cblas_dscal (n, -alpha, temp_q, 1);//rescale by -aplha cblas_daxpy (n, 1, temp_q, 1, r_t, 1);// sum r = 1*r - temp_p r_norm = cblas_dnrm2 (n, r, 1); //next step } cblas_dcopy (n, res, 1, x, 1); cs_spfree(AT); }
void conjugate_gradient_sparse(cs *A, double *b, double* x, int n, double itol) { int i,j; int iter; double rho,rho1,alpha,beta,omega; double r[n]; double z[n]; double q[n], temp_q[n]; double p[n], temp_p[n]; double res[n]; double precond[n]; //Preconditioner memset(precond, 0, n*sizeof(double)); memset(r, 0, n*sizeof(double)); memset(z, 0, n*sizeof(double)); memset(q, 0, n*sizeof(double)); memset(temp_q, 0, n*sizeof(double)); memset(p, 0, n*sizeof(double)); memset(temp_p, 0, n*sizeof(double)); /* Preconditioner */ double max; int pp; for(j = 0; j < n; ++j){ for(pp = A->p[j], max = fabs(A->x[pp]); pp < A->p[j+1]; pp++) if(fabs(A->x[pp]) > max) //vriskei to diagonio stoixeio max = fabs(A->x[pp]); precond[j] = 1/max; } cblas_dcopy (n, x, 1, res, 1); //r=b-Ax cblas_dcopy (n, b, 1, r, 1); memset(p, 0, n*sizeof(double)); cs_gaxpy (A, x, p); for(i=0;i<n;i++){ r[i]=r[i]-p[i]; } double r_norm = cblas_dnrm2 (n, r, 1); double b_norm = cblas_dnrm2 (n, b, 1); if(!b_norm) b_norm = 1; iter = 0; while( r_norm/b_norm > itol && iter < n ) { iter++; cblas_dcopy (n, r, 1, z, 1); //gia na min allaksei o r for(i=0;i<n;i++){ z[i]=precond[i]*z[i]; } rho = cblas_ddot (n, z, 1, r, 1); if (fpclassify(fabs(rho)) == FP_ZERO){ printf("RHO aborting CG due to EPS...\n"); exit(42); } if (iter == 1){ cblas_dcopy (n, z, 1, p, 1); } else{ beta = rho/rho1; //p = z + beta*p; cblas_dscal (n, beta, p, 1); //rescale cblas_daxpy (n, 1, z, 1, p, 1); //p = 1*z + p } rho1 = rho; //q = Ap memset(q, 0, n*sizeof(double)); cs_gaxpy (A, p, q); omega = cblas_ddot (n, p, 1, q, 1); if (fpclassify(fabs(omega)) == FP_ZERO){ printf("OMEGA aborting CG due to EPS...\n"); exit(42); } alpha = rho/omega; //x = x + aplha*p; cblas_dcopy (n, p, 1, temp_p, 1); cblas_dscal (n, alpha, temp_p, 1);//rescale by alpha cblas_daxpy (n, 1, temp_p, 1, res, 1);// sum x = 1*x + temp_p //r = r - aplha*q; cblas_dcopy (n, q, 1, temp_q, 1); cblas_dscal (n, -alpha, temp_q, 1);//rescale by alpha cblas_daxpy (n, 1, temp_q, 1, r, 1);// sum r = 1*r - temp_p //next step r_norm = cblas_dnrm2 (n, r, 1); } cblas_dcopy (n, res, 1, x, 1); }
int _globalLineSearchSparseGP( GlobalFrictionContactProblem *problem, AlartCurnierFun3x3Ptr computeACFun3x3, double *solution, double *direction, double *mu, double *rho, double *F, double *psi, CSparseMatrix *J, double *tmp, double alpha[1], unsigned int maxiter_ls) { double inf = 1e10; double alphamin = 1e-16; double alphamax = inf; double m1 = 0.01, m2 = 0.99; unsigned int n = (unsigned)NM_triplet(problem->M)->m; unsigned int m = problem->H->size1; unsigned int problem_size = n+2*m; // Computation of q(t) and q'(t) for t =0 double q0 = 0.5 * cblas_ddot(problem_size, psi, 1, psi, 1); // tmp <- J * direction cblas_dscal(problem_size, 0., tmp, 1); cs_gaxpy(J, direction, tmp); double dqdt0 = cblas_ddot(problem_size, psi, 1, tmp, 1); DEBUG_PRINTF("dqdt0=%e\n",dqdt0); DEBUG_PRINTF("q0=%e\n",q0); for(unsigned int iter = 0; iter < maxiter_ls; ++iter) { // tmp <- alpha*direction+solution cblas_dcopy(problem_size, solution, 1, tmp, 1); cblas_daxpy(problem_size, alpha[0], direction, 1, tmp, 1); ACPsi( problem, computeACFun3x3, tmp, /* v */ tmp+problem->M->size0+problem->H->size1, /* P */ tmp+problem->M->size0, /* U */ rho, psi); double q = 0.5 * cblas_ddot(problem_size, psi, 1, psi, 1); assert(q >= 0); double slope = (q - q0) / alpha[0]; int C1 = (slope >= m2 * dqdt0); int C2 = (slope <= m1 * dqdt0); DEBUG_PRINTF("C1=%i\t C2=%i\n",C1,C2); if(C1 && C2) { numerics_printf_verbose(1, "---- GFC3D - NSN_AC - global line search success. Number of ls iteration = %i alpha = %.10e, q = %.10e", iter, alpha[0], q); return 0; } else if(!C1) { alphamin = alpha[0]; } else { // not(C2) alphamax = alpha[0]; } if(alpha[0] < inf) { alpha[0] = 0.5 * (alphamin + alphamax); } else { alpha[0] = alphamin; } } numerics_printf_verbose(1,"---- GFC3D - NSN_AC - global line search unsuccessful. Max number of ls iteration reached = %i with alpha = %.10e", maxiter_ls, alpha[0]); return -1; }
/** * @brief Main wrapper for fitting a trendfilter model. * Takes as input either a sequence of lambda tuning parameters, or the number * of desired lambda values. In the latter case the function will also calculate * a lambda sequence. The user must supply allocated memory to store the output, * with the function itself returning only @c void. For default values, and an * example of how to call the function, see the function tf_admm_default. * * @param y a vector of responses * @param x a vector of response locations; must be in increasing order * @param w a vector of sample weights * @param n the length of y, x, and w * @param k degree of the trendfilter; i.e., k=1 linear * @param family family code for the type of fit; family=0 for OLS * @param max_iter maximum number of ADMM interations; ignored for k=0 * @param lam_flag 0/1 flag for whether lambda sequence needs to be estimated * @param lambda either a sequence of lambda when lam_flag=0, or empty * allocated space if lam_flag=1 * @param nlambda number of lambda values; need for both lam_flag=0 and 1 * @param lambda_min_ratio minimum ratio between min and max lambda; ignored for lam_flag=0 * @param beta allocated space of size n*nlambda to store the output coefficents * @param obj allocated space of size max_iter*nlambda to store the objective * @param iter allocated space of size nlambda to store the number of iterations * @param status allocated space of size nlambda to store the status of each run * @param rho tuning parameter for the ADMM algorithm * @param obj_tol stopping criteria tolerance * @param alpha_ls for family != 0, line search tuning parameter * @param gamma_ls for family != 0, line search tuning parameter * @param max_iter_ls for family != 0, max number of iterations in line search * @param max_iter_newton for family != 0, max number of iterations in inner ADMM * @param verbose 0/1 flag for printing progress * @return void * @see tf_admm_default */ void tf_admm (double * y, double * x, double * w, int n, int k, int family, int max_iter, int lam_flag, double * lambda, int nlambda, double lambda_min_ratio, double * beta, double * obj, int * iter, int * status, double rho, double obj_tol, double alpha_ls, double gamma_ls, int max_iter_ls, int max_iter_newton, int verbose) { int i; int j; double max_lam; double min_lam; double * temp_n; double * beta_max; double * alpha; double * u; cs * D; cs * Dt; cs * Dk; cs * Dkt; cs * DktDk; gqr * Dt_qr; gqr * Dkt_qr; beta_max = (double *) malloc(n * sizeof(double)); temp_n = (double *) malloc(n * sizeof(double)); alpha = (double *) malloc(n * sizeof(double)); /* we use extra buffer (n vs n-k) */ u = (double *) malloc(n * sizeof(double)); /* we use extra buffer (n vs n-k) */ /* Assume w does not have zeros */ for(i = 0; i < n; i++) temp_n[i] = 1/sqrt(w[i]); D = tf_calc_dk(n, k+1, x); Dk = tf_calc_dktil(n, k, x); Dt = cs_transpose(D, 1); diag_times_sparse(Dt, temp_n); /* Dt = W^{-1/2} Dt */ Dkt = cs_transpose(Dk, 1); Dt_qr = glmgen_qr(Dt); Dkt_qr = glmgen_qr(Dkt); DktDk = cs_multiply(Dkt,Dk); /* Determine the maximum lambda in the path, and initiate the path if needed * using the input lambda_min_ratio and equally spaced log points. */ max_lam = tf_maxlam(n, y, Dt_qr, w); if (!lam_flag) { min_lam = max_lam * lambda_min_ratio; lambda[0] = max_lam; for (i = 1; i < nlambda; i++) lambda[i] = exp((log(max_lam) * (nlambda - i -1) + log(min_lam) * i) / (nlambda-1)); } rho = rho * pow( (x[n-1] - x[0])/n, (double)k); /* Initiate alpha and u for a warm start */ if (lambda[0] < max_lam * 1e-5) { for (i = 0; i < n - k; i++) { alpha[i] = 0; u[i] = 0; } } else { /* beta_max */ for (i = 0; i < n; i++) temp_n[i] = -sqrt(w[i]) * y[i]; glmgen_qrsol (Dt_qr, temp_n); for (i = 0; i < n; i++) beta_max[i] = 0; cs_gaxpy(Dt, temp_n, beta_max); /* Dt has a W^{-1/2}, so in the next step divide by sqrt(w) instead of w. */ for (i = 0; i < n; i++) beta_max[i] = y[i] - beta_max[i]/sqrt(w[i]); /* alpha_max */ tf_dxtil(x, n, k, beta_max, alpha); /* u_max */ switch (family) { case FAMILY_GAUSSIAN: for (i = 0; i < n; i++) u[i] = w[i] * (beta_max[i] - y[i]) / (rho * lambda[0]); break; case FAMILY_LOGISTIC: for (i = 0; i < n; i++) { u[i] = logi_b2(beta_max[i]) * w[i] * (beta_max[i] - y[i]) / (rho * lambda[0]); } break; case FAMILY_POISSON: for (i = 0; i < n; i++) { u[i] = pois_b2(beta_max[i]) * w[i] *(beta_max[i] - y[i]) / (rho * lambda[0]); } break; default: for (i = 0; i < nlambda; i++) status[i] = 2; return; } glmgen_qrsol (Dkt_qr, u); } /* Iterate lower level functions over all lambda values; * the alpha and u vectors get used each time of subsequent * warm starts */ for (i = 0; i < nlambda; i++) { /* warm start */ double * beta_init = (i == 0) ? beta_max : beta + (i-1)*n; for(j = 0; j < n; j++) beta[i*n + j] = beta_init[j]; switch (family) { case FAMILY_GAUSSIAN: tf_admm_gauss(y, x, w, n, k, max_iter, lambda[i], beta+i*n, alpha, u, obj+i*max_iter, iter+i, rho * lambda[i], obj_tol, DktDk, verbose); break; case FAMILY_LOGISTIC: tf_admm_glm(y, x, w, n, k, max_iter, lambda[i], beta+i*n, alpha, u, obj+i*max_iter, iter+i, rho * lambda[i], obj_tol, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton, DktDk, &logi_b, &logi_b1, &logi_b2, verbose); break; case FAMILY_POISSON: tf_admm_glm(y, x, w, n, k, max_iter, lambda[i], beta+i*n, alpha, u, obj+i*max_iter, iter+i, rho * lambda[i], obj_tol, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton, DktDk, &pois_b, &pois_b1, &pois_b2, verbose); break; } /* If there any NaNs in beta: reset beta, alpha, u */ if(has_nan(beta + i * n, n)) { for(j = 0; j < n; j++) beta[i*n + j] = 0; for(j = 0; j < n-k; j++) { alpha[j] = 0; u[j] = 0; } status[i] = 1; printf("Numerical error in lambda[%d]=%f",i,lambda[i]); } } cs_spfree(D); cs_spfree(Dt); cs_spfree(Dk); cs_spfree(Dkt); cs_spfree(DktDk); glmgen_gqr_free(Dt_qr); glmgen_gqr_free(Dkt_qr); free(temp_n); free(beta_max); free(alpha); free(u); }
/* calculate merit function for a local problem */ double fclib_merit_local (struct fclib_local *problem, enum fclib_merit merit, struct fclib_solution *solution) { struct fclib_matrix * W = problem->W; struct fclib_matrix * V = problem->V; struct fclib_matrix * R = problem->R; double *mu = problem->mu; double *q = problem->q; double *s = problem->s; int d = problem->spacedim; if (d !=3 ) { printf("fclib_merit_local for space dimension = %i not yet implemented\n",d); return 0; } double *v = solution->v; double *r = solution->r; double *u = solution->u; double *l = solution->l; double error_l, error; double * tmp; error=0.0; error_l=0.0; int i, ic, ic3; if (merit == MERIT_1) { /* cs M_cs; */ /* fclib_matrix_to_cssparse(W, &M_cs); */ /* cs V_cs; */ /* fclib_matrix_to_cssparse(V, &V_cs); */ /* cs R_cs; */ /* fclib_matrix_to_cssparse(R, &R_cs); */ int n_e =0; if (R) n_e = R->n; /* compute V^T {r} + R \lambda + s */ if (n_e >0) { cs * VT = cs_transpose((cs *)V, 0) ; tmp = (double *)malloc(n_e*sizeof(double)); for (i =0; i <n_e; i++) tmp[i] = s[i] ; cs_gaxpy(VT, r, tmp); cs_gaxpy((cs *)R, l, tmp); error_l += dnrm2(tmp,n_e)/(1.0 + dnrm2(s,n_e) ); free(tmp); } /* compute \hat u = W {r} + V\lambda + q */ tmp = (double *)malloc(W->n*sizeof(double)); for (i =0; i <W->n; i++) tmp[i] = q[i] ; cs_gaxpy((cs*)V, l, tmp); cs_gaxpy((cs*)W, r, tmp); /* Compute natural map */ int nc = W->n/3; for (ic = 0, ic3 = 0 ; ic < nc ; ic++, ic3 += 3) { FrictionContact3D_unitary_compute_and_add_error(r + ic3, tmp + ic3, mu[ic], &error); } free(tmp); error = sqrt(error)/(1.0 + sqrt(dnrm2(q,W->n)) )+error_l; /* printf("error_l = %12.8e", error_l); */ /* printf("norm of u = %12.8e\n", dnrm2(u,W->n)); */ /* printf("norm of r = %12.8e\n", dnrm2(r,W->n)); */ /* printf("error = %12.8e\n", error); */ return error; } return 0; /* TODO */ }
void conjugate_gradient_sparse(cs *A, double *b, int n, double *x, double itol) { int i,j; int iter; double rho,rho1,alpha,beta,omega; double *r; double *z; double *q, *temp_q; double *p, *temp_p; double *res; double *precond; //Preconditioner r = (double *)safe_malloc(n * sizeof(double)); z = (double *)safe_malloc(n * sizeof(double)); q = (double *)safe_malloc(n * sizeof(double)); p = (double *)safe_malloc(n * sizeof(double)); res = (double *)safe_malloc(n * sizeof(double)); precond = (double *)safe_malloc(n * sizeof(double)); temp_q = (double *)safe_malloc(n * sizeof(double)); temp_p = (double *)safe_malloc(n * sizeof(double)); for(i = 0; i < n; i++){ precond[i] = 0; r[i] = 0; z[i] = 0; q[i] = 0; temp_q[i] = 0; p[i] =0; temp_p[i] = 0; } /* Preconditioner */ double max; int pp; for(j = 0; j < n; ++j){ for(pp = A->p[j], max = fabs(A->x[pp]); pp < A->p[j+1]; pp++) if(fabs(A->x[pp]) > max) //vriskei to diagonio stoixeio max = fabs(A->x[pp]); precond[j] = 1/max; } cblas_dcopy (n, x, 1, res, 1); //r=b-Ax cblas_dcopy (n, b, 1, r, 1); memset(p, 0, n*sizeof(double)); cs_gaxpy (A, x, p); for(i=0;i<n;i++){ r[i]=r[i]-p[i]; } double r_norm = cblas_dnrm2 (n, r, 1); double b_norm = cblas_dnrm2 (n, b, 1); if(!b_norm) b_norm = 1; iter = 0; double resid; while((resid = r_norm/b_norm) > 1e-3 && iter < ITER_NUM ) { if(!(iter % 100)) printf("Iteration: %d %f\n",iter,resid); iter++; cblas_dcopy (n, r, 1, z, 1); //gia na min allaksei o r for(i=0;i<n;i++){ z[i]=precond[i]*z[i]; } rho = cblas_ddot (n, z, 1, r, 1); if (fpclassify(fabs(rho)) == FP_ZERO){ printf("RHO aborting CG due to EPS...\n"); exit(42); } if (iter == 1){ cblas_dcopy (n, z, 1, p, 1); } else{ beta = rho/rho1; //p = z + beta*p; cblas_dscal (n, beta, p, 1); //rescale cblas_daxpy (n, 1, z, 1, p, 1); //p = 1*z + p } rho1 = rho; //q = Ap memset(q, 0, n*sizeof(double)); cs_gaxpy (A, p, q); omega = cblas_ddot (n, p, 1, q, 1); if (fpclassify(fabs(omega)) == FP_ZERO){ printf("OMEGA aborting CG due to EPS...\n"); exit(42); } alpha = rho/omega; //x = x + aplha*p; cblas_dcopy (n, p, 1, temp_p, 1); cblas_dscal (n, alpha, temp_p, 1);//rescale by alpha cblas_daxpy (n, 1, temp_p, 1, res, 1);// sum x = 1*x + temp_p //r = r - aplha*q; cblas_dcopy (n, q, 1, temp_q, 1); cblas_dscal (n, -alpha, temp_q, 1);//rescale by alpha cblas_daxpy (n, 1, temp_q, 1, r, 1);// sum r = 1*r - temp_p //next step r_norm = cblas_dnrm2 (n, r, 1); } printf("Solution approximated after %d iterations for tolerance %f\n",iter,resid); cblas_dcopy (n, res, 1, x, 1); }