/* * 最大最大激励化 第 unitdx 个单元 * */ double LayerWiseRBMs::maximizeUnit(int layerIdx, int unitIdx, double * unitSample, double argvNorm, int epoch){ int AMnumIn = layers[0]->numVis; // unitsample 归一化 double curNorm = squareNorm(unitSample, AMnumIn, 1); cblas_dscal(AMnumIn, argvNorm / curNorm, unitSample, 1); double maxValue =0; for(int k=0; k<epoch; k++){ // forward for(int i=0; i<=layerIdx; i++){ if(i==0) layers[i]->setInput(unitSample); else layers[i]->setInput(layers[i-1]->getOutput()); layers[i]->setBatchSize(1); layers[i]->runBatch(); } maxValue = layers[layerIdx]->getOutput()[unitIdx]; //back propagate for(int i=layerIdx; i>=0; i--){ if(i==layerIdx) layers[i]->getAMDelta(unitIdx, NULL) ; else layers[i]->getAMDelta(-1, layers[i+1]->AMDelta); } double lr = 0.01 * cblas_dasum(AMnumIn, unitSample, 1) / cblas_dasum(AMnumIn, layers[0]->AMDelta, 1); // update unitSample cblas_daxpy(AMnumIn, lr, layers[0]->AMDelta, 1, unitSample, 1); //归一化 unitSample curNorm = squareNorm(unitSample, AMnumIn, 1); cblas_dscal(AMnumIn, argvNorm / curNorm, unitSample, 1); } return maxValue; }
Result merge(Result* results) { Result r1 = results[0]; Result r2 = results[1]; if (r1.C + r1.n * r1.CM == r2.C) { // split n Result r = {r1.m, 2*r1.n, r1.CM, r1.C}; return r; } else if (r1.C + r1.m == r2.C) { // split m Result r = {2*r1.m, r1.n, r1.CM, r1.C}; return r; } else { // split k int x; for (x = 0; x < r1.n; x++) { cblas_daxpy(r2.m, 1, r2.C + r2.m * x, 1, r1.C + r1.CM * x, 1); } free(r2.C); Result r = {r1.m, r1.n, r1.CM, r1.C}; return r; } }
// Calculates p void cg_solver_calc_p( const int x, const int y, const int z, const int halo_depth, const double beta, double* vec_p, double* vec_r) { int x_inner = x - 2*halo_depth; #pragma omp parallel for for(int ii = halo_depth; ii < z-halo_depth; ++ii) { for(int jj = halo_depth; jj < y-halo_depth; ++jj) { const int offset = ii*x*y + jj*x + halo_depth; cblas_dscal(x_inner, beta, vec_p + offset, 1); cblas_daxpy(x_inner, 1.0, vec_r + offset, 1, vec_p + offset, 1); } } }
int main() { int n = 10; int in_x =1; int in_y =1; std::vector<double> x(n); std::vector<double> y(n); double alpha = 10; std::fill(x.begin(),x.end(),1.0); std::fill(y.begin(),y.end(),2.0); cblas_daxpy( n, alpha, &x[0], in_x, &y[0], in_y); //Print y for(int j=0;j<n;j++) std::cout << y[j] << "\t"; std::cout << std::endl; }
void Map::process(const double* inputs, double* outputs) { if(m_first_source > -1) { m_encoders[m_first_source]->process(inputs[m_first_source] * m_gains[m_first_source], m_harmonics_double); m_widers[m_first_source]->process(m_harmonics_double, outputs); for(unsigned int i = m_first_source+1; i < m_number_of_sources; i++) { if (!m_muted[i]) { m_encoders[i]->process(inputs[i] * m_gains[i], m_harmonics_double); m_widers[i]->process(m_harmonics_double, m_harmonics_double); cblas_daxpy(m_number_of_harmonics, 1.f, m_harmonics_double, 1, outputs, 1); } } } else { for(unsigned int i = 0; i < m_number_of_harmonics; i++) outputs[i] = 0.; } }
int main () { typedef boost::multi_array<double, 1> vector; typedef vector::index vector_index; int N = 100000000; vector x(boost::extents[N]); vector y(boost::extents[N]); vector a(boost::extents[N]); #pragma omp parallel for for (vector_index i = 0; i < N; i++) { x[i] = 1; y[i] = 1; a[i] = y[i]; } cblas_daxpy(N, 1.0, &x[0], 1, &a[0], 1); std::cout << a[0] << '\n'; return 0; }
JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_dmcsrm (JNIEnv * env, jobject calling_obj, jint M, jint N, jdoubleArray j_A, jint lda, jdoubleArray j_B, jintArray j_ir, jintArray j_jc, jdoubleArray j_C, jint ldc){ jdouble * A = (*env)->GetPrimitiveArrayCritical(env, j_A, JNI_FALSE); jdouble * B = (*env)->GetPrimitiveArrayCritical(env, j_B, JNI_FALSE); jint * ir = (*env)->GetPrimitiveArrayCritical(env, j_ir, JNI_FALSE); jint * jc = (*env)->GetPrimitiveArrayCritical(env, j_jc, JNI_FALSE); jdouble * C = (*env)->GetPrimitiveArrayCritical(env, j_C, JNI_FALSE); int ioff = jc[0]; int i, j, k; for (i = 0; i < N; i++) { for (j = jc[i]-ioff; j < jc[i+1]-ioff; j++) { k = ir[j]-ioff; cblas_daxpy(M, B[j], A+(i*lda), 1, C+(k*ldc), 1); } } (*env)->ReleasePrimitiveArrayCritical(env, j_C, C, 0); (*env)->ReleasePrimitiveArrayCritical(env, j_jc, jc, 0); (*env)->ReleasePrimitiveArrayCritical(env, j_ir, ir, 0); (*env)->ReleasePrimitiveArrayCritical(env, j_B, B, 0); (*env)->ReleasePrimitiveArrayCritical(env, j_A, A, 0); }
CPS_START_NAMESPACE //-------------------------------------------------------------------- // CVS keywords // // $Source: /home/chulwoo/CPS/repo/CVS/cps_only/cps_pp/src/util/dirac_op/d_op_mobius/noarch/mobius_m.C,v $ // $State: Exp $ // //-------------------------------------------------------------------- //------------------------------------------------------------------ // mobius_m.C // // mobius_m is the fermion matrix. // The in, out fields are defined on the checkerboard lattice // //------------------------------------------------------------------ CPS_END_NAMESPACE #include<util/dwf.h> #include<util/mobius.h> #include<util/gjp.h> #include<util/vector.h> #include<util/verbose.h> #include<util/error.h> #include<util/dirac_op.h> #include<util/time_cps.h> #include "blas-subs.h" CPS_START_NAMESPACE //4d precond. mobius Dirac op: // M_5 - kappa_b^2 M4eo M_5^-1 M4oe void mobius_m(Vector *out, Matrix *gauge_field, Vector *in, Float mass, Dwf *mobius_lib_arg) { //------------------------------------------------------------------ // Initializations //------------------------------------------------------------------ const int f_size = 24 * mobius_lib_arg->vol_4d * mobius_lib_arg->ls / 2; const Float kappa_ratio = mobius_lib_arg->mobius_kappa_b/mobius_lib_arg->mobius_kappa_c; const Float minus_kappa_b_sq = -mobius_lib_arg->mobius_kappa_b * mobius_lib_arg->mobius_kappa_b; Vector *frm_tmp2 = (Vector *) mobius_lib_arg->frm_tmp2; //Vector *temp = (Vector *) smalloc(f_size * sizeof(Float)); Float norm; // out = [ 1 + kappa_b/kappa_c 1/2 dslash_5 - kappa_b^2 Meo M5inv Moe] in // (dslash_5 uses (1+-g5), not P_R,L, i.e. no factor of 1/2 which is here out front) // 1. ftmp2 = Meo M5inv Moe in // 2. out <- in // 3. out += -kappa_b^2 ftmp2 // 4. out += -kappa_b/kappa_c /2 dslash_5 in // (done by the dslash_5 with a5_inv = -kappa_b/kappa_c/2 *GJP.MobiusA5Inv() ) //-------------------------------------------------------------- // 1. ftmp2 = Meo M5inv Moe in //-------------------------------------------------------------- // Apply Dslash O <- E //------------------------------------------------------------------ time_elapse(); mobius_dslash_4(out, gauge_field, in, 0, 0, mobius_lib_arg, mass); DEBUG_MOBIUS_DSLASH("mobius_dslash_4 %e\n", time_elapse()); //------------------------------------------------------------------ // Apply M_5^-1 (hopping in 5th dir + diagonal) //------------------------------------------------------------------ mobius_m5inv(out, mass, 0, mobius_lib_arg); DEBUG_MOBIUS_DSLASH("mobius_m5inv %e\n", time_elapse()); //------------------------------------------------------------------ // Apply Dslash E <- O //------------------------------------------------------------------ mobius_dslash_4(frm_tmp2, gauge_field, out, 1, 0, mobius_lib_arg, mass); DEBUG_MOBIUS_DSLASH("mobius_dslash_4 %e\n", time_elapse()); //------------------------------------------------------------------ // 2. out <- in //------------------------------------------------------------------ #ifndef USE_BLAS moveFloat((IFloat*)out, (IFloat*)in, f_size); #else cblas_dcopy(f_size, (IFloat*)in, 1, (IFloat*)out, 1); #endif DEBUG_MOBIUS_DSLASH("out <- in %e\n", time_elapse()); //------------------------------------------------------------------ // 3. out += -kap2 ftmp2 //------------------------------------------------------------------ #ifndef USE_BLAS fTimesV1PlusV2((IFloat*)out, minus_kappa_b_sq, (IFloat*)frm_tmp2, (IFloat *)out, f_size); #else cblas_daxpy(f_size, minus_kappa_b_sq, (IFloat*)frm_tmp2,1, (IFloat *)out, 1); #endif DEBUG_MOBIUS_DSLASH("mobius out+= kap2 %e\n", time_elapse()); //------------------------------------------------------------------ // 4. out += kappa_b/kappa_c dslash_5 in //------------------------------------------------------------------ mobius_kappa_dslash_5_plus(out, in, mass, 0, mobius_lib_arg, kappa_ratio); DEBUG_MOBIUS_DSLASH("mobius_kappa_dslash_5_plus %e\n", time_elapse()); // Flops count in this function is two AXPY = 4 flops per vector elements //DiracOp::CGflops += 3*f_size; }
void plotMerit(double *z, double psi_k, double descentCondition) { int incx = 1, incy = 1; double q_0, q_tk, qp_tk, merit_k; /* double tmin = 1e-12; */ double tk = 1, aux; double m1 = 1e-4; double Nstep = 0; int i = 0; FILE *fp; (*sFphi)(sN, z, sphi_z, 0); aux = cblas_dnrm2(sN, sphi_z, 1); /* Computes merit function */ aux = 0.5 * aux * aux; printf("plot psi_z %e\n", aux); if (!sPlotMerit) return; if (sPlotMerit) { /* sPlotMerit=0;*/ strcpy(fileName, "outputLS"); (*sFphi)(sN, z, sphi_z, 0); q_0 = cblas_dnrm2(sN, sphi_z , incx); q_0 = 0.5 * q_0 * q_0; fp = fopen(fileName, "w"); /* sPlotMerit=0;*/ tk = 5e-7; aux = -tk; Nstep = 1e4; for (i = 0; i < 2 * Nstep; i++) { cblas_dcopy(sN, z, incx, sz2, incx); cblas_daxpy(sN , aux , sdir_descent , incx , sz2 , incy); (*sFphi)(sN, sz2, sphi_z, 0); q_tk = cblas_dnrm2(sN, sphi_z , incx); q_tk = 0.5 * q_tk * q_tk; (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1); /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */ cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_z, incx); qp_tk = cblas_ddot(sN, sgrad_psi_z, 1, sdir_descent, 1); merit_k = psi_k + m1 * aux * descentCondition; fprintf(fp, "%e %.16e %.16e %e\n", aux, q_tk, merit_k, qp_tk); if (i == Nstep - 1) aux = 0; else aux += tk / Nstep; } fclose(fp); } }
int lineSearch_Wolfe(double *z, double qp_0) { int incx = 1, incy = 1; double q_0, q_tk, qp_tk; double tmin = 1e-12; int maxiter = 100; int niter = 0; double tk = 1; double tg, td; double m1 = 0.1; double m2 = 0.9; (*sFphi)(sN, z, sphi_z, 0); q_0 = cblas_dnrm2(sN, sphi_z , incx); q_0 = 0.5 * q_0 * q_0; tg = 0; td = 10e5; tk = (tg + td) / 2.0; while (niter < maxiter || (td - tg) < tmin) { niter++; /*q_tk = 0.5*|| phi(z+tk*d) ||*/ cblas_dcopy(sN, z, incx, sz2, incx); cblas_daxpy(sN , tk , sdir_descent , incx , sz2 , incy); (*sFphi)(sN, sz2, sphi_z, 0); q_tk = cblas_dnrm2(sN, sphi_z , incx); q_tk = 0.5 * q_tk * q_tk; (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1); /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */ cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_z, incx); qp_tk = cblas_ddot(sN, sgrad_psi_z, 1, sdir_descent, 1); if (qp_tk < m2 * qp_0 && q_tk < q_0 + m1 * tk * qp_0) { /*too small*/ if (niter == 1) break; tg = tk; tk = (tg + td) / 2.0; continue; } else if (q_tk > q_0 + m1 * tk * qp_0) { /*too big*/ td = tk; tk = (tg + td) / 2.0; continue; } else break; } cblas_dcopy(sN, sz2, incx, z, incx); if ((td - tg) <= tmin) { printf("NonSmoothNewton2::lineSearchWolfe warning, resulting tk < tmin, linesearch stopped.\n"); return 0; } return 1; }
void caffe_axpy<double>(const int N, const double alpha, const double* X, double* Y) { cblas_daxpy(N, alpha, X, 1, Y, 1); }
void eblas_daxpy_sub(size_t iStart, size_t iStop, double a, const double* x, int incx, double* y, int incy) { cblas_daxpy(iStop-iStart, a, x+incx*iStart, incx, y+incy*iStart, incy); }
void lcp_latin(LinearComplementarityProblem* problem, double *z, double *w, int *info , SolverOptions* options) { /* matrix M of the lcp */ double * M = problem->M->matrix0; /* size of the LCP */ int n = problem->size; int n2 = n * n; int i, j, iter1, nrhs; int info2 = 0; int itt, it_end; int incx, incy; int itermax = options->iparam[0]; double tol = options->dparam[0]; double k_latin = options->dparam[2]; double alpha, beta; double err1; double res, errmax; double *wc, *zc, *kinvden1, *kinvden2, *wt; double *maxwt, *wnum1, *znum1, *ww, *zz; double *num1, *kinvnum1, *den1, *den2, *wden1, *zden1; double *kinvwden1, *kzden1; double *k, *kinv, *DPO; // char trans='T', notrans='N', uplo='U', diag='N'; incx = 1; incy = 1; /* Recup input */ errmax = tol; itt = itermax; /* Initialize output */ options->iparam[1] = 0; options->dparam[1] = 0.0; /* Allocations */ ww = (double*) malloc(n * sizeof(double)); zz = (double*) malloc(n * sizeof(double)); wc = (double*) malloc(n * sizeof(double)); zc = (double*) malloc(n * sizeof(double)); znum1 = (double*) malloc(n * sizeof(double)); wnum1 = (double*) malloc(n * sizeof(double)); kinvden1 = (double*) malloc(n * sizeof(double)); kinvden2 = (double*) malloc(n * sizeof(double)); wt = (double*) malloc(n * sizeof(double)); maxwt = (double*) malloc(n * sizeof(double)); num1 = (double*) malloc(n * sizeof(double)); kinvnum1 = (double*) malloc(n * sizeof(double)); den1 = (double*) malloc(n * sizeof(double)); den2 = (double*) malloc(n * sizeof(double)); wden1 = (double*) malloc(n * sizeof(double)); zden1 = (double*) malloc(n * sizeof(double)); kinvwden1 = (double*) malloc(n * sizeof(double)); kzden1 = (double*) malloc(n * sizeof(double)); DPO = (double*) malloc(n2 * sizeof(double)); k = (double*) malloc(n2 * sizeof(double)); kinv = (double*) malloc(n2 * sizeof(double)); /* Initialization */ for (i = 0; i < n2; i++) { if (i < n) { wc[i] = 0.0; zc[i] = 0.0; z[i] = 0.0; w[i] = 0.0; znum1[i] = 0.0; wnum1[i] = 0.0; kinvden1[i] = 0.0; kinvden2[i] = 0.0; wt[i] = 0.0; maxwt[i] = 0.0; num1[i] = 0.0; kinvnum1[i] = 0.0; den1[i] = 0.0; den2[i] = 0.0; } k[i] = 0.0; kinv[i] = 0.0; DPO[i] = 0.0; } for (i = 0 ; i < n ; i++) { k[i * n + i] = k_latin * M[i * n + i]; if (fabs(k[i * n + i]) < DBL_EPSILON) { if (verbose > 0) { printf(" Warning nul diagonal term in k matrix \n"); } free(ww); free(zz); free(wc); free(zc); free(znum1); free(wnum1); free(kinvden1); free(kinvden2); free(wt); free(maxwt); free(num1); free(kinvnum1); free(den1); free(den2); free(wden1); free(zden1); free(kinvwden1); free(kzden1); free(DPO); free(k); free(kinv); *info = 3; return; } else kinv[i + n * i] = 1.0 / k[i + n * i]; } for (i = 0; i < n; i++) for (j = 0; j < n; j++) DPO[i + n * j] = M[j * n + i] + k[i + n * j]; /* Cholesky */ DPOTRF(LA_UP, n, DPO , n, &info2); if (info2 != 0) { printf(" Matter with Cholesky Factorization \n "); free(ww); free(zz); free(wc); free(zc); free(znum1); free(wnum1); free(kinvden1); free(kinvden2); free(wt); free(maxwt); free(num1); free(kinvnum1); free(den1); free(den2); free(wden1); free(zden1); free(kinvwden1); free(kzden1); free(DPO); free(k); free(kinv); *info = 2; return; } /* End of Cholesky */ /* Iteration loops */ iter1 = 0; err1 = 1.; while ((iter1 < itt) && (err1 > errmax)) { /* Linear stage (zc,wc) -> (z,w)*/ alpha = 1.; beta = 1.; cblas_dgemv(CblasColMajor,CblasTrans, n, n, alpha, k, n, zc, incx, beta, wc, incy); cblas_dcopy(n, problem->q, incx, znum1, incy); alpha = -1.; cblas_dscal(n , alpha , znum1 , incx); alpha = 1.; cblas_daxpy(n, alpha, wc, incx, znum1, incy); nrhs = 1; DTRTRS(LA_UP, LA_TRANS, LA_NONUNIT, n, nrhs, DPO, n, znum1, n, &info2); DTRTRS(LA_UP, LA_NOTRANS, LA_NONUNIT, n, nrhs, DPO, n, znum1, n, &info2); cblas_dcopy(n, znum1, incx, z, incy); alpha = -1.; beta = 1.; cblas_dgemv(CblasColMajor,CblasTrans, n, n, alpha, k, n, z, incx, beta, wc, incy); cblas_dcopy(n, wc, incx, w, incy); /* Local Stage */ cblas_dcopy(n, w, incx, wt, incy); alpha = -1.; beta = 1.; cblas_dgemv(CblasColMajor,CblasTrans, n, n, alpha, k, n, z, incx, beta, wt, incy); for (i = 0; i < n; i++) { if (wt[i] > 0.0) { wc[i] = wt[i]; zc[i] = 0.0; } else { wc[i] = 0.0; zc[i] = -wt[i] / k[i + n * i]; } } /* Convergence criterium */ cblas_dcopy(n, w, incx, wnum1, incy); alpha = -1.; cblas_daxpy(n, alpha, wc, incx, wnum1, incy); cblas_dcopy(n, z, incx, znum1, incy); cblas_daxpy(n, alpha, zc, incx, znum1, incy); alpha = 1.; beta = 1.; cblas_dgemv(CblasColMajor,CblasTrans, n, n, alpha, k, n, znum1, incx, beta, wnum1, incy); /* wnum1(:) =(w(:)-wc(:))+matmul( k(:,:),(z(:)-zc(:))) */ alpha = 1.; beta = 0.; cblas_dgemv(CblasColMajor,CblasTrans, n, n, alpha, kinv, n, wnum1, incx, beta, kinvnum1, incy); cblas_dcopy(n, z, incx, zz, incy); cblas_dcopy(n, w, incx, ww, incy); alpha = 1.; cblas_daxpy(n, alpha, wc, incx, ww, incy); cblas_daxpy(n, alpha, zc, incx, zz, incy); beta = 0.; alpha = 1.; cblas_dgemv(CblasColMajor,CblasTrans, n, n, alpha, k, n, zz, incx, beta, kzden1, incy); beta = 0.; alpha = 1.; cblas_dgemv(CblasColMajor,CblasTrans, n, n, alpha, kinv, n, ww, incx, beta, kinvwden1, incy); lcp_compute_error_only(n, z, w, &err1); it_end = iter1; res = err1; iter1 = iter1 + 1; options->iparam[1] = it_end; options->dparam[1] = res; } if (isnan(err1) || (err1 > errmax)) { if (verbose > 0) printf("No convergence of LATIN after %d iterations, the residue is %g\n", iter1, err1); *info = 1; } else { if (verbose > 0) printf("Convergence of LATIN after %d iterations, the residue is %g \n", iter1, err1); *info = 0; } free(wc); free(DPO); free(k); free(kinv); free(zz); free(ww); free(zc); free(znum1); free(wnum1); free(kinvden1); free(kinvden2); free(wt); free(maxwt); free(num1); free(kinvnum1); free(den1); free(den2); free(wden1); free(zden1); free(kinvwden1); free(kzden1); }
void get_top_delta(const da *m, const double *y, const double *x, double *d, const int batch_size){ cblas_dcopy(batch_size * m->n_in, y, 1, d, 1); cblas_daxpy(batch_size * m->n_in, -1, x, 1, d, 1); }
void train_da(da *m, dataset_blas *train_set, dataset_blas *expected_set, int mini_batch, int n_epcho, char* weight_filename){ int i, j, k, p, q; int epcho; double cost, total_cost; time_t start_time, end_time; FILE *weight_file; //weight_file = fopen(weight_filename, "w"); for(epcho = 0; epcho < n_epcho; epcho++){ total_cost = 0.0; start_time = time(NULL); for(k = 0; k < train_set->N / mini_batch; k++){ if((k+1) % 500 == 0){ printf("epcho %d batch %d\n", epcho + 1, k + 1); } get_hidden_values(m, train_set->input + k * mini_batch * m->n_in, h_out, mini_batch); get_reconstruct_input(m, h_out, z_out, mini_batch); get_top_delta(m, z_out, expected_set->input + k * mini_batch * m->n_in, d_high, mini_batch); get_second_delta(m, h_out, d_high, d_low, mini_batch); /* modify weight matrix W */ cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, m->n_out, m->n_in, mini_batch, 1, d_low, m->n_out, train_set->input + k * mini_batch * m->n_in, m->n_in, 0, tr1, m->n_in); cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, m->n_out, m->n_in, mini_batch, 1, h_out, m->n_out, d_high, m->n_in, 0, tr2, m->n_in); cblas_daxpy(m->n_out * m->n_in, 1, tr1, 1, tr2, 1); cblas_daxpy(m->n_out * m->n_in, -eta / mini_batch, tr2, 1, m->W, 1); /* modify bias vector */ cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, m->n_out, 1, mini_batch, 1, d_low, m->n_out, Ivec, 1, 0, tr1, 1); cblas_daxpy(m->n_out, -eta / mini_batch, tr1, 1, m->b, 1); cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, m->n_in, 1, mini_batch, 1, d_high, m->n_in, Ivec, 1, 0, tr1, 1); cblas_daxpy(m->n_in, -eta / mini_batch, tr1, 1, m->c, 1); for(i = 0; i < mini_batch * m->n_in; i++){ tr1[i] = log(z_out[i]); } total_cost -= cblas_ddot(mini_batch * m->n_in, expected_set->input + k * mini_batch * m->n_in, 1, tr1, 1) / mini_batch; for(i = 0; i < mini_batch * m->n_in; i++){ tr1[i] = log(1.0 - z_out[i]); } cblas_dcopy(mini_batch * m->n_in, Ivec, 1, tr2, 1); cblas_daxpy(mini_batch * m->n_in, -1, expected_set->input + k * mini_batch * m->n_in, 1, tr2, 1); total_cost -= cblas_ddot(mini_batch * m->n_in, tr1, 1, tr2, 1) / mini_batch; } end_time = time(NULL); printf("epcho %d cost: %.5lf\ttime: %ds\n", epcho + 1, total_cost / train_set->N * mini_batch, (int)(end_time - start_time)); } //fclose(weight_file); }
void bi_conjugate_gradient_sparse(cs *A, double *b, double* x, int n, double itol){ int i,j,iter; double rho,rho1,alpha,beta,omega; double r[n], r_t[n]; double z[n], z_t[n]; double q[n], q_t[n], temp_q[n]; double p[n], p_t[n], temp_p[n]; double res[n]; //NA VGEI! double precond[n]; //Initializations memset(precond, 0, n*sizeof(double)); memset(r, 0, n*sizeof(double)); memset(r_t, 0, n*sizeof(double)); memset(z, 0, n*sizeof(double)); memset(z_t, 0, n*sizeof(double)); memset(q, 0, n*sizeof(double)); memset(q_t, 0, n*sizeof(double)); memset(temp_q, 0, n*sizeof(double)); memset(p, 0, n*sizeof(double)); memset(p_t, 0, n*sizeof(double)); memset(temp_p, 0, n*sizeof(double)); memset(res, 0, n*sizeof(double)); /* Preconditioner */ double max; int pp; for(j = 0; j < n; ++j){ for(pp = A->p[j], max = fabs(A->x[pp]); pp < A->p[j+1]; pp++) if(fabs(A->x[pp]) > max) //vriskei to diagonio stoixeio max = fabs(A->x[pp]); precond[j] = 1/max; } cs *AT = cs_transpose (A, 1) ; cblas_dcopy (n, x, 1, res, 1); //r=b-Ax cblas_dcopy (n, b, 1, r, 1); memset(p, 0, n*sizeof(double)); cs_gaxpy (A, x, p); for(i=0;i<n;i++){ r[i]=r[i]-p[i]; } cblas_dcopy (n, r, 1, r_t, 1); double r_norm = cblas_dnrm2 (n, r, 1); double b_norm = cblas_dnrm2 (n, b, 1); if(!b_norm) b_norm = 1; iter = 0; while( r_norm/b_norm > itol && iter < n ){ iter++; cblas_dcopy (n, r, 1, z, 1); //gia na min allaksei o r cblas_dcopy (n, r_t, 1, z_t, 1); //gia na min allaksei o r_t for(i=0;i<n;i++){ z[i]=precond[i]*z[i]; z_t[i]=precond[i]*z_t[i]; } rho = cblas_ddot (n, z, 1, r_t, 1); if (fpclassify(fabs(rho)) == FP_ZERO){ printf("RHO aborting Bi-CG due to EPS...\n"); exit(42); } if (iter == 1){ cblas_dcopy (n, z, 1, p, 1); cblas_dcopy (n, z_t, 1, p_t, 1); } else{ //p = z + beta*p; beta = rho/rho1; cblas_dscal (n, beta, p, 1); //rescale p by beta cblas_dscal (n, beta, p_t, 1); //rescale p_t by beta cblas_daxpy (n, 1, z, 1, p, 1); //p = 1*z + p cblas_daxpy (n, 1, z_t, 1, p_t, 1); //p_t = 1*z_t + p_t } rho1 = rho; //q = Ap //q_t = trans(A)*p_t memset(q, 0, n*sizeof(double)); cs_gaxpy (A, p, q); memset(q_t, 0, n*sizeof(double)); cs_gaxpy(AT, p_t, q_t); omega = cblas_ddot (n, p_t, 1, q, 1); if (fpclassify(fabs(omega)) == FP_ZERO){ printf("OMEGA aborting Bi-CG due to EPS...\n"); exit(42); } alpha = rho/omega; //x = x + aplha*p; cblas_dcopy (n, p, 1, temp_p, 1); cblas_dscal (n, alpha, temp_p, 1);//rescale by aplha cblas_daxpy (n, 1, temp_p, 1, res, 1);// sum x = 1*x + temp_p //R = R - aplha*Q; cblas_dcopy (n, q, 1, temp_q, 1); cblas_dscal (n, -alpha, temp_q, 1);//rescale by -aplha cblas_daxpy (n, 1, temp_q, 1, r, 1);// sum r = 1*r - temp_p //~r=~r-alpha*~q cblas_dcopy (n, q_t, 1, temp_q, 1); cblas_dscal (n, -alpha, temp_q, 1);//rescale by -aplha cblas_daxpy (n, 1, temp_q, 1, r_t, 1);// sum r = 1*r - temp_p r_norm = cblas_dnrm2 (n, r, 1); //next step } cblas_dcopy (n, res, 1, x, 1); cs_spfree(AT); }
/* Ref: Weiss, Algorithm 11 CGS * INPUT * n : dimension of the problem * b [n] : r-h-s vector * atimes (int n, static double *x, double *b, void *param) : * calc matrix-vector product A.x = b. * atimes_param : parameters for atimes(). * it : struct iter. following entries are used * it->max = kend : max of iteration * it->eps = eps : criteria for |r^2|/|b^2| * OUTPUT * returned value : 0 == success, otherwise (-1) == failed * x [n] : solution * it->niter : # of iteration * it->res2 : |r^2| / |b^2| */ int cgs (int n, const double *b, double *x, void (*atimes) (int, const double *, double *, void *), void *atimes_param, struct iter *it) { #ifndef HAVE_CBLAS_H # ifdef HAVE_BLAS_H /* use Fortran BLAS routines */ int i_1 = 1; double d_m1 = -1.0; double d_2 = 2.0; # endif // !HAVE_BLAS_H #endif // !HAVE_CBLAS_H int ret = -1; double eps2 = it->eps * it->eps; int itmax = it->max; double *r = (double *)malloc (sizeof (double) * n); double *r0 = (double *)malloc (sizeof (double) * n); double *p = (double *)malloc (sizeof (double) * n); double *u = (double *)malloc (sizeof (double) * n); double *ap = (double *)malloc (sizeof (double) * n); double *q = (double *)malloc (sizeof (double) * n); double *t = (double *)malloc (sizeof (double) * n); CHECK_MALLOC (r, "cgs"); CHECK_MALLOC (r0, "cgs"); CHECK_MALLOC (p, "cgs"); CHECK_MALLOC (u, "cgs"); CHECK_MALLOC (ap, "cgs"); CHECK_MALLOC (q, "cgs"); CHECK_MALLOC (t, "cgs"); double r0ap; double rho, rho1; double delta; double beta; double res2 = 0.0; #ifdef HAVE_CBLAS_H /** * ATLAS version */ double b2 = cblas_ddot (n, b, 1, b, 1); // (b,b) eps2 *= b2; // initial residue atimes (n, x, r, atimes_param); // r = A.x cblas_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b cblas_dcopy (n, r, 1, r0, 1); // r0* = r cblas_dcopy (n, r, 1, p, 1); // p = r cblas_dcopy (n, r, 1, u, 1); // u = r rho = cblas_ddot (n, r0, 1, r, 1); // rho = (r0*, r) int i; for (i = 0; i < itmax; i ++) { atimes (n, p, ap, atimes_param); // ap = A.p r0ap = cblas_ddot (n, r0, 1, ap, 1); // r0ap = (r0*, A.p) delta = - rho / r0ap; cblas_dcopy (n, u, 1, q, 1); // q = u cblas_dscal (n, 2.0, q, 1); // q = 2 u cblas_daxpy (n, delta, ap, 1, q, 1); // q = 2 u + delta A.p atimes (n, q, t, atimes_param); // t = A.q cblas_daxpy (n, delta, t, 1, r, 1); // r = r + delta t cblas_daxpy (n, delta, q, 1, x, 1); // x = x + delta q res2 = cblas_ddot (n, r, 1, r, 1); if (it->debug == 2) { fprintf (it->out, "libiter-cgs %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } rho1 = cblas_ddot (n, r0, 1, r, 1); // rho = (r0*, r) beta = rho1 / rho; rho = rho1; // here t is not used so that this is used for working area. double *qu = t; cblas_dcopy (n, q, 1, qu, 1); // qu = q cblas_daxpy (n, -1.0, u, 1, qu, 1); // qu = q - u cblas_dcopy (n, r, 1, u, 1); // u = r cblas_daxpy (n, beta, qu, 1, u, 1); // u = r + beta (q - u) cblas_daxpy (n, beta, p, 1, qu, 1); // qu = q - u + beta * p cblas_dcopy (n, u, 1, p, 1); // p = u cblas_daxpy (n, beta, qu, 1, p, 1); // p = u + beta (q - u + b * p) } #else // !HAVE_CBLAS_H # ifdef HAVE_BLAS_H /** * BLAS version */ double b2 = ddot_ (&n, b, &i_1, b, &i_1); // (b,b) eps2 *= b2; // initial residue atimes (n, x, r, atimes_param); // r = A.x daxpy_ (&n, &d_m1, b, &i_1, r, &i_1); // r = A.x - b dcopy_ (&n, r, &i_1, r0, &i_1); // r0* = r dcopy_ (&n, r, &i_1, p, &i_1); // p = r dcopy_ (&n, r, &i_1, u, &i_1); // u = r rho = ddot_ (&n, r0, &i_1, r, &i_1); // rho = (r0*, r) int i; for (i = 0; i < itmax; i ++) { atimes (n, p, ap, atimes_param); // ap = A.p r0ap = ddot_ (&n, r0, &i_1, ap, &i_1); // r0ap = (r0*, A.p) delta = - rho / r0ap; dcopy_ (&n, u, &i_1, q, &i_1); // q = u dscal_ (&n, &d_2, q, &i_1); // q = 2 u daxpy_ (&n, &delta, ap, &i_1, q, &i_1); // q = 2 u + delta A.p atimes (n, q, t, atimes_param); // t = A.q daxpy_ (&n, &delta, t, &i_1, r, &i_1); // r = r + delta t daxpy_ (&n, &delta, q, &i_1, x, &i_1); // x = x + delta q res2 = ddot_ (&n, r, &i_1, r, &i_1); if (it->debug == 2) { fprintf (it->out, "libiter-cgs %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } rho1 = ddot_ (&n, r0, &i_1, r, &i_1); // rho = (r0*, r) beta = rho1 / rho; rho = rho1; // here t is not used so that this is used for working area. double *qu = t; dcopy_ (&n, q, &i_1, qu, &i_1); // qu = q daxpy_ (&n, &d_m1, u, &i_1, qu, &i_1); // qu = q - u dcopy_ (&n, r, &i_1, u, &i_1); // u = r daxpy_ (&n, &beta, qu, &i_1, u, &i_1); // u = r + beta (q - u) daxpy_ (&n, &beta, p, &i_1, qu, &i_1); // qu = q - u + beta * p dcopy_ (&n, u, &i_1, p, &i_1); // p = u daxpy_ (&n, &beta, qu, &i_1, p, &i_1); // p = u + beta (q - u + b * p) } # else // !HAVE_BLAS_H /** * local BLAS version */ double b2 = my_ddot (n, b, 1, b, 1); // (b,b) eps2 *= b2; // initial residue atimes (n, x, r, atimes_param); // r = A.x my_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b my_dcopy (n, r, 1, r0, 1); // r0* = r my_dcopy (n, r, 1, p, 1); // p = r my_dcopy (n, r, 1, u, 1); // u = r rho = my_ddot (n, r0, 1, r, 1); // rho = (r0*, r) int i; for (i = 0; i < itmax; i ++) { atimes (n, p, ap, atimes_param); // ap = A.p r0ap = my_ddot (n, r0, 1, ap, 1); // r0ap = (r0*, A.p) delta = - rho / r0ap; my_dcopy (n, u, 1, q, 1); // q = u my_dscal (n, 2.0, q, 1); // q = 2 u my_daxpy (n, delta, ap, 1, q, 1); // q = 2 u + delta A.p atimes (n, q, t, atimes_param); // t = A.q my_daxpy (n, delta, t, 1, r, 1); // r = r + delta t my_daxpy (n, delta, q, 1, x, 1); // x = x + delta q res2 = my_ddot (n, r, 1, r, 1); if (it->debug == 2) { fprintf (it->out, "libiter-cgs %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } rho1 = my_ddot (n, r0, 1, r, 1); // rho = (r0*, r) beta = rho1 / rho; rho = rho1; // here t is not used so that this is used for working area. double *qu = t; my_dcopy (n, q, 1, qu, 1); // qu = q my_daxpy (n, -1.0, u, 1, qu, 1); // qu = q - u my_dcopy (n, r, 1, u, 1); // u = r my_daxpy (n, beta, qu, 1, u, 1); // u = r + beta (q - u) my_daxpy (n, beta, p, 1, qu, 1); // qu = q - u + beta * p my_dcopy (n, u, 1, p, 1); // p = u my_daxpy (n, beta, qu, 1, p, 1); // p = u + beta (q - u + b * p) } # endif // !HAVE_BLAS_H #endif // !HAVE_CBLAS_H free (r); free (r0); free (p); free (u); free (ap); free (q); free (t); if (it->debug == 1) { fprintf (it->out, "libiter-cgs it= %d res^2= %e\n", i, res2); } it->niter = i; it->res2 = res2 / b2; return (ret); }
void caffe_cpu_xpasv<double>(const int M, const int N, const double alpha, double* X, const double* a, const double* b) { for (int i = 0; i < M; ++i) { cblas_daxpy(N, alpha * a[i], b, 1, X + i * N, 1); } }
void FrictionContact2D_latin(FrictionContactProblem* problem , double *reaction , double *velocity , int *info, SolverOptions* options) { int nc = problem->numberOfContacts; assert(nc>0); double * vec = problem->M->matrix0; double *qq = problem->q; double * mu = problem->mu; int info77 = 0; int i, j, kk, iter1, ino, ddl, nrhs; int info2 = 0; int n = 2 * nc; size_t idim, nbno; int incx = 1, incy = 1; size_t taille, taillet, taillen, itt; int *ddln; int *ddlt, *vectnt; assert(n>0); double errmax, alpha, beta, maxa, k_latin; double aa, nt, wn, tc, zc0; double err1, num11, err0; double den11, den22, knz0, ktz0, *ktz, *wf; double *wc, *zc, *wt, *maxwt, *wnum1, *znum1; double *zt, *maxzt; double *kn, *kt; // char trans='T', diag='N'; // char uplo='U', notrans='N'; double *k, *DPO, *kf, *kninv; double *kinvwden1, *kzden1, *kfinv, *knz, *wtnc; /* Recup input */ itt = options->iparam[0]; errmax = options->dparam[0]; k_latin = options->dparam[2]; /* Initialize output */ options->iparam[1] = 0; options->dparam[1] = 0.0; /* Allocations */ k = (double*) malloc(n * n * sizeof(double)); DPO = (double*) malloc(n * n * sizeof(double)); kf = (double*) malloc(n * n * sizeof(double)); kfinv = (double*) malloc(n * n * sizeof(double)); kninv = (double*) malloc(nc * nc * sizeof(double)); kn = (double*) malloc(nc * nc * sizeof(double)); kt = (double*) malloc(nc * nc * sizeof(double)); kinvwden1 = (double*) malloc(n * sizeof(double)); kzden1 = (double*) malloc(n * sizeof(double)); wc = (double*) malloc(n * sizeof(double)); zc = (double*) malloc(n * sizeof(double)); znum1 = (double*) malloc(n * sizeof(double)); wnum1 = (double*) malloc(n * sizeof(double)); wt = (double*) malloc(n * sizeof(double)); maxzt = (double*) malloc(n * sizeof(double)); knz = (double*) malloc(nc * sizeof(double)); wtnc = (double*) malloc(nc * sizeof(double)); ktz = (double*) malloc(nc * sizeof(double)); wf = (double*) malloc(nc * sizeof(double)); maxwt = (double*) malloc(nc * sizeof(double)); zt = (double*) malloc(nc * sizeof(double)); vectnt = (int*) malloc(n * sizeof(int)); ddln = (int*) malloc(nc * sizeof(int)); ddlt = (int*) malloc(nc * sizeof(int)); /* Initialization */ for (i = 0; i < n * n; i++) { k[i] = 0.; kf[i] = 0.; kfinv[i] = 0.; if (i < nc * nc) { kn[i] = 0.0; kt[i] = 0.0; kninv[i] = 0.0; if (i < n) { wc[i] = 0.0; zc[i] = 0.; reaction[i] = 0.; velocity[i] = 0.; znum1[i] = 0.; wnum1[i] = 0.; wt[i] = 0.; maxzt[i] = 0.; if (i < nc) { maxwt[i] = 0.; zt[i] = 0.; knz[i] = 0.; ktz[i] = 0.; wf[i] = 0.; wtnc[i] = 0.; } } } } for (i = 0; i < n; i++) { if (fabs(vec[i * n + i]) < DBL_EPSILON) { if (verbose > 0) printf("\n Warning nul diagonal term in M matrix \n"); free(k); free(DPO); free(kf); free(kfinv); free(kninv); free(kn); free(kt); free(kinvwden1); free(kzden1); free(wc); free(zc); free(znum1); free(wnum1); free(wt); free(maxzt); free(knz); free(wtnc); free(ktz); free(wf); free(maxwt); free(zt); free(vectnt); free(ddln); free(ddlt); *info = 3; return; } else { k[i + n * i] = k_latin / vec[i * n + i]; vectnt[i] = i + 1; } } for (i = 0; i < nc; i++) { ddln[i] = vectnt[2 * i]; if (i != 0) ddlt[i] = vectnt[2 * i - 1]; else ddlt[i] = 0; } for (i = 0; i < nc; i++) { kn[i + nc * i] = k[ddln[i] + n * ddln[i]]; kt[i + nc * i] = k[ddlt[i] + n * ddlt[i]]; } taillen = sizeof(ddln) / sizeof(ddln[0]); taillet = sizeof(ddlt) / sizeof(ddlt[0]); idim = 1 + taillen / taillet; taille = 0; for (i = 0; i < n; i++) taille = sizeof(qq[i]) + taille; taille = taille / sizeof(qq[0]); nbno = taille / idim; for (i = 0; i < nc; i++) { kf[ddln[i] + n * ddln[i]] = kn[i + nc * i]; kf[ddlt[i] + n * ddlt[i]] = kt[i + nc * i]; } for (i = 0; i < n; i++) { kfinv[i + n * i] = 1. / kf[i + n * i]; if (i < nc) kninv[i + nc * i] = 1. / kt[i + nc * i]; } for (i = 0; i < n; i++) for (j = 0; j < n; j++) DPO[i + n * j] = vec[j * n + i] + kfinv[i + n * j]; DPOTRF(LA_UP, n, DPO , n, &info2); if (info2 != 0) { if (verbose > 0) printf("\n Matter with Cholesky factorization \n"); free(k); free(DPO); free(kf); free(kfinv); free(kninv); free(kn); free(kt); free(kinvwden1); free(kzden1); free(wc); free(zc); free(znum1); free(wnum1); free(wt); free(maxzt); free(knz); free(wtnc); free(ktz); free(wf); free(maxwt); free(zt); free(vectnt); free(ddln); free(ddlt); *info = 2; return; } /* Iteration loops */ iter1 = 0; err1 = 1.; while ((iter1 < itt) && (err1 > errmax)) { /* Linear stage (zc,wc) -> (z,w) */ alpha = 1.; beta = 1.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, zc, incx, beta, wc, incy); cblas_dcopy(n, qq, incx, znum1, incy); alpha = -1.; cblas_dscal(n , alpha , znum1 , incx); alpha = 1.; cblas_daxpy(n, alpha, wc, incx, znum1, incy); nrhs = 1; DTRTRS(LA_UP, LA_TRANS, LA_NONUNIT, n, nrhs, DPO, n, znum1, n, &info77); DTRTRS(LA_UP, LA_NOTRANS, LA_NONUNIT, n, nrhs, DPO, n, znum1, n, &info77); cblas_dcopy(n, znum1, incx, reaction, incy); alpha = -1.; beta = 1.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, reaction, incx, beta, wc, incy); cblas_dcopy(n, wc, incx, velocity, incy); /* Local stage (z,w)->(zc,wc) */ for (i = 0; i < n; i++) { zc[i] = 0.; wc[i] = 0.0; } /* Normal party */ for (i = 0; i < nc; i++) { knz0 = 0.; for (kk = 0; kk < nc; kk++) { knz[i] = kt[i + nc * kk] * velocity[ddlt[kk]] + knz0; knz0 = knz[i]; } zt[i] = reaction[ddlt[i]] - knz[i]; if (zt[i] > 0.0) { zc[ddlt[i]] = zt[i]; maxzt[i] = 0.0; } else { zc[ddlt[i]] = 0.0; maxzt[i] = -zt[i]; } } for (i = 0; i < nc; i++) { zc0 = 0.; ktz0 = 0.; for (j = 0; j < nc; j++) { wc[ddlt[i]] = kninv[i + nc * j] * maxzt[j] + zc0; zc0 = wc[ddlt[i]]; ktz[i] = kn[i + nc * j] * velocity[ddln[j]] + ktz0; ktz0 = ktz[i]; } wf[i] = reaction[ddln[i]] - ktz[i]; } /* Loop other nodes */ for (ino = 0; ino < nbno; ino++) { ddl = ddln[ino]; nt = fabs(wf[ino]); /* Tangential vector */ if (nt < 1.e-8) tc = 0.; else tc = wf[ino] / nt; /* Tangentiel component */ wn = zc[ddlt[ino]]; aa = nt - mu[ino] * wn; if (aa > 0.0) { maxa = aa; } else { maxa = 0.0; } wc[ddl] = (maxa / (-1 * kn[ino + nc * ino])) * tc; aa = -nt + mu[ino] * wn; if (aa > 0.0) { maxa = aa; } else { maxa = 0.0; } zc[ddl] = (mu[ino] * wn - maxa) * tc; } /* Convergence criterium */ cblas_dcopy(n, reaction, incx, znum1, incy); alpha = -1.; cblas_daxpy(n, alpha, zc, incx, znum1, incy); cblas_dcopy(n, velocity, incx, wnum1, incy); cblas_daxpy(n, alpha, wc, incx, wnum1, incy); alpha = 1.; beta = 1.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kf, n, wnum1, incx, beta, znum1, incy); num11 = 0.; alpha = 1.; beta = 0.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, znum1, incx, beta, wnum1, incy); num11 = cblas_ddot(n, wnum1, incx, znum1, incy); cblas_dcopy(n, reaction, incx, znum1, incy); alpha = 1.; beta = 1.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kf, n, velocity, incx, beta, znum1, incy); alpha = 1.; beta = 0.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, znum1, incx, beta, wnum1, incy); den11 = cblas_ddot(n, wnum1, incx, znum1, incy); cblas_dcopy(n, zc, incx, znum1, incy); alpha = 1.; beta = 1.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kf, n, wc, incx, beta, znum1, incy); alpha = 1.; beta = 0.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, znum1, incx, beta, wnum1, incy); den22 = cblas_ddot(n, znum1, incx, wnum1, incy); err0 = num11 / (den11 + den22); err1 = sqrt(err0); options->iparam[1] = iter1; options->dparam[1] = err1; iter1 = iter1 + 1; } if (err1 > errmax) { if (verbose > 0) printf("No convergence after %d iterations, the residue is %g\n", iter1, err1); *info = 1; } else { if (verbose > 0) printf("Convergence after %d iterations, the residue is %g \n", iter1, err1); *info = 0; } free(k); free(DPO); free(kf); free(kfinv); free(kninv); free(kn); free(kt); free(kinvwden1); free(kzden1); free(wc); free(zc); free(znum1); free(wnum1); free(wt); free(maxzt); free(knz); free(wtnc); free(ktz); free(wf); free(maxwt); free(zt); free(vectnt); free(ddln); free(ddlt); }
int _globalLineSearchSparseGP( GlobalFrictionContactProblem *problem, AlartCurnierFun3x3Ptr computeACFun3x3, double *solution, double *direction, double *mu, double *rho, double *F, double *psi, CSparseMatrix *J, double *tmp, double alpha[1], unsigned int maxiter_ls) { double inf = 1e10; double alphamin = 1e-16; double alphamax = inf; double m1 = 0.01, m2 = 0.99; unsigned int n = (unsigned)NM_triplet(problem->M)->m; unsigned int m = problem->H->size1; unsigned int problem_size = n+2*m; // Computation of q(t) and q'(t) for t =0 double q0 = 0.5 * cblas_ddot(problem_size, psi, 1, psi, 1); // tmp <- J * direction cblas_dscal(problem_size, 0., tmp, 1); cs_gaxpy(J, direction, tmp); double dqdt0 = cblas_ddot(problem_size, psi, 1, tmp, 1); DEBUG_PRINTF("dqdt0=%e\n",dqdt0); DEBUG_PRINTF("q0=%e\n",q0); for(unsigned int iter = 0; iter < maxiter_ls; ++iter) { // tmp <- alpha*direction+solution cblas_dcopy(problem_size, solution, 1, tmp, 1); cblas_daxpy(problem_size, alpha[0], direction, 1, tmp, 1); ACPsi( problem, computeACFun3x3, tmp, /* v */ tmp+problem->M->size0+problem->H->size1, /* P */ tmp+problem->M->size0, /* U */ rho, psi); double q = 0.5 * cblas_ddot(problem_size, psi, 1, psi, 1); assert(q >= 0); double slope = (q - q0) / alpha[0]; int C1 = (slope >= m2 * dqdt0); int C2 = (slope <= m1 * dqdt0); DEBUG_PRINTF("C1=%i\t C2=%i\n",C1,C2); if(C1 && C2) { numerics_printf_verbose(1, "---- GFC3D - NSN_AC - global line search success. Number of ls iteration = %i alpha = %.10e, q = %.10e", iter, alpha[0], q); return 0; } else if(!C1) { alphamin = alpha[0]; } else { // not(C2) alphamax = alpha[0]; } if(alpha[0] < inf) { alpha[0] = 0.5 * (alphamin + alphamax); } else { alpha[0] = alphamin; } } numerics_printf_verbose(1,"---- GFC3D - NSN_AC - global line search unsuccessful. Max number of ls iteration reached = %i with alpha = %.10e", maxiter_ls, alpha[0]); return -1; }
void fc3d_ProjectedGradientOnCylinder(FrictionContactProblem* problem, double *reaction, double *velocity, int* info, SolverOptions* options) { /* int and double parameters */ int* iparam = options->iparam; double* dparam = options->dparam; /* Number of contacts */ int nc = problem->numberOfContacts; double* q = problem->q; NumericsMatrix* M = problem->M; /* Dimension of the problem */ int n = 3 * nc; /* Maximum number of iterations */ int itermax = iparam[0]; /* Tolerance */ double tolerance = dparam[0]; /***** Projected Gradient iterations *****/ int j, iter = 0; /* Current iteration number */ double error = 1.; /* Current error */ int hasNotConverged = 1; int contact; /* Number of the current row of blocks in M */ int nLocal = 3; dparam[0] = dparam[2]; // set the tolerance for the local solver double * velocitytmp = (double *)malloc(n * sizeof(double)); double rho = 0.0; int isVariable = 0; double rhoinit, rhomin; if (dparam[3] > 0.0) { rho = dparam[3]; } else { /* Variable step in fixed*/ isVariable = 1; printf("Variable step (line search) in Projected Gradient iterations\n"); rhoinit = dparam[3]; rhomin = dparam[4]; } double * reactionold; double * direction; if (isVariable) { reactionold = (double *)malloc(n * sizeof(double)); direction = (double *)malloc(n * sizeof(double)); } double alpha = 1.0; double beta = 1.0; /* double minusrho = -1.0*rho; */ if (!isVariable) { while ((iter < itermax) && (hasNotConverged > 0)) { ++iter; cblas_dcopy(n , q , 1 , velocitytmp, 1); prodNumericsMatrix(n, n, alpha, M, reaction, beta, velocitytmp); // projection for each contact cblas_daxpy(n, -1.0, velocitytmp, 1, reaction , 1); for (contact = 0 ; contact < nc ; ++contact) projectionOnCylinder(&reaction[ contact * nLocal], options->dWork[contact]); #ifdef VERBOSE_DEBUG printf("reaction before LS\n"); for (contact = 0 ; contact < nc ; ++contact) { for (j = 0; j < 3; j++) printf("reaction[%i] = %le\t", 3 * contact + j, reaction[3 * contact + j]); printf("\n"); } printf("velocitytmp before LS\n"); for (contact = 0 ; contact < nc ; ++contact) { for (j = 0; j < 3; j++) printf("velocitytmp[%i] = %le\t", 3 * contact + j, velocitytmp[3 * contact + j]); printf("\n"); } #endif /* **** Criterium convergence **** */ fc3d_Tresca_compute_error(problem, reaction , velocity, tolerance, options, &error); if (options->callback) { options->callback->collectStatsIteration(options->callback->env, nc * 3, reaction, velocity, error, NULL); } if (verbose > 0) printf("----------------------------------- FC3D - Projected Gradient On Cylinder (PGoC) - Iteration %i rho = %14.7e \tError = %14.7e\n", iter, rho, error); if (error < tolerance) hasNotConverged = 0; *info = hasNotConverged; } } else { rho = rhoinit; cblas_dcopy(n , q , 1 , velocitytmp, 1); prodNumericsMatrix(n, n, 1.0, M, reaction, 1.0, velocitytmp); cblas_daxpy(n, rho, velocitytmp, 1, reaction, 1); for (contact = 0 ; contact < nc ; ++contact) projectionOnCylinder(&reaction[contact * nLocal], options->dWork[contact]); cblas_dcopy(n , q , 1 , velocitytmp, 1); prodNumericsMatrix(n, n, 1.0, M, reaction, 1.0, velocitytmp); double oldcriterion = cblas_ddot(n, reaction, 1, velocitytmp, 1); #ifdef VERBOSE_DEBUG printf("oldcriterion =%le \n", oldcriterion); #endif while ((iter < itermax) && (hasNotConverged > 0)) { ++iter; // store the old reaction cblas_dcopy(n , reaction , 1 , reactionold , 1); // compute the direction cblas_dcopy(n , q , 1 , velocitytmp, 1); prodNumericsMatrix(n, n, 1.0, M, reaction, 1.0, velocitytmp); cblas_dcopy(n, velocitytmp, 1, direction, 1); // start line search j = 0; if (rho <= 100 * rhoinit) rho = 10.0 * rho; double newcriterion = 1e24; do { cblas_dcopy(n , reactionold , 1 , reaction , 1); cblas_daxpy(n, rho, direction, 1, reaction , 1) ; #ifdef VERBOSE_DEBUG printf("LS iteration %i step 0 \n", j); printf("rho = %le \n", rho); for (contact = 0 ; contact < nc ; ++contact) { for (int k = 0; k < 3; k++) printf("reaction[%i] = %le\t", 3 * contact + k, reaction[3 * contact + k]); printf("\n"); } #endif for (contact = 0 ; contact < nc ; ++contact) projectionOnCylinder(&reaction[contact * nLocal], options->dWork[contact]); /* printf("options->dWork[%i] = %le\n",contact, options->dWork[contact] );} */ #ifdef VERBOSE_DEBUG printf("LS iteration %i step 1 after projection\n", j); for (contact = 0 ; contact < nc ; ++contact) { for (int k = 0; k < 3; k++) printf("reaction[%i] = %le\t", 3 * contact + k, reaction[3 * contact + k]); printf("\n"); } #endif cblas_dcopy(n , q , 1 , velocitytmp, 1); prodNumericsMatrix(n, n, 1.0, M, reaction, 1.0, velocitytmp); #ifdef VERBOSE_DEBUG printf("LS iteration %i step 3 \n", j); for (contact = 0 ; contact < nc ; ++contact) { for (int k = 0; k < 3; k++) printf("velocitytmp[%i] = %le\t", 3 * contact + k, velocitytmp[3 * contact + k]); printf("\n"); } #endif newcriterion = cblas_ddot(n, reaction, 1, velocitytmp, 1); #ifdef VERBOSE_DEBUG printf("LS iteration %i newcriterion =%le\n", j, newcriterion); #endif if (rho > rhomin) { rho = rhomin; break; } rho = 0.5 * rho; } while (newcriterion > oldcriterion && ++j <= options->iparam[2]); oldcriterion = newcriterion; /* **** Criterium convergence **** */ fc3d_Tresca_compute_error(problem, reaction , velocity, tolerance, options, &error); if (verbose > 0) printf("----------------------------------- FC3D - Projected Gradient On Cylinder (PGoC) - Iteration %i rho = %14.7e \tError = %14.7e\n", iter, rho, error); if (error < tolerance) hasNotConverged = 0; *info = hasNotConverged; } } printf("----------------------------------- FC3D - Projected Gradient On Cylinder (PGoC)- #Iteration %i Final Residual = %14.7e\n", iter, error); dparam[0] = tolerance; dparam[1] = error; free(velocitytmp); if (isVariable) { free(reactionold); free(direction); } }
void variationalInequality_ExtraGradient(VariationalInequality* problem, double *x, double *w, int* info, SolverOptions* options) { /* /\* int and double parameters *\/ */ int* iparam = options->iparam; double* dparam = options->dparam; /* Number of contacts */ int n = problem->size; /* Maximum number of iterations */ int itermax = iparam[0]; /* Tolerance */ double tolerance = dparam[0]; /***** Fixed point iterations *****/ int iter = 0; /* Current iteration number */ double error = 1.; /* Current error */ int hasNotConverged = 1; dparam[0] = dparam[2]; // set the tolerance for the local solver double * xtmp = (double *)malloc(n * sizeof(double)); double * wtmp = (double *)malloc(n * sizeof(double)); double rho = 0.0, rho_k =0.0; int isVariable = 0; if (dparam[3] > 0.0) { rho = dparam[3]; if (verbose > 0) { printf("----------------------------------- VI - Extra Gradient (EG) - Fixed stepsize with rho = %14.7e \n", rho); } } else { /* Variable step in iterations*/ isVariable = 1; rho = -dparam[3]; if (verbose > 0) { printf("----------------------------------- VI - Extra Gradient (EG) - Variable stepsize with starting rho = %14.7e \n", rho); } } /* Variable for Line_search */ int success =0; double error_k, light_error_sum =0.0; int ls_iter = 0; int ls_itermax = 10; double tau=dparam[4], tauinv=dparam[5], L= dparam[6], Lmin = dparam[7]; double a1=0.0, a2=0.0; double * x_k =0; double * w_k =0; if (isVariable) { x_k = (double *)malloc(n * sizeof(double)); w_k = (double *)malloc(n * sizeof(double)); } //isVariable=0; if (!isVariable) { /* double minusrho = -1.0*rho; */ while ((iter < itermax) && (hasNotConverged > 0)) { ++iter; /* xtmp <- x */ cblas_dcopy(n , x , 1 , xtmp, 1); /* wtmp <- F(xtmp) */ problem->F(problem, n, xtmp,wtmp); /* xtmp <- xtmp - F(xtmp) */ cblas_daxpy(n, -1.0, wtmp , 1, xtmp , 1) ; /* wtmp <- ProjectionOnX(xtmp) */ problem->ProjectionOnX(problem,xtmp,wtmp); /* x <- x - wtmp */ cblas_daxpy(n, -1.0, wtmp , 1, x , 1) ; /* x <- ProjectionOnX(x) */ cblas_dcopy(n , xtmp , 1 , x, 1); problem->ProjectionOnX(problem,xtmp,x); /* problem->F(problem,x,w); */ /* cblas_daxpy(n, -1.0, w , 1, x , 1) ; */ /* cblas_dcopy(n , x , 1 , xtmp, 1); */ /* problem->ProjectionOnX(problem,xtmp,x); */ /* **** Criterium convergence **** */ if (options->iparam[SICONOS_VI_ERROR_EVALUATION] == SICONOS_VI_ERROR_EVALUATION_FULL ) { variationalInequality_computeError(problem, x , w, tolerance, options, &error); } else if (options->iparam[SICONOS_VI_ERROR_EVALUATION] == SICONOS_VI_ERROR_EVALUATION_LIGHT ) { cblas_dcopy(n, xtmp, 1,x , 1) ; cblas_daxpy(n, -1.0, x_k , 1, xtmp , 1) ; light_error_sum = cblas_dnrm2(n,xtmp,1); double norm_x= cblas_dnrm2(n,x,1); if (fabs(norm_x) > DBL_EPSILON) light_error_sum /= norm_x; error=light_error_sum; } if (options->callback) { options->callback->collectStatsIteration(options->callback->env, n, x, w, error, NULL); } if (verbose > 0) { printf("----------------------------------- VI - Extra Gradient (EG) - Iteration %i rho = %14.7e \tError = %14.7e\n", iter, rho, error); } if (error < tolerance) hasNotConverged = 0; *info = hasNotConverged; } } if (isVariable) { if (iparam[1]==0)/* Armijo rule with Khotbotov ratio (default) */ { while ((iter < itermax) && (hasNotConverged > 0)) { ++iter; /* Store the error */ error_k = error; /* x_k <-- x store the x at the beginning of the iteration */ cblas_dcopy(n , x , 1 , x_k, 1); problem->F(problem, n, x, w_k); ls_iter = 0 ; success =0; rho_k=rho / tau; while (!success && (ls_iter < ls_itermax)) { /* if (iparam[3] && ls_iter !=0) rho_k = rho_k * tau * min(1.0,a2/(rho_k*a1)); */ /* else */ rho_k = rho_k * tau ; /* x <- x_k for the std approach*/ if (iparam[2]==0) cblas_dcopy(n, x_k, 1, x , 1) ; /* x <- x - rho_k* w_k */ cblas_daxpy(n, -rho_k, w_k , 1, x , 1) ; /* xtmp <- ProjectionOnX(x) */ problem->ProjectionOnX(problem,x,xtmp); problem->F(problem,n,xtmp,w); DEBUG_EXPR_WE( for (int i =0; i< 5 ; i++) { printf("xtmp[%i]=%12.8e\t",i,xtmp[i]); printf("w[%i]=F[%i]=%12.8e\n",i,i,w[i]);}); /* velocitytmp <- velocity */ /* cblas_dcopy(n, w, 1, wtmp , 1) ; */ /* velocity <- velocity - velocity_k */ cblas_daxpy(n, -1.0, w_k , 1, w , 1) ; /* a1 = ||w - w_k|| */ a1 = cblas_dnrm2(n, w, 1); DEBUG_PRINTF("a1 = %12.8e\n", a1); /* xtmp <- x */ cblas_dcopy(n, xtmp, 1,x , 1) ; /* xtmp <- x - x_k */ cblas_daxpy(n, -1.0, x_k , 1, xtmp , 1) ; /* a2 = || x - x_k || */ a2 = cblas_dnrm2(n, xtmp, 1) ; DEBUG_PRINTF("a2 = %12.8e\n", a2); DEBUG_PRINTF("test rho_k*a1 < L * a2 = %e < %e\n", rho_k*a1 , L * a2 ) ; success = (rho_k*a1 < L * a2)?1:0; /* printf("rho_k = %12.8e\t", rho_k); */ /* printf("a1 = %12.8e\t", a1); */ /* printf("a2 = %12.8e\t", a2); */ /* printf("norm x = %12.8e\t",cblas_dnrm2(n, x, 1) ); */ /* printf("success = %i\n", success); */ ls_iter++; } /* velocitytmp <- q */ /* cblas_dcopy(n , q , 1 , velocitytmp, 1); */ /* prodNumericsMatrix(n, n, alpha, M, reaction, beta, velocitytmp); */ problem->F(problem, n, x,wtmp); /* x <- x - rho_k* wtmp */ cblas_daxpy(n, -rho_k, wtmp , 1, x , 1) ; /* wtmp <- ProjectionOnX(xtmp) */ cblas_dcopy(n , x , 1 , xtmp, 1); problem->ProjectionOnX(problem,xtmp,x); DEBUG_EXPR_WE( for (int i =0; i< 5 ; i++) { printf("x[%i]=%12.8e\t",i,x[i]); printf("w[%i]=F[%i]=%12.8e\n",i,i,w[i]); } ); /* **** Criterium convergence **** */ if (options->iparam[SICONOS_VI_ERROR_EVALUATION] == SICONOS_VI_ERROR_EVALUATION_FULL ) { variationalInequality_computeError(problem, x , w, tolerance, options, &error); } else if (options->iparam[SICONOS_VI_ERROR_EVALUATION] == SICONOS_VI_ERROR_EVALUATION_LIGHT ) { cblas_dcopy(n, xtmp, 1,x , 1) ; cblas_daxpy(n, -1.0, x_k , 1, xtmp , 1) ; light_error_sum = cblas_dnrm2(n,xtmp,1); double norm_x= cblas_dnrm2(n,x,1); if (fabs(norm_x) > DBL_EPSILON) light_error_sum /= norm_x; error=light_error_sum; } DEBUG_PRINTF("error = %12.8e\t error_k = %12.8e\n",error,error_k); /*Update rho*/ if ((rho_k*a1 < Lmin * a2) && (error < error_k)) { rho =rho_k*tauinv; } else rho =rho_k; if (verbose > 0) { printf("----------------------------------- VI - Extra Gradient (EG) - Iteration %i rho = %14.7e \tError = %14.7e\n", iter, rho, error); } if (error < tolerance) hasNotConverged = 0; *info = hasNotConverged; } }// end iparam[1]==0
/* Linesearch */ int linesearch2_Armijo(int n, double *z, double psi_k, double descentCondition) { /* IN : psi_k (merit function for current iteration) jacobian_psi_k (jacobian of the merit function) dk: descent direction OUT: tk, z */ double m1 = 0.1; double tk = 1; double tkl, tkr, tkaux; int incx = 1, incy = 1; double merit, merit_k; double tmin = 1e-14; double qp_tk; /* cblas_dcopy(sN, z, incx,sz2,incx);*/ /* z1 = z0 + dir */ /* cblas_daxpy(n , 1.0 , sdir_descent , incx , z , incy );*/ tk = 3.25; while (tk > tmin) { /* Computes merit function = 1/2*norm(phi(z_{k+1}))^2 */ cblas_dcopy(sN, z, incx, sz2, incx); cblas_daxpy(n , tk , sdir_descent , incx , sz2 , incy); (*sFphi)(n, sz2, sphi_z, 0); merit = cblas_dnrm2(n, sphi_z , incx); merit = 0.5 * merit * merit; merit_k = psi_k + m1 * tk * descentCondition; if (merit < merit_k) { tkl = 0; tkr = tk; /*calcul merit'(tk)*/ (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1); /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */ cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_zaux, incx); qp_tk = cblas_ddot(sN, sgrad_psi_zaux, 1, sdir_descent, 1); if (qp_tk > 0) { while (fabs(tkl - tkr) > tmin) { tkaux = 0.5 * (tkl + tkr); cblas_dcopy(sN, z, incx, sz2, incx); cblas_daxpy(n , tkaux , sdir_descent , incx , sz2 , incy); /*calcul merit'(tk)*/ (*sFphi)(n, sz2, sphi_z, 0); (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1); /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */ cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_zaux, incx); qp_tk = cblas_ddot(sN, sgrad_psi_zaux, 1, sdir_descent, 1); if (qp_tk > 0) { tkr = tkaux; } else { tkl = tkaux; } } } /* printf("merit = %e, merit_k=%e,tk= %e,tkaux=%e \n",merit,merit_k,tk,tkaux);*/ cblas_dcopy(sN, sz2, incx, z, incx); break; } tk = tk * 0.5; } if (tk <= tmin) { cblas_dcopy(sN, sz2, incx, z, incx); printf("NonSmoothNewton::linesearch2_Armijo warning, resulting tk=%e < tmin, linesearch stopped.\n", tk); return 0; } return 1; }
double search_Armijo_standalone(int n, double* theta, double preRHS, search_data* ls_data) { assert(ls_data->alpha0 > 0.0); assert(ls_data->alpha0 > ls_data->alpha_min); double alpha = ls_data->alpha0; double theta_iter = *theta, theta_ref = *theta; double* z = ls_data->z; double* zc = ls_data->zc; double* F = ls_data->F; double* F_merit = ls_data->F_merit; double* desc_dir = ls_data->desc_dir; void* data = ls_data->data; bool arcsearch = ls_data->searchtype == ARCSEARCH; void* set = ls_data->set; double RHS; armijo_extra_params* aep = (armijo_extra_params*) ls_data->extra_params; assert(aep); preRHS *= aep->gamma; while (alpha >= ls_data->alpha_min) { DEBUG_PRINTF("search_Armijo :: alpha %g, ls_data->alpha_min %g \n", alpha, ls_data->alpha_min); // desc_dir contains the direction d cblas_dcopy(n, z, 1, zc, 1); cblas_daxpy(n, alpha, desc_dir, 1, zc, 1); // z + alpha*d --> z if (arcsearch) { project_on_set(n, zc, set); /* we use F as a work vector here */ cblas_dcopy(n, z, 1, F, 1); cblas_daxpy(n, -1.0, zc, 1, F, 1); /* F = z(0) - z(alpha) */ /* warning desc_dir = -JacMerit !*/ double dotprod = cblas_ddot(n, desc_dir, 1, F, 1); if (dotprod > 0.0) RHS = ls_data->sigma*dotprod; else RHS = -alpha*ls_data->sigma*theta_ref; } else { RHS = alpha*preRHS; } // compute new F_merit ls_data->compute_F(data, zc, F); ls_data->compute_F_merit(data, zc, F, F_merit); DEBUG_PRINT("z "); DEBUG_EXPR_WE(for (unsigned int i = 0; i < n; ++i) { DEBUG_PRINTF("% 2.2e ", zc[i]) } DEBUG_PRINT("\n")); DEBUG_PRINT("F "); DEBUG_EXPR_WE(for (unsigned int i = 0; i < n; ++i) { DEBUG_PRINTF("% 2.2e ", F[i]) } DEBUG_PRINT("\n")); DEBUG_PRINT("F_merit "); DEBUG_EXPR_WE(for (unsigned int i = 0; i < n; ++i) { DEBUG_PRINTF("% 2.2e ", F_merit[i]) } DEBUG_PRINT("\n")); theta_iter = 0.5 * cblas_ddot(n, F_merit, 1, F_merit, 1); DEBUG_PRINTF("search_Armijo :: alpha %g\n", alpha); DEBUG_PRINTF("search_Armijo :: theta_iter %.*e ; theta_ref %.*e \n", DECIMAL_DIG, theta_iter, DECIMAL_DIG, theta_ref); // acceptance test if (theta_iter <= theta_ref + RHS) { if (verbose > 1) printf("search_Armijo :: alpha %g\n", alpha); break; } else { // alpha too large, decrease it alpha /= 2.0; } } *theta = theta_iter; return alpha; }
int nonSmoothNewtonNeigh(int n, double* z, NewtonFunctionPtr* phi, NewtonFunctionPtr* jacobianPhi, int* iparam, double* dparam) { int itermax = iparam[0]; // maximum number of iterations allowed int iterMaxWithSameZ = itermax / 4; int niter = 0; // current iteration number double tolerance = dparam[0]; /* double coef; */ sFphi = phi; sFjacobianPhi = jacobianPhi; // verbose=1; if (verbose > 0) { printf(" ============= Starting of Newton process =============\n"); printf(" - tolerance: %14.7e\n - maximum number of iterations: %i\n", tolerance, itermax); } int incx = 1; /* int n2 = n*n; */ int infoDGESV; /** merit function and its jacobian */ double psi_z; /** The algorithm is alg 4.1 of the paper of Kanzow and Kleinmichel, "A new class of semismooth Newton-type methods for nonlinear complementarity problems", in Computational Optimization and Applications, 11, 227-251 (1998). We try to keep the same notations */ double rho = 1e-8; double descentCondition, criterion, norm_jacobian_psi_z, normPhi_z; double p = 2.1; double terminationCriterion = 1; double norm; int findNewZ, i, j, NbLookingForANewZ; /* int naux=0; */ double aux = 0; /* double aux1=0; */ int ii; int resls = 1; /* char c; */ /* double * oldz; */ /* oldz=(double*)malloc(n*sizeof(double));*/ NbLookingForANewZ = 0; /** Iterations ... */ while ((niter < itermax) && (terminationCriterion > tolerance)) { scmp++; ++niter; /** Computes phi and its jacobian */ if (sZsol) { for (ii = 0; ii < sN; ii++) szzaux[ii] = sZsol[ii] - z[ii]; printf("dist zzsol %.32e.\n", cblas_dnrm2(n, szzaux, 1)); } (*sFphi)(n, z, sphi_z, 0); (*sFjacobianPhi)(n, z, sjacobianPhi_z, 1); /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */ cblas_dgemv(CblasColMajor,CblasTrans, n, n, 1.0, sjacobianPhi_z, n, sphi_z, incx, 0.0, sgrad_psi_z, incx); norm_jacobian_psi_z = cblas_dnrm2(n, sgrad_psi_z, 1); /* Computes norm2(phi) */ normPhi_z = cblas_dnrm2(n, sphi_z, 1); /* Computes merit function */ psi_z = 0.5 * normPhi_z * normPhi_z; if (normPhi_z < tolerance) { /*it is the solution*/ terminationCriterion = tolerance / 2.0; break; } if (verbose > 0) { printf("Non Smooth Newton, iteration number %i, norm grad psi= %14.7e , psi = %14.7e, normPhi = %e .\n", niter, norm_jacobian_psi_z, psi_z, normPhi_z); printf(" -----------------------------------------------------------------------\n"); } NbLookingForANewZ++; if (niter > 2) { if (10 * norm_jacobian_psi_z < tolerance || !resls || NbLookingForANewZ > iterMaxWithSameZ) { NbLookingForANewZ = 0; resls = 1; /* if (NbLookingForANewZ % 10 ==1 && 0){ printf("Try NonMonotomnelineSearch\n"); cblas_dcopy(n,sgrad_psi_z,1,sdir_descent,1); cblas_dscal( n , -1.0 ,sdir_descent,incx); NonMonotomnelineSearch( z, phi, 10); continue; } */ /* FOR DEBUG ONLY*/ if (sZsol) { printf("begin plot prev dir\n"); plotMerit(z, 0, 0); printf("end\n"); /* gets(&c);*/ (*sFphi)(n, sZsol, szaux, 0); printf("value psi(zsol)=%e\n", cblas_dnrm2(n, szaux, 1)); cblas_dcopy(n, sZsol, incx, szaux, incx); cblas_daxpy(n , -1 , z , 1 , szaux , 1); printf("dist to sol %e \n", cblas_dnrm2(n, szaux, 1)); for (ii = 0; ii < n; ii++) sdir_descent[ii] = sZsol[ii] - z[ii]; aux = norm; norm = 1; printf("begin plot zzsol dir\n"); plotMerit(z, 0, 0); printf("end\n"); /* gets(&c);*/ norm = aux; } printf("looking for a new Z...\n"); /*may be a local minimal*/ /*find a gradiant going out of this cul-de-sac.*/ norm = n / 2; findNewZ = 0; for (j = 0; j < 20; j++) { for (i = 0; i < n; i++) { if (sZsol) { /* FOR DEBUG ONLY*/ (*sFphi)(n, sZsol, sphi_zaux, 0); norm = cblas_dnrm2(n, sphi_zaux, 1); printf("Norm of the sol %e.\n", norm); for (ii = 0; ii < n; ii++) sdir_descent[ii] = sZsol[ii] - z[ii]; norm = 1; } else { for (ii = 0; ii < n; ii++) { sdir_descent[ii] = 1.0 * rand(); } cblas_dscal(n, 1 / cblas_dnrm2(n, sdir_descent, 1), sdir_descent, incx); cblas_dscal(n, norm, sdir_descent, incx); } cblas_dcopy(n, z, incx, szaux, incx); // cblas_dscal(n,0.0,zaux,incx); /* zaux = z + dir */ cblas_daxpy(n , norm , sdir_descent , 1 , szaux , 1); /* Computes the jacobian of the merit function, jacobian_psi_zaux = transpose(jacobianPhi_zaux).phi_zaux */ (*sFphi)(n, szaux, sphi_zaux, 0); (*sFjacobianPhi)(n, szaux, sjacobianPhi_zaux, 1); /* FOR DEBUG ONLY*/ if (sZsol) { aux = cblas_dnrm2(n, sphi_zaux, 1); printf("Norm of the sol is now %e.\n", aux); for (ii = 0; ii < n; ii++) printf("zsol %e zaux %e \n", sZsol[ii], szaux[ii]); } cblas_dgemv(CblasColMajor, CblasTrans, n, n, 1.0, sjacobianPhi_zaux, n, sphi_zaux, incx, 0.0, sgrad_psi_zaux, incx); cblas_dcopy(n, szaux, 1, szzaux, 1); cblas_daxpy(n , -1 , z , incx , szzaux , incx); /*zzaux must be a descente direction.*/ /*ie jacobian_psi_zaux.zzaux <0 printf("jacobian_psi_zaux : \n");*/ /*cblas_dcopy(n,sdir,incx,sdir_descent,incx); plotMerit(z, phi);*/ aux = cblas_ddot(n, sgrad_psi_zaux, 1, szzaux, 1); /* aux1 = cblas_dnrm2(n,szzaux,1); aux1 = cblas_dnrm2(n,sgrad_psi_zaux,1);*/ aux = aux / (cblas_dnrm2(n, szzaux, 1) * cblas_dnrm2(n, sgrad_psi_zaux, 1)); /* printf("aux: %e\n",aux);*/ if (aux < 0.1 * (j + 1)) { //zaux is the new point. findNewZ = 1; cblas_dcopy(n, szaux, incx, z, incx); break; } } if (findNewZ) break; if (j == 10) { norm = n / 2; } else if (j > 10) norm = -2 * norm; else norm = -norm / 2.0; } if (! findNewZ) { printf("failed to find a new z\n"); /* exit(1);*/ continue; } else continue; } } /* Stops if the termination criterion is satisfied */ terminationCriterion = norm_jacobian_psi_z; /* if(terminationCriterion < tolerance){ break; }*/ /* Search direction calculation Find a solution dk of jacobianPhiMatrix.d = -phiVector. dk is saved in phiVector. */ cblas_dscal(n , -1.0 , sphi_z, incx); DGESV(n, 1, sjacobianPhi_z, n, sipiv, sphi_z, n, &infoDGESV); if (infoDGESV) { printf("DGEV error %d.\n", infoDGESV); } cblas_dcopy(n, sphi_z, 1, sdir_descent, 1); criterion = cblas_dnrm2(n, sdir_descent, 1); /* printf("norm dir descent %e\n",criterion);*/ /*printf("begin plot descent dir\n"); plotMerit(z, phi); printf("end\n"); gets(&c);*/ /*printf("begin plot zzsol dir\n"); plotMeritToZsol(z,phi); printf("end\n"); gets(&c);*/ /* norm = cblas_dnrm2(n,sdir_descent,1); printf("norm desc %e \n",norm); cblas_dscal( n , 1/norm , sdir_descent, 1); */ /* descentCondition = jacobian_psi.dk */ descentCondition = cblas_ddot(n, sgrad_psi_z, 1, sdir_descent, 1); /* Criterion to be satisfied: error < -rho*norm(dk)^p */ criterion = -rho * pow(criterion, p); /* printf("ddddddd %d\n",scmp); if (scmp>100){ displayMat(sjacobianPhi_z,n,n,n); exit(1); }*/ // if ((infoDGESV != 0 || descentCondition > criterion) && 0) // { // printf("no a desc dir, get grad psy\n"); /* dk = - jacobian_psi (remind that dk is saved in phi_z) */ // cblas_dcopy(n, sgrad_psi_z, 1, sdir_descent, 1); // cblas_dscal(n , -1.0 , sdir_descent, incx); /*DEBUG ONLY*/ /*printf("begin plot new descent dir\n"); plotMerit(z); printf("end\n"); gets(&c);*/ // } /* coef=fabs(norm_jacobian_psi_z*norm_jacobian_psi_z/descentCondition); if (coef <1){ cblas_dscal(n,coef,sdir_descent,incx); printf("coef %e norm dir descent is now %e\n",coef,cblas_dnrm2(n,sdir_descent,1)); }*/ /* Step-3 Line search: computes z_k+1 */ /*linesearch_Armijo(n,z,sdir_descent,psi_z, descentCondition, phi);*/ /* if (niter == 10){ printf("begin plot new descent dir\n"); plotMerit(z); printf("end\n"); gets(&c); }*/ /* memcpy(oldz,z,n*sizeof(double));*/ resls = linesearch2_Armijo(n, z, psi_z, descentCondition); if (!resls && niter > 1) { /* displayMat(sjacobianPhi_z,n,n,n); printf("begin plot new descent dir\n"); plotMerit(oldz,psi_z, descentCondition); printf("end\n"); gets(&c);*/ } /* lineSearch_Wolfe(z, descentCondition, phi,jacobianPhi);*/ /* if (niter>3){ printf("angle between prev dir %e.\n",acos(cblas_ddot(n, sdir_descent, 1, sPrevDirDescent, 1)/(cblas_dnrm2(n,sdir_descent,1)*cblas_dnrm2(n,sPrevDirDescent,1)))); }*/ cblas_dcopy(n, sdir_descent, 1, sPrevDirDescent, 1); /* for (j=20;j<32;j++){ if (z[j]<0) z[j]=0; }*/ /* if( 1 || verbose>0) { printf("Non Smooth Newton, iteration number %i, error grad equal to %14.7e , psi value is %14.7e .\n",niter, terminationCriterion,psi_z); printf(" -----------------------------------------------------------------------\n"); }*/ } /* Total number of iterations */ iparam[1] = niter; /* Final error */ dparam[1] = terminationCriterion; /** Free memory*/ if (verbose > 0) { if (dparam[1] > tolerance) printf("Non Smooth Newton warning: no convergence after %i iterations\n" , niter); else printf("Non Smooth Newton: convergence after %i iterations\n" , niter); printf(" The residue is : %e \n", dparam[1]); } /* free(oldz);*/ if (dparam[1] > tolerance) return 1; else return 0; }
void My_daxpy(gsl_vector* y, const gsl_vector* x, double alpha) { cblas_daxpy(y->size, alpha, x->data, x->stride, y->data, y->stride); }
/* Ref: Weiss, Algorithm 12 BiCGSTAB * INPUT * n : dimension of the problem * b [n] : r-h-s vector * atimes (int n, static double *x, double *b, void *param) : * calc matrix-vector product A.x = b. * atimes_param : parameters for atimes(). * it : struct iter. following entries are used * it->max = kend : max of iteration * it->eps = eps : criteria for |r^2|/|b^2| * OUTPUT * returned value : 0 == success, otherwise (-1) == failed * x [n] : solution * it->niter : # of iteration * it->res2 : |r^2| / |b^2| */ int bicgstab (int n, const double *b, double *x, void (*atimes) (int, const double *, double *, void *), void *atimes_param, struct iter *it) { #ifndef HAVE_CBLAS_H # ifdef HAVE_BLAS_H /* use Fortran BLAS routines */ int i_1 = 1; double d_1 = 1.0; double d_m1 = -1.0; # endif // !HAVE_BLAS_H #endif // !HAVE_CBLAS_H int ret = -1; double eps2 = it->eps * it->eps; int itmax = it->max; double *r = (double *)malloc (sizeof (double) * n); double *rs = (double *)malloc (sizeof (double) * n); double *p = (double *)malloc (sizeof (double) * n); double *ap = (double *)malloc (sizeof (double) * n); double *s = (double *)malloc (sizeof (double) * n); double *t = (double *)malloc (sizeof (double) * n); CHECK_MALLOC (r, "bicgstab"); CHECK_MALLOC (rs, "bicgstab"); CHECK_MALLOC (p, "bicgstab"); CHECK_MALLOC (ap, "bicgstab"); CHECK_MALLOC (s, "bicgstab"); CHECK_MALLOC (t, "bicgstab"); double rsap; // (r*, A.p) double st; double t2; double rho, rho1; double delta; double gamma; double beta; double res2 = 0.0; #ifdef HAVE_CBLAS_H /** * ATLAS version */ double b2 = cblas_ddot (n, b, 1, b, 1); // (b,b) eps2 *= b2; atimes (n, x, r, atimes_param); // r = A.x ... cblas_daxpy (n, -1.0, b, 1, r, 1); // - b cblas_dcopy (n, r, 1, rs, 1); // r* = r cblas_dcopy (n, r, 1, p, 1); // p = r rho = cblas_ddot (n, rs, 1, r, 1); // rho = (r*, r) int i; for (i = 0; i < itmax; i ++) { atimes (n, p, ap, atimes_param); // ap = A.p rsap = cblas_ddot (n, rs, 1, ap, 1); // rsap = (r*, A.p) delta = - rho / rsap; cblas_dcopy (n, r, 1, s, 1); // s = r ... cblas_daxpy (n, delta, ap, 1, s, 1); // + delta A.p atimes (n, s, t, atimes_param); // t = A.s st = cblas_ddot (n, s, 1, t, 1); // st = (s, t) t2 = cblas_ddot (n, t, 1, t, 1); // t2 = (t, t) gamma = - st / t2; cblas_dcopy (n, s, 1, r, 1); // r = s ... cblas_daxpy (n, gamma, t, 1, r, 1); // + gamma t cblas_daxpy (n, delta, p, 1, x, 1); // x = x + delta p... cblas_daxpy (n, gamma, s, 1, x, 1); // + gamma s res2 = cblas_ddot (n, r, 1, r, 1); if (it->debug == 2) { fprintf (it->out, "libiter-bicgstab(cblas) %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } rho1 = cblas_ddot (n, rs, 1, r, 1); // rho = (r*, r) beta = rho1 / rho * delta / gamma; rho = rho1; cblas_daxpy (n, gamma, ap, 1, p, 1); // p = p + gamma A.p cblas_dscal (n, beta, p, 1); // p = beta (p + gamma A.p) cblas_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta(p + gamma A.p) } #else // !HAVE_CBLAS_H # ifdef HAVE_BLAS_H /** * BLAS version */ double b2 = ddot_ (&n, b, &i_1, b, &i_1); // (b,b) eps2 *= b2; atimes (n, x, r, atimes_param); // r = A.x ... daxpy_ (&n, &d_m1, b, &i_1, r, &i_1); // - b dcopy_ (&n, r, &i_1, rs, &i_1); // r* = r dcopy_ (&n, r, &i_1, p, &i_1); // p = r rho = ddot_ (&n, rs, &i_1, r, &i_1); // rho = (r*, r) int i; for (i = 0; i < itmax; i ++) { atimes (n, p, ap, atimes_param); // ap = A.p rsap = ddot_ (&n, rs, &i_1, ap, &i_1); // rsap = (r*, A.p) delta = - rho / rsap; dcopy_ (&n, r, &i_1, s, &i_1); // s = r ... daxpy_ (&n, &delta, ap, &i_1, s, &i_1); // + delta A.p atimes (n, s, t, atimes_param); // t = A.s st = ddot_ (&n, s, &i_1, t, &i_1); // st = (s, t) t2 = ddot_ (&n, t, &i_1, t, &i_1); // t2 = (t, t) gamma = - st / t2; dcopy_ (&n, s, &i_1, r, &i_1); // r = s ... daxpy_ (&n, &gamma, t, &i_1, r, &i_1); // + gamma t daxpy_ (&n, &delta, p, &i_1, x, &i_1); // x = x + delta p... daxpy_ (&n, &gamma, s, &i_1, x, &i_1); // + gamma s res2 = ddot_ (&n, r, &i_1, r, &i_1); if (it->debug == 2) { fprintf (it->out, "libiter-bicgstab(blas) %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } if (res2 > 1.0e20) { // already too big residual break; } rho1 = ddot_ (&n, rs, &i_1, r, &i_1); // rho = (r*, r) beta = rho1 / rho * delta / gamma; rho = rho1; daxpy_ (&n, &gamma, ap, &i_1, p, &i_1); // p = p + gamma A.p dscal_ (&n, &beta, p, &i_1); // p = beta (p + gamma A.p) daxpy_ (&n, &d_1, r, &i_1, p, &i_1); // p = r + beta(p + gamma A.p) } # else // !HAVE_BLAS_H /** * local BLAS version */ double b2 = my_ddot (n, b, 1, b, 1); // (b,b) eps2 *= b2; atimes (n, x, r, atimes_param); // r = A.x ... my_daxpy (n, -1.0, b, 1, r, 1); // - b my_dcopy (n, r, 1, rs, 1); // r* = r my_dcopy (n, r, 1, p, 1); // p = r rho = my_ddot (n, rs, 1, r, 1); // rho = (r*, r) int i; for (i = 0; i < itmax; i ++) { atimes (n, p, ap, atimes_param); // ap = A.p rsap = my_ddot (n, rs, 1, ap, 1); // rsap = (r*, A.p) delta = - rho / rsap; my_dcopy (n, r, 1, s, 1); // s = r ... my_daxpy (n, delta, ap, 1, s, 1); // + delta A.p atimes (n, s, t, atimes_param); // t = A.s st = my_ddot (n, s, 1, t, 1); // st = (s, t) t2 = my_ddot (n, t, 1, t, 1); // t2 = (t, t) gamma = - st / t2; my_dcopy (n, s, 1, r, 1); // r = s ... my_daxpy (n, gamma, t, 1, r, 1); // + gamma t my_daxpy (n, delta, p, 1, x, 1); // x = x + delta p... my_daxpy (n, gamma, s, 1, x, 1); // + gamma s res2 = my_ddot (n, r, 1, r, 1); if (it->debug == 2) { fprintf (it->out, "libiter-bicgstab(myblas) %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } rho1 = my_ddot (n, rs, 1, r, 1); // rho = (r*, r) beta = rho1 / rho * delta / gamma; rho = rho1; my_daxpy (n, gamma, ap, 1, p, 1); // p = p + gamma A.p my_dscal (n, beta, p, 1); // p = beta (p + gamma A.p) my_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta(p + gamma A.p) } # endif // !HAVE_BLAS_H #endif // !HAVE_CBLAS_H free (r); free (rs); free (p); free (ap); free (s); free (t); if (it->debug == 1) { fprintf (it->out, "libiter-bicgstab %d %e\n", i, res2 / b2); } it->niter = i; it->res2 = res2 / b2; return (ret); }
void variationalInequality_FixedPointProjection(VariationalInequality* problem, double *x, double *w, int* info, SolverOptions* options) { /* /\* int and double parameters *\/ */ int* iparam = options->iparam; double* dparam = options->dparam; /* Number of contacts */ int n = problem->size; /* Maximum number of iterations */ int itermax = iparam[0]; /* Tolerance */ double tolerance = dparam[0]; /***** Fixed point iterations *****/ int iter = 0; /* Current iteration number */ double error = 1.; /* Current error */ int hasNotConverged = 1; double * xtmp = (double *)malloc(n * sizeof(double)); double * wtmp = (double *)malloc(n * sizeof(double)); double rho = 0.0, rho_k =0.0; int isVariable = 0; if (dparam[3] > 0.0) { rho = dparam[3]; if (verbose > 0) { printf("----------------------------------- VI - Fixed Point Projection (FPP) - Fixed stepsize with rho = %14.7e \n", rho); } } else { /* Variable step in iterations*/ isVariable = 1; rho = -dparam[3]; if (verbose > 0) { printf("----------------------------------- VI - Fixed Point Projection (FPP) - Variable stepsize with starting rho = %14.7e \n", rho); } } /* Variable for Line_search */ int success =0; double error_k; int ls_iter = 0; int ls_itermax = 10; double tau=dparam[4], tauinv=dparam[5], L= dparam[6], Lmin = dparam[7]; DEBUG_PRINTF("tau=%g, tauinv=%g, L= %g, Lmin = %g",dparam[4], dparam[5], dparam[6], dparam[7] ) ; double a1=0.0, a2=0.0; double * x_k = NULL; double * w_k = NULL; if (isVariable) { x_k = (double *)malloc(n * sizeof(double)); w_k = (double *)malloc(n * sizeof(double)); } //isVariable=0; if (!isVariable) { /* double minusrho = -1.0*rho; */ while ((iter < itermax) && (hasNotConverged > 0)) { ++iter; problem->F(problem,n,x,w); cblas_daxpy(n, -1.0, w , 1, x , 1) ; cblas_dcopy(n , x , 1 , xtmp, 1); problem->ProjectionOnX(problem,xtmp,x); /* **** Criterium convergence **** */ variationalInequality_computeError(problem, x , w, tolerance, options, &error); if (options->callback) { options->callback->collectStatsIteration(options->callback->env, n, x, w, error, NULL); } if (verbose > 0) { printf("----------------------------------- VI - Fixed Point Projection (FPP) - Iteration %i rho = %14.7e \tError = %14.7e\n", iter, rho, error); } if (error < tolerance) hasNotConverged = 0; *info = hasNotConverged; } } else if (isVariable) { if (iparam[1]==0) /* Armijo rule with Khotbotov ratio (default) */ { DEBUG_PRINT("Variable step size method with Armijo rule with Khotbotov ratio (default) \n"); while ((iter < itermax) && (hasNotConverged > 0)) { ++iter; /* Store the error */ error_k = error; /* x_k <-- x store the x at the beginning of the iteration */ cblas_dcopy(n , x , 1 , x_k, 1); /* compute w_k =F(x_k) */ problem->F(problem,n,x,w_k); ls_iter = 0 ; success =0; rho_k=rho / tau; while (!success && (ls_iter < ls_itermax)) { /* if (iparam[3] && ls_iter !=0) rho_k = rho_k * tau * min(1.0,a2/(rho_k*a1)); */ /* else */ rho_k = rho_k * tau ; /* x <- x_k for the std approach*/ if (iparam[2]==0) cblas_dcopy(n, x_k, 1, x , 1) ; /* x <- x - rho_k* w_k */ cblas_daxpy(n, -rho_k, w_k , 1, x , 1) ; /* xtmp <- ProjectionOnX(x) */ problem->ProjectionOnX(problem,x,xtmp); problem->F(problem,n,xtmp,w); DEBUG_EXPR_WE( for (int i =0; i< 5 ; i++) { printf("xtmp[%i]=%12.8e\t",i,xtmp[i]); printf("w[%i]=F[%i]=%12.8e\n",i,i,w[i]);}); /* velocitytmp <- velocity */ /* cblas_dcopy(n, w, 1, wtmp , 1) ; */ /* velocity <- velocity - velocity_k */ cblas_daxpy(n, -1.0, w_k , 1, w , 1) ; /* a1 = ||w - w_k|| */ a1 = cblas_dnrm2(n, w, 1); DEBUG_PRINTF("a1 = %12.8e\n", a1); /* xtmp <- x */ cblas_dcopy(n, xtmp, 1,x , 1) ; /* xtmp <- x - x_k */ cblas_daxpy(n, -1.0, x_k , 1, xtmp , 1) ; /* a2 = || x - x_k || */ a2 = cblas_dnrm2(n, xtmp, 1) ; DEBUG_PRINTF("a2 = %12.8e\n", a2); DEBUG_PRINTF("test rho_k*a1 < L * a2 = %e < %e\n", rho_k*a1 , L * a2 ) ; success = (rho_k*a1 < L * a2)?1:0; /* printf("rho_k = %12.8e\t", rho_k); */ /* printf("a1 = %12.8e\t", a1); */ /* printf("a2 = %12.8e\t", a2); */ /* printf("norm x = %12.8e\t",cblas_dnrm2(n, x, 1) ); */ /* printf("success = %i\n", success); */ ls_iter++; } /* problem->F(problem,x,w); */ DEBUG_EXPR_WE( for (int i =0; i< 5 ; i++) { printf("x[%i]=%12.8e\t",i,x[i]); printf("w[%i]=F[%i]=%12.8e\n",i,i,w[i]);}); /* **** Criterium convergence **** */ variationalInequality_computeError(problem, x , w, tolerance, options, &error); DEBUG_EXPR_WE( if ((error < error_k)) { printf("(error < error_k) is satisfied\n"); }; );
void lanczos(double *F, double *Es, double *L, int n_eigs, int n_patch, int LANCZOS_ITR) { double *b; double b_norm; double *z; double *alpha, *beta; double *q; int i; double *eigvec; // eigenvectors // generate random b with norm 1. srand((unsigned int)time(NULL)); b = (double *)malloc(n_patch * sizeof(double)); for (i = 0; i < n_patch; i++) b[i] = rand(); b_norm = norm2(b, n_patch); for (i = 0; i < n_patch; i++) b[i] /= b_norm; alpha = (double *)malloc( (LANCZOS_ITR + 1) * sizeof(double) ); beta = (double *)malloc( (LANCZOS_ITR + 1) * sizeof(double) ); beta[0] = 0.0; // beta_0 <- 0 z = (double *)malloc( n_patch * sizeof(double)); q = (double *)malloc( n_patch * (LANCZOS_ITR + 2) * sizeof(double) ); memset(&q[0], 0, n_patch * sizeof(double)); // q_0 <- 0 memcpy(&q[n_patch], b, n_patch * sizeof(double)); // q_1 <- b for (i = 1; i <= LANCZOS_ITR; i++) { // z = L * Q(:, i) cblas_dsymv(CblasColMajor, CblasLower, n_patch, 1.0, L, n_patch, &q[i * n_patch], 1, 0.0, z, 1); // alpha(i) = Q(:, i)' * z; alpha[i] = cblas_ddot(n_patch, &q[i * n_patch], 1, z, 1); // z = z - alpha(i) * Q(:, i) cblas_daxpy(n_patch, -alpha[i], &q[i * n_patch], 1, z, 1); // z = z - beta(i - 1) * Q(:, i - 1); cblas_daxpy(n_patch, -beta[i - 1], &q[(i - 1) * n_patch], 1, z, 1); // beta(i) = norm(z, 2); beta[i] = cblas_dnrm2(n_patch, z, 1); // Q(:, i + 1) = z / beta(i); divide_copy(&q[(i + 1) * n_patch], z, n_patch, beta[i]); } // compute approximate eigensystem eigvec = (double *)malloc(LANCZOS_ITR * LANCZOS_ITR * sizeof(double)); LAPACKE_dstedc(LAPACK_COL_MAJOR, 'I', LANCZOS_ITR, &alpha[1], &beta[1], eigvec, LANCZOS_ITR); // copy specified number of eigenvalues memcpy(Es, &alpha[1], n_eigs * sizeof(double)); // V = Q(:, 1:k) * U cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, n_patch, LANCZOS_ITR, LANCZOS_ITR, 1.0, &q[n_patch], n_patch, eigvec, LANCZOS_ITR, 0.0, L, n_patch); // copy the corresponding eigenvectors memcpy(F, L, n_patch * n_eigs * sizeof(double)); free(b); free(z); free(alpha); free(beta); free(q); free(eigvec); }
void conjugate_gradient_sparse(cs *A, double *b, double* x, int n, double itol) { int i,j; int iter; double rho,rho1,alpha,beta,omega; double r[n]; double z[n]; double q[n], temp_q[n]; double p[n], temp_p[n]; double res[n]; double precond[n]; //Preconditioner memset(precond, 0, n*sizeof(double)); memset(r, 0, n*sizeof(double)); memset(z, 0, n*sizeof(double)); memset(q, 0, n*sizeof(double)); memset(temp_q, 0, n*sizeof(double)); memset(p, 0, n*sizeof(double)); memset(temp_p, 0, n*sizeof(double)); /* Preconditioner */ double max; int pp; for(j = 0; j < n; ++j){ for(pp = A->p[j], max = fabs(A->x[pp]); pp < A->p[j+1]; pp++) if(fabs(A->x[pp]) > max) //vriskei to diagonio stoixeio max = fabs(A->x[pp]); precond[j] = 1/max; } cblas_dcopy (n, x, 1, res, 1); //r=b-Ax cblas_dcopy (n, b, 1, r, 1); memset(p, 0, n*sizeof(double)); cs_gaxpy (A, x, p); for(i=0;i<n;i++){ r[i]=r[i]-p[i]; } double r_norm = cblas_dnrm2 (n, r, 1); double b_norm = cblas_dnrm2 (n, b, 1); if(!b_norm) b_norm = 1; iter = 0; while( r_norm/b_norm > itol && iter < n ) { iter++; cblas_dcopy (n, r, 1, z, 1); //gia na min allaksei o r for(i=0;i<n;i++){ z[i]=precond[i]*z[i]; } rho = cblas_ddot (n, z, 1, r, 1); if (fpclassify(fabs(rho)) == FP_ZERO){ printf("RHO aborting CG due to EPS...\n"); exit(42); } if (iter == 1){ cblas_dcopy (n, z, 1, p, 1); } else{ beta = rho/rho1; //p = z + beta*p; cblas_dscal (n, beta, p, 1); //rescale cblas_daxpy (n, 1, z, 1, p, 1); //p = 1*z + p } rho1 = rho; //q = Ap memset(q, 0, n*sizeof(double)); cs_gaxpy (A, p, q); omega = cblas_ddot (n, p, 1, q, 1); if (fpclassify(fabs(omega)) == FP_ZERO){ printf("OMEGA aborting CG due to EPS...\n"); exit(42); } alpha = rho/omega; //x = x + aplha*p; cblas_dcopy (n, p, 1, temp_p, 1); cblas_dscal (n, alpha, temp_p, 1);//rescale by alpha cblas_daxpy (n, 1, temp_p, 1, res, 1);// sum x = 1*x + temp_p //r = r - aplha*q; cblas_dcopy (n, q, 1, temp_q, 1); cblas_dscal (n, -alpha, temp_q, 1);//rescale by alpha cblas_daxpy (n, 1, temp_q, 1, r, 1);// sum r = 1*r - temp_p //next step r_norm = cblas_dnrm2 (n, r, 1); } cblas_dcopy (n, res, 1, x, 1); }