void Poisson1D::runPoisson1D(double vTolerance, double _chargeTolerance, double magicNumber, bool Equilibrum) { // Relatively quicker to solve potential, because potential is continuous. // potential == vacuum level double chargeTolerance = _chargeTolerance / ( pow(cmNL(1), 3) ); mat potential= phin + dev1D.getEAArray() + fLnArray; bCArrayFunct(); mat oldcDA = zeros(dev1D.getSumPoint()); mat errV = ones(1)*LARGE; mat errCDA = ones(1)*LARGE; // superlu_opts settings; // optional int numConvergenceStep=0; do { cDA = dev1D.chargeDensityArrayFunct(phin, setPhip(phin, Equilibrum), Equilibrum); errCDA = abs(cDA - oldcDA).max(); oldcDA = cDA; // std::cout << cDA(51) << " " << cDA(87) << std::endl; mat error = - (dev1D.getMatrixC() * potential - ChargeQ/E0 * cDA - bCArray); sp_mat qCMat = ChargeQ/E0 * dev1D.qCMatFunct(phin, setPhip(phin, Equilibrum), Equilibrum); sp_mat matrixC_plusCq = dev1D.getMatrixC() - qCMat; mat deltaPotential = spsolve(matrixC_plusCq, error, "superlu"); /** spsolve was not recognized. solve by using full path in #include, * then follow http://stackoverflow.com/questions/30494610/how-to-link-armadillo-with-eclipse * then rebuild (in index) */ potential += magicNumber * deltaPotential; errV = abs(deltaPotential).max(); numConvergenceStep++; phin = potential - (dev1D.getEAArray() + fLnArray ); } while ((errV(0) > vTolerance) || (errCDA(0) > chargeTolerance) || numConvergenceStep > 1E4); if ( numConvergenceStep > 1E4) std::cerr << "Solution not found." << std::endl; phip = setPhip(phin, Equilibrum); condBand = potential - dev1D.getEAArray(); valeBand = condBand - dev1D.getBGArray(); }
int sfgmr(int n, void (*smatvec) (float, float[], float, float[]), void (*spsolve) (int, float[], float[]), float *rhs, float *sol, double tol, int im, int *itmax, FILE * fits) { /*---------------------------------------------------------------------- | *** Preconditioned FGMRES *** +----------------------------------------------------------------------- | This is a simple version of the ARMS preconditioned FGMRES algorithm. +----------------------------------------------------------------------- | Y. S. Dec. 2000. -- Apr. 2008 +----------------------------------------------------------------------- | on entry: |---------- | | rhs = real vector of length n containing the right hand side. | sol = real vector of length n containing an initial guess to the | solution on input. | tol = tolerance for stopping iteration | im = Krylov subspace dimension | (itmax) = max number of iterations allowed. | fits = NULL: no output | != NULL: file handle to output " resid vs time and its" | | on return: |---------- | fgmr int = 0 --> successful return. | int = 1 --> convergence not achieved in itmax iterations. | sol = contains an approximate solution (upon successful return). | itmax = has changed. It now contains the number of steps required | to converge -- +----------------------------------------------------------------------- | internal work arrays: |---------- | vv = work array of length [im+1][n] (used to store the Arnoldi | basis) | hh = work array of length [im][im+1] (Householder matrix) | z = work array of length [im][n] to store preconditioned vectors +----------------------------------------------------------------------- | subroutines called : | matvec - matrix-vector multiplication operation | psolve - (right) preconditionning operation | psolve can be a NULL pointer (GMRES without preconditioner) +---------------------------------------------------------------------*/ int maxits = *itmax; int i, i1, ii, j, k, k1, its, retval, i_1 = 1, i_2 = 2; float beta, eps1 = 0.0, t, t0, gam; float **hh, *c, *s, *rs; float **vv, **z, tt; float zero = 0.0; float one = 1.0; its = 0; vv = (float **)SUPERLU_MALLOC((im + 1) * sizeof(float *)); for (i = 0; i <= im; i++) vv[i] = floatMalloc(n); z = (float **)SUPERLU_MALLOC(im * sizeof(float *)); hh = (float **)SUPERLU_MALLOC(im * sizeof(float *)); for (i = 0; i < im; i++) { hh[i] = floatMalloc(i + 2); z[i] = floatMalloc(n); } c = floatMalloc(im); s = floatMalloc(im); rs = floatMalloc(im + 1); /*---- outer loop starts here ----*/ do { /*---- compute initial residual vector ----*/ smatvec(one, sol, zero, vv[0]); for (j = 0; j < n; j++) vv[0][j] = rhs[j] - vv[0][j]; /* vv[0]= initial residual */ beta = snrm2_(&n, vv[0], &i_1); /*---- print info if fits != null ----*/ if (fits != NULL && its == 0) fprintf(fits, "%8d %10.2e\n", its, beta); /*if ( beta <= tol * dnrm2_(&n, rhs, &i_1) )*/ if ( !(beta > tol * snrm2_(&n, rhs, &i_1)) ) break; t = 1.0 / beta; /*---- normalize: vv[0] = vv[0] / beta ----*/ for (j = 0; j < n; j++) vv[0][j] = vv[0][j] * t; if (its == 0) eps1 = tol * beta; /*---- initialize 1-st term of rhs of hessenberg system ----*/ rs[0] = beta; for (i = 0; i < im; i++) { its++; i1 = i + 1; /*------------------------------------------------------------ | (Right) Preconditioning Operation z_{j} = M^{-1} v_{j} +-----------------------------------------------------------*/ if (spsolve) spsolve(n, z[i], vv[i]); else scopy_(&n, vv[i], &i_1, z[i], &i_1); /*---- matvec operation w = A z_{j} = A M^{-1} v_{j} ----*/ smatvec(one, z[i], zero, vv[i1]); /*------------------------------------------------------------ | modified gram - schmidt... | h_{i,j} = (w,v_{i}) | w = w - h_{i,j} v_{i} +------------------------------------------------------------*/ t0 = snrm2_(&n, vv[i1], &i_1); for (j = 0; j <= i; j++) { float negt; tt = sdot_(&n, vv[j], &i_1, vv[i1], &i_1); hh[i][j] = tt; negt = -tt; saxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1); } /*---- h_{j+1,j} = ||w||_{2} ----*/ t = snrm2_(&n, vv[i1], &i_1); while (t < 0.5 * t0) { t0 = t; for (j = 0; j <= i; j++) { float negt; tt = sdot_(&n, vv[j], &i_1, vv[i1], &i_1); hh[i][j] += tt; negt = -tt; saxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1); } t = snrm2_(&n, vv[i1], &i_1); } hh[i][i1] = t; if (t != 0.0) { /*---- v_{j+1} = w / h_{j+1,j} ----*/ t = 1.0 / t; for (k = 0; k < n; k++) vv[i1][k] = vv[i1][k] * t; } /*--------------------------------------------------- | done with modified gram schimdt and arnoldi step | now update factorization of hh +--------------------------------------------------*/ /*-------------------------------------------------------- | perform previous transformations on i-th column of h +-------------------------------------------------------*/ for (k = 1; k <= i; k++) { k1 = k - 1; tt = hh[i][k1]; hh[i][k1] = c[k1] * tt + s[k1] * hh[i][k]; hh[i][k] = -s[k1] * tt + c[k1] * hh[i][k]; } gam = sqrt(pow(hh[i][i], 2) + pow(hh[i][i1], 2)); /*--------------------------------------------------- | if gamma is zero then any small value will do | affect only residual estimate +--------------------------------------------------*/ /* if (gam == 0.0) gam = epsmac; */ /*---- get next plane rotation ---*/ if (gam == 0.0) { c[i] = one; s[i] = zero; } else { c[i] = hh[i][i] / gam; s[i] = hh[i][i1] / gam; } rs[i1] = -s[i] * rs[i]; rs[i] = c[i] * rs[i]; /*---------------------------------------------------- | determine residual norm and test for convergence +---------------------------------------------------*/ hh[i][i] = c[i] * hh[i][i] + s[i] * hh[i][i1]; beta = fabs(rs[i1]); if (fits != NULL) fprintf(fits, "%8d %10.2e\n", its, beta); if (beta <= eps1 || its >= maxits) break; } if (i == im) i--; /*---- now compute solution. 1st, solve upper triangular system ----*/ rs[i] = rs[i] / hh[i][i]; for (ii = 1; ii <= i; ii++) { k = i - ii; k1 = k + 1; tt = rs[k]; for (j = k1; j <= i; j++) tt = tt - hh[j][k] * rs[j]; rs[k] = tt / hh[k][k]; } /*---- linear combination of v[i]'s to get sol. ----*/ for (j = 0; j <= i; j++) { tt = rs[j]; for (k = 0; k < n; k++) sol[k] += tt * z[j][k]; } /* calculate the residual and output */ smatvec(one, sol, zero, vv[0]); for (j = 0; j < n; j++) vv[0][j] = rhs[j] - vv[0][j]; /* vv[0]= initial residual */ /*---- print info if fits != null ----*/ beta = snrm2_(&n, vv[0], &i_1); /*---- restart outer loop if needed ----*/ /*if (beta >= eps1 / tol)*/ if ( !(beta < eps1 / tol) ) { its = maxits + 10; break; } if (beta <= eps1) break; } while(its < maxits); retval = (its >= maxits); for (i = 0; i <= im; i++) SUPERLU_FREE(vv[i]); SUPERLU_FREE(vv); for (i = 0; i < im; i++) { SUPERLU_FREE(hh[i]); SUPERLU_FREE(z[i]); } SUPERLU_FREE(hh); SUPERLU_FREE(z); SUPERLU_FREE(c); SUPERLU_FREE(s); SUPERLU_FREE(rs); *itmax = its; return retval; } /*----end of fgmr ----*/
SolverResult KojimaSolver::solve(const LCP & lcp, vec & x, vec & y) const{ superlu_opts opts; opts.equilibrate = true; opts.permutation = superlu_opts::COLAMD; opts.refine = superlu_opts::REF_SINGLE; vec q = lcp.q; sp_mat M = lcp.M + regularizer * speye(size(lcp.M)); uint N = q.n_elem; assert(N == x.n_elem); assert(N == y.n_elem); // Figure out what is free (-inf,inf) // and what is bound to be non-negative [0,inf) bvec free_vars = lcp.free_vars; uvec bound_idx = find(0 == free_vars); uvec free_idx = find(1 == free_vars); assert(N == bound_idx.n_elem + free_idx.n_elem); uint NB = bound_idx.n_elem; // number of bound vars uint NF = free_idx.n_elem; // number of free vars /* In what follows, the primal variables x are partitioned into free variables and bound variables x = [f;b]' Likewise, the dual variables are partitioned into y = [0,s]' */ /* The Newton system: [M_ff M_fb 0][df] [M_f x + q_f] [M_bf M_bb -I][db] + [M_b x + q_b - s] [0 S B][dv] [u1 + VBe] Where "M_ff" is the free-free block Overwrite the S,B diagonals every iteration*/ // Split M matrix into blocks based on free and bound indicies block_sp_mat M_part = sp_partition(M,free_idx,bound_idx); sp_mat M_recon = block_mat(M_part); assert(PRETTY_SMALL > norm(M_recon - M)); vec qf = q(free_idx); vec qb = q(bound_idx); vec b = x(bound_idx); vec f = x(free_idx); vec s = y(bound_idx); // Build the Newton matrix vector<vector<sp_mat>> block_G; block_G.push_back(block_sp_vec{sp_mat(),sp_mat(),sp_mat(NB,NB)}); block_G.push_back(block_sp_vec{-M_part[0][0],-M_part[0][1],sp_mat()}); block_G.push_back(block_sp_vec{-M_part[1][0],-M_part[1][1],speye(NB,NB)}); // Start iteration double mean_comp, steplen; double sigma = initial_sigma; uint iter; for(iter = 0; iter < max_iter; iter++){ if(verbose or iter_verbose) cout << "---Iteration " << iter << "---" << endl; assert(all(0 == y(free_idx))); // Mean complementarity mean_comp = dot(b,s) / (double) NB; if(mean_comp < comp_thresh) break; block_G[0][1] = spdiag(s); block_G[0][2] = spdiag(b); sp_mat G = block_mat(block_G); assert(size(N + NB,N + NB) == size(G)); // Form RHS from residual and complementarity vec h = vec(N + NB); vec res_f = M_part[0][0]*f + M_part[0][1]*b + qf; vec res_b = M_part[1][0]*f + M_part[1][1]*b + qb - s; h.head(NB) = sigma * mean_comp - b % s; h.subvec(NB,size(res_f)) = res_f; h.tail(NB) = res_b; //Archiver arch; //arch.add_sp_mat("G",G); //arch.add_vec("h",h); //arch.write("test.sys"); // Solve and extract directions vec dir = spsolve(G,h,"superlu",opts); assert((N+NB) == dir.n_elem); vec df = dir.head(NF); vec db = dir.subvec(NF,N-1); assert(NB == db.n_elem); vec ds = dir.tail(NB); vec dir_recon = join_vert(df,join_vert(db,ds)); assert(PRETTY_SMALL > norm(dir_recon-dir)); steplen = steplen_heuristic(b,s,db,ds,0.9); sigma = sigma_heuristic(sigma,steplen); f += steplen * df; b += steplen * db; s += steplen * ds; if(verbose){ double res = norm(join_vert(res_f,res_b)); cout <<"\t Mean complementarity: " << mean_comp <<"\n\t Residual norm: " << res <<"\n\t |df|: " << norm(df) <<"\n\t |db|: " << norm(db) <<"\n\t |ds|: " << norm(ds) <<"\n\t Step length: " << steplen <<"\n\t Centering sigma: " << sigma << endl; } } if(verbose){ cout << "Finished" <<"\n\t Final mean complementarity: " << mean_comp << endl; } x(free_idx) = f; x(bound_idx) = b; y(free_idx).fill(0); y(bound_idx) = s; return SolverResult(x,y,iter); }
PLCP augment_plcp(const PLCP & original, vec & x, vec & y, vec & w, double scale){ uint N = original.P.n_rows; uint K = original.P.n_cols; assert(size(K,N) == size(original.U)); sp_mat P = sp_mat(original.P); double I_norm = norm(speye(K,K) - P.t() * P); if(norm(I_norm) >= PRETTY_SMALL){ cerr << "Error: P does not look orthogonal (" << I_norm << ")..." << endl; } assert(I_norm < PRETTY_SMALL); sp_mat U = sp_mat(original.U); vec q = vec(original.q); assert(all(q(find(1 == original.free_vars)) <= 0)); vec q_neg = min(zeros<vec>(N),q); vec q_pos = max(zeros<vec>(N),q); x = ones<vec>(N) - q_neg; y = ones<vec>(N) + q_pos; y(find(1 == original.free_vars)).fill(0); assert(norm(q_pos(find(1 == original.free_vars))) < ALMOST_ZERO); assert(N == x.n_elem); assert(N == y.n_elem); assert(all(x >= 0)); assert(all(y >= 0)); vec res = x - y + q; w = spsolve(P.t()*P + 1e-15*speye(K,K),P.t()*(x - y + q)); vec w_res = P * w - res; if(norm(w_res) >= PRETTY_SMALL){ cerr << "Error: Reduced vector w residual large (" << w_res << ")..." << endl; } assert(norm(w_res) < PRETTY_SMALL); vec b = P.t()*x - U*x - w; assert(K == b.n_elem); P.resize(N+1,K+1); U.resize(K+1,N+1); q.resize(N+1); P(N,K) = 1.0; U(span(0,K-1),N) = b; U(K,N) = scale; q(N) = 0; x.resize(N+1); y.resize(N+1); w.resize(K+1); x(N) = 1; y(N) = scale; w(K) = 1.0 - scale; bvec free_vars = bvec(N+1); free_vars.head(N) = original.free_vars; free_vars(N) = 0; return PLCP(P,U,q,free_vars); }