/* PCG_F77 - Fortran interface to PCG */ void F77(pcg_f77)(int *n, double *x, double *b, double *tol, int *maxit, int *clvl, int *iter, double *relres, int *flag, double *work, void (*matvec)(double *, double *), void (*precon)(double *, double *)) { pcg(*n, x, b, *tol, *maxit, *clvl, iter, relres, flag, work, matvec, precon); }
void pitsol(matrix_t *mat, precon_t *prec, options_t *opts, double *d_x, double *d_b) { /*---------------------------------------*/ double t1,t2; /*---------------------------------------*/ t1 = wall_timer(); switch (opts->solver) { case GMRES: fgmres(mat, prec, opts, d_x, d_b); break; case CG: pcg(mat, prec, opts, d_x, d_b); break; } t2 = wall_timer(); opts->result.tm_iter = t2-t1; }
void main () { double *x, *b, *work; int i; double relres; int iter, flag; read_MTX_SSS("matrices/poi2d_100.mtx", &n_s, &va_s, &da_s, &ja_s, &ia_s); x = (double *) malloc(n_s * sizeof(double)); b = (double *) malloc(n_s * sizeof(double)); work = (double *) malloc(4*n_s * sizeof(double)); assert(x != NULL && b != NULL && work != NULL); for (i = 0; i < n_s; i ++) { x[i] = 0.0; b[i] = 1.0; } printf("Starting PCG solver...\n"); pcg(n_s, x, b, 1e-12, 2000, 1, &iter, &relres, &flag, work, matvec, NULL); }
scs_int solveLinSys(const AMatrix * A, const Settings * stgs, Priv * p, scs_float * b, const scs_float * s, scs_int iter) { scs_int cgIts; scs_float cgTol = calcNorm(b, A->n) * (iter < 0 ? CG_BEST_TOL : CG_MIN_TOL / POWF((scs_float) iter + 1, stgs->cg_rate)); tic(&linsysTimer); /* solves Mx = b, for x but stores result in b */ /* s contains warm-start (if available) */ accumByAtrans(A, p, &(b[A->n]), b); /* solves (I+A'A)x = b, s warm start, solution stored in b */ cgIts = pcg(A, stgs, p, s, b, A->n, MAX(cgTol, CG_BEST_TOL)); scaleArray(&(b[A->n]), -1, A->m); accumByA(A, p, b, &(b[A->n])); if (iter >= 0) { totCgIts += cgIts; } totalSolveTime += tocq(&linsysTimer); #if EXTRAVERBOSE > 0 scs_printf("linsys solve time: %1.2es\n", tocq(&linsysTimer) / 1e3); #endif return 0; }
int main(int argc, char* argv[]) { // Initialization of the network, the communicator and the allocation of // the GPU is done as in previous tutorials. agile::NetworkEnvironment environment(argc, argv); typedef agile::GPUCommunicator<unsigned, float, float> communicator_type; communicator_type com; com.allocateGPU(); agile::GPUEnvironment::printInformation(std::cout); std::cout << std::endl; // We are interested in solving the linear problem \f$ Ax = y \f$, with a // given matrix \f$ A \f$ and a right-hand side vector \f$ y \f$. The unknown // is the vector \f$ x \f$. // Now, we can generate a matrix that shall be inverted (actually we do not // invert the matrix but use the CG algorithm). Note that CG requires a // symmetric positive definite (SPD) matrix and it is not too trivial to // write down a SPD matrix. If you fail to provide a SPD matrix to the CG // algorithm there is no guarantee that it will converge. You might be lucky, // you might be not... const unsigned SIZE = 20; float A_host[SIZE][SIZE]; for (unsigned row = 0; row < SIZE; ++row) for (unsigned column = 0; column <= row; ++column) { A_host[row][column] = (float(SIZE) - float(row) + float(SIZE) / 2.0) * (float(column) + 1.0); A_host[column][row] = A_host[row][column]; if (row == column) A_host[row][column] = 2.0 * float(SIZE) + float(row) + float(column); } // The matrix is still in the host's memory and has to be transfered to the // GPU. This is done automatically by the constructor of \p GPUMatrixPitched. agile::GPUMatrixPitched<float> A(SIZE, SIZE, (float*)A_host); // Next we need a reference solution. We can create any vector we like at // this place. std::vector<float> x_reference_host(SIZE); for (unsigned counter = 0; counter < SIZE; ++counter) x_reference_host[counter] = float(SIZE) - float(counter) + float(SIZE/3); // This vector has to be transfered to the GPU memory too. For vectors, this // can be achieved by the member function \p assignFromHost. agile::GPUVector<float> x_reference; x_reference.assignFromHost(x_reference_host.begin(), x_reference_host.end()); // We wrap the GPU matrix from above into a forward operator called // \p ForwardMatrix. Forward operators are simply objects that implement // the parenthesis-operator \p operator() which takes an // \p accumulated vector and returns a \p distributed one. In all other // respects the operator is a black box for us. // The \p ForwardMatrix operator requires a reference to the communicator // when constructing the object so that it has access to the network. typedef agile::ForwardMatrix<communicator_type, agile::GPUMatrixPitched<float> > forward_type; forward_type forward(com, A); // What we also want to use a preconditioner, which means that we change from // the original problem \f$ Ax = y \f$ to the equivalent one // \f$ PAx = Py \f$, where \f$ P \f$ is a preconditioner. The rationale is // that most often the matrix \f$ A \f$ is ill-conditioned and the CG algorithm // does not converge properly at all or it needs many iterations. The use of // a preconditioner makes the whole system better conditioned. The simplest // choice is to use the identity \f$ P = I \f$ (which means no preconditioning // at all). The best choice would be \f$ P = A^{-1} \f$ as we would have the // solution for \f$ x \f$ in the first step already (but then we need again // to find the inverse of \f$ A \f$ which we wanted to avoid). An // 'intermediate' possibility is to take \f$ P = diag(A)^{-1} \f$ which is // easy and fast to invert and gives better results than the identity. // A preconditioner belongs to the inverse operator. All inverse operators // implement a parenthesis-operator which takes a \p distributed vector // as input and returns an \p accumulated one (opposite to the forward // operators, thus). #if JACOBI_PRECONDITIONER typedef agile::JacobiPreconditioner<communicator_type, float> preconditioner_type; std::vector<float> diagonal(SIZE); for (unsigned row = 0; row < SIZE; ++row) diagonal[row] = A_host[row][row]; preconditioner_type preconditioner(com, diagonal); #else typedef agile::InverseIdentity<communicator_type> preconditioner_type; preconditioner_type preconditioner(com); #endif // The last operator needed is a measure. A measure operator has again // a parenthesis-operator. This timeis takes an \p accumulated vector as first // input and a \p distributed one as second input and returns a scalar // measuring somehow the size of the vectors. An example is the scalar // product operator. typedef agile::ScalarProductMeasure<communicator_type> measure_type; measure_type scalar_product(com); // Finally, generate the PCG solver. It needs the absolute and relative // tolerances as input so that it knows when the solution is good enough for // our purposes. Furthermore it requires the maximum amount of iterations // after which it simply capitulates without having found a solution. const double REL_TOLERANCE = 1e-12; const double ABS_TOLERANCE = 1e-6; const unsigned MAX_ITERATIONS = 100; agile::PreconditionedConjugateGradient<communicator_type, forward_type, preconditioner_type, measure_type> pcg(com, forward, preconditioner, scalar_product, REL_TOLERANCE, ABS_TOLERANCE, MAX_ITERATIONS); // What we have not generated, yet, is the right hand side \f$ y \f$. This is // simply one call to our forward operator. agile::GPUVector<float> y(SIZE); forward(x_reference, y); // We need one more vector to hold the result of the CG algorithm. Note that // we also supply the initial guess for the solution via this vector. agile::GPUVector<float> x(SIZE); // Finally, we have constructed, initialized, wrapped... everything. The only // thing left to do is to call the CG operator. pcg(y, x); // Print some statistics (and hope that the operator actually converged). if (pcg.convergence()) std::cout << "CG converged in "; else std::cout << "Error: CG did not converge in "; std::cout << pcg.getIteration() + 1 << " iterations." << std::endl; std::cout << "Initial residual = " << pcg.getRho0() << std::endl; std::cout << "Final residual = " << pcg.getRho() << std::endl; std::cout << "Ratio rho_k / rho_0 = " << pcg.getRho() / pcg.getRho0() << std::endl; // As the vectors in this example were quite small we can even print them to // standard output. std::cout << "Reference: " << std::endl << " "; for (unsigned counter = 0; counter < x_reference_host.size(); ++counter) std::cout << x_reference_host[counter] << " "; std::cout << std::endl; // The solution is still on the GPU and has to be transfered to the CPU memory. // This is accomplished using \p copyToHost. std::vector<float> x_host; x.copyToHost(x_host); // Output the solution, too. std::cout << "CG solution: " << std::endl << " "; for (unsigned counter = 0; counter < x_host.size(); ++counter) std::cout << x_host[counter] << " "; std::cout << std::endl; // Finally, we also compute the difference between the reference solution and // the true solution (of course, we do this on the GPU). agile::GPUVector<float> difference(SIZE); subVector(x_reference, x, difference); // To measure the distance, we use the scalar product measure we have // introduced above. Note, that this operator wants the first vector in // accumulated format and the second one in distributed format. The solution // we got from the CG algorithm is accumulated (because CG is an inverse // operator). This means, we have to distribute the solution to have mixed // formats. agile::GPUVector<float> difference_dist(difference); com.distribute(difference_dist); std::cout << "L2 of difference: " << std::sqrt(std::abs(scalar_product(difference, difference_dist))) << std::endl; // So, that's it. return 0; }
/** \brief Compute search direction using pcg method. * */ void compute_searchdir_pcg(problem_data_t * pdat, variables_t * vars, double t, double s, double gap, pcg_status_t * pcgstat, adata_t * adata, mdata_t * mdata, double *precond, double *tmp_m1, double *A2h, double *tmp_x1) { int i, m, n, nz; double *p0, *p1, *p2, *p3; double normg, pcgtol, pcgmaxi, multfact; dmatrix *matX1, *matX2; double lambda, tinv; double *g, *h, *z, *expz, *expmz, *ac, *ar, *b, *d1, *d2, *Aw; double *x, *v, *w, *u, *dx, *dv, *dw, *du, *gv, *gw, *gu, *gx; static double pcgtol_factor = 1.0; get_problem_data(pdat, &matX1, &matX2, &ac, &ar, &b, &lambda); get_variables(vars, &x, &v, &w, &u, &dx, &dv, &dw, &du, &gx, &gv, &gw, &gu, &g, &h, &z, &expz, &expmz, &d1, &d2, &Aw); m = matX1->m; n = matX1->n; nz = matX1->nz; tinv = 1.0 / t; p0 = &precond[0]; p1 = &precond[1]; p2 = &precond[1+n]; p3 = &precond[1+n+n]; /* dmat_vset(n+n+1, 0, dx); */ dmat_yATx(matX2, h, A2h); /* A2h = A2'*h */ multfact = 0.0; if (ac != NULL) { /* h.*ac */ dmat_elemprod(m, h, ac, tmp_m1); dmat_vset(n, 0, tmp_x1); dmat_yAmpqTx(matX1, NULL, NULL, tmp_m1, tmp_x1); dmat_elemprod(n, ar, tmp_x1, tmp_x1); for (i = 0; i < m; i++) { multfact += h[i] * ac[i] * ac[i]; } } p0[0] = 0; for (i = 0; i < m; i++) { p0[0] += b[i] * b[i] * h[i]; } /* complete forming gradient and d1, d2, precond */ for (i = 0; i < n; i++) { double q1, q2, d3, div; q1 = 1.0 / (u[i] + w[i]); q2 = 1.0 / (u[i] - w[i]); gw[i] -= (q1 - q2) * tinv; /* A'*g - (q1-q2) */ gu[i] = lambda - (q1 + q2) * tinv; /* lambda - (q1+q2) */ d1[i] = (q1 * q1 + q2 * q2) * tinv; d2[i] = (q1 * q1 - q2 * q2) * tinv; if (ac != NULL) { d3 = A2h[i] + d1[i] + multfact*ar[i]*ar[i] - 2*tmp_x1[i]; } else { d3 = A2h[i] + d1[i]; } div = 1 / (d3 * d1[i] - d2[i] * d2[i]); p1[i] = d1[i] * div; p2[i] = d2[i] * div; p3[i] = d3 * div; } normg = dmat_norm2(n+n+1, gx); pcgtol = min(1e-1, 0.3*gap/min(1.0,normg)); /* pcgtol = min(1e-1, 0.3*gap/min(1.0,sqrt(normg))); */ pcgmaxi = MAX_PCG_ITER; if (s < 1e-5) { pcgtol_factor *= 0.5; } else { pcgtol_factor = 1.0; } pcgtol = pcgtol*pcgtol_factor; dmat_waxpby(n+n+1, -1, gx, 0, NULL, tmp_x1); pcg(dx, pcgstat, afun, adata, mfun, mdata, tmp_x1, pcgtol, pcgmaxi, n+n+1); }