Esempio n. 1
0
/* PCG_F77 - Fortran interface to PCG
 */
void F77(pcg_f77)(int *n, 
		  double *x, 
		  double *b,
		  double *tol, 
		  int *maxit,
		  int *clvl,
		  int *iter, 
		  double *relres, 
		  int *flag,
		  double *work,
		  void (*matvec)(double *, double *),
		  void (*precon)(double *, double *)) {
  pcg(*n,
      x,
      b,
      *tol,
      *maxit,
      *clvl,
      iter,
      relres,
      flag,
      work,
      matvec,
      precon);
}
Esempio n. 2
0
void pitsol(matrix_t *mat, precon_t *prec,
options_t *opts, double *d_x, double *d_b) {
/*---------------------------------------*/
  double t1,t2;
/*---------------------------------------*/
  t1 = wall_timer();
  switch (opts->solver) {
    case GMRES:
      fgmres(mat, prec, opts, d_x, d_b);
    break;
    case CG:
      pcg(mat, prec, opts, d_x, d_b);
    break;
  }
  t2 = wall_timer();
  opts->result.tm_iter = t2-t1;
}
Esempio n. 3
0
void main () {
  double *x, *b, *work;
  int i;
  double relres;
  int iter, flag;

  read_MTX_SSS("matrices/poi2d_100.mtx", &n_s, &va_s, &da_s, &ja_s, &ia_s);

  x = (double *) malloc(n_s * sizeof(double));
  b = (double *) malloc(n_s * sizeof(double));
  work = (double *) malloc(4*n_s * sizeof(double));
  assert(x != NULL && b != NULL && work != NULL);

  for (i = 0; i < n_s; i ++) {
    x[i] = 0.0;
    b[i] = 1.0;
  }

  printf("Starting PCG solver...\n");
  pcg(n_s, x, b, 1e-12, 2000, 1, &iter, &relres, &flag, work, matvec, NULL);

}
Esempio n. 4
0
scs_int solveLinSys(const AMatrix * A, const Settings * stgs, Priv * p, scs_float * b, const scs_float * s, scs_int iter) {
	scs_int cgIts;
	scs_float cgTol = calcNorm(b, A->n)
			* (iter < 0 ? CG_BEST_TOL : CG_MIN_TOL / POWF((scs_float) iter + 1, stgs->cg_rate));

	tic(&linsysTimer);
	/* solves Mx = b, for x but stores result in b */
	/* s contains warm-start (if available) */
	accumByAtrans(A, p, &(b[A->n]), b);
	/* solves (I+A'A)x = b, s warm start, solution stored in b */
	cgIts = pcg(A, stgs, p, s, b, A->n, MAX(cgTol, CG_BEST_TOL));
	scaleArray(&(b[A->n]), -1, A->m);
	accumByA(A, p, b, &(b[A->n]));

	if (iter >= 0) {
		totCgIts += cgIts;
	}

	totalSolveTime += tocq(&linsysTimer);
#if EXTRAVERBOSE > 0
	scs_printf("linsys solve time: %1.2es\n", tocq(&linsysTimer) / 1e3);
#endif
	return 0;
}
Esempio n. 5
0
int main(int argc, char* argv[])
{
  // Initialization of the network, the communicator and the allocation of
  // the GPU is done as in previous tutorials.
  agile::NetworkEnvironment environment(argc, argv);
  typedef agile::GPUCommunicator<unsigned, float, float> communicator_type;
  communicator_type com;
  com.allocateGPU();
  agile::GPUEnvironment::printInformation(std::cout);
  std::cout << std::endl;

  // We are interested in solving the linear problem \f$ Ax = y \f$, with a
  // given matrix \f$ A \f$ and a right-hand side vector \f$ y \f$. The unknown
  // is the vector \f$ x \f$.
  // Now, we can generate a matrix that shall be inverted (actually we do not
  // invert the matrix but use the CG algorithm). Note that CG requires a
  // symmetric positive definite (SPD) matrix and it is not too trivial to
  // write down a SPD matrix. If you fail to provide a SPD matrix to the CG
  // algorithm there is no guarantee that it will converge. You might be lucky,
  // you might be not...
  const unsigned SIZE = 20;
  float A_host[SIZE][SIZE];
  for (unsigned row = 0; row < SIZE; ++row)
    for (unsigned column = 0; column <= row; ++column)
    {
      A_host[row][column] = (float(SIZE) - float(row) + float(SIZE) / 2.0)
                            * (float(column) + 1.0);
      A_host[column][row] = A_host[row][column];
      if (row == column)
        A_host[row][column] = 2.0 * float(SIZE) + float(row) + float(column);
    }

  // The matrix is still in the host's memory and has to be transfered to the
  // GPU. This is done automatically by the constructor of \p GPUMatrixPitched.
  agile::GPUMatrixPitched<float> A(SIZE, SIZE, (float*)A_host);

  // Next we need a reference solution. We can create any vector we like at
  // this place.
  std::vector<float> x_reference_host(SIZE);
  for (unsigned counter = 0; counter < SIZE; ++counter)
    x_reference_host[counter] = float(SIZE) - float(counter) + float(SIZE/3);

  // This vector has to be transfered to the GPU memory too. For vectors, this
  // can be achieved by the member function \p assignFromHost.
  agile::GPUVector<float> x_reference;
  x_reference.assignFromHost(x_reference_host.begin(), x_reference_host.end());

  // We wrap the GPU matrix from above into a forward operator called
  // \p ForwardMatrix. Forward operators are simply objects that implement
  // the parenthesis-operator \p operator() which takes an
  // \p accumulated vector and returns a \p distributed one. In all other
  // respects the operator is a black box for us.
  // The \p ForwardMatrix operator requires a reference to the communicator
  // when constructing the object so that it has access to the network.
  typedef agile::ForwardMatrix<communicator_type, agile::GPUMatrixPitched<float> >
    forward_type;
  forward_type forward(com, A);

  // What we also want to use a preconditioner, which means that we change from
  // the original problem \f$ Ax = y \f$ to the equivalent one
  // \f$ PAx = Py \f$, where \f$ P \f$ is a preconditioner. The rationale is
  // that most often the matrix \f$ A \f$ is ill-conditioned and the CG algorithm
  // does not converge properly at all or it needs many iterations. The use of
  // a preconditioner makes the whole system better conditioned. The simplest
  // choice is to use the identity \f$ P = I \f$ (which means no preconditioning
  // at all). The best choice would be \f$ P = A^{-1} \f$ as we would have the
  // solution for \f$ x \f$ in the first step already (but then we need again
  // to find the inverse of \f$ A \f$ which we wanted to avoid). An
  // 'intermediate' possibility is to take \f$ P = diag(A)^{-1} \f$ which is
  // easy and fast to invert and gives better results than the identity.
  // A preconditioner belongs to the inverse operator. All inverse operators
  // implement a parenthesis-operator which takes a \p distributed vector
  // as input and returns an \p accumulated one (opposite to the forward
  // operators, thus).
#if JACOBI_PRECONDITIONER
  typedef agile::JacobiPreconditioner<communicator_type, float>
    preconditioner_type;
  std::vector<float> diagonal(SIZE);
  for (unsigned row = 0; row < SIZE; ++row)
    diagonal[row] = A_host[row][row];
  preconditioner_type preconditioner(com, diagonal);
#else
  typedef agile::InverseIdentity<communicator_type> preconditioner_type;
  preconditioner_type preconditioner(com);
#endif

  // The last operator needed is a measure. A measure operator has again
  // a parenthesis-operator. This timeis takes an \p accumulated vector as first
  // input and a \p distributed one as second input and returns a scalar
  // measuring somehow the size of the vectors. An example is the scalar
  // product operator.
  typedef agile::ScalarProductMeasure<communicator_type> measure_type;
  measure_type scalar_product(com);

  // Finally, generate the PCG solver. It needs the absolute and relative
  // tolerances as input so that it knows when the solution is good enough for
  // our purposes. Furthermore it requires the maximum amount of iterations
  // after which it simply capitulates without having found a solution.
  const double REL_TOLERANCE = 1e-12;
  const double ABS_TOLERANCE = 1e-6;
  const unsigned MAX_ITERATIONS = 100;
  agile::PreconditionedConjugateGradient<communicator_type, forward_type,
                                           preconditioner_type, measure_type>
    pcg(com, forward, preconditioner, scalar_product,
        REL_TOLERANCE, ABS_TOLERANCE, MAX_ITERATIONS);

  // What we have not generated, yet, is the right hand side \f$ y \f$. This is
  // simply one call to our forward operator.
  agile::GPUVector<float> y(SIZE);
  forward(x_reference, y);

  // We need one more vector to hold the result of the CG algorithm. Note that
  // we also supply the initial guess for the solution via this vector.
  agile::GPUVector<float> x(SIZE);

  // Finally, we have constructed, initialized, wrapped... everything. The only
  // thing left to do is to call the CG operator.
  pcg(y, x);

  // Print some statistics (and hope that the operator actually converged).
  if (pcg.convergence())
    std::cout << "CG converged in ";
  else
    std::cout << "Error: CG did not converge in ";
  std::cout << pcg.getIteration() + 1 << " iterations." << std::endl;
  std::cout << "Initial residual    = " << pcg.getRho0() << std::endl;
  std::cout << "Final residual      = " << pcg.getRho() << std::endl;
  std::cout << "Ratio rho_k / rho_0 = " << pcg.getRho() / pcg.getRho0()
            << std::endl;

  // As the vectors in this example were quite small we can even print them to
  // standard output.
  std::cout << "Reference: " << std::endl << "  ";
  for (unsigned counter = 0; counter < x_reference_host.size(); ++counter)
    std::cout << x_reference_host[counter] << " ";
  std::cout << std::endl;

  // The solution is still on the GPU and has to be transfered to the CPU memory.
  // This is accomplished using \p copyToHost.
  std::vector<float> x_host;
  x.copyToHost(x_host);

  // Output the solution, too.
  std::cout << "CG solution: " << std::endl << "  ";
  for (unsigned counter = 0; counter < x_host.size(); ++counter)
    std::cout << x_host[counter] << " ";
  std::cout << std::endl;

  // Finally, we also compute the difference between the reference solution and
  // the true solution (of course, we do this on the GPU).
  agile::GPUVector<float> difference(SIZE);
  subVector(x_reference, x, difference);

  // To measure the distance, we use the scalar product measure we have
  // introduced above. Note, that this operator wants the first vector in
  // accumulated format and the second one in distributed format. The solution
  // we got from the CG algorithm is accumulated (because CG is an inverse
  // operator). This means, we have to distribute the solution to have mixed
  // formats.
  agile::GPUVector<float> difference_dist(difference);
  com.distribute(difference_dist);
  std::cout << "L2 of difference: "
            << std::sqrt(std::abs(scalar_product(difference, difference_dist)))
            << std::endl;

  // So, that's it.
  return 0;
}
Esempio n. 6
0
/** \brief  Compute search direction using pcg method.
 *
 */
void compute_searchdir_pcg(problem_data_t * pdat, variables_t * vars,
                           double t, double s, double gap, pcg_status_t * pcgstat,
                           adata_t * adata, mdata_t * mdata, double *precond,
                           double *tmp_m1, double *A2h, double *tmp_x1)
{
    int i, m, n, nz;
    double *p0, *p1, *p2, *p3;
    double normg, pcgtol, pcgmaxi, multfact;

    dmatrix *matX1, *matX2;
    double lambda, tinv;
    double *g, *h, *z, *expz, *expmz, *ac, *ar, *b, *d1, *d2, *Aw;
    double *x, *v, *w, *u, *dx, *dv, *dw, *du, *gv, *gw, *gu, *gx;

    static double pcgtol_factor = 1.0;


    get_problem_data(pdat, &matX1, &matX2, &ac, &ar, &b, &lambda);
    get_variables(vars, &x, &v, &w, &u, &dx, &dv, &dw, &du, &gx, &gv,
                  &gw, &gu, &g, &h, &z, &expz, &expmz, &d1, &d2, &Aw);
    m  = matX1->m;
    n  = matX1->n;
    nz = matX1->nz;
    tinv = 1.0 / t;

    p0 = &precond[0];
    p1 = &precond[1];
    p2 = &precond[1+n];
    p3 = &precond[1+n+n];

    /* dmat_vset(n+n+1, 0, dx); */

    dmat_yATx(matX2, h, A2h);        /* A2h = A2'*h */

    multfact = 0.0;
    if (ac != NULL)
    {
        /* h.*ac */
        dmat_elemprod(m, h, ac, tmp_m1);

        dmat_vset(n, 0, tmp_x1);
        dmat_yAmpqTx(matX1, NULL, NULL, tmp_m1, tmp_x1);
        dmat_elemprod(n, ar, tmp_x1, tmp_x1);

        for (i = 0; i < m; i++)
        {
            multfact += h[i] * ac[i] * ac[i];
        }
    }

    p0[0] = 0;
    for (i = 0; i < m; i++)
    {
        p0[0] += b[i] * b[i] * h[i];
    }

    /* complete forming gradient and d1, d2, precond */
    for (i = 0; i < n; i++)
    {
        double q1, q2, d3, div;

        q1 = 1.0 / (u[i] + w[i]);
        q2 = 1.0 / (u[i] - w[i]);

        gw[i] -= (q1 - q2) * tinv;        /* A'*g   - (q1-q2) */
        gu[i] = lambda - (q1 + q2) * tinv;        /* lambda - (q1+q2) */

        d1[i] = (q1 * q1 + q2 * q2) * tinv;
        d2[i] = (q1 * q1 - q2 * q2) * tinv;

        if (ac != NULL)
        {
            d3 = A2h[i] + d1[i] + multfact*ar[i]*ar[i] - 2*tmp_x1[i];
        }
        else
        {
            d3 = A2h[i] + d1[i];
        }
        div = 1 / (d3 * d1[i] - d2[i] * d2[i]);

        p1[i] = d1[i] * div;
        p2[i] = d2[i] * div;
        p3[i] = d3 * div;
    }
    normg = dmat_norm2(n+n+1, gx);

    pcgtol = min(1e-1, 0.3*gap/min(1.0,normg));
    /*
    pcgtol = min(1e-1, 0.3*gap/min(1.0,sqrt(normg)));
    */
    pcgmaxi = MAX_PCG_ITER;
    if (s < 1e-5)
    {
        pcgtol_factor *= 0.5;
    }
    else
    {
        pcgtol_factor = 1.0;
    }
     pcgtol = pcgtol*pcgtol_factor;

    dmat_waxpby(n+n+1, -1, gx, 0, NULL, tmp_x1);

    pcg(dx, pcgstat, afun, adata, mfun, mdata, tmp_x1, pcgtol, pcgmaxi, n+n+1);
}