Ejemplo n.º 1
0
extern "C" magma_int_t
magma_dfgmres(
    magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x,
    magma_d_solver_par *solver_par,
    magma_d_preconditioner *precond_par,
    magma_queue_t queue )
{
    magma_int_t info = MAGMA_NOTCONVERGED;
    
    magma_int_t dofs = A.num_rows;

    // prepare solver feedback
    solver_par->solver = Magma_PGMRES;
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    
    //Chronometry
    real_Double_t tempo1, tempo2;

    magma_int_t dim = solver_par->restart;
    magma_int_t m1 = dim+1; // used inside H macro
    magma_int_t i, j, k;
    double beta;
    
    double rel_resid, resid0=1, r0=0.0, betanom = 0.0, nom;
    
    magma_d_matrix v_t={Magma_CSR}, w_t={Magma_CSR}, t={Magma_CSR}, t2={Magma_CSR}, V={Magma_CSR}, W={Magma_CSR};
    v_t.memory_location = Magma_DEV;
    v_t.num_rows = dofs;
    v_t.num_cols = 1;
    v_t.dval = NULL;
    v_t.storage_type = Magma_DENSE;

    w_t.memory_location = Magma_DEV;
    w_t.num_rows = dofs;
    w_t.num_cols = 1;
    w_t.dval = NULL;
    w_t.storage_type = Magma_DENSE;
    
    double temp;
    
    double *H={0}, *s={0}, *cs={0}, *sn={0};

    CHECK( magma_dvinit( &t, Magma_DEV, dofs, 1, MAGMA_D_ZERO, queue ));
    CHECK( magma_dvinit( &t2, Magma_DEV, dofs, 1, MAGMA_D_ZERO, queue ));
    
    CHECK( magma_dmalloc_pinned( &H, (dim+1)*dim ));
    CHECK( magma_dmalloc_pinned( &s,  dim+1 ));
    CHECK( magma_dmalloc_pinned( &cs, dim ));
    CHECK( magma_dmalloc_pinned( &sn, dim ));
    
    
    CHECK( magma_dvinit( &V, Magma_DEV, dofs*(dim+1), 1, MAGMA_D_ZERO, queue ));
    CHECK( magma_dvinit( &W, Magma_DEV, dofs*dim, 1, MAGMA_D_ZERO, queue ));
    
    CHECK(  magma_dresidual( A, b, *x, &nom, queue));

    solver_par->init_res = nom;
    
    if ( ( nom * solver_par->rtol) < ATOLERANCE )
        r0 = ATOLERANCE;
    
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    

    tempo1 = magma_sync_wtime( queue );
    do
    {
        solver_par->numiter++;
        // compute initial residual and its norm
        // A.mult(n, 1, x, n, V(0), n);                        // V(0) = A*x
        CHECK( magma_d_spmv( MAGMA_D_ONE, A, *x, MAGMA_D_ZERO, t, queue ));
        solver_par->spmv_count++;
        magma_dcopy( dofs, t.dval, 1, V(0), 1, queue );
        
        temp = MAGMA_D_MAKE(-1.0, 0.0);
        magma_daxpy( dofs,temp, b.dval, 1, V(0), 1, queue );           // V(0) = V(0) - b
        beta = MAGMA_D_MAKE( magma_dnrm2( dofs, V(0), 1, queue ), 0.0 ); // beta = norm(V(0))
        if( magma_d_isnan_inf( beta ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
        
        if (solver_par->numiter == 0){
            solver_par->init_res = MAGMA_D_REAL( beta );
            resid0 = MAGMA_D_REAL( beta );
        
            r0 = resid0 * solver_par->rtol;
            if ( r0 < ATOLERANCE )
                r0 = ATOLERANCE;
            if ( resid0 < r0 ) {
                solver_par->final_res = solver_par->init_res;
                solver_par->iter_res = solver_par->init_res;
                info = MAGMA_SUCCESS;
                goto cleanup;
            }
        }
        if ( solver_par->verbose > 0 ) {
            solver_par->res_vec[0] = resid0;
            solver_par->timing[0] = 0.0;
        }
        temp = -1.0/beta;
        magma_dscal( dofs, temp, V(0), 1, queue );                 // V(0) = -V(0)/beta

        // save very first residual norm
        if (solver_par->numiter == 0)
            solver_par->init_res = MAGMA_D_REAL( beta );

        for (i = 1; i < dim+1; i++)
            s[i] = MAGMA_D_ZERO;
        s[0] = beta;

        i = -1;
        do {
            i++;
            
            // M.apply(n, 1, V(i), n, W(i), n);
            v_t.dval = V(i);
            CHECK( magma_d_applyprecond_left( MagmaNoTrans, A, v_t, &t, precond_par, queue ));
            CHECK( magma_d_applyprecond_right( MagmaNoTrans, A, t, &t2, precond_par, queue ));
            magma_dcopy( dofs, t2.dval, 1, W(i), 1, queue );

            // A.mult(n, 1, W(i), n, V(i+1), n);
            w_t.dval = W(i);
            CHECK( magma_d_spmv( MAGMA_D_ONE, A, w_t, MAGMA_D_ZERO, t, queue ));
            solver_par->spmv_count++;
            magma_dcopy( dofs, t.dval, 1, V(i+1), 1, queue );
            
            for (k = 0; k <= i; k++)
            {
                H(k, i) = magma_ddot( dofs, V(k), 1, V(i+1), 1, queue );
                temp = -H(k,i);
                // V(i+1) -= H(k, i) * V(k);
                magma_daxpy( dofs,-H(k,i), V(k), 1, V(i+1), 1, queue );
            }

            H(i+1, i) = MAGMA_D_MAKE( magma_dnrm2( dofs, V(i+1), 1, queue), 0. ); // H(i+1,i) = ||r||
            temp = 1.0 / H(i+1, i);
            // V(i+1) = V(i+1) / H(i+1, i)
            magma_dscal( dofs, temp, V(i+1), 1, queue );    //  (to be fused)
    
            for (k = 0; k < i; k++)
                ApplyPlaneRotation(&H(k,i), &H(k+1,i), cs[k], sn[k]);
          
            GeneratePlaneRotation(H(i,i), H(i+1,i), &cs[i], &sn[i]);
            ApplyPlaneRotation(&H(i,i), &H(i+1,i), cs[i], sn[i]);
            ApplyPlaneRotation(&s[i], &s[i+1], cs[i], sn[i]);
            
            betanom = MAGMA_D_ABS( s[i+1] );
            rel_resid = betanom / resid0;
            if ( solver_par->verbose > 0 ) {
                tempo2 = magma_sync_wtime( queue );
                if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                    solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                            = (real_Double_t) betanom;
                    solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                            = (real_Double_t) tempo2-tempo1;
                }
            }
            if (rel_resid <= solver_par->rtol || betanom <= solver_par->atol ){
                info = MAGMA_SUCCESS;
                break;
            }
        }
        while (i+1 < dim && solver_par->numiter+1 <= solver_par->maxiter);

        // solve upper triangular system in place
        for (j = i; j >= 0; j--)
        {
            s[j] /= H(j,j);
            for (k = j-1; k >= 0; k--)
                s[k] -= H(k,j) * s[j];
        }

        // update the solution
        for (j = 0; j <= i; j++)
        {
            // x = x + s[j] * W(j)
            magma_daxpy( dofs, s[j], W(j), 1, x->dval, 1, queue );
        }
    }
    while (rel_resid > solver_par->rtol
                && solver_par->numiter+1 <= solver_par->maxiter);

    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    double residual;
    CHECK( magma_dresidual( A, b, *x, &residual, queue ));
    solver_par->iter_res = betanom;
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) {
        info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_SLOW_CONVERGENCE;
        if( solver_par->iter_res < solver_par->rtol*solver_par->init_res ||
            solver_par->iter_res < solver_par->atol ) {
            info = MAGMA_SUCCESS;
        }
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_DIVERGENCE;
    }
    
cleanup:
    // free pinned memory
    magma_free_pinned(s);
    magma_free_pinned(cs);
    magma_free_pinned(sn);
    magma_free_pinned(H);

    //free DEV memory
    magma_dmfree( &V, queue);
    magma_dmfree( &W, queue);
    magma_dmfree( &t, queue);
    magma_dmfree( &t2, queue);

    solver_par->info = info;
    return info;
} /* magma_dfgmres */
Ejemplo n.º 2
0
int 
GMRES(const Operator &A, Vector &x, const Vector &b,
      const Preconditioner &M, Matrix &H, int &m, int &max_iter,
      Real &tol)
{
  Real resid;
  int i, j = 1, k;
  Vector s(m+1), cs(m+1), sn(m+1), w,r,Ax;
  r=M*b;
  Real normb = sqrt((r,r));
  
  Ax=b;
  Ax -=A * x;
  r = M*(Ax);
  Real beta = sqrt((r,r));
  
  if ( abs(normb) < 1.e-30)
    normb = 1;
  
  if ((resid = beta / normb) <= tol) {
    tol = resid;
    max_iter = 0;
    return 0;
  }

  Vector *v = new Vector[m+1];

  while (j <= max_iter) {
    v[0] = r / beta;    
    s = 0.0;
    s(0) = beta;
    
    for (i = 0; i < m && j <= max_iter; i++, j++) {
      w = M*(Ax=A * v[i]);
      for (k = 0; k <= i; k++) {
        H(k, i) = (w, v[k]);
        w -= H(k, i) * v[k];
      }
      H(i+1, i) = sqrt((w,w));
      v[i+1] = w  / H(i+1, i) ; 

      for (k = 0; k < i; k++)
        ApplyPlaneRotation(H(k,i), H(k+1,i), cs(k), sn(k));
      
      GeneratePlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
      ApplyPlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
      ApplyPlaneRotation(s(i), s(i+1), cs(i), sn(i));
      
      if ((resid = abs(s(i+1)) / normb) < tol) {
        Update(x, i, H, s, v);
        tol = resid;
        max_iter = j;
        delete [] v;
        return 0;
      }
    }
    Update(x, m - 1, H, s, v);
    Ax = b;
    Ax -= A*x;
    r = M*(Ax);
    beta = sqrt((r,r));
    if ((resid = beta / normb) < tol) {
      tol = resid;
      max_iter = j;
      delete [] v;
      return 0;
    }
  }
  
  tol = resid;
  delete [] v;
  return 1;
}
Ejemplo n.º 3
0
bool IterativeSolvers::gmres(const IRCMatrix &A,
                             Vector &x,
                             const Vector &b,
                             const Preconditioner &M) {
    const idx N = x.getLength();
    idx i, j = 1, k;
    Vector s(maxInnerIter + 1);
    Vector cs(maxInnerIter + 1);
    Vector sn(maxInnerIter + 1);
    Vector w(N);
    real normb = norm(M.solve(b));
    Vector r = M.solve(b - A * x);
    real beta = norm(r);
    if (normb == 0.0)
        normb = 1;
    real res(norm(r) / normb);
    if (res <= toler) {
        toler = res;
        maxIter = 0;
        return true;
    }
    Vector *v = new Vector[maxInnerIter + 1];
    for (idx id = 0; id < maxInnerIter + 1; ++id)
        v[id] = Vector(N);
    // CREATE HESSENBERG MATRIX NEEDED TO STORE INTERMEDIATES
    DenseMatrix H(maxInnerIter + 1, maxInnerIter);
    Vector temp(N);
    Vector temp2(maxInnerIter + 1);
    // MAIN LOOP
    while (j <= maxIter) {
        v[0] = r * (1.0 / beta);
        s = 0.0;
        s(0) = beta;
        // INNER ITERATIONS
        for (i = 0; i < maxInnerIter && j <= maxIter; i++, j++) {
            // CALCULATE w = M^{-1}(A*v[i])
            multiply(A, v[i], temp);
            M.solveMxb(w, temp);
            // PRE-CALCULATE DOT PRODUCTS IN PARALLEL
            // H(k,i) = dot( v[k], w)
#ifdef USES_OPENMP
            #pragma omp parallel for
#endif
            for (k = 0; k <= i ; ++k) {
                register real dp(0);
                for (idx id = 0 ; id < N ; ++id)
                    dp += w[id] * v[k][id];
                H(k, i) = dp; //dot(w,v[k]);
            }
            for (k = 0; k <= i; ++k) {
                // w -= v[k]*H(k,i) without temporaries
                register real tempr = H(k, i);
#ifdef USES_OPENMP
                #pragma omp parallel for // why is this loop so critical??
#endif
                for (idx id = 0 ; id < N ; ++id)
                    w[id] -= v[k][id] * tempr;
            }
            // BELOW PARALLEL REGION CALCULATES:
            // H(i+1,i) = norm(w);
            // v[i+1] = w * (1.0 / H(i+1, i));
            H(i + 1, i) = 0;
            real tempr(0);
#ifdef USES_OPENMP
            #pragma omp parallel shared(tempr)
#endif
            {
#ifdef USES_OPENMP
                #pragma omp for reduction(+:tempr)
#endif
                for (idx id = 0 ; id < N ; ++id)
                    tempr += w[id] * w[id]; //norm(w);
#ifdef USES_OPENMP
                #pragma omp single
#endif
                {
                    H(i + 1, i) = sqrt(tempr);
                    tempr = (1.0 / H(i + 1, i));
                }
#ifdef USES_OPENMP
                #pragma omp for
#endif
                for (idx id = 0 ; id < N ; ++id)
                    v[i + 1][id] = w[id] * tempr;
            }// end for omp parallel
            for (k = 0; k < i; k++)
                ApplyPlaneRotation(H(k, i), H(k + 1, i), cs(k), sn(k));
            GeneratePlaneRotation(H(i, i), H(i + 1, i), cs(i), sn(i));
            ApplyPlaneRotation(H(i, i), H(i + 1, i), cs(i), sn(i));
            ApplyPlaneRotation(s(i), s(i + 1), cs(i), sn(i));
            res = fabs(s(i + 1)) / normb;
            if (res < toler) {
                // COPY S INTO temp WITHOUT RESIZING
                for (idx id = 0 ; id < maxInnerIter + 1 ; ++id)
                    temp2[id] = s[id];
                Update(x, i, H, temp2, v);
                toler = res;
                maxIter = j;
                delete [] v;
                return true;
            }
        }// end for i IINNER ITERATIONS
        // COPY S INTO temp WITHOUT RESIZING
        for (idx id = 0 ; id < maxInnerIter + 1 ; ++id)
            temp2[id] = s[id];
        Update(x, maxInnerIter - 1, H, temp2, v);
        //multiply(A, x, temp);     //r = M.solve(b - A * x);
        M.solveMxb(r, b - A * x);
        beta = norm(r);
        res = beta / normb;
        if (res < toler) {
            toler = res;
            maxIter = j;
            delete [] v;
            return true;
        }
    }
    toler = res;
    delete [] v;
    return false;
}