bool SystemSolver_FASTCG::solve(const SparseMatrix& A_in, Vector& x_in, const Vector& b_in) {

        typedef double CoeffType ;
        typedef Array1d<CoeffType> VectorType ;
        typedef SparseMatrixBCRS<CoeffType, 2, 2> MatrixType ;

        unsigned int N0 = A_in.n() ;
        std::cerr << "N0 = " << N0 << std::endl ;
        Permutation permutation; 
        compute_permutation(A_in, permutation) ;

        MatrixType A ;
        ::OGF::convert_matrix(A_in, A, permutation) ;

//      ::OGF::compress_indices(A) ;

        std::cerr << "filling ratio:" << (A.filling_ratio() * 100.0) << "%" << std::endl ;
        if(false) {
            std::cerr << "Saving matrix to disk (matrix.dat)" << std::endl ;
            std::ofstream out("matrix.dat") ;
//            A.print(out) ;
            ::OGF::output_matrix(A, out) ;
        }

        unsigned int N = A.n() ; // Can be greater than N0 due to blocking
        N = QuickBLAS::aligned_size(N, sizeof(CoeffType)) ;

        std::cerr <<"N = " << N << std::endl ;

        int max_iter = (nb_iters_ == 0) ? 5 * N : nb_iters_ ;
        double eps = threshold_ ;

        std::cerr << "nb iters = " << max_iter << "  threshold = " << eps << std::endl ;

        VectorType b(N, alignment_for_SSE2) ;
        VectorType x(N, alignment_for_SSE2) ;

        permutation.invert_permute_vector(b_in, b) ;
        permutation.invert_permute_vector(x_in, x) ;

        solve_cg(A, x, b, eps, max_iter) ;

        permutation.permute_vector(x, x_in) ;

        return true ;
    }