Eigen::VectorXd BlockSparseMatrix::operator* (const Eigen::VectorXd& rhs) const { CHECK(!mbCSRDirty); int vectorLength = static_cast<int>(rhs.size()); double* rhsData = new double[vectorLength]; double* result = new double[vectorLength]; memcpy(rhsData, rhs.data(), vectorLength * sizeof(double)); char trans = 'n'; int numRows = mBlockHeight * mNumBlocksHeight; #ifdef _MKL_IMPLEMENT mkl_dcsrgemv(&trans, &numRows, const_cast<double*>(mCSREquivalent.GetValueData()), const_cast<int*>(mCSREquivalent.GetRowId()), const_cast<int*>(mCSREquivalent.GetColumnId()), rhsData, result); #else CHECK(0) << "_MKL_IMPLEMENT not defined!"; #endif Eigen::VectorXd ret(vectorLength); for (int i = 0; i < vectorLength; ++i) ret[i] = result[i]; delete[] rhsData; delete[] result; return ret; }
void cMKLSolver::step(MatrixX1C &solvec, MatrixX1C &rhsvec) { // variables required by gmres MKL_INT RCI_request; MKL_INT itercount; MKL_INT ivar = size; MKL_INT ipar[128]; double dpar[128]; // pointers to vectors double *solution = solvec.data(); double *rhs = rhsvec.data(); // zero initial guess for (int i = 0; i < size; i++) { solution[i] = 0.0; } // initialise gmres dfgmres_init(&ivar, solution, rhs, &RCI_request, ipar, dpar, gmres_tmp); if (RCI_request != 0) { fatal_error("initialising gmres failed!"); } // set desired parameters ipar[7] = 1; // perform maximum iterations test ipar[8] = 1; // perform residual stopping test ipar[9] = 0; // do not perform the user defined stopping test ipar[10] = 1; // run preconditioned gmres ipar[14] = gmres_restarts; // how often to restart gmres dpar[0] = gmres_relative_tol; // relative tolerance dpar[1] = gmres_absolute_tol; // absolute tolerance // check correctness of parameters dfgmres_check(&ivar, solution, rhs, &RCI_request, ipar, dpar, gmres_tmp); if (RCI_request != 0) { fatal_error("param check failed!"); } // start gmres reverse communication dfgmres(&ivar, solution, rhs, &RCI_request, ipar, dpar, gmres_tmp); bool complete = false; while (not complete) { // success if (RCI_request == 0) { complete = true; } // compute matrix vector multiplication else if (RCI_request == 1) { // compute gmres_tmp[ipar[22]-1] = A*gmres_tmp[ipar[21]-1] // note: ipar[21] and ipar[22] contain fortran style addresses so we must subtract 1 char cvar = 'N'; mkl_dcsrgemv(&cvar, &ivar, Acsr, Ai, Aj, &gmres_tmp[ipar[21] - 1], &gmres_tmp[ipar[22] - 1]); } // apply the preconditioner else if (RCI_request == 3) { char cvar = 'N'; char cvar1 = 'L'; char cvar2 = 'U'; mkl_dcsrtrsv(&cvar1, &cvar, &cvar2, &ivar, bilut, ibilut, jbilut, &gmres_tmp[ipar[21] - 1], gmres_trvec); cvar1='U'; cvar='N'; cvar2='N'; mkl_dcsrtrsv(&cvar1, &cvar, &cvar2, &ivar, bilut, ibilut, jbilut, gmres_trvec, &gmres_tmp[ipar[22] - 1]); } // check norm of generated vector else if (RCI_request == 4) { if (dpar[6] < 1.e-12) { complete = true; } } // failed else { std::ostringstream msgstream; msgstream << "fgmres failed: RCI_request " << RCI_request << "!"; fatal_error(msgstream.str()); } // next gmres call if (not complete) { dfgmres(&ivar, solution, rhs, &RCI_request, ipar, dpar, gmres_tmp); } } // get the result and print iteration count ipar[12] = 0; dfgmres_get(&ivar, solution, rhs, &RCI_request, ipar, dpar, gmres_tmp, &itercount); std::cout << itercount << " " << dpar[4] << std::endl; }
int solve_bicgstab(csr_t* mat, csr_t* ilu, double* b, double* x) { double tol = 1e-6, floatone = 1.0; const int max_iter = 200; int n = mat->n; int nnz = mat->nnz; double *r, *p, *y, *zm1, *zm2, *rm2, *rm1, *rm3, nrm0, nrm; r = (double*) calloc (n, sizeof(double)); p = (double*) calloc (n, sizeof(double)); y = (double*) calloc (n, sizeof(double)); rm1 = (double*) calloc (n, sizeof(double)); rm2 = (double*) calloc (n, sizeof(double)); rm3 = (double*) calloc (n, sizeof(double)); zm1 = (double*) calloc (n, sizeof(double)); zm2 = (double*) calloc (n, sizeof(double)); double rho = 1.0, rho1, beta = 0.0, alpha = 0.0, omega, temp, temp1; char lower1 = 'L', lower = 'N', lower2 = 'U'; char upper1 = 'U', upper = 'N', upper2 = 'N'; #ifdef TIMER double timerLUSol = 0, timerLUSol1, timerSpMV = 0, timerSpMV1, timerVector = 0, timerVector1; double timerTotal = omp_get_wtime(); #endif cblas_dcopy (n, b, 1, r, 1); cblas_dcopy (n, r, 1, p, 1); cblas_dcopy (n, r, 1, zm1, 1); nrm0 = cblas_dnrm2 (n, r, 1); for (int k = 0; k < max_iter; k++) { rho1 = rho; #ifdef TIMER timerVector1 = omp_get_wtime(); #endif rho = cblas_ddot(n, zm1, 1, r, 1); if ( k > 0 ) { beta = (rho / rho1) * (alpha / omega); cblas_daxpy (n, -omega, zm2, 1, p, 1); cblas_dscal (n, beta, p, 1); cblas_daxpy (n, floatone, r, 1, p, 1); } #ifdef TIMER timerVector += omp_get_wtime() - timerVector1; timerLUSol1 = omp_get_wtime(); #endif mkl_dcsrtrsv (&lower1, &lower, &lower2, &n, ilu->val, ilu->ia, ilu->ja, p, y); mkl_dcsrtrsv (&upper1, &upper, &upper2, &n, ilu->val, ilu->ia, ilu->ja, y, rm2); #ifdef TIMER timerLUSol += omp_get_wtime() - timerLUSol1; timerSpMV1 = omp_get_wtime(); #endif mkl_dcsrgemv (&lower, &n, mat->val, mat->ia, mat->ja, rm2, zm2); #ifdef TIMER timerSpMV += omp_get_wtime() - timerSpMV1; timerVector1 = omp_get_wtime(); #endif temp = cblas_ddot(n, zm1, 1, zm2, 1); alpha = rho / temp; cblas_daxpy (n, -alpha, zm2, 1, r, 1); cblas_daxpy (n, alpha, rm2, 1, x, 1); nrm = cblas_dnrm2 (n, r, 1); #ifdef TIMER timerVector += omp_get_wtime() - timerVector1; #endif if ((nrm < tol) && ( nrm / nrm0 < tol )) {printf(" iteration = %3d, residual = %le \n", k+1, nrm / nrm0); break; } #ifdef TIMER timerLUSol1 = omp_get_wtime(); #endif mkl_dcsrtrsv (&lower1, &lower, &lower2, &n, ilu->val, ilu->ia, ilu->ja, r, y); mkl_dcsrtrsv (&upper1, &upper, &upper2, &n, ilu->val, ilu->ia, ilu->ja, y, rm3); #ifdef TIMER timerLUSol += omp_get_wtime() - timerLUSol1; timerSpMV1 = omp_get_wtime(); #endif mkl_dcsrgemv (&lower, &n, mat->val, mat->ia, mat->ja, rm3, y); #ifdef TIMER timerSpMV += omp_get_wtime() - timerSpMV1; timerVector1 = omp_get_wtime(); #endif temp = cblas_ddot(n, y, 1, r, 1); temp1 = cblas_ddot(n, y, 1, y, 1); omega = temp / temp1; cblas_daxpy (n, omega, rm3, 1, x, 1); cblas_daxpy (n, -omega, y, 1, r, 1); nrm = cblas_dnrm2 (n, r, 1); #ifdef TIMER timerVector += omp_get_wtime() - timerVector1; #endif if ((nrm < tol) && ( nrm / nrm0 < tol )) {printf(" iteration = %3d, residual = %le \n", k+1, nrm / nrm0); break; } printf(" iteration = %3d, residual = %le \n", k+1, nrm / nrm0); } #ifdef TIMER printf("time LUSol\t%lf\ntime SpMV\t%lf\ntime v-v oper\t%lf\n",timerLUSol,timerSpMV,timerVector); printf("time total\t%lf\n",omp_get_wtime()-timerTotal); #endif free (r); free (rm1); free (rm2); free (rm3); free (zm1); free (zm2); free (p); free (y); return 0; }
int solve_bicgstab_block(csr_t* mat, csr_t** ilu, int nb, double* b, double* x) { int n = mat->n; int nnz = mat->nnz; int *offset_ilu = (int*) calloc(nb, sizeof(int)); for ( int i = 1; i < nb; i++ ) offset_ilu[i] = offset_ilu[i-1] + ilu[i-1]->n; double tol = 1e-6, floatone = 1.0; const int max_iter = 200; double *r, *p, *y, *zm1, *zm2, *rm2, *rm1, *rm3, nrm0, nrm; r = (double*) malloc (sizeof(double) * n); p = (double*) malloc (sizeof(double) * n); y = (double*) malloc (sizeof(double) * n); rm1 = (double*) malloc (sizeof(double) * n); rm2 = (double*) malloc (sizeof(double) * n); rm3 = (double*) malloc (sizeof(double) * n); zm1 = (double*) malloc (sizeof(double) * n); zm2 = (double*) malloc (sizeof(double) * n); double rho = 1.0, rho1, beta = 0.0, alpha = 0.0, omega, temp, temp1; char lower1 = 'L', lower = 'N', lower2 = 'U'; char upper1 = 'U', upper = 'N', upper2 = 'N'; #ifdef TIMER double timerLUSol = 0, timerLUSol1, timerSpMV = 0, timerSpMV1; double timerTotal = omp_get_wtime(); #endif cblas_dcopy (n, b, 1, r, 1); cblas_dcopy (n, r, 1, p, 1); cblas_dcopy (n, r, 1, zm1, 1); nrm0 = cblas_dnrm2 (n, r, 1); for (int k = 0; k < max_iter; k++) { rho1 = rho; rho = cblas_ddot(n, zm1, 1, r, 1); if ( k > 0 ) { beta = (rho / rho1) * (alpha / omega); cblas_daxpy (n, -omega, zm2, 1, p, 1); cblas_dscal (n, beta, p, 1); cblas_daxpy (n, floatone, r, 1, p, 1); } #ifdef TIMER timerLUSol1 = omp_get_wtime(); #endif #pragma omp parallel { #pragma omp for for (int i = 0; i < nb; i++) mkl_dcsrtrsv (&lower1, &lower, &lower2, &ilu[i]->n, ilu[i]->val, ilu[i]->ia, ilu[i]->ja, &p[offset_ilu[i]], &y[offset_ilu[i]]); #pragma omp for for (int i = 0; i < nb; i++) mkl_dcsrtrsv (&upper1, &upper, &upper2, &ilu[i]->n, ilu[i]->val, ilu[i]->ia, ilu[i]->ja, &y[offset_ilu[i]], &rm2[offset_ilu[i]]); } #ifdef TIMER timerLUSol += omp_get_wtime() - timerLUSol1; timerSpMV1 = omp_get_wtime(); #endif mkl_dcsrgemv (&lower, &n, mat->val, mat->ia, mat->ja, rm2, zm2); #ifdef TIMER timerSpMV += omp_get_wtime() - timerSpMV1; #endif temp = cblas_ddot(n, zm1, 1, zm2, 1); alpha = rho / temp; cblas_daxpy (n, -alpha, zm2, 1, r, 1); cblas_daxpy (n, alpha, rm2, 1, x, 1); nrm = cblas_dnrm2 (n, x, 1); if ((nrm < tol) && ( nrm / nrm0 < tol )) {printf(" iteration = %3d, residual = %le \n", k+1, nrm / nrm0); break; } #ifdef TIMER timerLUSol1 = omp_get_wtime(); #endif #pragma omp parallel { #pragma omp for for (int i = 0; i < nb; i++) mkl_dcsrtrsv (&lower1, &lower, &lower2, &ilu[i]->n, ilu[i]->val, ilu[i]->ia, ilu[i]->ja, &r[offset_ilu[i]], &y[offset_ilu[i]]); #pragma omp for for (int i = 0; i < nb; i++) mkl_dcsrtrsv (&upper1, &upper, &upper2, &ilu[i]->n, ilu[i]->val, ilu[i]->ia, ilu[i]->ja, &y[offset_ilu[i]], &rm3[offset_ilu[i]]); } #ifdef TIMER timerLUSol += omp_get_wtime() - timerLUSol1; timerSpMV1 = omp_get_wtime(); #endif mkl_dcsrgemv (&lower, &n, mat->val, mat->ia, mat->ja, rm3, y); #ifdef TIMER timerSpMV += omp_get_wtime() - timerSpMV1; #endif temp = cblas_ddot(n, y, 1, r, 1); temp1 = cblas_ddot(n, y, 1, y, 1); omega = temp / temp1; cblas_daxpy (n, omega, rm3, 1, x, 1); cblas_daxpy (n, -omega, y, 1, r, 1); nrm = cblas_dnrm2 (n, r, 1); if ((nrm < tol) && ( nrm / nrm0 < tol )) {printf(" iteration = %3d, residual = %le \n", k+1, nrm / nrm0); break; } printf(" iteration = %3d, residual = %le \n", k+1, nrm / nrm0); } #ifdef TIMER printf("time LUSol\t%lf\ntime SpMV\t%lf\n",timerLUSol,timerSpMV); printf("time total\t%lf\n",omp_get_wtime()-timerTotal); #endif free (r); free (rm1); free (rm2); free (rm3); free (zm1); free (zm2); free (p); free (y); free (offset_ilu); return 0; }
int main( int argc, char **argv ) { /* Matrix data. */ double *aa; int N; int *ia, *ja; read_mtx_and_return_csr(argc, argv, &N, &N, &ia, &ja, &aa); printf("\nDone with reading mtx file.\n"); printf("m = %d, n = %d, nz = %d\n\n", N, N, ia[N]); /*--------------------------------------------------------------------------- /* Allocate storage for the ?par parameters and the solution/rhs vectors /*---------------------------------------------------------------------------*/ MKL_INT ipar[size]; double dpar[size]; double *tmp = (double *)malloc((N * (2 * N + 1) + (N * (N + 9)) / 2 + 1) * sizeof(double)); double rhs[N]; double computed_solution[N]; /*--------------------------------------------------------------------------- /* Some additional variables to use with the RCI (P)FGMRES solver /*---------------------------------------------------------------------------*/ MKL_INT itercount; MKL_INT RCI_request, i, ivar; double dvar; char cvar; /*--------------------------------------------------------------------------- /* Initialize variables and the right hand side through matrix-vector product /*---------------------------------------------------------------------------*/ ivar = N; cvar = 'N'; /*--------------------------------------------------------------------------- /* Initialize the initial guess /*---------------------------------------------------------------------------*/ for (i = 0; i < N; i++) { computed_solution[i] = 1.0; } /*--------------------------------------------------------------------------- /* Initialize the solver /*---------------------------------------------------------------------------*/ dfgmres_init (&ivar, computed_solution, rhs, &RCI_request, ipar, dpar, tmp); if (RCI_request != 0) { printf("Going to FAILED\n"); goto FAILED; } /*--------------------------------------------------------------------------- /* Set the desired parameters: /* LOGICAL parameters: /* do residual stopping test /* do not request for the user defined stopping test /* do the check of the norm of the next generated vector automatically /* DOUBLE PRECISION parameters /* set the relative tolerance to 1.0D-3 instead of default value 1.0D-6 /*---------------------------------------------------------------------------*/ ipar[7] = 0; ipar[8] = 1; ipar[9] = 0; ipar[11] = 1; dpar[0] = 1.0E-3; /*--------------------------------------------------------------------------- /* Check the correctness and consistency of the newly set parameters /*---------------------------------------------------------------------------*/ dfgmres_check (&ivar, computed_solution, rhs, &RCI_request, ipar, dpar, tmp); if (RCI_request != 0) { printf("Going to FAILED\n"); goto FAILED; } /*--------------------------------------------------------------------------- /* Print the info about the RCI FGMRES method /*---------------------------------------------------------------------------*/ if (INFO == 1) { printf ("Some info about the current run of RCI FGMRES method:\n\n"); if (ipar[7]) { printf ("As ipar[7]=%d, the automatic test for the maximal number of ", ipar[7]); printf ("iterations will be\nperformed\n"); } else { printf ("As ipar[7]=%d, the automatic test for the maximal number of ", ipar[7]); printf ("iterations will be\nskipped\n"); } printf ("+++\n"); if (ipar[8]) { printf ("As ipar[8]=%d, the automatic residual test will be performed\n", ipar[8]); } else { printf ("As ipar[8]=%d, the automatic residual test will be skipped\n", ipar[8]); } printf ("+++\n"); if (ipar[9]) { printf ("As ipar[9]=%d, the user-defined stopping test will be ", ipar[9]); printf ("requested via\nRCI_request=2\n"); } else { printf ("As ipar[9]=%d, the user-defined stopping test will not be ", ipar[9]); printf ("requested, thus,\nRCI_request will not take the value 2\n"); } printf ("+++\n"); if (ipar[10]) { printf ("As ipar[10]=%d, the Preconditioned FGMRES iterations will be ", ipar[10]); printf ("performed, thus,\nthe preconditioner action will be requested via"); printf ("RCI_request=3\n"); } else { printf ("As ipar[10]=%d, the Preconditioned FGMRES iterations will not ", ipar[10]); printf ("be performed,\nthus, RCI_request will not take the value 3\n"); } printf ("+++\n"); if (ipar[11]) { printf ("As ipar[11]=%d, the automatic test for the norm of the next ", ipar[11]); printf ("generated vector is\nnot equal to zero up to rounding and "); printf ("computational errors will be performed,\nthus, RCI_request will not "); printf ("take the value 4\n"); } else { printf ("As ipar[11]=%d, the automatic test for the norm of the next ", ipar[11]); printf ("generated vector is\nnot equal to zero up to rounding and "); printf ("computational errors will be skipped,\nthus, the user-defined test "); printf ("will be requested via RCI_request=4\n"); } printf ("+++\n\n"); } /*--------------------------------------------------------------------------- /* Compute the solution by RCI (P)FGMRES solver without preconditioning /* Reverse Communication starts here /*---------------------------------------------------------------------------*/ ONE:dfgmres (&ivar, computed_solution, rhs, &RCI_request, ipar, dpar, tmp); /*--------------------------------------------------------------------------- /* If RCI_request=0, then the solution was found with the required precision /*---------------------------------------------------------------------------*/ if (RCI_request == 0) { printf("RCI_request = %d\n", RCI_request); printf("Going to COMPLETE\n"); goto COMPLETE; } /*--------------------------------------------------------------------------- /* If RCI_request=1, then compute the vector A*tmp[ipar[21]-1] /* and put the result in vector tmp[ipar[22]-1] /*--------------------------------------------------------------------------- /* NOTE that ipar[21] and ipar[22] contain FORTRAN style addresses, /* therefore, in C code it is required to subtract 1 from them to get C style /* addresses /*---------------------------------------------------------------------------*/ if (RCI_request == 1) { printf("RCI_request = %d\n", RCI_request); mkl_dcsrgemv (&cvar, &ivar, aa, ia, ja, &tmp[ipar[21] - 1], &tmp[ipar[22] - 1]); printf("Going to ONE\n"); goto ONE; } /*--------------------------------------------------------------------------- /* If RCI_request=anything else, then dfgmres subroutine failed /* to compute the solution vector: computed_solution[N] /*---------------------------------------------------------------------------*/ else { printf("Going to FAILED\n"); goto FAILED; } /*--------------------------------------------------------------------------- /* Reverse Communication ends here /* Get the current iteration number and the FGMRES solution (DO NOT FORGET to /* call dfgmres_get routine as computed_solution is still containing /* the initial guess!) /*---------------------------------------------------------------------------*/ COMPLETE:dfgmres_get (&ivar, computed_solution, rhs, &RCI_request, ipar, dpar, tmp, &itercount); /* /*--------------------------------------------------------------------------- /* Print solution vector: computed_solution[N] and the number of iterations: itercount /*--------------------------------------------------------------------------- */ printf (" The system has been solved \n"); printf ("\n The following solution has been obtained: \n"); /* for (i = 0; i < N; i++) { printf ("computed_solution[%d]=", i); printf ("%e\n", computed_solution[i]); } */ printf ("\n Number of iterations: %d\n", itercount); i = 1; /*-------------------------------------------------------------------------*/ /* Release internal MKL memory that might be used for computations */ /* NOTE: It is important to call the routine below to avoid memory leaks */ /* unless you disable MKL Memory Manager */ /*-------------------------------------------------------------------------*/ MKL_Free_Buffers (); return 0; /*if (itercount == expected_itercount && dvar < 1.0e-14) { printf ("\nThis example has successfully PASSED through all steps of "); printf ("computation!\n"); return 0; } else { printf ("\nThis example may have FAILED as either the number of iterations "); printf ("differs\nfrom the expected number of iterations %d, ", expected_itercount); printf ("or the computed solution\ndiffers much from the expected solution "); printf ("(Euclidean norm is %e), or both.\n", dvar); return 1; }*/ /*-------------------------------------------------------------------------*/ /* Release internal MKL memory that might be used for computations */ /* NOTE: It is important to call the routine below to avoid memory leaks */ /* unless you disable MKL Memory Manager */ /*-------------------------------------------------------------------------*/ FAILED:printf ("\nThis example FAILED as the solver has returned the ERROR "); printf ("code %d", RCI_request); MKL_Free_Buffers (); free (ia); free (ja); free (aa); free (tmp); printf("\nMemory deallocated...\n"); return 0; }