int main (int argc, char *argv[]) { int taskid; // a task identifier int numtasks; // number of tasks in partition MPI_Comm comm; int m; // size of the matrix int local_m; // rows of matrix A sent to each worker double *A, *b,*exact_x, *x; double *temp_1, *temp_2; double *local_A, *local_v,*local_u; double *local_M; // M is the preconditioner in this example, which is the diagonal element of A; int i,j,k; MPI_Init(&argc,&argv); comm=MPI_COMM_WORLD; MPI_Comm_rank(comm,&taskid); MPI_Comm_size(comm,&numtasks); if(taskid==MASTER){ // initilization: A and b /* start modification 1: read A and b from mtx files in node 0 */ ////m=64; // size of the matrix ////A=(double *)malloc(sizeof(double)*(m*m)); ////// !!! A is in col-major ////for(j=0;j<m;j++) //// for (i=0;i<m;i++){ //// if(i==j) //// *(A+j*m+i)=m*100.0; //// else //// *(A+j*m+i)=i+1.0; //// } ////exact_x=(double *)malloc(sizeof(double)*m); ////for (i=0;i<m;i++) //// *(exact_x+i)=1.0; ////b=(double *)malloc(sizeof(double)*m); ////// b=A*ones(n,1) ////cblas_dgemv(CblasColMajor, CblasNoTrans, m, m, 1.0, A, m, exact_x, 1, 0.0, b, 1); /////* end modification 1*/ mtxBLU mtxA(argv[1], NPROC); mtxBLU mtxb(argv[2]); m=mtxA.m; A = (double*)malloc(sizeof(double)*(m*m)); b = (double*)malloc(sizeof(double)*m); for(i=0; i<m*m; i++) A[i]=mtxA.arr[i]; for(i=0; i<m; i++) b[i]=mtxb.arr[i]; } MPI_Bcast(&m,1,MPI_INT, MASTER, comm); // send m from node MASTER to all other nodes. local_m=m/numtasks; local_A=(double *)malloc(sizeof(double)*(local_m*m)); local_u=(double *)malloc(sizeof(double)*(local_m)); local_v=(double *)malloc(sizeof(double)*m); // partition A and send A_i to local_A on node i MPI_Scatter(A,local_m*m, MPI_DOUBLE, local_A, local_m*m, MPI_DOUBLE, MASTER, comm); if(taskid==MASTER){ free(A); //free(exact_x); // do not free b, it wil be used for GMRES } /* start modification 2: generate preconditioner M * In this example, TA choose the diagonal elements of A as the preconditioner. * In HW3 part b, you should generate L and U here. */ ////local_M=(double *)malloc(sizeof(double)*local_m); ////for(i=0;i<local_m;i++) //// *(local_M+i)=*(local_A+taskid*local_m+i*m+i); int mm=int( m/numtasks); double *U,*L,*UU,*LL; U = new double[numtasks*mm*mm]; //block col domain L = new double[numtasks*mm*mm]; //block col domain UU= new double[numtasks*mm*mm]; //full col domain LL= new double[numtasks*mm*mm]; //full col domain std::fill(L, L+numtasks*mm*mm, 0); std::fill(U, U+numtasks*mm*mm, 0); for(i=0; i<mm; i++) L[taskid*mm*mm+i+i*mm]=1; //make Lii = eyes(mm) for(i=0; i<numtasks*mm*mm; i++) //backup A into U U[i]=local_A[i]; blockLU(L, U, mm, taskid); for(k=0; k<numtasks; k++) for(j=0; j<mm; j++) for(i=0; i<mm; i++){ UU[k*mm+i+j*mm*numtasks]= U[k*mm*mm+j*mm+i]; LL[k*mm+i+j*mm*numtasks]= L[k*mm*mm+j*mm+i]; } /* end modification 2*/ /*--------------------------------------------------------------------------- * GMRES: Allocate storage for the ?par parameters and the solution vectors *---------------------------------------------------------------------------*/ MKL_INT RCI_request; int RCI_flag; double dvar; int flag=0; MKL_INT ipar[128]; //specifies the integer set of data for the RCI FGMRES computations double dpar[128]; // specifies the double precision set of data double *tmp; //used to supply the double precision temporary space for theRCI FGMRES computations, specifically: double *computed_solution; double *residual; double *f; MKL_INT itercount, ierr=0;; MKL_INT ivar; double b_2norm; char cvar='N'; MKL_INT incx=1; if (taskid==MASTER){ ipar[14]=RESTART; // restart iteration number int n_tmp = (2 * ipar[14] + 1) * m + ipar[14] * (ipar[14] + 9) / 2 + 1; tmp=(double *)malloc(sizeof(double)*n_tmp); computed_solution=(double *)malloc(sizeof(double)*m); residual=(double *)malloc(sizeof(double)*m); f=(double *)malloc(sizeof(double)*m); ivar=m; /*--------------------------------------------------------------------------- * Initialize the initial guess *---------------------------------------------------------------------------*/ for (i = 0; i < m; i++) { computed_solution[i] = 0.5; } b_2norm = cblas_dnrm2 (ivar, b, incx); // printf("b_2norm=%f\n",b_2norm); /*--------------------------------------------------------------------------- * Initialize the solver *---------------------------------------------------------------------------*/ dfgmres_init (&ivar, computed_solution,b, &RCI_request, ipar, dpar, tmp); RCI_flag=RCI_request; } MPI_Bcast(&RCI_flag,1,MPI_INT, MASTER, comm); if (RCI_flag != 0) goto FAILED; if(taskid==MASTER){ /*--------------------------------------------------------------------------- * GMRES: Set the desired parameters: *---------------------------------------------------------------------------*/ ipar[14] = RESTART; // restart iteration number ipar[7] = 1; //do the stopping test ipar[10] = 1; // use preconditioner dpar[0] = TOL; /*--------------------------------------------------------------------------- * Check the correctness and consistency of the newly set parameters *---------------------------------------------------------------------------*/ dfgmres_check (&ivar, computed_solution, b, &RCI_request, ipar, dpar, tmp); RCI_flag=RCI_request; } MPI_Bcast(&RCI_flag,1,MPI_INT, MASTER, comm); if (RCI_flag != 0) goto FAILED; if (taskid==MASTER){ /*--------------------------------------------------------------------------- * Print the info about the RCI FGMRES method *---------------------------------------------------------------------------*/ printf ("Some info about the current run of RCI FGMRES method:\n\n"); if (ipar[7]) { printf ("As ipar[7]=%d, the automatic test for the maximal number of ", ipar[7]); printf ("iterations will be\nperformed\n"); } else { printf ("As ipar[7]=%d, the automatic test for the maximal number of ", ipar[7]); printf ("iterations will be\nskipped\n"); } printf ("+++\n"); if (ipar[8]) { printf ("As ipar[8]=%d, the automatic residual test will be performed\n", ipar[8]); } else { printf ("As ipar[8]=%d, the automatic residual test will be skipped\n", ipar[8]); } printf ("+++\n"); if (ipar[9]) { printf ("As ipar[9]=%d, the user-defined stopping test will be ", ipar[9]); printf ("requested via\nRCI_request=2\n"); } else { printf ("As ipar[9]=%d, the user-defined stopping test will not be ", ipar[9]); printf ("requested, thus,\nRCI_request will not take the value 2\n"); } printf ("+++\n"); if (ipar[10]) { printf ("As ipar[10]=%d, the Preconditioned FGMRES iterations will be ", ipar[10]); printf ("performed, thus,\nthe preconditioner action will be requested via "); printf ("RCI_request=3\n"); } else { printf ("As ipar[10]=%d, the Preconditioned FGMRES iterations will not ", ipar[10]); printf ("be performed,\nthus, RCI_request will not take the value 3\n"); } printf ("+++\n"); if (ipar[11]) { printf ("As ipar[11]=%d, the automatic test for the norm of the next ", ipar[11]); printf ("generated vector is\nnot equal to zero up to rounding and "); printf ("computational errors will be performed,\nthus, RCI_request will not "); printf ("take the value 4\n"); } else { printf ("As ipar[11]=%d, the automatic test for the norm of the next ", ipar[11]); printf ("generated vector is\nnot equal to zero up to rounding and "); printf ("computational errors will be skipped,\nthus, the user-defined test "); printf ("will be requested via RCI_request=4\n"); } printf ("+++\n\n"); } /*--------------------------------------------------------------------------- * Compute the solution by RCI (P)FGMRES solver with preconditioning * Reverse Communication starts here *---------------------------------------------------------------------------*/ ONE: if(taskid==MASTER){ dfgmres (&ivar, computed_solution,b, &RCI_request, ipar, dpar, tmp); RCI_flag=RCI_request; } MPI_Bcast(&RCI_flag,1,MPI_INT, MASTER, comm); // send RCI_request from node MASTER to all other nodes. /*--------------------------------------------------------------------------- * If RCI_request=0, then the solution was found with the required precision *---------------------------------------------------------------------------*/ if (RCI_flag == 0) goto COMPLETE; /*--------------------------------------------------------------------------- * If RCI_request=1, then compute the vector A*tmp[ipar[21]-1] * and put the result in vector tmp[ipar[22]-1] *--------------------------------------------------------------------------- * NOTE that ipar[21] and ipar[22] contain FORTRAN style addresses, * therefore, in C code it is required to subtract 1 from them to get C style * addresses *---------------------------------------------------------------------------*/ if (RCI_flag == 1) { if (taskid==MASTER){ temp_1=&tmp[ipar[21] - 1]; temp_2=&tmp[ipar[22] - 1]; } mpi_dgemv(m,local_m,local_A,temp_1, temp_2,local_u,local_v,taskid, comm); goto ONE; } /*--------------------------------------------------------------------------- * If RCI_request=2, then do the user-defined stopping test * The residual stopping test for the computed solution is performed here *--------------------------------------------------------------------------- */ if (RCI_flag == 2) { /* Request to the dfgmres_get routine to put the solution into b[N] via ipar[12] -------------------------------------------------------------------------------- WARNING: beware that the call to dfgmres_get routine with ipar[12]=0 at this stage may destroy the convergence of the FGMRES method, therefore, only advanced users should exploit this option with care */ if (taskid==MASTER){ ipar[12] = 1; /* Get the current FGMRES solution in the vector f */ dfgmres_get (&ivar, computed_solution, f, &RCI_request, ipar, dpar, tmp, &itercount); temp_1=f; temp_2=residual; } /* Compute the current true residual via mpi mat_vec multiplication */ mpi_dgemv(m,local_m,local_A,temp_1,temp_2,local_u,local_v,taskid, comm); if(taskid==MASTER){ dvar = -1.0E0; cblas_daxpy (ivar, dvar, b, incx, residual, incx); dvar = cblas_dnrm2 (ivar, residual, incx); printf("iteration %d, relative residual:%e\n",itercount, dvar); } MPI_Bcast(&dvar,1,MPI_DOUBLE, MASTER, comm); if (dvar < TOL){ goto COMPLETE; } else goto ONE; } /*--------------------------------------------------------------------------- * If RCI_request=3, then apply the preconditioner on the vector * tmp[ipar[21]-1] and put the result in vector tmp[ipar[22]-1] *--------------------------------------------------------------------------- * NOTE that ipar[21] and ipar[22] contain FORTRAN style addresses, * therefore, in C code it is required to subtract 1 from them to get C style * addresses *---------------------------------------------------------------------------*/ if (RCI_flag == 3) { if (taskid==MASTER){ temp_1=&tmp[ipar[21] - 1]; temp_2=&tmp[ipar[22] - 1]; } /* start modification 3: solve L U temp_2 = temp_1 */ ////mpi_preconditioner_solver(m,local_m,local_M,temp_1, temp_2,local_u,taskid,comm); //method1: //double *rhs = (double*) malloc( sizeof(double)*m); //for(i=0; i<m; i++) // rhs[i]=temp_1[i]; //method2: for(i=0; i<m; i++) temp_2[i]=temp_1[i]; LUSolver(LL,UU, temp_2, m, mm, taskid, NPROC); //free(rhs); /* end modification 3 */ goto ONE; } /*--------------------------------------------------------------------------- * If RCI_request=4, then check if the norm of the next generated vector is * not zero up to rounding and computational errors. The norm is contained * in dpar[6] parameter *---------------------------------------------------------------------------*/ if (RCI_flag == 4) { if(taskid==MASTER) dvar=dpar[6]; MPI_Bcast(&dvar,1,MPI_DOUBLE, MASTER, comm); if (dvar <1.0E-12 ){ goto COMPLETE; } else goto ONE; } /*--------------------------------------------------------------------------- * If RCI_request=anything else, then dfgmres subroutine failed * to compute the solution vector: computed_solution[N] *---------------------------------------------------------------------------*/ else { goto FAILED; } /*--------------------------------------------------------------------------- * Reverse Communication ends here * Get the current iteration number and the FGMRES solution (DO NOT FORGET to * call dfgmres_get routine as computed_solution is still containing * the initial guess!). Request to dfgmres_get to put the solution * into vector computed_solution[N] via ipar[12] *---------------------------------------------------------------------------*/ COMPLETE:if(taskid==MASTER){ ipar[12] = 0; dfgmres_get (&ivar, computed_solution,b, &RCI_request, ipar, dpar, tmp, &itercount); /*--------------------------------------------------------------------------- * Print solution vector: computed_solution[N] and the number of iterations: itercount *---------------------------------------------------------------------------*/ printf ("The system has been solved in %d iterations \n", itercount); printf ("The following solution has been obtained (first 4 elements): \n"); for (i = 0; i < 4; i++) { printf ("computed_solution[%d]=", i); printf ("%e\n", computed_solution[i]); } /*-------------------------------------------------------------------------*/ /* Release internal MKL memory that might be used for computations */ /* NOTE: It is important to call the routine below to avoid memory leaks */ /* unless you disable MKL Memory Manager */ /*-------------------------------------------------------------------------*/ MKL_Free_Buffers (); temp_1=computed_solution; temp_2=residual; } // compute the relative residual mpi_dgemv(m,local_m,local_A,temp_1,temp_2,local_u,local_v,taskid, comm); if(taskid==MASTER){ dvar = -1.0E0; cblas_daxpy (ivar, dvar, b, incx, residual, incx); dvar = cblas_dnrm2 (ivar, residual, incx); printf("relative residual:%e\n",dvar/b_2norm); if(itercount<MAXIT && dvar<TOL) flag=0; //success else flag=1; //fail } MPI_Bcast(&flag,1,MPI_INT, MASTER, comm); free(local_A); free(local_M); free(local_u); free(local_v); if(taskid==MASTER){ free(tmp); free(b); free(computed_solution); free(residual); } if(flag==0){ MPI_Finalize(); return 0; } else{ MPI_Finalize(); return 1; } /* Release internal MKL memory that might be used for computations */ /* NOTE: It is important to call the routine below to avoid memory leaks */ /* unless you disable MKL Memory Manager */ /*-------------------------------------------------------------------------*/ FAILED: if(taskid==MASTER){ printf ("\nThis example FAILED as the solver has returned the ERROR code %d", RCI_request); MKL_Free_Buffers (); } free(local_A); free(local_M); free(local_u); free(local_v); if(taskid==MASTER){ free(tmp); free(b); free(computed_solution); free(residual); } MPI_Finalize(); return 1; }
/*---------------------------------------------------------------------------*/ int CGFMMmrhs(MKL_INT *solution, double *ShellSphs, double *rhs, double *nRhs, MKL_INT n) { /*---------------------------------------------------------------------------*/ /* Define arrays for the upper triangle of the coefficient matrix and rhs vector */ /* Compressed sparse row storage is used for sparse representation */ /*---------------------------------------------------------------------------*/ MKL_INT rci_request, expected_itercount = 20, i, j; MKL_INT itercount[nRhs]; /* Fill all arrays containing matrix data. */ /*---------------------------------------------------------------------------*/ /* Allocate storage for the solver ?par and temporary storage tmp */ /*---------------------------------------------------------------------------*/ MKL_INT length = 128, method = 1; /*---------------------------------------------------------------------------*/ /* Some additional variables to use with the RCI (P)CG solver */ /*---------------------------------------------------------------------------*/ MKL_INT ipar[128 + 2 * nRhs]; double euclidean_norm, dpar[128 + 2 * nRhs]; double *tmp; tmp = (double*)calloc(n * (3 + nRhs),sizeof(double)); double eone = -1.E0; MKL_INT ione = 1; /*---------------------------------------------------------------------------*/ /* Initialize the initial guess */ /*---------------------------------------------------------------------------*/ for (i = 0; i < n*nRhs; i++) solution[i] = 1.E0; /*---------------------------------------------------------------------------*/ /* Initialize the solver */ /*---------------------------------------------------------------------------*/ for (i = 0; i < (length + 2 * nRhs); i++) ipar[i] = 0; for (i = 0; i < (length + 2 * nRhs); i++) dpar[i] = 0.E0; dcgmrhs_init (&n, solution, &nRhs, rhs, &method, &rci_request, ipar, dpar, tmp); if (rci_request != 0) goto failure; /*---------------------------------------------------------------------------*/ /* Set the desired parameters: */ /* LOGICAL parameters: */ /* do residual stopping test */ /* do not request for the user defined stopping test */ /* DOUBLE parameters */ /* set the relative tolerance to 1.0D-5 instead of default value 1.0D-6 */ /*---------------------------------------------------------------------------*/ ipar[8] = 1; ipar[9] = 0; dpar[0] = 1.E-5; /*---------------------------------------------------------------------------*/ /* Compute the solution by RCI (P)CG solver without preconditioning */ /* Reverse Communications starts here */ /*---------------------------------------------------------------------------*/ rci:dcgmrhs (&n, solution, &nRhs, rhs, &rci_request, ipar, dpar, tmp); /*---------------------------------------------------------------------------*/ /* If rci_request=0, then the solution was found with the required precision */ /*---------------------------------------------------------------------------*/ if (rci_request == 0) goto getsln; /*---------------------------------------------------------------------------*/ /* If rci_request=1, then compute the vector A*tmp[0] */ /* and put the result in vector tmp[n] */ /*---------------------------------------------------------------------------*/ if (rci_request == 1) { //mkl_dcsrsymv (&tr, &n, a, ia, ja, tmp, &tmp[n]); //debug RPY(n, ShellSphs,tmp); // SPMV by 4 calls of FMM goto rci; } /*---------------------------------------------------------------------------*/ /* If rci_request=anything else, then dcg subroutine failed */ /* to compute the solution vector: solution[n] */ /*---------------------------------------------------------------------------*/ goto failure; /*---------------------------------------------------------------------------*/ /* Reverse Communication ends here */ /* Get the current iteration number into itercount */ /*---------------------------------------------------------------------------*/ getsln:dcgmrhs_get (&n, solution, &nRhs, rhs, &rci_request, ipar, dpar, tmp, itercount); /*---------------------------------------------------------------------------*/ /* Print solution vector: solution[n] and number of iterations: itercount */ /*---------------------------------------------------------------------------*/ printf ("The system has been solved\n"); printf ("The following solution obtained\n"); for (i = 0; i < n / 2; i++) printf ("%6.3f ", solution[i]); printf ("\n"); for (i = n / 2; i < n; i++) printf ("%6.3f ", solution[i]); printf ("\n"); for (i = 0; i < n / 2; i++) printf ("%6.3f ", solution[n + i]); printf ("\n"); for (i = n / 2; i < n; i++) printf ("%6.3f ", solution[n + i]); printf ("\nExpected solution is\n"); for (i = 0; i < n / 2; i++) { printf ("%6.3f ", expected_sol[i]); expected_sol[i] -= solution[i]; } printf ("\n"); for (i = n / 2; i < n; i++) { printf ("%6.3f ", expected_sol[i]); expected_sol[i] -= solution[i]; } printf ("\n"); for (i = 0; i < n / 2; i++) { printf ("%6.3f ", expected_sol[n + i]); expected_sol[n + i] -= solution[n + i]; } printf ("\n"); for (i = n / 2; i < n; i++) { printf ("%6.3f ", expected_sol[n + i]); expected_sol[n + i] -= solution[n + i]; } printf ("\n"); i = 1; j = n * nRhs; euclidean_norm = dnrm2 (&j, expected_sol, &i); /*-------------------------------------------------------------------------*/ /* Release internal MKL memory that might be used for computations */ /* NOTE: It is important to call the routine below to avoid memory leaks */ /* unless you disable MKL Memory Manager */ /*-------------------------------------------------------------------------*/ MKL_Free_Buffers (); if (euclidean_norm < 1.0e-12) { printf ("This example has successfully PASSED through all steps of computation!\n"); return 0; } else { printf ("This example may have FAILED as the computed solution differs\n"); printf ("much from the expected solution (Euclidean norm is %e).\n", euclidean_norm); return 1; } /*-------------------------------------------------------------------------*/ /* Release internal MKL memory that might be used for computations */ /* NOTE: It is important to call the routine below to avoid memory leaks */ /* unless you disable MKL Memory Manager */ /*-------------------------------------------------------------------------*/ failure:printf ("This example FAILED as the solver has returned the ERROR code %d", rci_request); MKL_Free_Buffers (); return 1; }
int main( int argc, char **argv ) { /* Matrix data. */ double *aa; int N; int *ia, *ja; read_mtx_and_return_csr(argc, argv, &N, &N, &ia, &ja, &aa); printf("\nDone with reading mtx file.\n"); printf("m = %d, n = %d, nz = %d\n\n", N, N, ia[N]); /*--------------------------------------------------------------------------- /* Allocate storage for the ?par parameters and the solution/rhs vectors /*---------------------------------------------------------------------------*/ MKL_INT ipar[size]; double dpar[size]; double *tmp = (double *)malloc((N * (2 * N + 1) + (N * (N + 9)) / 2 + 1) * sizeof(double)); double rhs[N]; double computed_solution[N]; /*--------------------------------------------------------------------------- /* Some additional variables to use with the RCI (P)FGMRES solver /*---------------------------------------------------------------------------*/ MKL_INT itercount; MKL_INT RCI_request, i, ivar; double dvar; char cvar; /*--------------------------------------------------------------------------- /* Initialize variables and the right hand side through matrix-vector product /*---------------------------------------------------------------------------*/ ivar = N; cvar = 'N'; /*--------------------------------------------------------------------------- /* Initialize the initial guess /*---------------------------------------------------------------------------*/ for (i = 0; i < N; i++) { computed_solution[i] = 1.0; } /*--------------------------------------------------------------------------- /* Initialize the solver /*---------------------------------------------------------------------------*/ dfgmres_init (&ivar, computed_solution, rhs, &RCI_request, ipar, dpar, tmp); if (RCI_request != 0) { printf("Going to FAILED\n"); goto FAILED; } /*--------------------------------------------------------------------------- /* Set the desired parameters: /* LOGICAL parameters: /* do residual stopping test /* do not request for the user defined stopping test /* do the check of the norm of the next generated vector automatically /* DOUBLE PRECISION parameters /* set the relative tolerance to 1.0D-3 instead of default value 1.0D-6 /*---------------------------------------------------------------------------*/ ipar[7] = 0; ipar[8] = 1; ipar[9] = 0; ipar[11] = 1; dpar[0] = 1.0E-3; /*--------------------------------------------------------------------------- /* Check the correctness and consistency of the newly set parameters /*---------------------------------------------------------------------------*/ dfgmres_check (&ivar, computed_solution, rhs, &RCI_request, ipar, dpar, tmp); if (RCI_request != 0) { printf("Going to FAILED\n"); goto FAILED; } /*--------------------------------------------------------------------------- /* Print the info about the RCI FGMRES method /*---------------------------------------------------------------------------*/ if (INFO == 1) { printf ("Some info about the current run of RCI FGMRES method:\n\n"); if (ipar[7]) { printf ("As ipar[7]=%d, the automatic test for the maximal number of ", ipar[7]); printf ("iterations will be\nperformed\n"); } else { printf ("As ipar[7]=%d, the automatic test for the maximal number of ", ipar[7]); printf ("iterations will be\nskipped\n"); } printf ("+++\n"); if (ipar[8]) { printf ("As ipar[8]=%d, the automatic residual test will be performed\n", ipar[8]); } else { printf ("As ipar[8]=%d, the automatic residual test will be skipped\n", ipar[8]); } printf ("+++\n"); if (ipar[9]) { printf ("As ipar[9]=%d, the user-defined stopping test will be ", ipar[9]); printf ("requested via\nRCI_request=2\n"); } else { printf ("As ipar[9]=%d, the user-defined stopping test will not be ", ipar[9]); printf ("requested, thus,\nRCI_request will not take the value 2\n"); } printf ("+++\n"); if (ipar[10]) { printf ("As ipar[10]=%d, the Preconditioned FGMRES iterations will be ", ipar[10]); printf ("performed, thus,\nthe preconditioner action will be requested via"); printf ("RCI_request=3\n"); } else { printf ("As ipar[10]=%d, the Preconditioned FGMRES iterations will not ", ipar[10]); printf ("be performed,\nthus, RCI_request will not take the value 3\n"); } printf ("+++\n"); if (ipar[11]) { printf ("As ipar[11]=%d, the automatic test for the norm of the next ", ipar[11]); printf ("generated vector is\nnot equal to zero up to rounding and "); printf ("computational errors will be performed,\nthus, RCI_request will not "); printf ("take the value 4\n"); } else { printf ("As ipar[11]=%d, the automatic test for the norm of the next ", ipar[11]); printf ("generated vector is\nnot equal to zero up to rounding and "); printf ("computational errors will be skipped,\nthus, the user-defined test "); printf ("will be requested via RCI_request=4\n"); } printf ("+++\n\n"); } /*--------------------------------------------------------------------------- /* Compute the solution by RCI (P)FGMRES solver without preconditioning /* Reverse Communication starts here /*---------------------------------------------------------------------------*/ ONE:dfgmres (&ivar, computed_solution, rhs, &RCI_request, ipar, dpar, tmp); /*--------------------------------------------------------------------------- /* If RCI_request=0, then the solution was found with the required precision /*---------------------------------------------------------------------------*/ if (RCI_request == 0) { printf("RCI_request = %d\n", RCI_request); printf("Going to COMPLETE\n"); goto COMPLETE; } /*--------------------------------------------------------------------------- /* If RCI_request=1, then compute the vector A*tmp[ipar[21]-1] /* and put the result in vector tmp[ipar[22]-1] /*--------------------------------------------------------------------------- /* NOTE that ipar[21] and ipar[22] contain FORTRAN style addresses, /* therefore, in C code it is required to subtract 1 from them to get C style /* addresses /*---------------------------------------------------------------------------*/ if (RCI_request == 1) { printf("RCI_request = %d\n", RCI_request); mkl_dcsrgemv (&cvar, &ivar, aa, ia, ja, &tmp[ipar[21] - 1], &tmp[ipar[22] - 1]); printf("Going to ONE\n"); goto ONE; } /*--------------------------------------------------------------------------- /* If RCI_request=anything else, then dfgmres subroutine failed /* to compute the solution vector: computed_solution[N] /*---------------------------------------------------------------------------*/ else { printf("Going to FAILED\n"); goto FAILED; } /*--------------------------------------------------------------------------- /* Reverse Communication ends here /* Get the current iteration number and the FGMRES solution (DO NOT FORGET to /* call dfgmres_get routine as computed_solution is still containing /* the initial guess!) /*---------------------------------------------------------------------------*/ COMPLETE:dfgmres_get (&ivar, computed_solution, rhs, &RCI_request, ipar, dpar, tmp, &itercount); /* /*--------------------------------------------------------------------------- /* Print solution vector: computed_solution[N] and the number of iterations: itercount /*--------------------------------------------------------------------------- */ printf (" The system has been solved \n"); printf ("\n The following solution has been obtained: \n"); /* for (i = 0; i < N; i++) { printf ("computed_solution[%d]=", i); printf ("%e\n", computed_solution[i]); } */ printf ("\n Number of iterations: %d\n", itercount); i = 1; /*-------------------------------------------------------------------------*/ /* Release internal MKL memory that might be used for computations */ /* NOTE: It is important to call the routine below to avoid memory leaks */ /* unless you disable MKL Memory Manager */ /*-------------------------------------------------------------------------*/ MKL_Free_Buffers (); return 0; /*if (itercount == expected_itercount && dvar < 1.0e-14) { printf ("\nThis example has successfully PASSED through all steps of "); printf ("computation!\n"); return 0; } else { printf ("\nThis example may have FAILED as either the number of iterations "); printf ("differs\nfrom the expected number of iterations %d, ", expected_itercount); printf ("or the computed solution\ndiffers much from the expected solution "); printf ("(Euclidean norm is %e), or both.\n", dvar); return 1; }*/ /*-------------------------------------------------------------------------*/ /* Release internal MKL memory that might be used for computations */ /* NOTE: It is important to call the routine below to avoid memory leaks */ /* unless you disable MKL Memory Manager */ /*-------------------------------------------------------------------------*/ FAILED:printf ("\nThis example FAILED as the solver has returned the ERROR "); printf ("code %d", RCI_request); MKL_Free_Buffers (); free (ia); free (ja); free (aa); free (tmp); printf("\nMemory deallocated...\n"); return 0; }
/*---------------------------------------------------------------------------*/ int main (void) { /*---------------------------------------------------------------------------*/ /* Define arrays for the upper triangle of the coefficient matrix and */ /* preconditioner as well as an array for rhs vector */ /* Compressed sparse row storage is used for sparse representation */ /*---------------------------------------------------------------------------*/ MKL_INT n = 100, rci_request, itercount, lexpected_itercount = 15, uexpected_itercount = 19, i; double rhs[100]; MKL_INT ia[100 + 1]; MKL_INT ja[100 - 1]; double a[100 - 1], a1[100 - 1]; /*---------------------------------------------------------------------------*/ /* Allocate storage for the solver ?par and temporary storage tmp */ /*---------------------------------------------------------------------------*/ MKL_INT length = 128; MKL_INT ipar[128]; double dpar[128], tmp[4 * 100]; /*---------------------------------------------------------------------------*/ /* Some additional variables to use with the RCI (P)CG solver */ /* OMEGA is the relaxation parameter, NITER_SSOR is the maximum number of */ /* iterations for the SSOR preconditioner */ /*---------------------------------------------------------------------------*/ double solution[100]; double expected_sol[100]; double omega = 0.5E0, one = 1.E0, zero = 0.E0, om = 1.E0 - omega; double euclidean_norm, temp[100]; MKL_INT niter_ssor = 20; char matdes[6]; char tr = 'n'; double eone = -1.E0; MKL_INT ione = 1; /*---------------------------------------------------------------------------*/ /* Initialize the coefficient matrix and expected solution */ /*---------------------------------------------------------------------------*/ for (i = 0; i < n; i++) expected_sol[i] = 1.E0; for (i = 0; i < n - 1; i++) { ja[i] = i + 2; ia[i] = i + 1; a[i] = 0.5E0; a1[i] = omega * a[i]; } ia[n - 1] = n; ia[n] = ia[n - 1]; matdes[0] = 's'; matdes[1] = 'u'; matdes[2] = 'u'; matdes[3] = 'f'; /*---------------------------------------------------------------------------*/ /* Initialize vectors rhs, temp, and tmp[n:2*n-1] with zeros as mkl_dcsrmv */ /* routine does not set NAN to zero. Thus, if any of the values in the */ /* vectors above accidentally happens to be NAN, the example will fail */ /* to complete. */ /* Initialize the right hand side through matrix-vector product */ /*---------------------------------------------------------------------------*/ for (i = 0; i < n; i++) { rhs[i] = zero; temp[i] = zero; tmp[n + i] = zero; } mkl_dcsrmv (&tr, &n, &n, &one, matdes, a, ja, ia, &ia[1], expected_sol, &zero, rhs); /*---------------------------------------------------------------------------*/ /* Initialize the initial guess */ /*---------------------------------------------------------------------------*/ for (i = 0; i < n; i++) solution[i] = zero; /*---------------------------------------------------------------------------*/ /* Initialize the solver */ /*---------------------------------------------------------------------------*/ dcg_init (&n, solution, rhs, &rci_request, ipar, dpar, tmp); if (rci_request != 0) goto failure; /*---------------------------------------------------------------------------*/ /* Set the desired parameters: */ /* INTEGER parameters: */ /* set the maximal number of iterations to 100 */ /* LOGICAL parameters: */ /* run the Preconditioned version of RCI (P)CG with preconditioner C_inverse */ /* DOUBLE parameters */ /* - */ /*---------------------------------------------------------------------------*/ ipar[4] = 100; ipar[10] = 1; /*---------------------------------------------------------------------------*/ /* Check the correctness and consistency of the newly set parameters */ /*---------------------------------------------------------------------------*/ dcg_check (&n, solution, rhs, &rci_request, ipar, dpar, tmp); if (rci_request != 0) goto failure; /*---------------------------------------------------------------------------*/ /* Compute the solution by RCI (P)CG solver */ /* Reverse Communications starts here */ /*---------------------------------------------------------------------------*/ rci:dcg (&n, solution, rhs, &rci_request, ipar, dpar, tmp); /*---------------------------------------------------------------------------*/ /* If rci_request=0, then the solution was found according to the requested */ /* stopping tests. In this case, this means that it was found after 100 */ /* iterations. */ /*---------------------------------------------------------------------------*/ if (rci_request == 0) goto getsln; /*---------------------------------------------------------------------------*/ /* If rci_request=1, then compute the vector A*tmp[0] */ /* and put the result in vector tmp[n] */ /*---------------------------------------------------------------------------*/ if (rci_request == 1) { matdes[0] = 's'; mkl_dcsrmv (&tr, &n, &n, &one, matdes, a, ja, ia, &ia[1], tmp, &zero, &tmp[n]); goto rci; } /*---------------------------------------------------------------------------*/ /* If rci_request=2, then do the user-defined stopping test: compute the */ /* Euclidean norm of the actual residual using MKL routines and check if */ /* it is less than 1.E-8 */ /*---------------------------------------------------------------------------*/ if (rci_request == 2) { matdes[0] = 's'; mkl_dcsrmv (&tr, &n, &n, &one, matdes, a, ja, ia, &ia[1], solution, &zero, temp); daxpy (&n, &eone, rhs, &ione, temp, &ione); euclidean_norm = dnrm2 (&n, temp, &ione); /*---------------------------------------------------------------------------*/ /* The solution has not been found yet according to the user-defined stopping */ /* test. Continue RCI (P)CG iterations. */ /*---------------------------------------------------------------------------*/ if (euclidean_norm > 1.E-6) goto rci; /*---------------------------------------------------------------------------*/ /* The solution has been found according to the user-defined stopping test */ /*---------------------------------------------------------------------------*/ else goto getsln; } /*---------------------------------------------------------------------------*/ /* If rci_request=3, then apply the simplest SSOR preconditioning */ /* on vector tmp[2*n] and put the result in vector tmp[3*n] */ /*---------------------------------------------------------------------------*/ if (rci_request == 3) { dcopy (&n, &tmp[2 * n], &ione, &tmp[3 * n], &ione); matdes[0] = 't'; for (i = 1; i <= niter_ssor; i++) { dcopy (&n, &tmp[2 * n], &ione, temp, &ione); matdes[2] = 'n'; tr = 'n'; mkl_dcsrmv (&tr, &n, &n, &eone, matdes, a1, ja, ia, &ia[1], &tmp[3 * n], &omega, temp); daxpy (&n, &om, &tmp[3 * n], &ione, temp, &ione); matdes[2] = 'u'; tr = 't'; mkl_dcsrsv (&tr, &n, &one, matdes, a1, ja, ia, &ia[1], temp, &tmp[3 * n]); } goto rci; } /*---------------------------------------------------------------------------*/ /* If rci_request=anything else, then dcg subroutine failed */ /* to compute the solution vector: solution[n] */ /*---------------------------------------------------------------------------*/ goto failure; /*---------------------------------------------------------------------------*/ /* Reverse Communication ends here */ /* Get the current iteration number into itercount */ /*---------------------------------------------------------------------------*/ getsln:dcg_get (&n, solution, rhs, &rci_request, ipar, dpar, tmp, &itercount); /*---------------------------------------------------------------------------*/ /* Print solution vector: solution[n] and number of iterations: itercount */ /*---------------------------------------------------------------------------*/ printf ("The system has been solved\n"); printf ("The following solution obtained\n"); for (i = 0; i < n / 4; i++) { printf ("%6.3f %6.3f %6.3f %6.3f", solution[4 * i], solution[4 * i + 1], solution[4 * i + 2], solution[4 * i + 3]); printf ("\n"); } printf ("\nExpected solution is\n"); for (i = 0; i < n / 4; i++) { printf ("%6.3f %6.3f %6.3f %6.3f", expected_sol[4 * i], expected_sol[4 * i + 1], expected_sol[4 * i + 2], expected_sol[4 * i + 3]); expected_sol[4 * i] -= solution[4 * i]; printf ("\n"); } printf ("\nNumber of iterations: %d\n", itercount); i = 4; n /= 4; euclidean_norm = dnrm2 (&n, expected_sol, &i); /*-------------------------------------------------------------------------*/ /* Release internal MKL memory that might be used for computations */ /* NOTE: It is important to call the routine below to avoid memory leaks */ /* unless you disable MKL Memory Manager */ /*-------------------------------------------------------------------------*/ MKL_Free_Buffers (); if (lexpected_itercount <= itercount <= uexpected_itercount && euclidean_norm < 1.0e-4) { printf ("This example has successfully PASSED through all steps of computation!"); printf ("\n"); return 0; } else { printf ("This example may have FAILED as either the number of iterations differs"); printf ("\nfrom the expected number of iterations %d-", lexpected_itercount); printf ("-%d, or the computed solution\ndiffers much from ", uexpected_itercount); printf ("the expected solution (Euclidean norm is %e), or both.\n", euclidean_norm); return 1; } /*-------------------------------------------------------------------------*/ /* Release internal MKL memory that might be used for computations */ /* NOTE: It is important to call the routine below to avoid memory leaks */ /* unless you disable MKL Memory Manager */ /*-------------------------------------------------------------------------*/ failure:printf ("This example FAILED as the solver has returned the ERROR "); printf ("code %d", rci_request); MKL_Free_Buffers (); return 1; }