C++ (Cpp) mkl_dcsrmv Exemples

Exemple #1

0

Afficher le fichier

Fichier : rhs.cpp Projet : andyras/dynamix

int RHS_WFN_SPARSE(realtype t, N_Vector y, N_Vector ydot, void * data) {
  // data is a pointer to the params struct
  Params * p;
  p = (Params *) data;

  // more compact notation for N_Vectors
  realtype * yp = N_VGetArrayPointer(y);
  realtype * ydotp = N_VGetArrayPointer(ydot);

  // update Hamiltonian if it is time-dependent
  if (p->torsion || p->laser_on) {
    // only update if at a new time point
    if ((t > 0.0) && (t != p->lastTime)) {
      updateHamiltonian(p, t);
      // update time point
      p->lastTime = t;
    }
  }

  // extract parameters from p
  realtype * H = &(p->H_sp)[0];
  int * columns = &(p->H_cols)[0];
  int * rowind = &(p->H_rowind)[0];

  //realtype * H = &(p->H_lo)[0];
  int N = p->NEQ;

  // set up MKL variables
  char transa = 'n';
  double alpha_re = 1.0;  // alpha value for real part of wfn derivative
  double alpha_im = -1.0; // alpha value for imag part of wfn derivative
  double beta = 0.0;
  char matdescra [6] = {'S', // symmetric matrix
      'L', // lower triangle
      'N', // non-unit on diagonal
      'C', // zero-based indexing (C-style)
      '*', '*'}; // extra characters

  // Re(\dot{\psi}) = \hat{H}Im(\psi)
  mkl_dcsrmv(&transa, &N, &N, &alpha_re, &matdescra[0], &H[0], &columns[0],
             &rowind[0], &rowind[1], &yp[N], &beta, &ydotp[0]);

  // Im(\dot{\psi}) = -i\hat{H}Re(\psi)
  mkl_dcsrmv(&transa, &N, &N, &alpha_im, &matdescra[0], &H[0], &columns[0],
             &rowind[0], &rowind[1], &yp[0], &beta, &ydotp[N]);

  return 0;
}

Exemple #2

0

Afficher le fichier

Fichier : chols_warm.c Projet : zhuangsc/cholesky

void mkl_warmup(){
	srand48(time(0));
	hbmat_t *t = malloc(sizeof(hbmat_t));
	t->m = DIM; t->n = DIM;
	t->vdiag = NULL;
	int m = t->m;
	int alpha = 1; int beta = 1;
	int *vptr = t->vptr = malloc((DIM+1) * sizeof(int));
	int *vpos = t->vpos = malloc((DIM * DIM) *sizeof(int));
	double *vval = t->vval = malloc((DIM*DIM)*sizeof(double));
	vptr[0] = 0;
	int vpos_p = 0;
	puts("warm-up");
	for ( int i = 1; i <= DIM; ++i ) {
		vptr[i] = vptr[i-1] +  FILL;
		int vp = 0;
		for ( int j = vptr[i-1]; j < vptr[i]; ++j ) {
			vpos[vpos_p] = vp;
			vval[vpos_p] = drand48();
			vp++; vpos_p++;
		}
	}

	double *x = malloc(DIM*sizeof(double));
	for(int i = 0; i < DIM; ++i)
		x[i] = drand48();
	double *y = malloc(DIM*sizeof(double));
	mkl_dcsrmv("N", &m, &m, &alpha, "GLNC", vval, vpos, vptr, vptr+1, x, &beta, y);
	mkl_dcsrsv("N", &m, &alpha, "TLNC", vval, vpos, vptr, vptr+1, x, y);
	mkl_cspblas_dcsrgemv("N", &m, vval, vptr, vpos, x, y);
	
	free(x); free(y);
	free(vptr); free(vpos); free(vval);
	free(t);
}

Exemple #3

0

Afficher le fichier

Fichier : Stokhos_OpenMP_MKL_CrsMatrix.hpp Projet : 00liujj/trilinos

void multiply(const CrsMatrix< double , Kokkos::OpenMP >& A,
              const Kokkos::View< double* , Kokkos::OpenMP >& x,
              Kokkos::View< double* , Kokkos::OpenMP >& y,
              MKLMultiply tag)
{
  MKL_INT n = A.graph.row_map.dimension_0() - 1 ;
  double *A_values = A.values.ptr_on_device() ;
  MKL_INT *col_indices = A.graph.entries.ptr_on_device() ;
  MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.ptr_on_device()) ;
  MKL_INT *row_end = row_beg+1;
  char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
  char trans = 'N';
  double alpha = 1.0;
  double beta = 0.0;

  double *x_values = x.ptr_on_device() ;
  double *y_values = y.ptr_on_device() ;

  mkl_dcsrmv(&trans, &n, &n, &alpha, matdescra, A_values, col_indices,
             row_beg, row_end, x_values, &beta, y_values);
}

Exemple #4

0

Afficher le fichier

/* ////////////////////////////////////////////////////////////////////////////
   -- testing sparse matrix vector product
*/
int main(  int argc, char** argv )
{
    TESTING_INIT();
    magma_queue_t queue;
    magma_queue_create( /*devices[ opts->device ],*/ &queue );

    magma_d_sparse_matrix hA, hA_SELLP, hA_ELL, dA, dA_SELLP, dA_ELL;
    hA_SELLP.blocksize = 8;
    hA_SELLP.alignment = 8;
    real_Double_t start, end, res;
    magma_int_t *pntre;

    double c_one  = MAGMA_D_MAKE(1.0, 0.0);
    double c_zero = MAGMA_D_MAKE(0.0, 0.0);
    
    magma_int_t i, j;
    for( i = 1; i < argc; ++i ) {
        if ( strcmp("--blocksize", argv[i]) == 0 ) {
            hA_SELLP.blocksize = atoi( argv[++i] );
        } else if ( strcmp("--alignment", argv[i]) == 0 ) {
            hA_SELLP.alignment = atoi( argv[++i] );
        } else
            break;
    }
    printf( "\n#    usage: ./run_dspmv"
        " [ --blocksize %d --alignment %d (for SELLP) ]"
        " matrices \n\n", (int) hA_SELLP.blocksize, (int) hA_SELLP.alignment );

    while(  i < argc ) {

        if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) {   // Laplace test
            i++;
            magma_int_t laplace_size = atoi( argv[i] );
            magma_dm_5stencil(  laplace_size, &hA, queue );
        } else {                        // file-matrix test
            magma_d_csr_mtx( &hA,  argv[i], queue );
        }

        printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n",
                            (int) hA.num_rows,(int) hA.num_cols,(int) hA.nnz );

        real_Double_t FLOPS = 2.0*hA.nnz/1e9;

        magma_d_vector hx, hy, dx, dy, hrefvec, hcheck;

        // init CPU vectors
        magma_d_vinit( &hx, Magma_CPU, hA.num_rows, c_zero, queue );
        magma_d_vinit( &hy, Magma_CPU, hA.num_rows, c_zero, queue );

        // init DEV vectors
        magma_d_vinit( &dx, Magma_DEV, hA.num_rows, c_one, queue );
        magma_d_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );

        #ifdef MAGMA_WITH_MKL
            // calling MKL with CSR
            pntre = (magma_int_t*)malloc( (hA.num_rows+1)*sizeof(magma_int_t) );
            pntre[0] = 0;
            for (j=0; j<hA.num_rows; j++ ) {
                pntre[j] = hA.row[j+1];
            }
             MKL_INT num_rows = hA.num_rows;
             MKL_INT num_cols = hA.num_cols;
             MKL_INT nnz = hA.nnz;

            MKL_INT *col;
            TESTING_MALLOC_CPU( col, MKL_INT, nnz );
            for( magma_int_t t=0; t < hA.nnz; ++t ) {
                col[ t ] = hA.col[ t ];
            }
            MKL_INT *row;
            TESTING_MALLOC_CPU( row, MKL_INT, num_rows );
            for( magma_int_t t=0; t < hA.num_rows; ++t ) {
                row[ t ] = hA.col[ t ];
            }
    
            start = magma_wtime();
            for (j=0; j<10; j++ ) {
                mkl_dcsrmv( "N", &num_rows, &num_cols, 
                            MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), 
                            col, row, pntre, 
                                                    MKL_ADDR(hx.val), 
                            MKL_ADDR(&c_zero),        MKL_ADDR(hy.val) );
            }
            end = magma_wtime();
            printf( "\n > MKL  : %.2e seconds %.2e GFLOP/s    (CSR).\n",
                                            (end-start)/10, FLOPS*10/(end-start) );

            TESTING_FREE_CPU( row );
            TESTING_FREE_CPU( col );
            free(pntre);
        #endif // MAGMA_WITH_MKL

        // copy matrix to GPU
        magma_d_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue );        
        // SpMV on GPU (CSR) -- this is the reference!
        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            magma_d_spmv( c_one, dA, dx, c_zero, dy, queue );
        end = magma_sync_wtime( queue );
        printf( " > MAGMA: %.2e seconds %.2e GFLOP/s    (standard CSR).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );
        magma_d_mfree(&dA, queue );
        magma_d_vtransfer( dy, &hrefvec , Magma_DEV, Magma_CPU, queue );

        // convert to ELL and copy to GPU
        magma_d_mconvert(  hA, &hA_ELL, Magma_CSR, Magma_ELL, queue );
        magma_d_mtransfer( hA_ELL, &dA_ELL, Magma_CPU, Magma_DEV, queue );
        magma_d_mfree(&hA_ELL, queue );
        magma_d_vfree( &dy, queue );
        magma_d_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
        // SpMV on GPU (ELL)
        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            magma_d_spmv( c_one, dA_ELL, dx, c_zero, dy, queue );
        end = magma_sync_wtime( queue );
        printf( " > MAGMA: %.2e seconds %.2e GFLOP/s    (standard ELL).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );
        magma_d_mfree(&dA_ELL, queue );
        magma_d_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_D_REAL(hcheck.val[k]) - MAGMA_D_REAL(hrefvec.val[k]);
        if ( res < .000001 )
            printf("# tester spmv ELL:  ok\n");
        else
            printf("# tester spmv ELL:  failed\n");
        magma_d_vfree( &hcheck, queue );

        // convert to SELLP and copy to GPU
        magma_d_mconvert(  hA, &hA_SELLP, Magma_CSR, Magma_SELLP, queue );
        magma_d_mtransfer( hA_SELLP, &dA_SELLP, Magma_CPU, Magma_DEV, queue );
        magma_d_mfree(&hA_SELLP, queue );
        magma_d_vfree( &dy, queue );
        magma_d_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
        // SpMV on GPU (SELLP)
        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            magma_d_spmv( c_one, dA_SELLP, dx, c_zero, dy, queue );
        end = magma_sync_wtime( queue );
        printf( " > MAGMA: %.2e seconds %.2e GFLOP/s    (SELLP).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );

        magma_d_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_D_REAL(hcheck.val[k]) - MAGMA_D_REAL(hrefvec.val[k]);
        printf("# |x-y|_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("# tester spmv SELL-P:  ok\n");
        else
            printf("# tester spmv SELL-P:  failed\n");
        magma_d_vfree( &hcheck, queue );

        magma_d_mfree(&dA_SELLP, queue );


        // SpMV on GPU (CUSPARSE - CSR)
        // CUSPARSE context //

        cusparseHandle_t cusparseHandle = 0;
        cusparseStatus_t cusparseStatus;
        cusparseStatus = cusparseCreate(&cusparseHandle);
        cusparseSetStream( cusparseHandle, queue );

        cusparseMatDescr_t descr = 0;
        cusparseStatus = cusparseCreateMatDescr(&descr);

        cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL);
        cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO);
        double alpha = c_one;
        double beta = c_zero;
        magma_d_vfree( &dy, queue );
        magma_d_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );

        // copy matrix to GPU
        magma_d_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue );

        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            cusparseStatus =
            cusparseDcsrmv(cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, 
                        hA.num_rows, hA.num_cols, hA.nnz, &alpha, descr, 
                        dA.dval, dA.drow, dA.dcol, dx.dval, &beta, dy.dval);
        end = magma_sync_wtime( queue );
        if (cusparseStatus != 0)    printf("error in cuSPARSE CSR\n");
        printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s    (CSR).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );
        cusparseMatDescr_t descrA;
        cusparseStatus = cusparseCreateMatDescr(&descrA);
         if (cusparseStatus != 0)    printf("error\n");
        cusparseHybMat_t hybA;
        cusparseStatus = cusparseCreateHybMat( &hybA );
         if (cusparseStatus != 0)    printf("error\n");

        magma_d_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_D_REAL(hcheck.val[k]) - MAGMA_D_REAL(hrefvec.val[k]);
        printf("# |x-y|_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("# tester spmv cuSPARSE CSR:  ok\n");
        else
            printf("# tester spmv cuSPARSE CSR:  failed\n");
        magma_d_vfree( &hcheck, queue );
        magma_d_vfree( &dy, queue );
        magma_d_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
       
        cusparseDcsr2hyb(cusparseHandle,  hA.num_rows, hA.num_cols,
                        descrA, dA.dval, dA.drow, dA.dcol,
                        hybA, 0, CUSPARSE_HYB_PARTITION_AUTO);

        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            cusparseStatus =
            cusparseDhybmv( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, 
               &alpha, descrA, hybA,
               dx.dval, &beta, dy.dval);
        end = magma_sync_wtime( queue );
        if (cusparseStatus != 0)    printf("error in cuSPARSE HYB\n");
        printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s    (HYB).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );

        magma_d_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_D_REAL(hcheck.val[k]) - MAGMA_D_REAL(hrefvec.val[k]);
        printf("# |x-y|_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("# tester spmv cuSPARSE HYB:  ok\n");
        else
            printf("# tester spmv cuSPARSE HYB:  failed\n");
        magma_d_vfree( &hcheck, queue );

        cusparseDestroyMatDescr( descrA );
        cusparseDestroyHybMat( hybA );
        cusparseDestroy( cusparseHandle );

        magma_d_mfree(&dA, queue );



        printf("\n\n");


        // free CPU memory
        magma_d_mfree(&hA, queue );
        magma_d_vfree(&hx, queue );
        magma_d_vfree(&hy, queue );
        magma_d_vfree(&hrefvec, queue );
        // free GPU memory
        magma_d_vfree(&dx, queue );
        magma_d_vfree(&dy, queue );

        i++;

    }
    
    magma_queue_destroy( queue );
    TESTING_FINALIZE();
    return 0;
}

Exemple #5

-1

Afficher le fichier

Fichier : cg_ssor_precon_c.c Projet : sd-omkar/ece999

/*---------------------------------------------------------------------------*/
int
main (void)
{
	/*---------------------------------------------------------------------------*/
  /* Define arrays for the upper triangle of the coefficient matrix and        */
  /* preconditioner as well as an array for rhs vector                         */
  /* Compressed sparse row storage is used for sparse representation           */
	/*---------------------------------------------------------------------------*/
  MKL_INT n = 100, rci_request, itercount, lexpected_itercount = 15,
    uexpected_itercount = 19, i;
  double rhs[100];
  MKL_INT ia[100 + 1];
  MKL_INT ja[100 - 1];
  double a[100 - 1], a1[100 - 1];
	/*---------------------------------------------------------------------------*/
  /* Allocate storage for the solver ?par and temporary storage tmp            */
	/*---------------------------------------------------------------------------*/
  MKL_INT length = 128;
  MKL_INT ipar[128];
  double dpar[128], tmp[4 * 100];
	/*---------------------------------------------------------------------------*/
  /* Some additional variables to use with the RCI (P)CG solver                */
  /* OMEGA is the relaxation parameter, NITER_SSOR is the maximum number of    */
  /* iterations for the SSOR preconditioner                                    */
	/*---------------------------------------------------------------------------*/
  double solution[100];
  double expected_sol[100];
  double omega = 0.5E0, one = 1.E0, zero = 0.E0, om = 1.E0 - omega;
  double euclidean_norm, temp[100];
  MKL_INT niter_ssor = 20;
  char matdes[6];
  char tr = 'n';
  double eone = -1.E0;
  MKL_INT ione = 1;

	/*---------------------------------------------------------------------------*/
  /* Initialize the coefficient matrix and expected solution                     */
	/*---------------------------------------------------------------------------*/
  for (i = 0; i < n; i++)
    expected_sol[i] = 1.E0;

  for (i = 0; i < n - 1; i++)
    {
      ja[i] = i + 2;
      ia[i] = i + 1;
      a[i] = 0.5E0;
      a1[i] = omega * a[i];
    }
  ia[n - 1] = n;
  ia[n] = ia[n - 1];
  matdes[0] = 's';
  matdes[1] = 'u';
  matdes[2] = 'u';
  matdes[3] = 'f';

	/*---------------------------------------------------------------------------*/
  /* Initialize vectors rhs, temp, and tmp[n:2*n-1] with zeros as mkl_dcsrmv   */
  /* routine does not set NAN to zero. Thus, if any of the values in the       */
  /* vectors above accidentally happens to be NAN, the example will fail       */
  /* to complete.                                                              */
  /* Initialize the right hand side through matrix-vector product              */
	/*---------------------------------------------------------------------------*/
  for (i = 0; i < n; i++)
    {
      rhs[i] = zero;
      temp[i] = zero;
      tmp[n + i] = zero;
    }
  mkl_dcsrmv (&tr, &n, &n, &one, matdes, a, ja, ia, &ia[1], expected_sol,
	      &zero, rhs);
	/*---------------------------------------------------------------------------*/
  /* Initialize the initial guess                                              */
	/*---------------------------------------------------------------------------*/
  for (i = 0; i < n; i++)
    solution[i] = zero;
	/*---------------------------------------------------------------------------*/
  /* Initialize the solver                                                     */
	/*---------------------------------------------------------------------------*/
  dcg_init (&n, solution, rhs, &rci_request, ipar, dpar, tmp);
  if (rci_request != 0)
    goto failure;
	/*---------------------------------------------------------------------------*/
  /* Set the desired parameters:                                               */
  /* INTEGER parameters:                                                       */
  /* set the maximal number of iterations to 100                               */
  /* LOGICAL parameters:                                                       */
  /* run the Preconditioned version of RCI (P)CG with preconditioner C_inverse */
  /* DOUBLE parameters                                                         */
  /* -                                                                         */
	/*---------------------------------------------------------------------------*/
  ipar[4] = 100;
  ipar[10] = 1;
	/*---------------------------------------------------------------------------*/
  /* Check the correctness and consistency of the newly set parameters         */
	/*---------------------------------------------------------------------------*/
  dcg_check (&n, solution, rhs, &rci_request, ipar, dpar, tmp);
  if (rci_request != 0)
    goto failure;
	/*---------------------------------------------------------------------------*/
  /* Compute the solution by RCI (P)CG solver                                  */
  /* Reverse Communications starts here                                        */
	/*---------------------------------------------------------------------------*/
rci:dcg (&n, solution, rhs, &rci_request, ipar, dpar, tmp);
	/*---------------------------------------------------------------------------*/
  /* If rci_request=0, then the solution was found according to the requested  */
  /* stopping tests. In this case, this means that it was found after 100      */
  /* iterations.                                                               */
	/*---------------------------------------------------------------------------*/
  if (rci_request == 0)
    goto getsln;
	/*---------------------------------------------------------------------------*/
  /* If rci_request=1, then compute the vector A*tmp[0]                        */
  /* and put the result in vector tmp[n]                                       */
	/*---------------------------------------------------------------------------*/
  if (rci_request == 1)
    {
      matdes[0] = 's';
      mkl_dcsrmv (&tr, &n, &n, &one, matdes, a, ja, ia, &ia[1], tmp, &zero,
		  &tmp[n]);
      goto rci;
    }
	/*---------------------------------------------------------------------------*/
  /* If rci_request=2, then do the user-defined stopping test: compute the     */
  /* Euclidean norm of the actual residual using MKL routines and check if     */
  /* it is less than 1.E-8                                                     */
	/*---------------------------------------------------------------------------*/
  if (rci_request == 2)
    {
      matdes[0] = 's';
      mkl_dcsrmv (&tr, &n, &n, &one, matdes, a, ja, ia, &ia[1], solution,
		  &zero, temp);
      daxpy (&n, &eone, rhs, &ione, temp, &ione);
      euclidean_norm = dnrm2 (&n, temp, &ione);
		/*---------------------------------------------------------------------------*/
      /* The solution has not been found yet according to the user-defined stopping */
      /* test. Continue RCI (P)CG iterations.                                      */
		/*---------------------------------------------------------------------------*/
      if (euclidean_norm > 1.E-6)
	goto rci;
		/*---------------------------------------------------------------------------*/
      /* The solution has been found according to the user-defined stopping test   */
		/*---------------------------------------------------------------------------*/
      else
	goto getsln;
    }
	/*---------------------------------------------------------------------------*/
  /* If rci_request=3, then  apply the simplest SSOR preconditioning           */
  /* on vector tmp[2*n] and put the result in vector tmp[3*n]                  */
	/*---------------------------------------------------------------------------*/
  if (rci_request == 3)
    {
      dcopy (&n, &tmp[2 * n], &ione, &tmp[3 * n], &ione);
      matdes[0] = 't';
      for (i = 1; i <= niter_ssor; i++)
	{
	  dcopy (&n, &tmp[2 * n], &ione, temp, &ione);
	  matdes[2] = 'n';
	  tr = 'n';
	  mkl_dcsrmv (&tr, &n, &n, &eone, matdes, a1, ja, ia, &ia[1],
		      &tmp[3 * n], &omega, temp);
	  daxpy (&n, &om, &tmp[3 * n], &ione, temp, &ione);
	  matdes[2] = 'u';
	  tr = 't';
	  mkl_dcsrsv (&tr, &n, &one, matdes, a1, ja, ia, &ia[1], temp,
		      &tmp[3 * n]);
	}
      goto rci;
    }
	/*---------------------------------------------------------------------------*/
  /* If rci_request=anything else, then dcg subroutine failed                  */
  /* to compute the solution vector: solution[n]                               */
	/*---------------------------------------------------------------------------*/
  goto failure;
	/*---------------------------------------------------------------------------*/
  /* Reverse Communication ends here                                           */
  /* Get the current iteration number into itercount                           */
	/*---------------------------------------------------------------------------*/
getsln:dcg_get (&n, solution, rhs, &rci_request, ipar, dpar, tmp,
	   &itercount);
	/*---------------------------------------------------------------------------*/
  /* Print solution vector: solution[n] and number of iterations: itercount    */
	/*---------------------------------------------------------------------------*/
  printf ("The system has been solved\n");
  printf ("The following solution obtained\n");
  for (i = 0; i < n / 4; i++)
    {
      printf ("%6.3f  %6.3f  %6.3f  %6.3f", solution[4 * i],
	      solution[4 * i + 1], solution[4 * i + 2], solution[4 * i + 3]);
      printf ("\n");
    }
  printf ("\nExpected solution is\n");
  for (i = 0; i < n / 4; i++)
    {
      printf ("%6.3f  %6.3f  %6.3f  %6.3f", expected_sol[4 * i],
	      expected_sol[4 * i + 1], expected_sol[4 * i + 2],
	      expected_sol[4 * i + 3]);
      expected_sol[4 * i] -= solution[4 * i];
      printf ("\n");
    }

  printf ("\nNumber of iterations: %d\n", itercount);
  i = 4;
  n /= 4;
  euclidean_norm = dnrm2 (&n, expected_sol, &i);

	/*-------------------------------------------------------------------------*/
  /* Release internal MKL memory that might be used for computations         */
  /* NOTE: It is important to call the routine below to avoid memory leaks   */
  /* unless you disable MKL Memory Manager                                   */
	/*-------------------------------------------------------------------------*/
  MKL_Free_Buffers ();

  if (lexpected_itercount <= itercount <= uexpected_itercount
      && euclidean_norm < 1.0e-4)
    {
      printf
	("This example has successfully PASSED through all steps of computation!");
      printf ("\n");
      return 0;
    }
  else
    {
      printf
	("This example may have FAILED as either the number of iterations differs");
      printf ("\nfrom the expected number of iterations %d-",
	      lexpected_itercount);
      printf ("-%d, or the computed solution\ndiffers much from ",
	      uexpected_itercount);
      printf ("the expected solution (Euclidean norm is %e), or both.\n",
	      euclidean_norm);
      return 1;
    }
	/*-------------------------------------------------------------------------*/
  /* Release internal MKL memory that might be used for computations         */
  /* NOTE: It is important to call the routine below to avoid memory leaks   */
  /* unless you disable MKL Memory Manager                                   */
	/*-------------------------------------------------------------------------*/
failure:printf
    ("This example FAILED as the solver has returned the ERROR ");
  printf ("code %d", rci_request);
  MKL_Free_Buffers ();
  return 1;
}