/*! \brief Check the inf-norm of the error vector */ void pzinf_norm_error(int iam, int_t n, int_t nrhs, doublecomplex x[], int_t ldx, doublecomplex xtrue[], int_t ldxtrue, gridinfo_t *grid) { double err, xnorm, temperr, tempxnorm; doublecomplex *x_work, *xtrue_work; doublecomplex temp; int i, j; for (j = 0; j < nrhs; j++) { x_work = &x[j*ldx]; xtrue_work = &xtrue[j*ldxtrue]; err = xnorm = 0.0; for (i = 0; i < n; i++) { z_sub(&temp, &x_work[i], &xtrue_work[i]); err = SUPERLU_MAX(err, slud_z_abs(&temp)); xnorm = SUPERLU_MAX(xnorm, slud_z_abs(&x_work[i])); } /* get the golbal max err & xnrom */ temperr = err; tempxnorm = xnorm; MPI_Allreduce( &temperr, &err, 1, MPI_DOUBLE, MPI_MAX, grid->comm); MPI_Allreduce( &tempxnorm, &xnorm, 1, MPI_DOUBLE, MPI_MAX, grid->comm); err = err / xnorm; if ( !iam ) printf("\tSol %2d: ||X-Xtrue||/||X|| = %e\n", j, err); } }
/*! \brief Check the inf-norm of the error vector */ void zinf_norm_error_dist(int_t n, int_t nrhs, doublecomplex *x, int_t ldx, doublecomplex *xtrue, int_t ldxtrue, gridinfo_t *grid) { double err, xnorm; doublecomplex *x_work, *xtrue_work; doublecomplex temp; int i, j; for (j = 0; j < nrhs; j++) { x_work = &x[j*ldx]; xtrue_work = &xtrue[j*ldxtrue]; err = xnorm = 0.0; for (i = 0; i < n; i++) { z_sub(&temp, &x_work[i], &xtrue_work[i]); err = SUPERLU_MAX(err, slud_z_abs(&temp)); xnorm = SUPERLU_MAX(xnorm, slud_z_abs(&x_work[i])); } err = err / xnorm; printf("\tRHS %2d: ||X-Xtrue||/||X|| = %e\n", j, err); } }
/* * Performs sparse matrix-vector multiplication. */ void pzgsmv ( int_t abs, /* Input. Do abs(A)*abs(x). */ SuperMatrix *A_internal, /* Input. Matrix A permuted by columns. The column indices are translated into the relative positions in the gathered x-vector. The type of A can be: Stype = NR_loc; Dtype = SLU_Z; Mtype = GE. */ gridinfo_t *grid, /* Input */ pzgsmv_comm_t *gsmv_comm, /* Input. The data structure for communication. */ doublecomplex x[], /* Input. The distributed source vector */ doublecomplex ax[] /* Output. The distributed destination vector */ ) { NRformat_loc *Astore; int iam, procs; int_t i, j, p, m, m_loc, n, fst_row, jcol; int_t *colind, *rowptr; int *SendCounts, *RecvCounts; int_t *ind_tosend, *ind_torecv, *ptr_ind_tosend, *ptr_ind_torecv; int_t *extern_start, TotalValSend; doublecomplex *nzval, *val_tosend, *val_torecv; doublecomplex zero = {0.0, 0.0}, temp; double *ax_abs = (double *) ax; MPI_Request *send_req, *recv_req; MPI_Status status; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pzgsmv()"); #endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ iam = grid->iam; procs = grid->nprow * grid->npcol; Astore = (NRformat_loc *) A_internal->Store; m = A_internal->nrow; n = A_internal->ncol; m_loc = Astore->m_loc; fst_row = Astore->fst_row; colind = Astore->colind; rowptr = Astore->rowptr; nzval = (doublecomplex *) Astore->nzval; extern_start = gsmv_comm->extern_start; ind_torecv = gsmv_comm->ind_torecv; ptr_ind_tosend = gsmv_comm->ptr_ind_tosend; ptr_ind_torecv = gsmv_comm->ptr_ind_torecv; SendCounts = gsmv_comm->SendCounts; RecvCounts = gsmv_comm->RecvCounts; val_tosend = (doublecomplex *) gsmv_comm->val_tosend; val_torecv = (doublecomplex *) gsmv_comm->val_torecv; TotalValSend = gsmv_comm->TotalValSend; /* ------------------------------------------------------------ COPY THE X VALUES INTO THE SEND BUFFER. ------------------------------------------------------------*/ for (i = 0; i < TotalValSend; ++i) { j = ind_torecv[i] - fst_row; /* Relative index in x[] */ val_tosend[i] = x[j]; } /* ------------------------------------------------------------ COMMUNICATE THE X VALUES. ------------------------------------------------------------*/ if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*procs *sizeof(MPI_Request)))) ABORT("Malloc fails for recv_req[]."); recv_req = send_req + procs; for (p = 0; p < procs; ++p) { if ( RecvCounts[p] ) { MPI_Isend(&val_tosend[ptr_ind_torecv[p]], RecvCounts[p], SuperLU_MPI_DOUBLE_COMPLEX, p, iam, grid->comm, &send_req[p]); } if ( SendCounts[p] ) { MPI_Irecv(&val_torecv[ptr_ind_tosend[p]], SendCounts[p], SuperLU_MPI_DOUBLE_COMPLEX, p, p, grid->comm, &recv_req[p]); } } /* ------------------------------------------------------------ PERFORM THE ACTUAL MULTIPLICATION. ------------------------------------------------------------*/ if ( abs ) { /* Perform abs(A)*abs(x) */ /* Multiply the local part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ ax_abs[i] = 0.0; for (j = rowptr[i]; j < extern_start[i]; ++j) { jcol = colind[j]; ax_abs[i] += slud_z_abs1(&nzval[j]) * slud_z_abs1(&x[jcol]); } } for (p = 0; p < procs; ++p) { if ( RecvCounts[p] ) MPI_Wait(&send_req[p], &status); if ( SendCounts[p] ) MPI_Wait(&recv_req[p], &status); } /* Multiply the external part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ for (j = extern_start[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; ax_abs[i] += slud_z_abs1(&nzval[j]) * slud_z_abs(&val_torecv[jcol]); } } } else { /* Multiply the local part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ ax[i] = zero; for (j = rowptr[i]; j < extern_start[i]; ++j) { jcol = colind[j]; zz_mult(&temp, &nzval[j], &x[jcol]); z_add(&ax[i], &ax[i], &temp); } } for (p = 0; p < procs; ++p) { if ( RecvCounts[p] ) MPI_Wait(&send_req[p], &status); if ( SendCounts[p] ) MPI_Wait(&recv_req[p], &status); } /* Multiply the external part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ for (j = extern_start[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; zz_mult(&temp, &nzval[j], &val_torecv[jcol]); z_add(&ax[i], &ax[i], &temp); } } } SUPERLU_FREE(send_req); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pzgsmv()"); #endif } /* PZGSMV */