示例#1
0
/*! \brief Check the inf-norm of the error vector 
 */
void pzinf_norm_error(int iam, int_t n, int_t nrhs, doublecomplex x[], int_t ldx,
		      doublecomplex xtrue[], int_t ldxtrue, gridinfo_t *grid) 
{
    double err, xnorm, temperr, tempxnorm;
    doublecomplex *x_work, *xtrue_work;
    doublecomplex temp;
    int i, j;

    for (j = 0; j < nrhs; j++) {
      x_work = &x[j*ldx];
      xtrue_work = &xtrue[j*ldxtrue];
      err = xnorm = 0.0;
      for (i = 0; i < n; i++) {
        z_sub(&temp, &x_work[i], &xtrue_work[i]);
	err = SUPERLU_MAX(err, slud_z_abs(&temp));
	xnorm = SUPERLU_MAX(xnorm, slud_z_abs(&x_work[i]));
      }

      /* get the golbal max err & xnrom */
      temperr = err;
      tempxnorm = xnorm;
      MPI_Allreduce( &temperr, &err, 1, MPI_DOUBLE, MPI_MAX, grid->comm);
      MPI_Allreduce( &tempxnorm, &xnorm, 1, MPI_DOUBLE, MPI_MAX, grid->comm);

      err = err / xnorm;
      if ( !iam ) printf("\tSol %2d: ||X-Xtrue||/||X|| = %e\n", j, err);
    }
}
示例#2
0
/*! \brief Check the inf-norm of the error vector
 */
void zinf_norm_error_dist(int_t n, int_t nrhs, doublecomplex *x, int_t ldx,
                          doublecomplex *xtrue, int_t ldxtrue,
                          gridinfo_t *grid)
{
    double err, xnorm;
    doublecomplex *x_work, *xtrue_work;
    doublecomplex temp;
    int i, j;

    for (j = 0; j < nrhs; j++) {
        x_work = &x[j*ldx];
        xtrue_work = &xtrue[j*ldxtrue];
        err = xnorm = 0.0;
        for (i = 0; i < n; i++) {
            z_sub(&temp, &x_work[i], &xtrue_work[i]);
            err = SUPERLU_MAX(err, slud_z_abs(&temp));
            xnorm = SUPERLU_MAX(xnorm, slud_z_abs(&x_work[i]));
        }
        err = err / xnorm;
        printf("\tRHS %2d: ||X-Xtrue||/||X|| = %e\n", j, err);
    }
}
示例#3
0
/*
 * Performs sparse matrix-vector multiplication.
 */
void
pzgsmv
(
 int_t  abs,               /* Input. Do abs(A)*abs(x). */
 SuperMatrix *A_internal,  /* Input. Matrix A permuted by columns.
			      The column indices are translated into
			      the relative positions in the gathered x-vector.
			      The type of A can be:
			      Stype = NR_loc; Dtype = SLU_Z; Mtype = GE. */
 gridinfo_t *grid,         /* Input */
 pzgsmv_comm_t *gsmv_comm, /* Input. The data structure for communication. */
 doublecomplex x[],       /* Input. The distributed source vector */
 doublecomplex ax[]       /* Output. The distributed destination vector */
)
{
    NRformat_loc *Astore;
    int iam, procs;
    int_t i, j, p, m, m_loc, n, fst_row, jcol;
    int_t *colind, *rowptr;
    int   *SendCounts, *RecvCounts;
    int_t *ind_tosend, *ind_torecv, *ptr_ind_tosend, *ptr_ind_torecv;
    int_t *extern_start, TotalValSend;
    doublecomplex *nzval, *val_tosend, *val_torecv;
    doublecomplex zero = {0.0, 0.0}, temp;
    double *ax_abs = (double *) ax;
    MPI_Request *send_req, *recv_req;
    MPI_Status status;

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(grid->iam, "Enter pzgsmv()");
#endif

    /* ------------------------------------------------------------
       INITIALIZATION.
       ------------------------------------------------------------*/
    iam = grid->iam;
    procs = grid->nprow * grid->npcol;
    Astore = (NRformat_loc *) A_internal->Store;
    m = A_internal->nrow;
    n = A_internal->ncol;
    m_loc = Astore->m_loc;
    fst_row = Astore->fst_row;
    colind = Astore->colind;
    rowptr = Astore->rowptr;
    nzval = (doublecomplex *) Astore->nzval;
    extern_start = gsmv_comm->extern_start;
    ind_torecv = gsmv_comm->ind_torecv;
    ptr_ind_tosend = gsmv_comm->ptr_ind_tosend;
    ptr_ind_torecv = gsmv_comm->ptr_ind_torecv;
    SendCounts = gsmv_comm->SendCounts;
    RecvCounts = gsmv_comm->RecvCounts;
    val_tosend = (doublecomplex *) gsmv_comm->val_tosend;
    val_torecv = (doublecomplex *) gsmv_comm->val_torecv;
    TotalValSend = gsmv_comm->TotalValSend;

    /* ------------------------------------------------------------
       COPY THE X VALUES INTO THE SEND BUFFER.
       ------------------------------------------------------------*/
    for (i = 0; i < TotalValSend; ++i) {
        j = ind_torecv[i] - fst_row; /* Relative index in x[] */
	val_tosend[i] = x[j];
    }

    /* ------------------------------------------------------------
       COMMUNICATE THE X VALUES.
       ------------------------------------------------------------*/
    if ( !(send_req = (MPI_Request *)
	   SUPERLU_MALLOC(2*procs *sizeof(MPI_Request))))
        ABORT("Malloc fails for recv_req[].");
    recv_req = send_req + procs;
    for (p = 0; p < procs; ++p) {
        if ( RecvCounts[p] ) {
	    MPI_Isend(&val_tosend[ptr_ind_torecv[p]], RecvCounts[p],
                      SuperLU_MPI_DOUBLE_COMPLEX, p, iam,
                      grid->comm, &send_req[p]);
	}
	if ( SendCounts[p] ) {
	    MPI_Irecv(&val_torecv[ptr_ind_tosend[p]], SendCounts[p],
                      SuperLU_MPI_DOUBLE_COMPLEX, p, p,
                      grid->comm, &recv_req[p]);
	}
    }
    
    /* ------------------------------------------------------------
       PERFORM THE ACTUAL MULTIPLICATION.
       ------------------------------------------------------------*/
    if ( abs ) { /* Perform abs(A)*abs(x) */
        /* Multiply the local part. */
        for (i = 0; i < m_loc; ++i) { /* Loop through each row */
            ax_abs[i] = 0.0;
	    for (j = rowptr[i]; j < extern_start[i]; ++j) {
	        jcol = colind[j];
		ax_abs[i] += slud_z_abs1(&nzval[j]) * slud_z_abs1(&x[jcol]);
	    }
        }

        for (p = 0; p < procs; ++p) {
            if ( RecvCounts[p] ) MPI_Wait(&send_req[p], &status);
	    if ( SendCounts[p] ) MPI_Wait(&recv_req[p], &status);
        }

        /* Multiply the external part. */
        for (i = 0; i < m_loc; ++i) { /* Loop through each row */
	    for (j = extern_start[i]; j < rowptr[i+1]; ++j) {
	        jcol = colind[j];
	        ax_abs[i] += slud_z_abs1(&nzval[j]) * slud_z_abs(&val_torecv[jcol]);
	    }
	}
    } else {
        /* Multiply the local part. */
        for (i = 0; i < m_loc; ++i) { /* Loop through each row */
	    ax[i] = zero;
	    for (j = rowptr[i]; j < extern_start[i]; ++j) {
	        jcol = colind[j];
                zz_mult(&temp, &nzval[j], &x[jcol]);
                z_add(&ax[i], &ax[i], &temp);
	    }
        }

        for (p = 0; p < procs; ++p) {
            if ( RecvCounts[p] ) MPI_Wait(&send_req[p], &status);
	    if ( SendCounts[p] ) MPI_Wait(&recv_req[p], &status);
        }

        /* Multiply the external part. */
        for (i = 0; i < m_loc; ++i) { /* Loop through each row */
	    for (j = extern_start[i]; j < rowptr[i+1]; ++j) {
	        jcol = colind[j];
                zz_mult(&temp, &nzval[j], &val_torecv[jcol]);
                z_add(&ax[i], &ax[i], &temp);
	    }
	}
    }

    SUPERLU_FREE(send_req);
#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Exit pzgsmv()");
#endif

} /* PZGSMV */