static int f(realtype t, N_Vector u, N_Vector udot, void *user_data) { realtype *udata, *udotdata; UserData data; HYPRE_ParVector uhyp; HYPRE_ParVector udothyp; /* Extract hypre vectors */ uhyp = N_VGetVector_ParHyp(u); udothyp = N_VGetVector_ParHyp(udot); /* Access hypre vectors local data */ udata = hypre_VectorData(hypre_ParVectorLocalVector(uhyp)); udotdata = hypre_VectorData(hypre_ParVectorLocalVector(udothyp)); data = (UserData) user_data; /* Call ucomm to do inter-processor communication */ ucomm(t, u, data); /* Call fcalc to calculate all right-hand sides */ fcalc(t, udata, udotdata, data); return(0); }
HYPRE_Int HYPRE_ParCSRMLSolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, HYPRE_ParVector b, HYPRE_ParVector x ) { double *rhs, *sol; MH_Link *link = (MH_Link *) solver; ML *ml = link->ml_ptr; HYPRE_Int leng, level = ml->ML_num_levels - 1; ML_Operator *Amat = &(ml->Amat[level]); ML_Krylov *ml_kry; rhs = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) b)); sol = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) x)); /* ml_kry = ML_Krylov_Create(ml->comm); ML_Krylov_Set_Method(ml_kry, 1); ML_Krylov_Set_Amatrix(ml_kry, Amat); ML_Krylov_Set_Precon(ml_kry, ml); ML_Krylov_Set_PreconFunc(ml_kry, ML_AMGVSolve_Wrapper); leng = Amat->outvec_leng; ML_Krylov_Solve(ml_kry, leng, rhs, sol); ML_Krylov_Destroy(&ml_kry); */ ML_Solve_AMGV(ml, rhs, sol); //ML_Iterate(ml, sol, rhs); return 0; }
int HYPRE_ParCSR_SuperLUSolve(HYPRE_Solver solver, HYPRE_ParCSRMatrix A, HYPRE_ParVector b, HYPRE_ParVector x ) { #ifdef HAVE_SUPERLU int nrows, i, info; double *bData, *xData; SuperMatrix B; SuperLUStat_t slu_stat; trans_t trans; HYPRE_SuperLU *sluPtr = (HYPRE_SuperLU *) solver; /* ---------------------------------------------------------------- */ /* make sure setup has been called */ /* ---------------------------------------------------------------- */ assert ( sluPtr != NULL ); if ( ! (sluPtr->factorized_) ) { printf("HYPRE_ParCSR_SuperLUSolve ERROR - not factorized yet.\n"); return -1; } /* ---------------------------------------------------------------- */ /* fetch right hand side and solution vector */ /* ---------------------------------------------------------------- */ xData = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *)x)); bData = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *)b)); nrows = hypre_ParVectorGlobalSize((hypre_ParVector *)x); for (i = 0; i < nrows; i++) xData[i] = bData[i]; /* ---------------------------------------------------------------- */ /* solve */ /* ---------------------------------------------------------------- */ dCreate_Dense_Matrix(&B, nrows, 1, bData, nrows, SLU_DN, SLU_D,SLU_GE); /* ------------------------------------------------------------- * solve the problem * -----------------------------------------------------------*/ trans = NOTRANS; StatInit(&slu_stat); dgstrs (trans, &(sluPtr->SLU_Lmat), &(sluPtr->SLU_Umat), sluPtr->permC_, sluPtr->permR_, &B, &slu_stat, &info); Destroy_SuperMatrix_Store(&B); StatFree(&slu_stat); return 0; #else printf("HYPRE_ParCSR_SuperLUSolve ERROR - SuperLU not enabled.\n"); *solver = (HYPRE_Solver) NULL; return -1; #endif }
static void ucomm(realtype t, N_Vector u, UserData data) { realtype *udata, *uext, buffer[2*NVARS*MYSUB]; MPI_Comm comm; int my_pe, isubx, isuby; long int nvmxsub, nvmysub; MPI_Request request[4]; HYPRE_ParVector uhyp; uhyp = N_VGetVector_ParHyp(u); udata = hypre_VectorData(hypre_ParVectorLocalVector(uhyp)); /* Get comm, my_pe, subgrid indices, data sizes, extended array uext */ comm = data->comm; my_pe = data->my_pe; isubx = data->isubx; isuby = data->isuby; nvmxsub = data->nvmxsub; nvmysub = NVARS*MYSUB; uext = data->uext; /* Start receiving boundary data from neighboring PEs */ BRecvPost(comm, request, my_pe, isubx, isuby, nvmxsub, nvmysub, uext, buffer); /* Send data from boundary of local grid to neighboring PEs */ BSend(comm, my_pe, isubx, isuby, nvmxsub, nvmysub, udata); /* Finish receiving boundary data from neighboring PEs */ BRecvWait(request, isubx, isuby, nvmxsub, uext, buffer); }
HYPRE_Int HYPRE_ParaSailsSolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, HYPRE_ParVector b, HYPRE_ParVector x ) { HYPRE_Real *rhs, *soln; Secret *secret = (Secret *) solver; rhs = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) b)); soln = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) x)); hypre_ParaSailsApply(secret->obj, rhs, soln); return hypre_error_flag; }
/* ---------------------------------------------------------------------- * has_data * * Utility that drills down to the hypre vector local data block and * checks if it is allocated. Does not verify the size of the block. * --------------------------------------------------------------------*/ booleantype has_data(N_Vector X) { hypre_ParVector *Xvec = N_VGetVector_ParHyp(X); realtype *Xdata = Xvec == NULL ? NULL : hypre_VectorData(hypre_ParVectorLocalVector(Xvec)); if (Xdata == NULL) return FALSE; else return TRUE; }
/* Print current t, step count, order, stepsize, and sampled c1,c2 values */ static void PrintOutput(void *arkode_mem, int my_pe, MPI_Comm comm, N_Vector u, realtype t) { int flag; realtype hu, *udata, tempu[2]; int npelast; long int i0, i1, nst; MPI_Status status; HYPRE_ParVector uhyp; npelast = NPEX*NPEY - 1; uhyp = N_VGetVector_ParHyp(u); udata = hypre_VectorData(hypre_ParVectorLocalVector(uhyp)); /* Send c1,c2 at top right mesh point to PE 0 */ if (my_pe == npelast) { i0 = NVARS*MXSUB*MYSUB - 2; i1 = i0 + 1; if (npelast != 0) MPI_Send(&udata[i0], 2, PVEC_REAL_MPI_TYPE, 0, 0, comm); else { tempu[0] = udata[i0]; tempu[1] = udata[i1]; } } /* On PE 0, receive c1,c2 at top right, then print performance data and sampled solution values */ if (my_pe == 0) { if (npelast != 0) MPI_Recv(&tempu[0], 2, PVEC_REAL_MPI_TYPE, npelast, 0, comm, &status); flag = ARKodeGetNumSteps(arkode_mem, &nst); check_flag(&flag, "ARKodeGetNumSteps", 1, my_pe); flag = ARKodeGetLastStep(arkode_mem, &hu); check_flag(&flag, "ARKodeGetLastStep", 1, my_pe); #if defined(SUNDIALS_EXTENDED_PRECISION) printf("t = %.2Le no. steps = %ld stepsize = %.2Le\n", t, nst, hu); printf("At bottom left: c1, c2 = %12.3Le %12.3Le \n", udata[0], udata[1]); printf("At top right: c1, c2 = %12.3Le %12.3Le \n\n", tempu[0], tempu[1]); #elif defined(SUNDIALS_DOUBLE_PRECISION) printf("t = %.2e no. steps = %ld stepsize = %.2e\n", t, nst, hu); printf("At bottom left: c1, c2 = %12.3e %12.3e \n", udata[0], udata[1]); printf("At top right: c1, c2 = %12.3e %12.3e \n\n", tempu[0], tempu[1]); #else printf("t = %.2e no. steps = %ld stepsize = %.2e\n", t, nst, hu); printf("At bottom left: c1, c2 = %12.3e %12.3e \n", udata[0], udata[1]); printf("At top right: c1, c2 = %12.3e %12.3e \n\n", tempu[0], tempu[1]); #endif } }
HYPRE_Int hypre_BlockTridiagSolve(void *data, hypre_ParCSRMatrix *A, hypre_ParVector *b, hypre_ParVector *x) { HYPRE_Int i, ind, nrows1, nrows2, *index_set1, *index_set2; double *ffv, *uuv, *f1v, *f2v, *u1v, *u2v; HYPRE_ParCSRMatrix A21, A11, A22; hypre_ParVector *F1, *U1, *F2, *U2; HYPRE_Solver precon1, precon2; hypre_BlockTridiagData *b_data = (hypre_BlockTridiagData *) data; index_set1 = b_data->index_set1; index_set2 = b_data->index_set2; nrows1 = index_set1[0]; nrows2 = index_set2[0]; precon1 = b_data->precon1; precon2 = b_data->precon2; A11 = (HYPRE_ParCSRMatrix) b_data->A11; A22 = (HYPRE_ParCSRMatrix) b_data->A22; A21 = (HYPRE_ParCSRMatrix) b_data->A21; F1 = b_data->F1; U1 = b_data->U1; F2 = b_data->F2; U2 = b_data->U2; ffv = hypre_VectorData(hypre_ParVectorLocalVector(b)); uuv = hypre_VectorData(hypre_ParVectorLocalVector(x)); f1v = hypre_VectorData(hypre_ParVectorLocalVector(F1)); u1v = hypre_VectorData(hypre_ParVectorLocalVector(U1)); f2v = hypre_VectorData(hypre_ParVectorLocalVector(F2)); u2v = hypre_VectorData(hypre_ParVectorLocalVector(U2)); for (i = 0; i < nrows1; i++) { ind = index_set1[i+1]; f1v[i] = ffv[ind]; u1v[i] = 0.0; } HYPRE_BoomerAMGSolve(precon1, A11, (HYPRE_ParVector) F1, (HYPRE_ParVector) U1); for (i = 0; i < nrows2; i++) { ind = index_set2[i+1]; f2v[i] = ffv[ind]; u2v[i] = 0.0; } HYPRE_ParCSRMatrixMatvec(-1.0,A21,(HYPRE_ParVector) U1,1.0, (HYPRE_ParVector) F2); HYPRE_BoomerAMGSolve(precon2, A22, (HYPRE_ParVector) F2, (HYPRE_ParVector) U2); for (i = 0; i < nrows1; i++) { ind = index_set1[i+1]; uuv[ind] = u1v[i]; } for (i = 0; i < nrows2; i++) { ind = index_set2[i+1]; uuv[ind] = u2v[i]; } return (0); }
HYPRE_Int HYPRE_ParCSRDiagScale( HYPRE_Solver solver, HYPRE_ParCSRMatrix HA, HYPRE_ParVector Hy, HYPRE_ParVector Hx ) { hypre_ParCSRMatrix *A = (hypre_ParCSRMatrix *) HA; hypre_ParVector *y = (hypre_ParVector *) Hy; hypre_ParVector *x = (hypre_ParVector *) Hx; double *x_data = hypre_VectorData(hypre_ParVectorLocalVector(x)); double *y_data = hypre_VectorData(hypre_ParVectorLocalVector(y)); double *A_data = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(A)); HYPRE_Int *A_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(A)); HYPRE_Int local_size = hypre_VectorSize(hypre_ParVectorLocalVector(x)); HYPRE_Int i, ierr = 0; for (i=0; i < local_size; i++) { x_data[i] = y_data[i]/A_data[A_i[i]]; } return ierr; }
int HYPRE_LSI_PolySolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, HYPRE_ParVector b, HYPRE_ParVector x ) { int i, j, order, Nrows; double *rhs, *soln, *orig_rhs, mult, *coefs; HYPRE_LSI_Poly *poly_ptr = (HYPRE_LSI_Poly *) solver; rhs = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) b)); soln = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) x)); order = poly_ptr->order; Nrows = poly_ptr->Nrows; coefs = poly_ptr->coefficients; if ( coefs == NULL ) { printf("HYPRE_LSI_PolySolve ERROR : PolySetup not called.\n"); exit(1); } orig_rhs = (double *) malloc( Nrows * sizeof(double) ); for ( i = 0; i < Nrows; i++ ) { orig_rhs[i] = rhs[i]; soln[i] = rhs[i] * coefs[order]; } for (i = order - 1; i >= 0; i-- ) { HYPRE_ParCSRMatrixMatvec(1.0, A, x, 0.0, b); mult = coefs[i]; for ( j = 0; j < Nrows; j++ ) soln[j] = mult * orig_rhs[j] + rhs[j]; } for ( i = 0; i < Nrows; i++ ) rhs[i] = orig_rhs[i]; free( orig_rhs ); return 0; }
/* ---------------------------------------------------------------------- * Check vector * * Checks if all elements of vector X are set to value ans. * --------------------------------------------------------------------*/ int check_ans(realtype ans, N_Vector X, long int local_length) { int failure = 0; long int i; hypre_ParVector *Xvec = N_VGetVector_ParHyp(X); realtype *Xdata = Xvec == NULL ? NULL : hypre_VectorData(hypre_ParVectorLocalVector(Xvec)); /* check vector data */ for(i=0; i < local_length; i++) { failure += FNEQ(Xdata[i], ans); } if (failure > ZERO) return(1); else return(0); }
static int PSolve(realtype tn, N_Vector u, N_Vector fu, N_Vector r, N_Vector z, realtype gamma, realtype delta, int lr, void *user_data, N_Vector vtemp) { realtype **(*P)[MYSUB]; int nvmxsub; long int *(*pivot)[MYSUB]; int lx, ly; realtype *zdata, *v; HYPRE_ParVector zhyp; UserData data; /* Extract the P and pivot arrays from user_data */ data = (UserData) user_data; P = data->P; pivot = data->pivot; /* Solve the block-diagonal system Px = r using LU factors stored in P and pivot data in pivot, and return the solution in z. First copy vector r to z. */ N_VScale(RCONST(1.0), r, z); nvmxsub = data->nvmxsub; zhyp = N_VGetVector_ParHyp(z); /* extract hypre vector */ zdata = hypre_VectorData(hypre_ParVectorLocalVector(zhyp)); for (lx = 0; lx < MXSUB; lx++) { for (ly = 0; ly < MYSUB; ly++) { v = &(zdata[lx*NVARS + ly*nvmxsub]); denseGETRS(P[lx][ly], NVARS, pivot[lx][ly], v); } } return(0); }
HYPRE_Int hypre_ParCSRMatrixMatvec( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_vectors = hypre_VectorNumVectors(x_local); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int ierr = 0; HYPRE_Int num_sends, i, j, jv, index, start; HYPRE_Int vecstride = hypre_VectorVectorStride( x_local ); HYPRE_Int idxstride = hypre_VectorIndexStride( x_local ); HYPRE_Complex *x_tmp_data, **x_buf_data; HYPRE_Complex *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( idxstride>0 ); if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; hypre_assert( hypre_VectorNumVectors(y_local)==num_vectors ); if ( num_vectors==1 ) x_tmp = hypre_SeqVectorCreate( num_cols_offd ); else { hypre_assert( num_vectors>1 ); x_tmp = hypre_SeqMultiVectorCreate( num_cols_offd, num_vectors ); } hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); x_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) x_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); if ( num_vectors==1 ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[0][index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } } else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[jv][index++] = x_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ]; } } hypre_assert( idxstride==1 ); /* ... The assert is because the following loop only works for 'column' storage of a multivector. This needs to be fixed to work more generally, at least for 'row' storage. This in turn, means either change CommPkg so num_sends is no.zones*no.vectors (not no.zones) or, less dangerously, put a stride in the logic of CommHandleCreate (stride either from a new arg or a new variable inside CommPkg). Or put the num_vector iteration inside CommHandleCreate (perhaps a new multivector variant of it). */ for ( jv=0; jv<num_vectors; ++jv ) { comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data[jv], &(x_tmp_data[jv*num_cols_offd]) ); } hypre_CSRMatrixMatvec( alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if (num_cols_offd) hypre_CSRMatrixMatvec( alpha, offd, x_tmp, 1.0, y_local); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(x_buf_data[jv]); hypre_TFree(x_buf_data); return ierr; }
HYPRE_Int hypre_ParCSRMatrixMatvecT( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); hypre_Vector *y_tmp; HYPRE_Int vecstride = hypre_VectorVectorStride( y_local ); HYPRE_Int idxstride = hypre_VectorIndexStride( y_local ); HYPRE_Complex *y_tmp_data, **y_buf_data; HYPRE_Complex *y_local_data = hypre_VectorData(y_local); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_vectors = hypre_VectorNumVectors(y_local); HYPRE_Int i, j, jv, index, start, num_sends; HYPRE_Int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- *-----------------------------------------------------------------------*/ comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); if ( num_vectors==1 ) { y_tmp = hypre_SeqVectorCreate(num_cols_offd); } else { y_tmp = hypre_SeqMultiVectorCreate(num_cols_offd,num_vectors); } hypre_SeqVectorInitialize(y_tmp); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); y_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) y_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); y_tmp_data = hypre_VectorData(y_tmp); y_local_data = hypre_VectorData(y_local); hypre_assert( idxstride==1 ); /* only 'column' storage of multivectors * implemented so far */ if (num_cols_offd) hypre_CSRMatrixMatvecT(alpha, offd, x_local, 0.0, y_tmp); for ( jv=0; jv<num_vectors; ++jv ) { /* this is where we assume multivectors are 'column' storage */ comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 2, comm_pkg, &(y_tmp_data[jv*num_cols_offd]), y_buf_data[jv] ); } hypre_CSRMatrixMatvecT(alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if ( num_vectors==1 ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)] += y_buf_data[0][index++]; } } else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ] += y_buf_data[jv][index++]; } } hypre_SeqVectorDestroy(y_tmp); y_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(y_buf_data[jv]); hypre_TFree(y_buf_data); return ierr; }
int HYPRE_LSI_SchwarzSolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix Amat, HYPRE_ParVector b, HYPRE_ParVector x ) { int i, j, cnt, blk, index, max_blk_size, nrows; int ntimes, Nrows, extNrows, nblocks, *indptr, column; int *aux_mat_ia, *aux_mat_ja, *mat_ia, *mat_ja, *idiag; double *dbuffer, *aux_mat_aa, *solbuf, *xbuffer; double *rhs, *soln, *mat_aa, ddata; MH_Context *context; HYPRE_LSI_Schwarz *sch_ptr = (HYPRE_LSI_Schwarz *) solver; /* --------------------------------------------------------- * fetch vectors * ---------------------------------------------------------*/ rhs = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector*) b)); soln = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector*) x)); /* --------------------------------------------------------- * fetch vectors * ---------------------------------------------------------*/ ntimes = sch_ptr->ntimes; Nrows = sch_ptr->Nrows; extNrows = sch_ptr->extNrows; nblocks = sch_ptr->nblocks; max_blk_size = 0; for ( i = 0; i < nblocks; i++ ) if (sch_ptr->blk_sizes[i] > max_blk_size) max_blk_size = sch_ptr->blk_sizes[i]; /* --------------------------------------------------------- * initialize memory for interprocessor communication * ---------------------------------------------------------*/ dbuffer = (double *) malloc(extNrows * sizeof(double)); for ( i = 0; i < Nrows; i++ ) dbuffer[i] = rhs[i]; for ( i = 0; i < Nrows; i++ ) soln[i] = 0.0; context = (MH_Context *) malloc(sizeof(MH_Context)); context->comm = sch_ptr->comm; context->Amat = sch_ptr->mh_mat; /* --------------------------------------------------------- * communicate the rhs and put into dbuffer * ---------------------------------------------------------*/ if ( extNrows > Nrows ) MH_ExchBdry(dbuffer, context); solbuf = (double *) malloc(max_blk_size * sizeof(double)); idiag = (int *) malloc(max_blk_size * sizeof(int)); xbuffer = (double *) malloc(extNrows * sizeof(double)); for ( i = Nrows; i < extNrows; i++ ) xbuffer[i] = 0.0; /* --------------------------------------------------------- * the first pass * ---------------------------------------------------------*/ for ( blk = 0; blk < nblocks; blk++ ) { nrows = sch_ptr->blk_sizes[blk]; if ( sch_ptr->blk_indices != NULL ) { indptr = sch_ptr->blk_indices[blk]; for ( i = 0; i < nrows; i++ ) solbuf[i] = dbuffer[indptr[i]]; } else { for ( i = 0; i < nrows; i++ ) solbuf[i] = dbuffer[i]; } mat_ia = sch_ptr->bmat_ia[blk]; mat_ja = sch_ptr->bmat_ja[blk]; mat_aa = sch_ptr->bmat_aa[blk]; if ( nblocks > 1 ) { aux_mat_ia = sch_ptr->aux_bmat_ia[blk]; aux_mat_ja = sch_ptr->aux_bmat_ja[blk]; aux_mat_aa = sch_ptr->aux_bmat_aa[blk]; } if ( nblocks > 1 ) { for ( i = 0; i < nrows; i++ ) { ddata = solbuf[i]; for ( j = aux_mat_ia[i]; j < aux_mat_ia[i+1]; j++ ) { index = aux_mat_ja[j]; if (index<Nrows) ddata -= (aux_mat_aa[j]*soln[index]); else ddata -= (aux_mat_aa[j]*xbuffer[index]); } solbuf[i] = ddata; } } for ( i = 0; i < nrows; i++ ) { ddata = 0.0; for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ ) { column = mat_ja[j]; if ( column == i ) { idiag[i] = j; break;} ddata += mat_aa[j] * solbuf[column]; } solbuf[i] -= ddata; } for ( i = nrows-1; i >= 0; i-- ) { ddata = 0.0; for ( j = idiag[i]+1; j < mat_ia[i+1]; j++ ) { column = mat_ja[j]; ddata += mat_aa[j] * solbuf[column]; } solbuf[i] -= ddata; solbuf[i] /= mat_aa[idiag[i]]; } if ( nblocks > 1 ) { for ( i = 0; i < nrows; i++ ) { if ( indptr[i] < Nrows ) soln[indptr[i]] = solbuf[i]; else xbuffer[indptr[i]] = solbuf[i]; } } else { for ( i = 0; i < nrows; i++ ) { if ( i < Nrows ) soln[i] = solbuf[i]; else xbuffer[i] = solbuf[i]; } } } for ( cnt = 1; cnt < ntimes; cnt++ ) { for ( i = 0; i < Nrows; i++ ) xbuffer[i] = soln[i]; if ( extNrows > Nrows ) MH_ExchBdry(xbuffer, context); for ( blk = 0; blk < nblocks; blk++ ) { nrows = sch_ptr->blk_sizes[blk]; mat_ia = sch_ptr->bmat_ia[blk]; mat_ja = sch_ptr->bmat_ja[blk]; mat_aa = sch_ptr->bmat_aa[blk]; if ( nblocks > 1 ) { indptr = sch_ptr->blk_indices[blk]; aux_mat_ia = sch_ptr->aux_bmat_ia[blk]; aux_mat_ja = sch_ptr->aux_bmat_ja[blk]; aux_mat_aa = sch_ptr->aux_bmat_aa[blk]; for ( i = 0; i < nrows; i++ ) { ddata = dbuffer[indptr[i]]; for ( j = aux_mat_ia[i]; j < aux_mat_ia[i+1]; j++ ) { index = aux_mat_ja[j]; if (index<Nrows) ddata -= (aux_mat_aa[j]*soln[index]); else ddata -= (aux_mat_aa[j]*xbuffer[index]); } solbuf[i] = ddata; } } else for ( i = 0; i < nrows; i++ ) solbuf[i] = dbuffer[i]; for ( i = 0; i < nrows; i++ ) { ddata = 0.0; for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ ) { column = mat_ja[j]; if ( column == i ) { idiag[i] = j; break;} ddata += mat_aa[j] * solbuf[column]; } solbuf[i] -= ddata; } for ( i = nrows-1; i >= 0; i-- ) { ddata = 0.0; for ( j = idiag[i]+1; j < mat_ia[i+1]; j++ ) { column = mat_ja[j]; ddata += mat_aa[j] * solbuf[column]; } solbuf[i] -= ddata; solbuf[i] /= mat_aa[idiag[i]]; } if ( nblocks > 1 ) { for ( i = 0; i < nrows; i++ ) if ( indptr[i] < Nrows ) soln[indptr[i]] = solbuf[i]; else xbuffer[indptr[i]] = solbuf[i]; } else { for ( i = 0; i < nrows; i++ ) if ( i < Nrows ) soln[i] = solbuf[i]; else xbuffer[i] = solbuf[i]; } } } /* --------------------------------------------------------- */ /* clean up */ /* --------------------------------------------------------- */ free(xbuffer); free( idiag ); free( solbuf ); free( dbuffer ); free( context ); return 0; }
HYPRE_Int hypre_BoomerAMGRelaxT( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_type, HYPRE_Int relax_points, HYPRE_Real relax_weight, hypre_ParVector *u, hypre_ParVector *Vtemp ) { hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int n_global= hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int n = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int first_index = hypre_ParVectorFirstIndex(u); hypre_Vector *u_local = hypre_ParVectorLocalVector(u); HYPRE_Real *u_data = hypre_VectorData(u_local); hypre_Vector *Vtemp_local = hypre_ParVectorLocalVector(Vtemp); HYPRE_Real *Vtemp_data = hypre_VectorData(Vtemp_local); hypre_CSRMatrix *A_CSR; HYPRE_Int *A_CSR_i; HYPRE_Int *A_CSR_j; HYPRE_Real *A_CSR_data; hypre_Vector *f_vector; HYPRE_Real *f_vector_data; HYPRE_Int i; HYPRE_Int jj; HYPRE_Int column; HYPRE_Int relax_error = 0; HYPRE_Real *A_mat; HYPRE_Real *b_vec; HYPRE_Real zero = 0.0; /*----------------------------------------------------------------------- * Switch statement to direct control based on relax_type: * relax_type = 7 -> Jacobi (uses ParMatvec) * relax_type = 9 -> Direct Solve *-----------------------------------------------------------------------*/ switch (relax_type) { case 7: /* Jacobi (uses ParMatvec) */ { /*----------------------------------------------------------------- * Copy f into temporary vector. *-----------------------------------------------------------------*/ hypre_ParVectorCopy(f,Vtemp); /*----------------------------------------------------------------- * Perform MatvecT Vtemp=f-A^Tu *-----------------------------------------------------------------*/ hypre_ParCSRMatrixMatvecT(-1.0,A, u, 1.0, Vtemp); for (i = 0; i < n; i++) { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (A_diag_data[A_diag_i[i]] != zero) { u_data[i] += relax_weight * Vtemp_data[i] / A_diag_data[A_diag_i[i]]; } } } break; case 9: /* Direct solve: use gaussian elimination */ { /*----------------------------------------------------------------- * Generate CSR matrix from ParCSRMatrix A *-----------------------------------------------------------------*/ if (n) { A_CSR = hypre_ParCSRMatrixToCSRMatrixAll(A); f_vector = hypre_ParVectorToVectorAll(f); A_CSR_i = hypre_CSRMatrixI(A_CSR); A_CSR_j = hypre_CSRMatrixJ(A_CSR); A_CSR_data = hypre_CSRMatrixData(A_CSR); f_vector_data = hypre_VectorData(f_vector); A_mat = hypre_CTAlloc(HYPRE_Real, n_global*n_global); b_vec = hypre_CTAlloc(HYPRE_Real, n_global); /*--------------------------------------------------------------- * Load transpose of CSR matrix into A_mat. *---------------------------------------------------------------*/ for (i = 0; i < n_global; i++) { for (jj = A_CSR_i[i]; jj < A_CSR_i[i+1]; jj++) { column = A_CSR_j[jj]; A_mat[column*n_global+i] = A_CSR_data[jj]; } b_vec[i] = f_vector_data[i]; } relax_error = gselim(A_mat,b_vec,n_global); for (i = 0; i < n; i++) { u_data[i] = b_vec[first_index+i]; } hypre_TFree(A_mat); hypre_TFree(b_vec); hypre_CSRMatrixDestroy(A_CSR); A_CSR = NULL; hypre_SeqVectorDestroy(f_vector); f_vector = NULL; } } break; } return(relax_error); }
/****************************************************************************** * * hypre_IJVectorAddToValuesPar * * adds to a potentially noncontiguous set of IJVectorPar components * *****************************************************************************/ HYPRE_Int hypre_IJVectorAddToValuesPar(hypre_IJVector *vector, HYPRE_Int num_values, const HYPRE_Int *indices, const double *values ) { HYPRE_Int my_id; HYPRE_Int i, j, vec_start, vec_stop; double *data; HYPRE_Int print_level = hypre_IJVectorPrintLevel(vector); HYPRE_Int *IJpartitioning = hypre_IJVectorPartitioning(vector); hypre_ParVector *par_vector = hypre_IJVectorObject(vector); hypre_AuxParVector *aux_vector = hypre_IJVectorTranslator(vector); MPI_Comm comm = hypre_IJVectorComm(vector); hypre_Vector *local_vector = hypre_ParVectorLocalVector(par_vector); /* If no components are to be retrieved, perform no checking and return */ if (num_values < 1) return 0; hypre_MPI_Comm_rank(comm, &my_id); /* If par_vector == NULL or partitioning == NULL or local_vector == NULL let user know of catastrophe and exit */ if (!par_vector) { if (print_level) { hypre_printf("par_vector == NULL -- "); hypre_printf("hypre_IJVectorAddToValuesPar\n"); hypre_printf("**** Vector storage is either unallocated or orphaned ****\n"); } hypre_error_in_arg(1); } if (!IJpartitioning) { if (print_level) { hypre_printf("IJpartitioning == NULL -- "); hypre_printf("hypre_IJVectorAddToValuesPar\n"); hypre_printf("**** IJVector partitioning is either unallocated or orphaned ****\n"); } hypre_error_in_arg(1); } if (!local_vector) { if (print_level) { hypre_printf("local_vector == NULL -- "); hypre_printf("hypre_IJVectorAddToValuesPar\n"); hypre_printf("**** Vector local data is either unallocated or orphaned ****\n"); } hypre_error_in_arg(1); } #ifdef HYPRE_NO_GLOBAL_PARTITION vec_start = IJpartitioning[0]; vec_stop = IJpartitioning[1]-1; #else vec_start = IJpartitioning[my_id]; vec_stop = IJpartitioning[my_id+1]-1; #endif if (vec_start > vec_stop) { if (print_level) { hypre_printf("vec_start > vec_stop -- "); hypre_printf("hypre_IJVectorAddToValuesPar\n"); hypre_printf("**** This vector partitioning should not occur ****\n"); } hypre_error_in_arg(1); } /* Determine whether indices points to local indices only, and if not, store indices and values into auxiliary vector structure If indices == NULL, assume that num_values components are to be set in a block starting at vec_start. NOTE: If indices == NULL off processor values are ignored!!! */ /* if (indices) { for (i = 0; i < num_values; i++) { ierr += (indices[i] < vec_start); ierr += (indices[i] >= vec_stop); } } if (ierr) { hypre_printf("indices beyond local range -- "); hypre_printf("hypre_IJVectorAddToValuesPar\n"); hypre_printf("**** Indices specified are unusable ****\n"); exit(1); } */ data = hypre_VectorData(local_vector); if (indices) { HYPRE_Int current_num_elmts = hypre_AuxParVectorCurrentNumElmts(aux_vector); HYPRE_Int max_off_proc_elmts = hypre_AuxParVectorMaxOffProcElmts(aux_vector); HYPRE_Int *off_proc_i = hypre_AuxParVectorOffProcI(aux_vector); double *off_proc_data = hypre_AuxParVectorOffProcData(aux_vector); for (j = 0; j < num_values; j++) { i = indices[j]; if (i < vec_start || i > vec_stop) { /* if elements outside processor boundaries, store in off processor stash */ if (!max_off_proc_elmts) { max_off_proc_elmts = 100; hypre_AuxParVectorMaxOffProcElmts(aux_vector) = max_off_proc_elmts; hypre_AuxParVectorOffProcI(aux_vector) = hypre_CTAlloc(HYPRE_Int,max_off_proc_elmts); hypre_AuxParVectorOffProcData(aux_vector) = hypre_CTAlloc(double,max_off_proc_elmts); off_proc_i = hypre_AuxParVectorOffProcI(aux_vector); off_proc_data = hypre_AuxParVectorOffProcData(aux_vector); } else if (current_num_elmts + 1 > max_off_proc_elmts) { max_off_proc_elmts += 10; off_proc_i = hypre_TReAlloc(off_proc_i,HYPRE_Int,max_off_proc_elmts); off_proc_data = hypre_TReAlloc(off_proc_data,double, max_off_proc_elmts); hypre_AuxParVectorMaxOffProcElmts(aux_vector) = max_off_proc_elmts; hypre_AuxParVectorOffProcI(aux_vector) = off_proc_i; hypre_AuxParVectorOffProcData(aux_vector) = off_proc_data; } off_proc_i[current_num_elmts] = i; off_proc_data[current_num_elmts++] = values[j]; hypre_AuxParVectorCurrentNumElmts(aux_vector)=current_num_elmts; }
/****************************************************************************** * * hypre_IJVectorSetValuesPar * * sets a potentially noncontiguous set of components of an IJVectorPar * *****************************************************************************/ HYPRE_Int hypre_IJVectorSetValuesPar(hypre_IJVector *vector, HYPRE_Int num_values, const HYPRE_Int *indices, const double *values ) { HYPRE_Int my_id; HYPRE_Int i, j, vec_start, vec_stop; double *data; HYPRE_Int print_level = hypre_IJVectorPrintLevel(vector); HYPRE_Int *IJpartitioning = hypre_IJVectorPartitioning(vector); hypre_ParVector *par_vector = hypre_IJVectorObject(vector); hypre_AuxParVector *aux_vector = hypre_IJVectorTranslator(vector); MPI_Comm comm = hypre_IJVectorComm(vector); hypre_Vector *local_vector = hypre_ParVectorLocalVector(par_vector); /* If no components are to be set, perform no checking and return */ if (num_values < 1) return 0; hypre_MPI_Comm_rank(comm, &my_id); /* If par_vector == NULL or partitioning == NULL or local_vector == NULL let user know of catastrophe and exit */ if (!par_vector) { if (print_level) { hypre_printf("par_vector == NULL -- "); hypre_printf("hypre_IJVectorSetValuesPar\n"); hypre_printf("**** Vector storage is either unallocated or orphaned ****\n"); } hypre_error_in_arg(1); } if (!IJpartitioning) { if (print_level) { hypre_printf("IJpartitioning == NULL -- "); hypre_printf("hypre_IJVectorSetValuesPar\n"); hypre_printf("**** IJVector partitioning is either unallocated or orphaned ****\n"); } hypre_error_in_arg(1); } if (!local_vector) { if (print_level) { hypre_printf("local_vector == NULL -- "); hypre_printf("hypre_IJVectorSetValuesPar\n"); hypre_printf("**** Vector local data is either unallocated or orphaned ****\n"); } hypre_error_in_arg(1); } #ifdef HYPRE_NO_GLOBAL_PARTITION vec_start = IJpartitioning[0]; vec_stop = IJpartitioning[1]-1; #else vec_start = IJpartitioning[my_id]; vec_stop = IJpartitioning[my_id+1]-1; #endif if (vec_start > vec_stop) { if (print_level) { hypre_printf("vec_start > vec_stop -- "); hypre_printf("hypre_IJVectorSetValuesPar\n"); hypre_printf("**** This vector partitioning should not occur ****\n"); } hypre_error_in_arg(1); } /* Determine whether indices points to local indices only, and if not, store indices and values into auxiliary vector structure If indices == NULL, assume that num_values components are to be set in a block starting at vec_start. NOTE: If indices == NULL off processor values are ignored!!! */ data = hypre_VectorData(local_vector); if (indices) { HYPRE_Int current_num_elmts = hypre_AuxParVectorCurrentNumElmts(aux_vector); /*HYPRE_Int max_off_proc_elmts = hypre_AuxParVectorMaxOffProcElmts(aux_vector);*/ HYPRE_Int *off_proc_i = hypre_AuxParVectorOffProcI(aux_vector); /*double *off_proc_data = hypre_AuxParVectorOffProcData(aux_vector);*/ HYPRE_Int cancel_indx = hypre_AuxParVectorCancelIndx(aux_vector); HYPRE_Int ii; for (j = 0; j < num_values; j++) { i = indices[j]; if (i < vec_start || i > vec_stop) { for (ii = 0; ii < current_num_elmts; ii++) { if (i == off_proc_i[ii]) { off_proc_i[ii] = -1; cancel_indx++; } } hypre_AuxParVectorCancelIndx(aux_vector) = cancel_indx; } /* if elements outside processor boundaries, search for previous occurrences and cancel them */ /* if elements outside processor boundaries, store in off processor stash */ /*if (!max_off_proc_elmts) { max_off_proc_elmts = 100; hypre_AuxParVectorMaxOffProcElmts(aux_vector) = max_off_proc_elmts; hypre_AuxParVectorOffProcI(aux_vector) = hypre_CTAlloc(HYPRE_Int,max_off_proc_elmts); hypre_AuxParVectorOffProcData(aux_vector) = hypre_CTAlloc(double,max_off_proc_elmts); off_proc_i = hypre_AuxParVectorOffProcI(aux_vector); off_proc_data = hypre_AuxParVectorOffProcData(aux_vector); } else if (current_num_elmts + 1 > max_off_proc_elmts) { max_off_proc_elmts += 10; off_proc_i = hypre_TReAlloc(off_proc_i,HYPRE_Int,max_off_proc_elmts); off_proc_data = hypre_TReAlloc(off_proc_data,double, max_off_proc_elmts); hypre_AuxParVectorMaxOffProcElmts(aux_vector) = max_off_proc_elmts; hypre_AuxParVectorOffProcI(aux_vector) = off_proc_i; hypre_AuxParVectorOffProcData(aux_vector) = off_proc_data; } off_proc_i[current_num_elmts] = i; off_proc_data[current_num_elmts++] = values[j]; hypre_AuxParVectorCurrentNumElmts(aux_vector)=current_num_elmts; }*/ else /* local values are inserted into the vector */ { i -= vec_start; data[i] = values[j]; } } } else { if (num_values > vec_stop - vec_start + 1) { if (print_level) { hypre_printf("Warning! Indices beyond local range not identified!\n "); hypre_printf("Off processor values have been ignored!\n"); } num_values = vec_stop - vec_start +1; } for (j = 0; j < num_values; j++) data[j] = values[j]; } return hypre_error_flag; }
HYPRE_Int hypre_SchwarzSolve(hypre_CSRMatrix *A, hypre_Vector *rhs_vector, HYPRE_Int num_domains, HYPRE_Int *i_domain_dof, HYPRE_Int *j_domain_dof, HYPRE_Real *domain_matrixinverse, hypre_Vector *x_vector, hypre_Vector *aux_vector) { HYPRE_Int ierr = 0; /* HYPRE_Int num_dofs; */ HYPRE_Int *i_dof_dof; HYPRE_Int *j_dof_dof; HYPRE_Real *a_dof_dof; HYPRE_Real *x; HYPRE_Real *rhs; HYPRE_Real *aux; HYPRE_Int i,j,k, j_loc, k_loc; HYPRE_Int matrix_size, matrix_size_counter = 0; /* initiate: ----------------------------------------------- */ /* num_dofs = hypre_CSRMatrixNumRows(A); */ i_dof_dof = hypre_CSRMatrixI(A); j_dof_dof = hypre_CSRMatrixJ(A); a_dof_dof = hypre_CSRMatrixData(A); x = hypre_VectorData(x_vector); rhs = hypre_VectorData(rhs_vector); aux = hypre_VectorData(aux_vector); /* for (i=0; i < num_dofs; i++) x[i] = 0.e0; */ /* forward solve: ----------------------------------------------- */ matrix_size_counter = 0; for (i=0; i < num_domains; i++) { matrix_size = i_domain_dof[i+1] - i_domain_dof[i]; /* compute residual: ---------------------------------------- */ for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { aux[j_domain_dof[j]] = rhs[j_domain_dof[j]]; for (k=i_dof_dof[j_domain_dof[j]]; k<i_dof_dof[j_domain_dof[j]+1]; k++) aux[j_domain_dof[j]] -= a_dof_dof[k] * x[j_dof_dof[k]]; } /* solve for correction: ------------------------------------- */ for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { j_loc = j-i_domain_dof[i]; for (k=i_domain_dof[i]; k < i_domain_dof[i+1]; k++) { k_loc = k-i_domain_dof[i]; x[j_domain_dof[j]]+= domain_matrixinverse[matrix_size_counter + j_loc + k_loc * matrix_size] * aux[j_domain_dof[k]]; } } matrix_size_counter += matrix_size * matrix_size; } /* backward solve: ------------------------------------------------ */ for (i=num_domains-1; i > -1; i--) { matrix_size = i_domain_dof[i+1] - i_domain_dof[i]; matrix_size_counter -= matrix_size * matrix_size; /* compute residual: ---------------------------------------- */ for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { aux[j_domain_dof[j]] = rhs[j_domain_dof[j]]; for (k=i_dof_dof[j_domain_dof[j]]; k<i_dof_dof[j_domain_dof[j]+1]; k++) aux[j_domain_dof[j]] -= a_dof_dof[k] * x[j_dof_dof[k]]; } /* solve for correction: ------------------------------------- */ for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { j_loc = j-i_domain_dof[i]; for (k=i_domain_dof[i]; k < i_domain_dof[i+1]; k++) { k_loc = k-i_domain_dof[i]; x[j_domain_dof[j]]+= domain_matrixinverse[matrix_size_counter + j_loc + k_loc * matrix_size] * aux[j_domain_dof[k]]; } } } return ierr; }
HYPRE_Int hypre_seqAMGCycle( hypre_ParAMGData *amg_data, HYPRE_Int p_level, hypre_ParVector **Par_F_array, hypre_ParVector **Par_U_array ) { hypre_ParVector *Aux_U; hypre_ParVector *Aux_F; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int n; HYPRE_Int i; hypre_Vector *u_local; double *u_data; HYPRE_Int first_index; /* Acquire seq data */ MPI_Comm new_comm = hypre_ParAMGDataNewComm(amg_data); HYPRE_Solver coarse_solver = hypre_ParAMGDataCoarseSolver(amg_data); hypre_ParCSRMatrix *A_coarse = hypre_ParAMGDataACoarse(amg_data); hypre_ParVector *F_coarse = hypre_ParAMGDataFCoarse(amg_data); hypre_ParVector *U_coarse = hypre_ParAMGDataUCoarse(amg_data); Aux_U = Par_U_array[p_level]; Aux_F = Par_F_array[p_level]; first_index = hypre_ParVectorFirstIndex(Aux_U); u_local = hypre_ParVectorLocalVector(Aux_U); u_data = hypre_VectorData(u_local); n = hypre_VectorSize(u_local); if (A_coarse) { double *f_data; hypre_Vector *f_local; hypre_Vector *tmp_vec; HYPRE_Int nf; HYPRE_Int local_info; double *recv_buf; HYPRE_Int *displs, *info; HYPRE_Int size; HYPRE_Int new_num_procs; hypre_MPI_Comm_size(new_comm, &new_num_procs); f_local = hypre_ParVectorLocalVector(Aux_F); f_data = hypre_VectorData(f_local); nf = hypre_VectorSize(f_local); /* first f */ info = hypre_CTAlloc(HYPRE_Int, new_num_procs); local_info = nf; hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm); displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); displs[0] = 0; for (i=1; i < new_num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; size = displs[new_num_procs]; tmp_vec = hypre_ParVectorLocalVector(F_coarse); recv_buf = hypre_VectorData(tmp_vec); hypre_MPI_Allgatherv ( f_data, nf, hypre_MPI_DOUBLE, recv_buf, info, displs, hypre_MPI_DOUBLE, new_comm ); tmp_vec = hypre_ParVectorLocalVector(U_coarse); recv_buf = hypre_VectorData(tmp_vec); /*then u */ hypre_MPI_Allgatherv ( u_data, n, hypre_MPI_DOUBLE, recv_buf, info, displs, hypre_MPI_DOUBLE, new_comm ); /* clean up */ hypre_TFree(displs); hypre_TFree(info); hypre_BoomerAMGSolve(coarse_solver, A_coarse, F_coarse, U_coarse); /*copy my part of U to parallel vector */ { double *local_data; local_data = hypre_VectorData(hypre_ParVectorLocalVector(U_coarse)); for (i = 0; i < n; i++) { u_data[i] = local_data[first_index+i]; } } } return(Solve_err_flag); }
HYPRE_Int hypre_ParCSRBlockMatrixMatvecT( HYPRE_Complex alpha, hypre_ParCSRBlockMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A); hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(A); hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); hypre_Vector *y_tmp; HYPRE_Complex *y_local_data; HYPRE_Int blk_size = hypre_ParCSRBlockMatrixBlockSize(A); HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Complex *y_tmp_data, *y_buf_data; HYPRE_Int num_rows = hypre_ParCSRBlockMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRBlockMatrixGlobalNumCols(A); HYPRE_Int num_cols_offd = hypre_CSRBlockMatrixNumCols(offd); HYPRE_Int i, j, index, start, finish, elem, num_sends; HYPRE_Int size, k; HYPRE_Int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ if (num_rows*blk_size != x_size) ierr = 1; if (num_cols*blk_size != y_size) ierr = 2; if (num_rows*blk_size != x_size && num_cols*blk_size != y_size) ierr = 3; /*----------------------------------------------------------------------- *-----------------------------------------------------------------------*/ y_tmp = hypre_SeqVectorCreate(num_cols_offd*blk_size); hypre_SeqVectorInitialize(y_tmp); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_BlockMatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); size = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)*blk_size; y_buf_data = hypre_CTAlloc(HYPRE_Complex, size); y_tmp_data = hypre_VectorData(y_tmp); y_local_data = hypre_VectorData(y_local); if (num_cols_offd) hypre_CSRBlockMatrixMatvecT(alpha, offd, x_local, 0.0, y_tmp); comm_handle = hypre_ParCSRBlockCommHandleCreate ( 2, blk_size, comm_pkg, y_tmp_data, y_buf_data); hypre_CSRBlockMatrixMatvecT(alpha, diag, x_local, beta, y_local); hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); finish = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); for (j = start; j < finish; j++) { elem = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)*blk_size; for (k = 0; k < blk_size; k++) { y_local_data[elem++] += y_buf_data[index++]; } } } hypre_TFree(y_buf_data); hypre_SeqVectorDestroy(y_tmp); y_tmp = NULL; return ierr; }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { hypre_ParCSRMatrix *par_matrix, *g_matrix, **submatrices; hypre_CSRMatrix *A_diag, *A_offd; hypre_CSRBlockMatrix *diag; hypre_CSRBlockMatrix *offd; hypre_ParCSRBlockMatrix *par_blk_matrix, *par_blk_matrixT, *rap_matrix; hypre_Vector *x_local; hypre_Vector *y_local; hypre_ParVector *x; hypre_ParVector *y; HYPRE_Solver gmres_solver, precon; HYPRE_Int *diag_i, *diag_j, *offd_i, *offd_j; HYPRE_Int *diag_i2, *diag_j2, *offd_i2, *offd_j2; double *diag_d, *diag_d2, *offd_d, *offd_d2; HYPRE_Int mypid, local_size, nprocs; HYPRE_Int global_num_rows, global_num_cols, num_cols_offd; HYPRE_Int num_nonzeros_diag, num_nonzeros_offd, *colMap; HYPRE_Int ii, jj, kk, row, col, nnz, *indices, *colMap2; double *data, ddata, *y_data; HYPRE_Int *row_starts, *col_starts, *rstarts, *cstarts; HYPRE_Int *row_starts2, *col_starts2; HYPRE_Int block_size=2, bnnz=4, *index_set; FILE *fp; /* --------------------------------------------- */ /* Initialize MPI */ /* --------------------------------------------- */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &mypid); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &nprocs); /* build and fetch matrix */ MyBuildParLaplacian9pt((HYPRE_ParCSRMatrix *) &par_matrix); global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix); row_starts = hypre_ParCSRMatrixRowStarts(par_matrix); col_starts = hypre_ParCSRMatrixColStarts(par_matrix); A_diag = hypre_ParCSRMatrixDiag(par_matrix); A_offd = hypre_ParCSRMatrixOffd(par_matrix); num_cols_offd = hypre_CSRMatrixNumCols(A_offd); num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(A_diag); num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(A_offd); /* --------------------------------------------- */ /* build vector and apply matvec */ /* --------------------------------------------- */ x = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_cols,col_starts); hypre_ParVectorSetPartitioningOwner(x,0); hypre_ParVectorInitialize(x); x_local = hypre_ParVectorLocalVector(x); data = hypre_VectorData(x_local); local_size = col_starts[mypid+1] - col_starts[mypid]; for (ii = 0; ii < local_size; ii++) data[ii] = 1.0; y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_rows,row_starts); hypre_ParVectorSetPartitioningOwner(y,0); hypre_ParVectorInitialize(y); hypre_ParCSRMatrixMatvec (1.0, par_matrix, x, 0.0, y); ddata = hypre_ParVectorInnerProd(y, y); if (mypid == 0) hypre_printf("y inner product = %e\n", ddata); hypre_ParVectorDestroy(x); hypre_ParVectorDestroy(y); /* --------------------------------------------- */ /* build block matrix */ /* --------------------------------------------- */ rstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) rstarts[ii] = row_starts[ii]; cstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) cstarts[ii] = col_starts[ii]; par_blk_matrix = hypre_ParCSRBlockMatrixCreate(hypre_MPI_COMM_WORLD,block_size, global_num_rows, global_num_cols, rstarts, cstarts, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); colMap = hypre_ParCSRMatrixColMapOffd(par_matrix); if (num_cols_offd > 0) colMap2 = hypre_CTAlloc(HYPRE_Int, num_cols_offd); else colMap2 = NULL; for (ii = 0; ii < num_cols_offd; ii++) colMap2[ii] = colMap[ii]; hypre_ParCSRBlockMatrixColMapOffd(par_blk_matrix) = colMap2; diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(par_matrix)); diag_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixDiag(par_matrix)); diag_d = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(par_matrix)); diag = hypre_ParCSRBlockMatrixDiag(par_blk_matrix); diag_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); diag_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_diag); diag_d2 = hypre_CTAlloc(double, num_nonzeros_diag*bnnz); for (ii = 0; ii <= local_size; ii++) diag_i2[ii] = diag_i[ii]; for (ii = 0; ii < num_nonzeros_diag; ii++) diag_j2[ii] = diag_j[ii]; hypre_CSRBlockMatrixI(diag) = diag_i2; hypre_CSRBlockMatrixJ(diag) = diag_j2; for (ii = 0; ii < num_nonzeros_diag; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) diag_d2[ii*bnnz+jj*block_size+kk] = diag_d[ii]; else diag_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(diag) = diag_d2; offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(par_matrix)); offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(par_matrix)); offd_d = hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(par_matrix)); offd = hypre_ParCSRBlockMatrixOffd(par_blk_matrix); offd_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); for (ii = 0; ii <= local_size; ii++) offd_i2[ii] = offd_i[ii]; hypre_CSRBlockMatrixI(offd) = offd_i2; if (num_cols_offd) { offd_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_offd); for (ii = 0; ii < num_nonzeros_offd; ii++) offd_j2[ii] = offd_j[ii]; hypre_CSRBlockMatrixJ(offd) = offd_j2; offd_d2 = hypre_CTAlloc(double, num_nonzeros_offd*bnnz); for (ii = 0; ii < num_nonzeros_offd; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) offd_d2[ii*bnnz+jj*block_size+kk] = offd_d[ii]; else offd_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(offd) = offd_d2; } else {
int hypre_CSRMatrixMatvec_FF( double alpha, hypre_CSRMatrix *A, hypre_Vector *x, double beta, hypre_Vector *y, int *CF_marker_x, int *CF_marker_y, int fpt ) { double *A_data = hypre_CSRMatrixData(A); int *A_i = hypre_CSRMatrixI(A); int *A_j = hypre_CSRMatrixJ(A); int num_rows = hypre_CSRMatrixNumRows(A); int num_cols = hypre_CSRMatrixNumCols(A); double *x_data = hypre_VectorData(x); double *y_data = hypre_VectorData(y); int x_size = hypre_VectorSize(x); int y_size = hypre_VectorSize(y); double temp; int i, jj; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. Matvec returns ierr = 1 if * length of X doesn't equal the number of columns of A, * ierr = 2 if the length of Y doesn't equal the number of rows * of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in Matvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ if (num_cols != x_size) ierr = 1; if (num_rows != y_size) ierr = 2; if (num_cols != x_size && num_rows != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { for (i = 0; i < num_rows; i++) if (CF_marker_x[i] == fpt) y_data[i] *= beta; return ierr; } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { for (i = 0; i < num_rows; i++) if (CF_marker_x[i] == fpt) y_data[i] = 0.0; } else { for (i = 0; i < num_rows; i++) if (CF_marker_x[i] == fpt) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A*x *-----------------------------------------------------------------*/ for (i = 0; i < num_rows; i++) { if (CF_marker_x[i] == fpt) { temp = y_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) if (CF_marker_y[A_j[jj]] == fpt) temp += A_data[jj] * x_data[A_j[jj]]; y_data[i] = temp; } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { for (i = 0; i < num_rows; i++) if (CF_marker_x[i] == fpt) y_data[i] *= alpha; } return ierr; }
int hypre_CSRMatrixMatvecT( double alpha, hypre_CSRMatrix *A, hypre_Vector *x, double beta, hypre_Vector *y ) { double *A_data = hypre_CSRMatrixData(A); int *A_i = hypre_CSRMatrixI(A); int *A_j = hypre_CSRMatrixJ(A); int num_rows = hypre_CSRMatrixNumRows(A); int num_cols = hypre_CSRMatrixNumCols(A); double *x_data = hypre_VectorData(x); double *y_data = hypre_VectorData(y); int x_size = hypre_VectorSize(x); int y_size = hypre_VectorSize(y); int num_vectors = hypre_VectorNumVectors(x); int idxstride_y = hypre_VectorIndexStride(y); int vecstride_y = hypre_VectorVectorStride(y); int idxstride_x = hypre_VectorIndexStride(x); int vecstride_x = hypre_VectorVectorStride(x); double temp; int i, i1, j, jv, jj, ns, ne, size, rest; int num_threads; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= beta; return ierr; } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] = 0.0; } else { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A^T*x *-----------------------------------------------------------------*/ num_threads = hypre_NumThreads(); if (num_threads > 1) { for (i1 = 0; i1 < num_threads; i1++) { size = num_cols/num_threads; rest = num_cols - size*num_threads; if (i1 < rest) { ns = i1*size+i1-1; ne = (i1+1)*size+i1+1; } else { ns = i1*size+rest-1; ne = (i1+1)*size+rest; } if ( num_vectors==1 ) { for (i = 0; i < num_rows; i++) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; if (j > ns && j < ne) y_data[j] += A_data[jj] * x_data[i]; } } } else { for (i = 0; i < num_rows; i++) { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; if (j > ns && j < ne) y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x]; } } } } } } else { for (i = 0; i < num_rows; i++) { if ( num_vectors==1 ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[j] += A_data[jj] * x_data[i]; } } else { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x ]; } } } } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= alpha; } return ierr; }
/*-------------------------------------------------------------------------- * hypre_ParCSRMatrixMatvec_FF *--------------------------------------------------------------------------*/ HYPRE_Int hypre_ParCSRMatrixMatvec_FF( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y, HYPRE_Int *CF_marker, HYPRE_Int fpt ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int ierr = 0; HYPRE_Int num_sends, i, j, index, start, num_procs; HYPRE_Int *int_buf_data = NULL; HYPRE_Int *CF_marker_offd = NULL; HYPRE_Complex *x_tmp_data = NULL; HYPRE_Complex *x_buf_data = NULL; HYPRE_Complex *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_MPI_Comm_size(comm,&num_procs); if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; if (num_procs > 1) { if (num_cols_offd) { x_tmp = hypre_SeqVectorCreate( num_cols_offd ); hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); } /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_sends) x_buf_data = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data, x_tmp_data ); } hypre_CSRMatrixMatvec_FF( alpha, diag, x_local, beta, y_local, CF_marker, CF_marker, fpt); if (num_procs > 1) { hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_sends) int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); if (num_cols_offd) CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate(11,comm_pkg,int_buf_data,CF_marker_offd ); hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_cols_offd) hypre_CSRMatrixMatvec_FF( alpha, offd, x_tmp, 1.0, y_local, CF_marker, CF_marker_offd, fpt); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; hypre_TFree(x_buf_data); hypre_TFree(int_buf_data); hypre_TFree(CF_marker_offd); } return ierr; }
/****************************************************************************** * * hypre_IJVectorZeroValuesPar * * zeroes all local components of an IJVectorPar * *****************************************************************************/ HYPRE_Int hypre_IJVectorZeroValuesPar(hypre_IJVector *vector) { HYPRE_Int my_id; HYPRE_Int i, vec_start, vec_stop; double *data; hypre_ParVector *par_vector = hypre_IJVectorObject(vector); MPI_Comm comm = hypre_IJVectorComm(vector); HYPRE_Int *partitioning = hypre_ParVectorPartitioning(par_vector); hypre_Vector *local_vector = hypre_ParVectorLocalVector(par_vector); HYPRE_Int print_level = hypre_IJVectorPrintLevel(vector); hypre_MPI_Comm_rank(comm, &my_id); /* If par_vector == NULL or partitioning == NULL or local_vector == NULL let user know of catastrophe and exit */ if (!par_vector) { if (print_level) { hypre_printf("par_vector == NULL -- "); hypre_printf("hypre_IJVectorZeroValuesPar\n"); hypre_printf("**** Vector storage is either unallocated or orphaned ****\n"); } hypre_error_in_arg(1); } if (!partitioning) { if (print_level) { hypre_printf("partitioning == NULL -- "); hypre_printf("hypre_IJVectorZeroValuesPar\n"); hypre_printf("**** Vector partitioning is either unallocated or orphaned ****\n"); } hypre_error_in_arg(1); } if (!local_vector) { if (print_level) { hypre_printf("local_vector == NULL -- "); hypre_printf("hypre_IJVectorZeroValuesPar\n"); hypre_printf("**** Vector local data is either unallocated or orphaned ****\n"); } hypre_error_in_arg(1); } #ifdef HYPRE_NO_GLOBAL_PARTITION vec_start = partitioning[0]; vec_stop = partitioning[1]; #else vec_start = partitioning[my_id]; vec_stop = partitioning[my_id+1]; #endif if (vec_start > vec_stop) { if (print_level) { hypre_printf("vec_start > vec_stop -- "); hypre_printf("hypre_IJVectorZeroValuesPar\n"); hypre_printf("**** This vector partitioning should not occur ****\n"); } hypre_error_in_arg(1); } data = hypre_VectorData( local_vector ); for (i = 0; i < vec_stop - vec_start; i++) data[i] = 0.; return hypre_error_flag; }
int hypre_CSRMatrixMatvec( double alpha, hypre_CSRMatrix *A, hypre_Vector *x, double beta, hypre_Vector *y ) { double *A_data = hypre_CSRMatrixData(A); int *A_i = hypre_CSRMatrixI(A); int *A_j = hypre_CSRMatrixJ(A); int num_rows = hypre_CSRMatrixNumRows(A); int num_cols = hypre_CSRMatrixNumCols(A); int *A_rownnz = hypre_CSRMatrixRownnz(A); int num_rownnz = hypre_CSRMatrixNumRownnz(A); double *x_data = hypre_VectorData(x); double *y_data = hypre_VectorData(y); int x_size = hypre_VectorSize(x); int y_size = hypre_VectorSize(y); int num_vectors = hypre_VectorNumVectors(x); int idxstride_y = hypre_VectorIndexStride(y); int vecstride_y = hypre_VectorVectorStride(y); int idxstride_x = hypre_VectorIndexStride(x); int vecstride_x = hypre_VectorVectorStride(x); double temp, tempx; int i, j, jj; int m; double xpar=0.7; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. Matvec returns ierr = 1 if * length of X doesn't equal the number of columns of A, * ierr = 2 if the length of Y doesn't equal the number of rows * of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in Matvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); if (num_cols != x_size) ierr = 1; if (num_rows != y_size) ierr = 2; if (num_cols != x_size && num_rows != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= beta; return ierr; } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] = 0.0; } else { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A*x *-----------------------------------------------------------------*/ /* use rownnz pointer to do the A*x multiplication when num_rownnz is smaller than num_rows */ if (num_rownnz < xpar*(num_rows)) { for (i = 0; i < num_rownnz; i++) { m = A_rownnz[i]; /* * for (jj = A_i[m]; jj < A_i[m+1]; jj++) * { * j = A_j[jj]; * y_data[m] += A_data[jj] * x_data[j]; * } */ if ( num_vectors==1 ) { tempx = y_data[m]; for (jj = A_i[m]; jj < A_i[m+1]; jj++) tempx += A_data[jj] * x_data[A_j[jj]]; y_data[m] = tempx; } else for ( j=0; j<num_vectors; ++j ) { tempx = y_data[ j*vecstride_y + m*idxstride_y ]; for (jj = A_i[m]; jj < A_i[m+1]; jj++) tempx += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ]; y_data[ j*vecstride_y + m*idxstride_y] = tempx; } } } else { #pragma omp parallel for private(i,jj,temp) schedule(static) for (i = 0; i < num_rows; i++) { if ( num_vectors==1 ) { temp = y_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) temp += A_data[jj] * x_data[A_j[jj]]; y_data[i] = temp; } else for ( j=0; j<num_vectors; ++j ) { temp = y_data[ j*vecstride_y + i*idxstride_y ]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { temp += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ]; } y_data[ j*vecstride_y + i*idxstride_y ] = temp; } } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= alpha; } return ierr; }
HYPRE_Int hypre_ParCSRBlockMatrixMatvec(HYPRE_Complex alpha, hypre_ParCSRBlockMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y) { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg; hypre_CSRBlockMatrix *diag, *offd; hypre_Vector *x_local, *y_local, *x_tmp; HYPRE_Int i, j, k, index, num_rows, num_cols; HYPRE_Int blk_size, x_size, y_size, size; HYPRE_Int num_cols_offd, start, finish, elem; HYPRE_Int ierr = 0, nprocs, num_sends, mypid; HYPRE_Complex *x_tmp_data, *x_buf_data, *x_local_data; hypre_MPI_Comm_size(hypre_ParCSRBlockMatrixComm(A), &nprocs); hypre_MPI_Comm_rank(hypre_ParCSRBlockMatrixComm(A), &mypid); comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A); num_rows = hypre_ParCSRBlockMatrixGlobalNumRows(A); num_cols = hypre_ParCSRBlockMatrixGlobalNumCols(A); blk_size = hypre_ParCSRBlockMatrixBlockSize(A); diag = hypre_ParCSRBlockMatrixDiag(A); offd = hypre_ParCSRBlockMatrixOffd(A); num_cols_offd = hypre_CSRBlockMatrixNumCols(offd); x_local = hypre_ParVectorLocalVector(x); y_local = hypre_ParVectorLocalVector(y); x_size = hypre_ParVectorGlobalSize(x); y_size = hypre_ParVectorGlobalSize(y); x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. *--------------------------------------------------------------------*/ if (num_cols*blk_size != x_size) ierr = 11; if (num_rows*blk_size != y_size) ierr = 12; if (num_cols*blk_size != x_size && num_rows*blk_size != y_size) ierr = 13; if (nprocs > 1) { x_tmp = hypre_SeqVectorCreate(num_cols_offd*blk_size); hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); if (!comm_pkg) { hypre_BlockMatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); size = hypre_ParCSRCommPkgSendMapStart(comm_pkg,num_sends)*blk_size; x_buf_data = hypre_CTAlloc(HYPRE_Complex, size); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); finish = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); for (j = start; j < finish; j++) { elem = hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)*blk_size; for (k = 0; k < blk_size; k++) x_buf_data[index++] = x_local_data[elem++]; } } comm_handle = hypre_ParCSRBlockCommHandleCreate(1, blk_size,comm_pkg, x_buf_data, x_tmp_data); } hypre_CSRBlockMatrixMatvec(alpha, diag, x_local, beta, y_local); if (nprocs > 1) { hypre_ParCSRBlockCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_cols_offd) hypre_CSRBlockMatrixMatvec(alpha,offd,x_tmp,1.0,y_local); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; hypre_TFree(x_buf_data); } return ierr; }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { hypre_ParVector *vector1; hypre_ParVector *vector2; hypre_ParVector *tmp_vector; HYPRE_Int num_procs, my_id; HYPRE_Int global_size = 20; HYPRE_Int local_size; HYPRE_Int first_index; HYPRE_Int num_vectors, vecstride, idxstride; HYPRE_Int i, j; HYPRE_Int *partitioning; double prod; double *data, *data2; hypre_Vector *vector; hypre_Vector *local_vector; hypre_Vector *local_vector2; /* Initialize MPI */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs ); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &my_id ); hypre_printf(" my_id: %d num_procs: %d\n", my_id, num_procs); partitioning = NULL; num_vectors = 3; vector1 = hypre_ParMultiVectorCreate ( hypre_MPI_COMM_WORLD, global_size, partitioning, num_vectors ); partitioning = hypre_ParVectorPartitioning(vector1); hypre_ParVectorInitialize(vector1); local_vector = hypre_ParVectorLocalVector(vector1); data = hypre_VectorData(local_vector); local_size = hypre_VectorSize(local_vector); vecstride = hypre_VectorVectorStride(local_vector); idxstride = hypre_VectorIndexStride(local_vector); first_index = partitioning[my_id]; hypre_printf("vecstride=%i idxstride=%i local_size=%i num_vectors=%i", vecstride, idxstride, local_size, num_vectors ); for (j=0; j<num_vectors; ++j ) for (i=0; i < local_size; i++) data[ j*vecstride + i*idxstride ] = first_index+i + 100*j; hypre_ParVectorPrint(vector1, "Vector"); local_vector2 = hypre_SeqMultiVectorCreate( global_size, num_vectors ); hypre_SeqVectorInitialize(local_vector2); data2 = hypre_VectorData(local_vector2); vecstride = hypre_VectorVectorStride(local_vector2); idxstride = hypre_VectorIndexStride(local_vector2); for (j=0; j<num_vectors; ++j ) for (i=0; i < global_size; i++) data2[ j*vecstride + i*idxstride ] = i + 100*j; /* partitioning = hypre_CTAlloc(HYPRE_Int,4); partitioning[0] = 0; partitioning[1] = 10; partitioning[2] = 10; partitioning[3] = 20; */ partitioning = hypre_CTAlloc(HYPRE_Int,1+num_procs); hypre_GeneratePartitioning( global_size, num_procs, &partitioning ); vector2 = hypre_VectorToParVector(hypre_MPI_COMM_WORLD,local_vector2,partitioning); hypre_ParVectorSetPartitioningOwner(vector2,0); hypre_ParVectorPrint(vector2, "Convert"); vector = hypre_ParVectorToVectorAll(vector2); /*----------------------------------------------------------- * Copy the vector into tmp_vector *-----------------------------------------------------------*/ /* Read doesn't work for multivectors yet... tmp_vector = hypre_ParVectorRead(hypre_MPI_COMM_WORLD, "Convert");*/ tmp_vector = hypre_ParMultiVectorCreate ( hypre_MPI_COMM_WORLD, global_size, partitioning, num_vectors ); hypre_ParVectorInitialize( tmp_vector ); hypre_ParVectorCopy( vector2, tmp_vector ); /* tmp_vector = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_size,partitioning); hypre_ParVectorSetPartitioningOwner(tmp_vector,0); hypre_ParVectorInitialize(tmp_vector); hypre_ParVectorCopy(vector1, tmp_vector); hypre_ParVectorPrint(tmp_vector,"Copy"); */ /*----------------------------------------------------------- * Scale tmp_vector *-----------------------------------------------------------*/ hypre_ParVectorScale(2.0, tmp_vector); hypre_ParVectorPrint(tmp_vector,"Scale"); /*----------------------------------------------------------- * Do an Axpy (2*vector - vector) = vector *-----------------------------------------------------------*/ hypre_ParVectorAxpy(-1.0, vector1, tmp_vector); hypre_ParVectorPrint(tmp_vector,"Axpy"); /*----------------------------------------------------------- * Do an inner product vector* tmp_vector *-----------------------------------------------------------*/ prod = hypre_ParVectorInnerProd(vector1, tmp_vector); hypre_printf (" prod: %8.2f \n", prod); /*----------------------------------------------------------- * Finalize things *-----------------------------------------------------------*/ hypre_ParVectorDestroy(vector1); hypre_ParVectorDestroy(vector2); hypre_ParVectorDestroy(tmp_vector); hypre_SeqVectorDestroy(local_vector2); if (vector) hypre_SeqVectorDestroy(vector); /* Finalize MPI */ hypre_MPI_Finalize(); return 0; }
HYPRE_Int hypre_seqAMGCycle( hypre_ParAMGData *amg_data, HYPRE_Int p_level, hypre_ParVector **Par_F_array, hypre_ParVector **Par_U_array ) { hypre_ParVector *Aux_U; hypre_ParVector *Aux_F; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int n; HYPRE_Int i; hypre_Vector *u_local; HYPRE_Real *u_data; HYPRE_Int first_index; /* Acquire seq data */ MPI_Comm new_comm = hypre_ParAMGDataNewComm(amg_data); HYPRE_Solver coarse_solver = hypre_ParAMGDataCoarseSolver(amg_data); hypre_ParCSRMatrix *A_coarse = hypre_ParAMGDataACoarse(amg_data); hypre_ParVector *F_coarse = hypre_ParAMGDataFCoarse(amg_data); hypre_ParVector *U_coarse = hypre_ParAMGDataUCoarse(amg_data); HYPRE_Int redundant = hypre_ParAMGDataRedundant(amg_data); Aux_U = Par_U_array[p_level]; Aux_F = Par_F_array[p_level]; first_index = hypre_ParVectorFirstIndex(Aux_U); u_local = hypre_ParVectorLocalVector(Aux_U); u_data = hypre_VectorData(u_local); n = hypre_VectorSize(u_local); /*if (A_coarse)*/ if (hypre_ParAMGDataParticipate(amg_data)) { HYPRE_Real *f_data; hypre_Vector *f_local; hypre_Vector *tmp_vec; HYPRE_Int nf; HYPRE_Int local_info; HYPRE_Real *recv_buf = NULL; HYPRE_Int *displs = NULL; HYPRE_Int *info = NULL; HYPRE_Int new_num_procs, my_id; hypre_MPI_Comm_size(new_comm, &new_num_procs); hypre_MPI_Comm_rank(new_comm, &my_id); f_local = hypre_ParVectorLocalVector(Aux_F); f_data = hypre_VectorData(f_local); nf = hypre_VectorSize(f_local); /* first f */ info = hypre_CTAlloc(HYPRE_Int, new_num_procs); local_info = nf; if (redundant) hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm); else hypre_MPI_Gather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, 0, new_comm); if (redundant || my_id ==0) { displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); displs[0] = 0; for (i=1; i < new_num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; if (F_coarse) { tmp_vec = hypre_ParVectorLocalVector(F_coarse); recv_buf = hypre_VectorData(tmp_vec); } } if (redundant) hypre_MPI_Allgatherv ( f_data, nf, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, new_comm ); else hypre_MPI_Gatherv ( f_data, nf, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, 0, new_comm ); if (redundant || my_id ==0) { tmp_vec = hypre_ParVectorLocalVector(U_coarse); recv_buf = hypre_VectorData(tmp_vec); } /*then u */ if (redundant) { hypre_MPI_Allgatherv ( u_data, n, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, new_comm ); hypre_TFree(displs); hypre_TFree(info); } else hypre_MPI_Gatherv ( u_data, n, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, 0, new_comm ); /* clean up */ if (redundant || my_id ==0) { hypre_BoomerAMGSolve(coarse_solver, A_coarse, F_coarse, U_coarse); } /*copy my part of U to parallel vector */ if (redundant) { HYPRE_Real *local_data; local_data = hypre_VectorData(hypre_ParVectorLocalVector(U_coarse)); for (i = 0; i < n; i++) { u_data[i] = local_data[first_index+i]; } } else { HYPRE_Real *local_data=NULL; if (my_id == 0) local_data = hypre_VectorData(hypre_ParVectorLocalVector(U_coarse)); hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL, u_data, n, HYPRE_MPI_REAL, 0, new_comm ); /*if (my_id == 0) local_data = hypre_VectorData(hypre_ParVectorLocalVector(F_coarse)); hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL, f_data, n, HYPRE_MPI_REAL, 0, new_comm );*/ if (my_id == 0) hypre_TFree(displs); hypre_TFree(info); } } return(Solve_err_flag); }