HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { HYPRE_Int num_procs, myid; HYPRE_Int verbose = 0, build_matrix_type = 1; HYPRE_Int index, matrix_arg_index, commpkg_flag=3; HYPRE_Int i, k, ierr=0; HYPRE_Int row_start, row_end; HYPRE_Int col_start, col_end, global_num_rows; HYPRE_Int *row_part, *col_part; char *csrfilename; HYPRE_Int preload = 0, loop = 0, loop2 = LOOP2; HYPRE_Int bcast_rows[2], *info; hypre_ParCSRMatrix *parcsr_A, *small_A; HYPRE_ParCSRMatrix A_temp, A_temp_small; hypre_CSRMatrix *A_CSR; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int nx, ny, nz; HYPRE_Int P, Q, R; HYPRE_Int p, q, r; HYPRE_Real values[4]; hypre_ParVector *x_new; hypre_ParVector *y_new, *y; HYPRE_Int *row_starts; HYPRE_Real ans; HYPRE_Real start_time, end_time, total_time, *loop_times; HYPRE_Real T_avg, T_std; HYPRE_Int noparmprint = 0; #if mydebug HYPRE_Int j, tmp_int; #endif /*----------------------------------------------------------- * Initialize MPI *-----------------------------------------------------------*/ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs ); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid ); /*----------------------------------------------------------- * default - is 27pt laplace *-----------------------------------------------------------*/ build_matrix_type = 2; matrix_arg_index = argc; /*----------------------------------------------------------- * Parse command line *-----------------------------------------------------------*/ index = 1; while ( index < argc) { if ( strcmp(argv[index], "-verbose") == 0 ) { index++; verbose = 1; } else if ( strcmp(argv[index], "-fromonecsrfile") == 0 ) { index++; build_matrix_type = 1; matrix_arg_index = index; /*this tells where the name is*/ } else if ( strcmp(argv[index], "-commpkg") == 0 ) { index++; commpkg_flag = atoi(argv[index++]); } else if ( strcmp(argv[index], "-laplacian") == 0 ) { index++; build_matrix_type = 2; matrix_arg_index = index; } else if ( strcmp(argv[index], "-27pt") == 0 ) { index++; build_matrix_type = 4; matrix_arg_index = index; } /* else if ( strcmp(argv[index], "-nopreload") == 0 ) { index++; preload = 0; } */ else if ( strcmp(argv[index], "-loop") == 0 ) { index++; loop = atoi(argv[index++]); } else if ( strcmp(argv[index], "-noparmprint") == 0 ) { index++; noparmprint = 1; } else { index++; /*hypre_printf("Warning: Unrecogized option '%s'\n",argv[index++] );*/ } } /*----------------------------------------------------------- * Setup the Matrix problem *-----------------------------------------------------------*/ /*----------------------------------------------------------- * Get actual partitioning- * read in an actual csr matrix. *-----------------------------------------------------------*/ if (build_matrix_type ==1) /*read in a csr matrix from one file */ { if (matrix_arg_index < argc) { csrfilename = argv[matrix_arg_index]; } else { hypre_printf("Error: No filename specified \n"); exit(1); } if (myid == 0) { /*hypre_printf(" FromFile: %s\n", csrfilename);*/ A_CSR = hypre_CSRMatrixRead(csrfilename); } row_part = NULL; col_part = NULL; parcsr_A = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, A_CSR, row_part, col_part); if (myid == 0) hypre_CSRMatrixDestroy(A_CSR); } else if (build_matrix_type ==2) { myBuildParLaplacian(argc, argv, matrix_arg_index, &A_temp, !noparmprint); parcsr_A = (hypre_ParCSRMatrix *) A_temp; } else if (build_matrix_type ==4) { myBuildParLaplacian27pt(argc, argv, matrix_arg_index, &A_temp, !noparmprint); parcsr_A = (hypre_ParCSRMatrix *) A_temp; } /*----------------------------------------------------------- * create a small problem so that timings are more accurate - * code gets run twice (small laplace) *-----------------------------------------------------------*/ /*this is no longer being used - preload = 0 is set at the beginning */ if (preload == 1) { /*hypre_printf("preload!\n");*/ values[1] = -1; values[2] = -1; values[3] = -1; values[0] = - 6.0 ; nx = 2; ny = num_procs; nz = 2; P = 1; Q = num_procs; R = 1; p = myid % P; q = (( myid - p)/P) % Q; r = ( myid - p - P*q)/( P*Q ); A_temp_small = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD, nx, ny, nz, P, Q, R, p, q, r, values); small_A = (hypre_ParCSRMatrix *) A_temp_small; /*do comm packages*/ hypre_NewCommPkgCreate(small_A); hypre_NewCommPkgDestroy(small_A); hypre_MatvecCommPkgCreate(small_A); hypre_ParCSRMatrixDestroy(small_A); } /*----------------------------------------------------------- * Prepare for timing *-----------------------------------------------------------*/ /* instead of preloading, let's not time the first one if more than one*/ if (!loop) { loop = 1; /* and don't do any timings */ } else { loop +=1; if (loop < 2) loop = 2; } loop_times = hypre_CTAlloc(HYPRE_Real, loop); /******************************************************************************************/ hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); if (commpkg_flag == 1 || commpkg_flag ==3 ) { /*----------------------------------------------------------- * Create new comm package *-----------------------------------------------------------*/ if (!myid) hypre_printf("********************************************************\n" ); /*do loop times*/ for (i=0; i< loop; i++) { loop_times[i] = 0.0; for (k=0; k< loop2; k++) { hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); start_time = hypre_MPI_Wtime(); #if mpip_on if (i==(loop-1)) hypre_MPI_Pcontrol(1); #endif hypre_NewCommPkgCreate(parcsr_A); #if mpip_on if (i==(loop-1)) hypre_MPI_Pcontrol(0); #endif end_time = hypre_MPI_Wtime(); end_time = end_time - start_time; hypre_MPI_Allreduce(&end_time, &total_time, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD); loop_times[i] += total_time; if ( !((i+1)== loop && (k+1) == loop2)) hypre_NewCommPkgDestroy(parcsr_A); }/*end of loop2 */ } /*end of loop*/ /* calculate the avg and std. */ if (loop > 1) { /* calculate the avg and std. */ stats_mo(loop_times, loop, &T_avg, &T_std); if (!myid) hypre_printf(" NewCommPkgCreate: AVG. wall clock time = %f seconds\n", T_avg); if (!myid) hypre_printf(" STD. for %d runs = %f\n", loop-1, T_std); if (!myid) hypre_printf(" (Note: avg./std. timings exclude run 0.)\n"); if (!myid) hypre_printf("********************************************************\n" ); for (i=0; i< loop; i++) { if (!myid) hypre_printf(" run %d = %f sec.\n", i, loop_times[i]); } if (!myid) hypre_printf("********************************************************\n" ); } else { if (!myid) hypre_printf("********************************************************\n" ); if (!myid) hypre_printf(" NewCommPkgCreate:\n"); if (!myid) hypre_printf(" run time = %f sec.\n", loop_times[0]); if (!myid) hypre_printf("********************************************************\n" ); } /*----------------------------------------------------------- * Verbose printing *-----------------------------------------------------------*/ /*some verification*/ global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); if (verbose) { ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A); hypre_printf("myid = %i, my ACTUAL local range: [%i, %i]\n", myid, row_start, row_end); ierr = hypre_GetAssumedPartitionRowRange( myid, global_num_rows, &row_start, &row_end); hypre_printf("myid = %i, my assumed local range: [%i, %i]\n", myid, row_start, row_end); hypre_printf("myid = %d, num_recvs = %d\n", myid, hypre_ParCSRCommPkgNumRecvs(comm_pkg) ); #if mydebug for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) { hypre_printf("myid = %d, recv proc = %d, vec_starts = [%d : %d]\n", myid, hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1); } #endif hypre_printf("myid = %d, num_sends = %d\n", myid, hypre_ParCSRCommPkgNumSends(comm_pkg) ); #if mydebug for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) { tmp_int = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] - hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; for (j=0; j< tmp_int; j++) { hypre_printf("myid = %d, send proc = %d, send element = %d\n",myid, hypre_ParCSRCommPkgSendProcs(comm_pkg)[i], hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); } } #endif } /*----------------------------------------------------------- * To verify correctness (if commpkg_flag = 3) *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { /*do a matvec - we are assuming a square matrix */ row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A); x_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts); hypre_ParVectorSetPartitioningOwner(x_new, 0); hypre_ParVectorInitialize(x_new); hypre_ParVectorSetRandomValues(x_new, 1); y_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts); hypre_ParVectorSetPartitioningOwner(y_new, 0); hypre_ParVectorInitialize(y_new); hypre_ParVectorSetConstantValues(y_new, 0.0); /*y = 1.0*A*x+1.0*y */ hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y_new); } /*----------------------------------------------------------- * Clean up after MyComm *-----------------------------------------------------------*/ hypre_NewCommPkgDestroy(parcsr_A); } /******************************************************************************************/ /******************************************************************************************/ hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); if (commpkg_flag > 1 ) { /*----------------------------------------------------------- * Set up standard comm package *-----------------------------------------------------------*/ bcast_rows[0] = 23; bcast_rows[1] = 1789; if (!myid) hypre_printf("********************************************************\n" ); /*do loop times*/ for (i=0; i< loop; i++) { loop_times[i] = 0.0; for (k=0; k< loop2; k++) { hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); start_time = hypre_MPI_Wtime(); #if time_gather info = hypre_CTAlloc(HYPRE_Int, num_procs); hypre_MPI_Allgather(bcast_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hypre_MPI_COMM_WORLD); #endif hypre_MatvecCommPkgCreate(parcsr_A); end_time = hypre_MPI_Wtime(); end_time = end_time - start_time; hypre_MPI_Allreduce(&end_time, &total_time, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD); loop_times[i] += total_time; if ( !((i+1)== loop && (k+1) == loop2)) hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkg(parcsr_A)); }/* end of loop 2*/ } /*end of loop*/ /* calculate the avg and std. */ if (loop > 1) { stats_mo(loop_times, loop, &T_avg, &T_std); if (!myid) hypre_printf("Current CommPkgCreate: AVG. wall clock time = %f seconds\n", T_avg); if (!myid) hypre_printf(" STD. for %d runs = %f\n", loop-1, T_std); if (!myid) hypre_printf(" (Note: avg./std. timings exclude run 0.)\n"); if (!myid) hypre_printf("********************************************************\n" ); for (i=0; i< loop; i++) { if (!myid) hypre_printf(" run %d = %f sec.\n", i, loop_times[i]); } if (!myid) hypre_printf("********************************************************\n" ); } else { if (!myid) hypre_printf("********************************************************\n" ); if (!myid) hypre_printf(" Current CommPkgCreate:\n"); if (!myid) hypre_printf(" run time = %f sec.\n", loop_times[0]); if (!myid) hypre_printf("********************************************************\n" ); } /*----------------------------------------------------------- * Verbose printing *-----------------------------------------------------------*/ /*some verification*/ if (verbose) { ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A); hypre_printf("myid = %i, std - my local range: [%i, %i]\n", myid, row_start, row_end); ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); hypre_printf("myid = %d, std - num_recvs = %d\n", myid, hypre_ParCSRCommPkgNumRecvs(comm_pkg) ); #if mydebug for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) { hypre_printf("myid = %d, std - recv proc = %d, vec_starts = [%d : %d]\n", myid, hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1); } #endif hypre_printf("myid = %d, std - num_sends = %d\n", myid, hypre_ParCSRCommPkgNumSends(comm_pkg)); #if mydebug for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) { tmp_int = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] - hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; for (j=0; j< tmp_int; j++) { hypre_printf("myid = %d, std - send proc = %d, send element = %d\n",myid, hypre_ParCSRCommPkgSendProcs(comm_pkg)[i], hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); } } #endif } /*----------------------------------------------------------- * Verify correctness *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A); y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows,row_starts); hypre_ParVectorSetPartitioningOwner(y, 0); hypre_ParVectorInitialize(y); hypre_ParVectorSetConstantValues(y, 0.0); hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y); } } /*----------------------------------------------------------- * Compare matvecs for both comm packages (3) *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { /*make sure that y and y_new are the same - now y_new should=0*/ hypre_ParVectorAxpy( -1.0, y, y_new ); hypre_ParVectorSetRandomValues(y, 1); ans = hypre_ParVectorInnerProd( y, y_new ); if (!myid) { if ( fabs(ans) > 1e-8 ) { hypre_printf("!!!!! WARNING !!!!! should be zero if correct = %6.10f\n", ans); } else { hypre_printf("Matvecs match ( should be zero = %6.10f )\n", ans); } } } /*----------------------------------------------------------- * Clean up *-----------------------------------------------------------*/ hypre_ParCSRMatrixDestroy(parcsr_A); /*this calls the standard comm package destroy - but we'll destroy ours separately until it is incorporated */ if (commpkg_flag == 3 ) { hypre_ParVectorDestroy(x_new); hypre_ParVectorDestroy(y); hypre_ParVectorDestroy(y_new); } hypre_MPI_Finalize(); return(ierr); }
int hypre_ParCSRMatrixMatvec( double alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, double beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_BigInt num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_BigInt num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_BigInt x_size = hypre_ParVectorGlobalSize(x); HYPRE_BigInt y_size = hypre_ParVectorGlobalSize(y); int num_vectors = 1; int num_cols_offd = hypre_CSRMatrixNumCols(offd); int ierr = 0; int num_sends, i, j, jv, index, start; /*int vecstride = hypre_VectorVectorStride( x_local ); int idxstride = hypre_VectorIndexStride( x_local );*/ double *x_tmp_data, **x_buf_data; double *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ /*hypre_assert( idxstride>0 );*/ if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; x_tmp = hypre_SeqVectorCreate( num_cols_offd ); hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { #ifdef HYPRE_NO_GLOBAL_PARTITION hypre_NewCommPkgCreate(A); #else hypre_MatvecCommPkgCreate(A); #endif comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); x_buf_data = hypre_CTAlloc( double*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) x_buf_data[jv] = hypre_CTAlloc(double, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); /*if ( num_vectors==1 )*/ { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[0][index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } } /*else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[jv][index++] = x_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ]; } } hypre_assert( idxstride==1 );*/ /* >>> ... The assert is because the following loop only works for 'column' storage of a multivector <<< >>> This needs to be fixed to work more generally, at least for 'row' storage. <<< >>> This in turn, means either change CommPkg so num_sends is no.zones*no.vectors (not no.zones) >>> or, less dangerously, put a stride in the logic of CommHandleCreate (stride either from a >>> new arg or a new variable inside CommPkg). Or put the num_vector iteration inside >>> CommHandleCreate (perhaps a new multivector variant of it). */ for ( jv=0; jv<num_vectors; ++jv ) { comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data[jv], &(x_tmp_data[jv*num_cols_offd]) ); } hypre_CSRMatrixMatvec( alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if (num_cols_offd) hypre_CSRMatrixMatvec( alpha, offd, x_tmp, 1.0, y_local); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(x_buf_data[jv]); hypre_TFree(x_buf_data); return ierr; }
int hypre_ParCSRMatrixMatvec_FF( double alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, double beta, hypre_ParVector *y, int *CF_marker, int fpt ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_BigInt num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_BigInt num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_BigInt x_size = hypre_ParVectorGlobalSize(x); HYPRE_BigInt y_size = hypre_ParVectorGlobalSize(y); HYPRE_BigInt num_cols_offd = hypre_CSRMatrixNumCols(offd); int ierr = 0; int num_sends, i, j, index, start, num_procs; int *int_buf_data = NULL; int *CF_marker_offd = NULL; double *x_tmp_data = NULL; double *x_buf_data = NULL; double *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ MPI_Comm_size(comm,&num_procs); if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; if (num_procs > 1) { if (num_cols_offd) { x_tmp = hypre_SeqVectorCreate( num_cols_offd ); hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); } /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { #ifdef HYPRE_NO_GLOBAL_PARTITION hypre_NewCommPkgCreate(A); #else hypre_MatvecCommPkgCreate(A); #endif comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_sends) x_buf_data = hypre_CTAlloc(double, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data, x_tmp_data ); } hypre_CSRMatrixMatvec_FF( alpha, diag, x_local, beta, y_local, CF_marker, CF_marker, fpt); if (num_procs > 1) { hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_sends) int_buf_data = hypre_CTAlloc(int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); if (num_cols_offd) CF_marker_offd = hypre_CTAlloc(int, num_cols_offd); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate(11,comm_pkg,int_buf_data,CF_marker_offd ); hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_cols_offd) hypre_CSRMatrixMatvec_FF( alpha, offd, x_tmp, 1.0, y_local, CF_marker, CF_marker_offd, fpt); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; hypre_TFree(x_buf_data); hypre_TFree(int_buf_data); hypre_TFree(CF_marker_offd); }
int hypre_ParCSRMatrixMatvecT( double alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, double beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); hypre_Vector *y_tmp; int vecstride = hypre_VectorVectorStride( y_local ); int idxstride = hypre_VectorIndexStride( y_local ); double *y_tmp_data, **y_buf_data; double *y_local_data = hypre_VectorData(y_local); HYPRE_BigInt num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_BigInt num_cols = hypre_ParCSRMatrixGlobalNumCols(A); int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_BigInt x_size = hypre_ParVectorGlobalSize(x); HYPRE_BigInt y_size = hypre_ParVectorGlobalSize(y); int num_vectors = hypre_VectorNumVectors(y_local); int i, j, jv, index, start, num_sends; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- *-----------------------------------------------------------------------*/ comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); /*if ( num_vectors==1 ) {*/ y_tmp = hypre_SeqVectorCreate(num_cols_offd); /*} else { y_tmp = hypre_SeqMultiVectorCreate(num_cols_offd,num_vectors); }*/ hypre_SeqVectorInitialize(y_tmp); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { #ifdef HYPRE_NO_GLOBAL_PARTITION hypre_NewCommPkgCreate(A); #else hypre_MatvecCommPkgCreate(A); #endif comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); y_buf_data = hypre_CTAlloc( double*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) y_buf_data[jv] = hypre_CTAlloc(double, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); y_tmp_data = hypre_VectorData(y_tmp); y_local_data = hypre_VectorData(y_local); hypre_assert( idxstride==1 ); /* >>> only 'column' storage of multivectors implemented so far */ if (num_cols_offd) hypre_CSRMatrixMatvecT(alpha, offd, x_local, 0.0, y_tmp); for ( jv=0; jv<num_vectors; ++jv ) { /* >>> this is where we assume multivectors are 'column' storage */ comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 2, comm_pkg, &(y_tmp_data[jv*num_cols_offd]), y_buf_data[jv] ); } hypre_CSRMatrixMatvecT(alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if ( num_vectors==1 ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)] += y_buf_data[0][index++]; } } else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ] += y_buf_data[jv][index++]; } } hypre_SeqVectorDestroy(y_tmp); y_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(y_buf_data[jv]); hypre_TFree(y_buf_data); return ierr; }