HYPRE_Int hypre_MPI_Allgatherv( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, void *recvbuf, HYPRE_Int *recvcounts, HYPRE_Int *displs, hypre_MPI_Datatype recvtype, hypre_MPI_Comm comm ) { return ( hypre_MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, *recvcounts, recvtype, comm) ); }
HYPRE_Int hypre_MPI_Scatter( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, HYPRE_Int root, hypre_MPI_Comm comm ) { return ( hypre_MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) ); }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { HYPRE_Int num_procs, myid; HYPRE_Int verbose = 0, build_matrix_type = 1; HYPRE_Int index, matrix_arg_index, commpkg_flag=3; HYPRE_Int i, k, ierr=0; HYPRE_Int row_start, row_end; HYPRE_Int col_start, col_end, global_num_rows; HYPRE_Int *row_part, *col_part; char *csrfilename; HYPRE_Int preload = 0, loop = 0, loop2 = LOOP2; HYPRE_Int bcast_rows[2], *info; hypre_ParCSRMatrix *parcsr_A, *small_A; HYPRE_ParCSRMatrix A_temp, A_temp_small; hypre_CSRMatrix *A_CSR; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int nx, ny, nz; HYPRE_Int P, Q, R; HYPRE_Int p, q, r; HYPRE_Real values[4]; hypre_ParVector *x_new; hypre_ParVector *y_new, *y; HYPRE_Int *row_starts; HYPRE_Real ans; HYPRE_Real start_time, end_time, total_time, *loop_times; HYPRE_Real T_avg, T_std; HYPRE_Int noparmprint = 0; #if mydebug HYPRE_Int j, tmp_int; #endif /*----------------------------------------------------------- * Initialize MPI *-----------------------------------------------------------*/ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs ); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid ); /*----------------------------------------------------------- * default - is 27pt laplace *-----------------------------------------------------------*/ build_matrix_type = 2; matrix_arg_index = argc; /*----------------------------------------------------------- * Parse command line *-----------------------------------------------------------*/ index = 1; while ( index < argc) { if ( strcmp(argv[index], "-verbose") == 0 ) { index++; verbose = 1; } else if ( strcmp(argv[index], "-fromonecsrfile") == 0 ) { index++; build_matrix_type = 1; matrix_arg_index = index; /*this tells where the name is*/ } else if ( strcmp(argv[index], "-commpkg") == 0 ) { index++; commpkg_flag = atoi(argv[index++]); } else if ( strcmp(argv[index], "-laplacian") == 0 ) { index++; build_matrix_type = 2; matrix_arg_index = index; } else if ( strcmp(argv[index], "-27pt") == 0 ) { index++; build_matrix_type = 4; matrix_arg_index = index; } /* else if ( strcmp(argv[index], "-nopreload") == 0 ) { index++; preload = 0; } */ else if ( strcmp(argv[index], "-loop") == 0 ) { index++; loop = atoi(argv[index++]); } else if ( strcmp(argv[index], "-noparmprint") == 0 ) { index++; noparmprint = 1; } else { index++; /*hypre_printf("Warning: Unrecogized option '%s'\n",argv[index++] );*/ } } /*----------------------------------------------------------- * Setup the Matrix problem *-----------------------------------------------------------*/ /*----------------------------------------------------------- * Get actual partitioning- * read in an actual csr matrix. *-----------------------------------------------------------*/ if (build_matrix_type ==1) /*read in a csr matrix from one file */ { if (matrix_arg_index < argc) { csrfilename = argv[matrix_arg_index]; } else { hypre_printf("Error: No filename specified \n"); exit(1); } if (myid == 0) { /*hypre_printf(" FromFile: %s\n", csrfilename);*/ A_CSR = hypre_CSRMatrixRead(csrfilename); } row_part = NULL; col_part = NULL; parcsr_A = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, A_CSR, row_part, col_part); if (myid == 0) hypre_CSRMatrixDestroy(A_CSR); } else if (build_matrix_type ==2) { myBuildParLaplacian(argc, argv, matrix_arg_index, &A_temp, !noparmprint); parcsr_A = (hypre_ParCSRMatrix *) A_temp; } else if (build_matrix_type ==4) { myBuildParLaplacian27pt(argc, argv, matrix_arg_index, &A_temp, !noparmprint); parcsr_A = (hypre_ParCSRMatrix *) A_temp; } /*----------------------------------------------------------- * create a small problem so that timings are more accurate - * code gets run twice (small laplace) *-----------------------------------------------------------*/ /*this is no longer being used - preload = 0 is set at the beginning */ if (preload == 1) { /*hypre_printf("preload!\n");*/ values[1] = -1; values[2] = -1; values[3] = -1; values[0] = - 6.0 ; nx = 2; ny = num_procs; nz = 2; P = 1; Q = num_procs; R = 1; p = myid % P; q = (( myid - p)/P) % Q; r = ( myid - p - P*q)/( P*Q ); A_temp_small = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD, nx, ny, nz, P, Q, R, p, q, r, values); small_A = (hypre_ParCSRMatrix *) A_temp_small; /*do comm packages*/ hypre_NewCommPkgCreate(small_A); hypre_NewCommPkgDestroy(small_A); hypre_MatvecCommPkgCreate(small_A); hypre_ParCSRMatrixDestroy(small_A); } /*----------------------------------------------------------- * Prepare for timing *-----------------------------------------------------------*/ /* instead of preloading, let's not time the first one if more than one*/ if (!loop) { loop = 1; /* and don't do any timings */ } else { loop +=1; if (loop < 2) loop = 2; } loop_times = hypre_CTAlloc(HYPRE_Real, loop); /******************************************************************************************/ hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); if (commpkg_flag == 1 || commpkg_flag ==3 ) { /*----------------------------------------------------------- * Create new comm package *-----------------------------------------------------------*/ if (!myid) hypre_printf("********************************************************\n" ); /*do loop times*/ for (i=0; i< loop; i++) { loop_times[i] = 0.0; for (k=0; k< loop2; k++) { hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); start_time = hypre_MPI_Wtime(); #if mpip_on if (i==(loop-1)) hypre_MPI_Pcontrol(1); #endif hypre_NewCommPkgCreate(parcsr_A); #if mpip_on if (i==(loop-1)) hypre_MPI_Pcontrol(0); #endif end_time = hypre_MPI_Wtime(); end_time = end_time - start_time; hypre_MPI_Allreduce(&end_time, &total_time, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD); loop_times[i] += total_time; if ( !((i+1)== loop && (k+1) == loop2)) hypre_NewCommPkgDestroy(parcsr_A); }/*end of loop2 */ } /*end of loop*/ /* calculate the avg and std. */ if (loop > 1) { /* calculate the avg and std. */ stats_mo(loop_times, loop, &T_avg, &T_std); if (!myid) hypre_printf(" NewCommPkgCreate: AVG. wall clock time = %f seconds\n", T_avg); if (!myid) hypre_printf(" STD. for %d runs = %f\n", loop-1, T_std); if (!myid) hypre_printf(" (Note: avg./std. timings exclude run 0.)\n"); if (!myid) hypre_printf("********************************************************\n" ); for (i=0; i< loop; i++) { if (!myid) hypre_printf(" run %d = %f sec.\n", i, loop_times[i]); } if (!myid) hypre_printf("********************************************************\n" ); } else { if (!myid) hypre_printf("********************************************************\n" ); if (!myid) hypre_printf(" NewCommPkgCreate:\n"); if (!myid) hypre_printf(" run time = %f sec.\n", loop_times[0]); if (!myid) hypre_printf("********************************************************\n" ); } /*----------------------------------------------------------- * Verbose printing *-----------------------------------------------------------*/ /*some verification*/ global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); if (verbose) { ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A); hypre_printf("myid = %i, my ACTUAL local range: [%i, %i]\n", myid, row_start, row_end); ierr = hypre_GetAssumedPartitionRowRange( myid, global_num_rows, &row_start, &row_end); hypre_printf("myid = %i, my assumed local range: [%i, %i]\n", myid, row_start, row_end); hypre_printf("myid = %d, num_recvs = %d\n", myid, hypre_ParCSRCommPkgNumRecvs(comm_pkg) ); #if mydebug for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) { hypre_printf("myid = %d, recv proc = %d, vec_starts = [%d : %d]\n", myid, hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1); } #endif hypre_printf("myid = %d, num_sends = %d\n", myid, hypre_ParCSRCommPkgNumSends(comm_pkg) ); #if mydebug for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) { tmp_int = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] - hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; for (j=0; j< tmp_int; j++) { hypre_printf("myid = %d, send proc = %d, send element = %d\n",myid, hypre_ParCSRCommPkgSendProcs(comm_pkg)[i], hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); } } #endif } /*----------------------------------------------------------- * To verify correctness (if commpkg_flag = 3) *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { /*do a matvec - we are assuming a square matrix */ row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A); x_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts); hypre_ParVectorSetPartitioningOwner(x_new, 0); hypre_ParVectorInitialize(x_new); hypre_ParVectorSetRandomValues(x_new, 1); y_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts); hypre_ParVectorSetPartitioningOwner(y_new, 0); hypre_ParVectorInitialize(y_new); hypre_ParVectorSetConstantValues(y_new, 0.0); /*y = 1.0*A*x+1.0*y */ hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y_new); } /*----------------------------------------------------------- * Clean up after MyComm *-----------------------------------------------------------*/ hypre_NewCommPkgDestroy(parcsr_A); } /******************************************************************************************/ /******************************************************************************************/ hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); if (commpkg_flag > 1 ) { /*----------------------------------------------------------- * Set up standard comm package *-----------------------------------------------------------*/ bcast_rows[0] = 23; bcast_rows[1] = 1789; if (!myid) hypre_printf("********************************************************\n" ); /*do loop times*/ for (i=0; i< loop; i++) { loop_times[i] = 0.0; for (k=0; k< loop2; k++) { hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); start_time = hypre_MPI_Wtime(); #if time_gather info = hypre_CTAlloc(HYPRE_Int, num_procs); hypre_MPI_Allgather(bcast_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hypre_MPI_COMM_WORLD); #endif hypre_MatvecCommPkgCreate(parcsr_A); end_time = hypre_MPI_Wtime(); end_time = end_time - start_time; hypre_MPI_Allreduce(&end_time, &total_time, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD); loop_times[i] += total_time; if ( !((i+1)== loop && (k+1) == loop2)) hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkg(parcsr_A)); }/* end of loop 2*/ } /*end of loop*/ /* calculate the avg and std. */ if (loop > 1) { stats_mo(loop_times, loop, &T_avg, &T_std); if (!myid) hypre_printf("Current CommPkgCreate: AVG. wall clock time = %f seconds\n", T_avg); if (!myid) hypre_printf(" STD. for %d runs = %f\n", loop-1, T_std); if (!myid) hypre_printf(" (Note: avg./std. timings exclude run 0.)\n"); if (!myid) hypre_printf("********************************************************\n" ); for (i=0; i< loop; i++) { if (!myid) hypre_printf(" run %d = %f sec.\n", i, loop_times[i]); } if (!myid) hypre_printf("********************************************************\n" ); } else { if (!myid) hypre_printf("********************************************************\n" ); if (!myid) hypre_printf(" Current CommPkgCreate:\n"); if (!myid) hypre_printf(" run time = %f sec.\n", loop_times[0]); if (!myid) hypre_printf("********************************************************\n" ); } /*----------------------------------------------------------- * Verbose printing *-----------------------------------------------------------*/ /*some verification*/ if (verbose) { ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A); hypre_printf("myid = %i, std - my local range: [%i, %i]\n", myid, row_start, row_end); ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); hypre_printf("myid = %d, std - num_recvs = %d\n", myid, hypre_ParCSRCommPkgNumRecvs(comm_pkg) ); #if mydebug for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) { hypre_printf("myid = %d, std - recv proc = %d, vec_starts = [%d : %d]\n", myid, hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1); } #endif hypre_printf("myid = %d, std - num_sends = %d\n", myid, hypre_ParCSRCommPkgNumSends(comm_pkg)); #if mydebug for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) { tmp_int = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] - hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; for (j=0; j< tmp_int; j++) { hypre_printf("myid = %d, std - send proc = %d, send element = %d\n",myid, hypre_ParCSRCommPkgSendProcs(comm_pkg)[i], hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); } } #endif } /*----------------------------------------------------------- * Verify correctness *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A); y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows,row_starts); hypre_ParVectorSetPartitioningOwner(y, 0); hypre_ParVectorInitialize(y); hypre_ParVectorSetConstantValues(y, 0.0); hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y); } } /*----------------------------------------------------------- * Compare matvecs for both comm packages (3) *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { /*make sure that y and y_new are the same - now y_new should=0*/ hypre_ParVectorAxpy( -1.0, y, y_new ); hypre_ParVectorSetRandomValues(y, 1); ans = hypre_ParVectorInnerProd( y, y_new ); if (!myid) { if ( fabs(ans) > 1e-8 ) { hypre_printf("!!!!! WARNING !!!!! should be zero if correct = %6.10f\n", ans); } else { hypre_printf("Matvecs match ( should be zero = %6.10f )\n", ans); } } } /*----------------------------------------------------------- * Clean up *-----------------------------------------------------------*/ hypre_ParCSRMatrixDestroy(parcsr_A); /*this calls the standard comm package destroy - but we'll destroy ours separately until it is incorporated */ if (commpkg_flag == 3 ) { hypre_ParVectorDestroy(x_new); hypre_ParVectorDestroy(y); hypre_ParVectorDestroy(y_new); } hypre_MPI_Finalize(); return(ierr); }
HYPRE_Int hypre_seqAMGSetup( hypre_ParAMGData *amg_data, HYPRE_Int p_level, HYPRE_Int coarse_threshold) { /* Par Data Structure variables */ hypre_ParCSRMatrix **Par_A_array = hypre_ParAMGDataAArray(amg_data); MPI_Comm comm = hypre_ParCSRMatrixComm(Par_A_array[0]); MPI_Comm new_comm, seq_comm; hypre_ParCSRMatrix *A_seq = NULL; hypre_CSRMatrix *A_seq_diag; hypre_CSRMatrix *A_seq_offd; hypre_ParVector *F_seq = NULL; hypre_ParVector *U_seq = NULL; hypre_ParCSRMatrix *A; HYPRE_Int **dof_func_array; HYPRE_Int num_procs, my_id; HYPRE_Int not_finished_coarsening; HYPRE_Int level; HYPRE_Solver coarse_solver; /* misc */ dof_func_array = hypre_ParAMGDataDofFuncArray(amg_data); /*MPI Stuff */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm,&my_id); /*initial */ level = p_level; not_finished_coarsening = 1; /* convert A at this level to sequential */ A = Par_A_array[level]; { double *A_seq_data = NULL; HYPRE_Int *A_seq_i = NULL; HYPRE_Int *A_seq_offd_i = NULL; HYPRE_Int *A_seq_j = NULL; double *A_tmp_data = NULL; HYPRE_Int *A_tmp_i = NULL; HYPRE_Int *A_tmp_j = NULL; HYPRE_Int *info, *displs, *displs2; HYPRE_Int i, j, size, num_nonzeros, total_nnz, cnt; hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); double *A_diag_data = hypre_CSRMatrixData(A_diag); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A); hypre_MPI_Group orig_group, new_group; HYPRE_Int *ranks, new_num_procs, *row_starts; info = hypre_CTAlloc(HYPRE_Int, num_procs); hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); ranks = hypre_CTAlloc(HYPRE_Int, num_procs); new_num_procs = 0; for (i=0; i < num_procs; i++) if (info[i]) { ranks[new_num_procs] = i; info[new_num_procs++] = info[i]; } MPI_Comm_group(comm, &orig_group); hypre_MPI_Group_incl(orig_group, new_num_procs, ranks, &new_group); MPI_Comm_create(comm, new_group, &new_comm); hypre_MPI_Group_free(&new_group); hypre_MPI_Group_free(&orig_group); if (num_rows) { /* alloc space in seq data structure only for participating procs*/ HYPRE_BoomerAMGCreate(&coarse_solver); HYPRE_BoomerAMGSetMaxRowSum(coarse_solver, hypre_ParAMGDataMaxRowSum(amg_data)); HYPRE_BoomerAMGSetStrongThreshold(coarse_solver, hypre_ParAMGDataStrongThreshold(amg_data)); HYPRE_BoomerAMGSetCoarsenType(coarse_solver, hypre_ParAMGDataCoarsenType(amg_data)); HYPRE_BoomerAMGSetInterpType(coarse_solver, hypre_ParAMGDataInterpType(amg_data)); HYPRE_BoomerAMGSetTruncFactor(coarse_solver, hypre_ParAMGDataTruncFactor(amg_data)); HYPRE_BoomerAMGSetPMaxElmts(coarse_solver, hypre_ParAMGDataPMaxElmts(amg_data)); if (hypre_ParAMGDataUserRelaxType(amg_data) > -1) HYPRE_BoomerAMGSetRelaxType(coarse_solver, hypre_ParAMGDataUserRelaxType(amg_data)); HYPRE_BoomerAMGSetRelaxOrder(coarse_solver, hypre_ParAMGDataRelaxOrder(amg_data)); HYPRE_BoomerAMGSetRelaxWt(coarse_solver, hypre_ParAMGDataUserRelaxWeight(amg_data)); if (hypre_ParAMGDataUserNumSweeps(amg_data) > -1) HYPRE_BoomerAMGSetNumSweeps(coarse_solver, hypre_ParAMGDataUserNumSweeps(amg_data)); HYPRE_BoomerAMGSetNumFunctions(coarse_solver, hypre_ParAMGDataNumFunctions(amg_data)); HYPRE_BoomerAMGSetMaxIter(coarse_solver, 1); HYPRE_BoomerAMGSetTol(coarse_solver, 0); /* Create CSR Matrix, will be Diag part of new matrix */ A_tmp_i = hypre_CTAlloc(HYPRE_Int, num_rows+1); A_tmp_i[0] = 0; for (i=1; i < num_rows+1; i++) A_tmp_i[i] = A_diag_i[i]-A_diag_i[i-1]+A_offd_i[i]-A_offd_i[i-1]; num_nonzeros = A_offd_i[num_rows]+A_diag_i[num_rows]; A_tmp_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros); A_tmp_data = hypre_CTAlloc(double, num_nonzeros); cnt = 0; for (i=0; i < num_rows; i++) { for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++) { A_tmp_j[cnt] = A_diag_j[j]+first_row_index; A_tmp_data[cnt++] = A_diag_data[j]; } for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++) { A_tmp_j[cnt] = col_map_offd[A_offd_j[j]]; A_tmp_data[cnt++] = A_offd_data[j]; } } displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); displs[0] = 0; for (i=1; i < new_num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; size = displs[new_num_procs]; A_seq_i = hypre_CTAlloc(HYPRE_Int, size+1); A_seq_offd_i = hypre_CTAlloc(HYPRE_Int, size+1); hypre_MPI_Allgatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, displs, HYPRE_MPI_INT, new_comm ); displs2 = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); A_seq_i[0] = 0; displs2[0] = 0; for (j=1; j < displs[1]; j++) A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1]; for (i=1; i < new_num_procs; i++) { for (j=displs[i]; j < displs[i+1]; j++) { A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1]; } } A_seq_i[size] = A_seq_i[size]+A_seq_i[size-1]; displs2[new_num_procs] = A_seq_i[size]; for (i=1; i < new_num_procs+1; i++) { displs2[i] = A_seq_i[displs[i]]; info[i-1] = displs2[i] - displs2[i-1]; } total_nnz = displs2[new_num_procs]; A_seq_j = hypre_CTAlloc(HYPRE_Int, total_nnz); A_seq_data = hypre_CTAlloc(double, total_nnz); hypre_MPI_Allgatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT, A_seq_j, info, displs2, HYPRE_MPI_INT, new_comm ); hypre_MPI_Allgatherv ( A_tmp_data, num_nonzeros, hypre_MPI_DOUBLE, A_seq_data, info, displs2, hypre_MPI_DOUBLE, new_comm ); hypre_TFree(displs); hypre_TFree(displs2); hypre_TFree(A_tmp_i); hypre_TFree(A_tmp_j); hypre_TFree(A_tmp_data); row_starts = hypre_CTAlloc(HYPRE_Int,2); row_starts[0] = 0; row_starts[1] = size; /* Create 1 proc communicator */ seq_comm = hypre_MPI_COMM_SELF; A_seq = hypre_ParCSRMatrixCreate(seq_comm,size,size, row_starts, row_starts, 0,total_nnz,0); A_seq_diag = hypre_ParCSRMatrixDiag(A_seq); A_seq_offd = hypre_ParCSRMatrixOffd(A_seq); hypre_CSRMatrixData(A_seq_diag) = A_seq_data; hypre_CSRMatrixI(A_seq_diag) = A_seq_i; hypre_CSRMatrixJ(A_seq_diag) = A_seq_j; hypre_CSRMatrixI(A_seq_offd) = A_seq_offd_i; F_seq = hypre_ParVectorCreate(seq_comm, size, row_starts); U_seq = hypre_ParVectorCreate(seq_comm, size, row_starts); hypre_ParVectorOwnsPartitioning(F_seq) = 0; hypre_ParVectorOwnsPartitioning(U_seq) = 0; hypre_ParVectorInitialize(F_seq); hypre_ParVectorInitialize(U_seq); hypre_BoomerAMGSetup(coarse_solver,A_seq,F_seq,U_seq); hypre_ParAMGDataCoarseSolver(amg_data) = coarse_solver; hypre_ParAMGDataACoarse(amg_data) = A_seq; hypre_ParAMGDataFCoarse(amg_data) = F_seq; hypre_ParAMGDataUCoarse(amg_data) = U_seq; hypre_ParAMGDataNewComm(amg_data) = new_comm; } hypre_TFree(info); hypre_TFree(ranks); } return 0; }
void hypre_MatTCommPkgCreate_core ( /* input args: */ MPI_Comm comm, HYPRE_Int * col_map_offd, HYPRE_Int first_col_diag, HYPRE_Int * col_starts, HYPRE_Int num_rows_diag, HYPRE_Int num_cols_diag, HYPRE_Int num_cols_offd, HYPRE_Int * row_starts, HYPRE_Int firstColDiag, HYPRE_Int * colMapOffd, HYPRE_Int * mat_i_diag, HYPRE_Int * mat_j_diag, HYPRE_Int * mat_i_offd, HYPRE_Int * mat_j_offd, HYPRE_Int data, /* = 1 for a matrix with floating-point data, =0 for Boolean matrix */ /* pointers to output args: */ HYPRE_Int * p_num_recvs, HYPRE_Int ** p_recv_procs, HYPRE_Int ** p_recv_vec_starts, HYPRE_Int * p_num_sends, HYPRE_Int ** p_send_procs, HYPRE_Int ** p_send_map_starts, HYPRE_Int ** p_send_map_elmts ) { HYPRE_Int num_sends; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int num_recvs; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; HYPRE_Int i, j, j2, k, ir, rowmin, rowmax; HYPRE_Int *tmp, *recv_buf, *displs, *info, *send_buf, *all_num_sends3; HYPRE_Int num_procs, my_id, num_elmts; HYPRE_Int local_info, index, index2; HYPRE_Int pmatch, col, kc, p; HYPRE_Int * recv_sz_buf; HYPRE_Int * row_marker; hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); info = hypre_CTAlloc(HYPRE_Int, num_procs); /* ---------------------------------------------------------------------- * determine which processors to receive from (set proc_mark) and num_recvs, * at the end of the loop proc_mark[i] contains the number of elements to be * received from Proc. i * * - For A*b or A*B: for each off-diagonal column i of A, you want to identify * the processor which has the corresponding element i of b (row i of B) * (columns in the local diagonal block, just multiply local rows of B). * You do it by finding the processor which has that column of A in its * _diagonal_ block - assuming b or B is distributed the same, which I believe * is evenly among processors, by row. There is a unique solution because * the diag/offd blocking is defined by which processor owns which rows of A. * * - For A*A^T: A^T is not distributed by rows as B or any 'normal' matrix is. * For each off-diagonal row,column k,i element of A, you want to identify * the processors which have the corresponding row,column i,j elements of A^T * i.e., row,column j,i elements of A (all i,j,k for which these entries are * nonzero, row k of A lives on this processor, and row j of A lives on * a different processor). So, given a column i in the local A-offd or A-diag, * we want to find all the processors which have column i, in diag or offd * blocks. Unlike the A*B case, I don't think you can eliminate looking at * any class of blocks. * ---------------------------------------------------------------------*/ /* The algorithm for A*B was: For each of my columns i (in offd block), use known information on data distribution of columns in _diagonal_ blocks to find the processor p which owns row i. (Note that for i in diag block, I own the row, nothing to do.) Count up such i's for each processor in proc_mark. Construct a data structure, recv_buf, made by appending a structure tmp from each processor. The data structure tmp looks like (p, no. of i's, i1, i2,...) (p=0,...) . There are two communication steps: gather size information (local_info) from all processors (into info), then gather the data (tmp) from all processors (into recv_buf). Then you go through recv_buf. For each (sink) processor p you search for for the appearance of my (source) processor number (most of recv_buf pertains to other processors and is ignored). When you find the appropriate section, pull out the i's, count them and save them, in send_map_elmts, and save p in send_procs and index information in send_map_starts. */ /* The algorithm for A*A^T: [ Originally I had planned to figure out approximately which processors had the information (for A*B it could be done exactly) to save on communication. But even for A*B where the data owner is known, all data is sent to all processors, so that's not worth worrying about on the first cut. One consequence is that proc_mark is not needed.] Construct a data structure, recv_buf, made by appending a structure tmp for each processor. It simply consists of (no. of i's, i1, i2,...) where i is the global number of a column in the offd block. There are still two communication steps: gather size information (local_info) from all processors (into info), then gather the data (tmp) from all processors (into recv_buf). Then you go through recv_buf. For each (sink) processor p you go through all its column numbers in recv_buf. Check each one for whether you have data in that column. If so, put in in send_map_elmts, p in send_procs, and update the index information in send_map_starts. Note that these arrays don't mean quite the same thing as for A*B. */ num_recvs=num_procs-1; local_info = num_procs + num_cols_offd + num_cols_diag; hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); /* ---------------------------------------------------------------------- * generate information to be send: tmp contains for each recv_proc: * {deleted: id of recv_procs}, number of elements to be received for this processor, * indices of elements (in this order) * ---------------------------------------------------------------------*/ displs = hypre_CTAlloc(HYPRE_Int, num_procs+1); displs[0] = 0; for (i=1; i < num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; recv_buf = hypre_CTAlloc(HYPRE_Int, displs[num_procs]); tmp = hypre_CTAlloc(HYPRE_Int, local_info); j = 0; for (i=0; i < num_procs; i++) { j2 = j++; tmp[j2] = 0; for (k=0; k < num_cols_offd; k++) if (col_map_offd[k] >= col_starts[i] && col_map_offd[k] < col_starts[i+1]) { tmp[j++] = col_map_offd[k]; ++(tmp[j2]); }; for (k=0; k < num_cols_diag; k++) if ( k+first_col_diag >= col_starts[i] && k+first_col_diag < col_starts[i+1] ) { tmp[j++] = k + first_col_diag; ++(tmp[j2]); } } hypre_MPI_Allgatherv(tmp,local_info,HYPRE_MPI_INT,recv_buf,info,displs,HYPRE_MPI_INT,comm); /* ---------------------------------------------------------------------- * determine send_procs and actual elements to be send (in send_map_elmts) * and send_map_starts whose i-th entry points to the beginning of the * elements to be send to proc. i * ---------------------------------------------------------------------*/ /* Meanings of arrays being set here, more verbosely stated: send_procs: processors p to send to send_map_starts: for each p, gives range of indices in send_map_elmts; send_map_elmts: Each element is a send_map_elmts[i], with i in a range given by send_map_starts[p..p+1], for some p. This element is is the global column number for a column in the offd block of p which is to be multiplied by data from this processor. For A*B, send_map_elmts[i] is therefore a row of B belonging to this processor, to be sent to p. For A*A^T, send_map_elmts[i] is a row of A belonging to this processor, to be sent to p; this row was selected because it has a nonzero on a _column_ needed by p. */ num_sends = num_procs; /* may turn out to be less, but we can't know yet */ num_elmts = (num_procs-1)*num_rows_diag; /* ... a crude upper bound; should try to do better even if more comm required */ send_procs = hypre_CTAlloc(HYPRE_Int, num_sends); send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1); send_map_elmts = hypre_CTAlloc(HYPRE_Int, num_elmts); row_marker = hypre_CTAlloc(HYPRE_Int,num_rows_diag); index = 0; index2 = 0; send_map_starts[0] = 0; for (i=0; i < num_procs; i++) { send_map_starts[index+1] = send_map_starts[index]; j = displs[i]; pmatch = 0; for ( ir=0; ir<num_rows_diag; ++ir ) row_marker[ir] = 0; while ( j < displs[i+1]) { num_elmts = recv_buf[j++]; /* no. of columns proc. i wants */ for ( k=0; k<num_elmts; k++ ) { col = recv_buf[j++]; /* a global column no. at proc. i */ for ( kc=0; kc<num_cols_offd; kc++ ) { if ( col_map_offd[kc]==col && i!=my_id ) { /* this processor has the same column as proc. i (but is different) */ pmatch = 1; send_procs[index] = i; /* this would be right if we could send columns, but we can't ... offset = first_col_diag; ++send_map_starts[index+1]; send_map_elmts[index2++] = col - offset; */ /* Plan to send all of my rows which use this column... */ RowsWithColumn( &rowmin, &rowmax, col, num_rows_diag, firstColDiag, colMapOffd, mat_i_diag, mat_j_diag, mat_i_offd, mat_j_offd ); for ( ir=rowmin; ir<=rowmax; ++ir ) { if ( row_marker[ir]==0 ) { row_marker[ir] = 1; ++send_map_starts[index+1]; send_map_elmts[index2++] = ir; } } } } /* alternative way of doing the following for-loop: for ( kc=0; kc<num_cols_diag; kc++ ) { if ( kc+first_col_diag==col && i!=my_id ) { / * this processor has the same column as proc. i (but is different) * / pmatch = 1; / * this would be right if we could send columns, but we can't ... >>> * / send_procs[index] = i; ++send_map_starts[index+1]; send_map_elmts[index2++] = col - offset; / * Plan to send all of my rows which use this column... * / / * NOT DONE * / } } */ for ( kc=row_starts[my_id]; kc<row_starts[my_id+1]; kc++ ) { if ( kc==col && i!=my_id ) { /* this processor has the same column as proc. i (but is different) */ pmatch = 1; send_procs[index] = i; /* this would be right if we could send columns, but we can't ... >>> ++send_map_starts[index+1]; send_map_elmts[index2++] = col - offset;*/ /* Plan to send all of my rows which use this column... */ RowsWithColumn( &rowmin, &rowmax, col, num_rows_diag, firstColDiag, colMapOffd, mat_i_diag, mat_j_diag, mat_i_offd, mat_j_offd ); for ( ir=rowmin; ir<=rowmax; ++ir ) { if ( row_marker[ir]==0 ) { row_marker[ir] = 1; ++send_map_starts[index+1]; send_map_elmts[index2++] = ir; } } } } } } if ( pmatch ) index++; } num_sends = index; /* no. of proc. rows will be sent to */ /* Compute receive arrays recv_procs, recv_vec_starts ... */ recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs); recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1); j2 = 0; for (i=0; i < num_procs; i++) { if ( i!=my_id ) { recv_procs[j2] = i; j2++; }; }; /* Compute recv_vec_starts. The real job is, for each processor p, to figure out how many rows p will send to me (me=this processor). I now know how many (and which) rows I will send to each p. Indeed, if send_procs[index]=p, then the number is send_map_starts[index+1]-send_map_starts[index]. More communication is needed. options: hypre_MPI_Allgather of communication sizes. <--- my choice, for now good: simple bad: send num_procs*num_sends data, only need num_procs but: not that much data compared to previous communication hypre_MPI_Allgatherv of communication sizes, only for pairs of procs. that communicate good: less data than above bad: need extra commun. step to get recvcounts hypre_MPI_ISend,hypre_MPI_IRecv of each size, separately between each pair of procs. good: no excess data sent bad: lots of little messages but: Allgather might be done the same under the hood may be much slower than Allgather or may be a bit faster depending on implementations */ send_buf = hypre_CTAlloc( HYPRE_Int, 3*num_sends ); all_num_sends3 = hypre_CTAlloc( HYPRE_Int, num_procs ); /* scatter-gather num_sends, to set up the size for the main comm. step */ i = 3*num_sends; hypre_MPI_Allgather( &i, 1, HYPRE_MPI_INT, all_num_sends3, 1, HYPRE_MPI_INT, comm ); displs[0] = 0; for ( p=0; p<num_procs; ++p ) { displs[p+1] = displs[p] + all_num_sends3[p]; }; recv_sz_buf = hypre_CTAlloc( HYPRE_Int, displs[num_procs] ); /* scatter-gather size of row info to send, and proc. to send to */ index = 0; for ( i=0; i<num_sends; ++i ) { send_buf[index++] = send_procs[i]; /* processor to send to */ send_buf[index++] = my_id; send_buf[index++] = send_map_starts[i+1] - send_map_starts[i]; /* ... sizes of info to send */ }; hypre_MPI_Allgatherv( send_buf, 3*num_sends, HYPRE_MPI_INT, recv_sz_buf, all_num_sends3, displs, HYPRE_MPI_INT, comm); recv_vec_starts[0] = 0; j2 = 0; j = 0; for ( i=0; i<displs[num_procs]; i=i+3 ) { j = i; if ( recv_sz_buf[j++]==my_id ) { recv_procs[j2] = recv_sz_buf[j++]; recv_vec_starts[j2+1] = recv_vec_starts[j2] + recv_sz_buf[j++]; j2++; } } num_recvs = j2; #if 0 hypre_printf("num_procs=%i send_map_starts (%i):",num_procs,num_sends+1); for( i=0; i<=num_sends; ++i ) hypre_printf(" %i", send_map_starts[i] ); hypre_printf(" send_procs (%i):",num_sends); for( i=0; i<num_sends; ++i ) hypre_printf(" %i", send_procs[i] ); hypre_printf("\n"); hypre_printf("my_id=%i num_sends=%i send_buf[0,1,2]=%i %i %i", my_id, num_sends, send_buf[0], send_buf[1], send_buf[2] ); hypre_printf(" all_num_sends3[0,1]=%i %i\n", all_num_sends3[0], all_num_sends3[1] ); hypre_printf("my_id=%i rcv_sz_buf (%i):", my_id, displs[num_procs] ); for( i=0; i<displs[num_procs]; ++i ) hypre_printf(" %i", recv_sz_buf[i] ); hypre_printf("\n"); hypre_printf("my_id=%i recv_vec_starts (%i):",my_id,num_recvs+1); for( i=0; i<=num_recvs; ++i ) hypre_printf(" %i", recv_vec_starts[i] ); hypre_printf(" recv_procs (%i):",num_recvs); for( i=0; i<num_recvs; ++i ) hypre_printf(" %i", recv_procs[i] ); hypre_printf("\n"); hypre_printf("my_id=%i num_recvs=%i recv_sz_buf[0,1,2]=%i %i %i\n", my_id, num_recvs, recv_sz_buf[0], recv_sz_buf[1], recv_sz_buf[2] ); #endif hypre_TFree(send_buf); hypre_TFree(all_num_sends3); hypre_TFree(tmp); hypre_TFree(recv_buf); hypre_TFree(displs); hypre_TFree(info); hypre_TFree(recv_sz_buf); hypre_TFree(row_marker); /* finish up with the hand-coded call-by-reference... */ *p_num_recvs = num_recvs; *p_recv_procs = recv_procs; *p_recv_vec_starts = recv_vec_starts; *p_num_sends = num_sends; *p_send_procs = send_procs; *p_send_map_starts = send_map_starts; *p_send_map_elmts = send_map_elmts; }
HYPRE_Int hypre_seqAMGCycle( hypre_ParAMGData *amg_data, HYPRE_Int p_level, hypre_ParVector **Par_F_array, hypre_ParVector **Par_U_array ) { hypre_ParVector *Aux_U; hypre_ParVector *Aux_F; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int n; HYPRE_Int i; hypre_Vector *u_local; HYPRE_Real *u_data; HYPRE_Int first_index; /* Acquire seq data */ MPI_Comm new_comm = hypre_ParAMGDataNewComm(amg_data); HYPRE_Solver coarse_solver = hypre_ParAMGDataCoarseSolver(amg_data); hypre_ParCSRMatrix *A_coarse = hypre_ParAMGDataACoarse(amg_data); hypre_ParVector *F_coarse = hypre_ParAMGDataFCoarse(amg_data); hypre_ParVector *U_coarse = hypre_ParAMGDataUCoarse(amg_data); HYPRE_Int redundant = hypre_ParAMGDataRedundant(amg_data); Aux_U = Par_U_array[p_level]; Aux_F = Par_F_array[p_level]; first_index = hypre_ParVectorFirstIndex(Aux_U); u_local = hypre_ParVectorLocalVector(Aux_U); u_data = hypre_VectorData(u_local); n = hypre_VectorSize(u_local); /*if (A_coarse)*/ if (hypre_ParAMGDataParticipate(amg_data)) { HYPRE_Real *f_data; hypre_Vector *f_local; hypre_Vector *tmp_vec; HYPRE_Int nf; HYPRE_Int local_info; HYPRE_Real *recv_buf = NULL; HYPRE_Int *displs = NULL; HYPRE_Int *info = NULL; HYPRE_Int new_num_procs, my_id; hypre_MPI_Comm_size(new_comm, &new_num_procs); hypre_MPI_Comm_rank(new_comm, &my_id); f_local = hypre_ParVectorLocalVector(Aux_F); f_data = hypre_VectorData(f_local); nf = hypre_VectorSize(f_local); /* first f */ info = hypre_CTAlloc(HYPRE_Int, new_num_procs); local_info = nf; if (redundant) hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm); else hypre_MPI_Gather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, 0, new_comm); if (redundant || my_id ==0) { displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); displs[0] = 0; for (i=1; i < new_num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; if (F_coarse) { tmp_vec = hypre_ParVectorLocalVector(F_coarse); recv_buf = hypre_VectorData(tmp_vec); } } if (redundant) hypre_MPI_Allgatherv ( f_data, nf, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, new_comm ); else hypre_MPI_Gatherv ( f_data, nf, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, 0, new_comm ); if (redundant || my_id ==0) { tmp_vec = hypre_ParVectorLocalVector(U_coarse); recv_buf = hypre_VectorData(tmp_vec); } /*then u */ if (redundant) { hypre_MPI_Allgatherv ( u_data, n, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, new_comm ); hypre_TFree(displs); hypre_TFree(info); } else hypre_MPI_Gatherv ( u_data, n, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, 0, new_comm ); /* clean up */ if (redundant || my_id ==0) { hypre_BoomerAMGSolve(coarse_solver, A_coarse, F_coarse, U_coarse); } /*copy my part of U to parallel vector */ if (redundant) { HYPRE_Real *local_data; local_data = hypre_VectorData(hypre_ParVectorLocalVector(U_coarse)); for (i = 0; i < n; i++) { u_data[i] = local_data[first_index+i]; } } else { HYPRE_Real *local_data=NULL; if (my_id == 0) local_data = hypre_VectorData(hypre_ParVectorLocalVector(U_coarse)); hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL, u_data, n, HYPRE_MPI_REAL, 0, new_comm ); /*if (my_id == 0) local_data = hypre_VectorData(hypre_ParVectorLocalVector(F_coarse)); hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL, f_data, n, HYPRE_MPI_REAL, 0, new_comm );*/ if (my_id == 0) hypre_TFree(displs); hypre_TFree(info); } } return(Solve_err_flag); }
HYPRE_Int hypre_seqAMGCycle( hypre_ParAMGData *amg_data, HYPRE_Int p_level, hypre_ParVector **Par_F_array, hypre_ParVector **Par_U_array ) { hypre_ParVector *Aux_U; hypre_ParVector *Aux_F; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int n; HYPRE_Int i; hypre_Vector *u_local; double *u_data; HYPRE_Int first_index; /* Acquire seq data */ MPI_Comm new_comm = hypre_ParAMGDataNewComm(amg_data); HYPRE_Solver coarse_solver = hypre_ParAMGDataCoarseSolver(amg_data); hypre_ParCSRMatrix *A_coarse = hypre_ParAMGDataACoarse(amg_data); hypre_ParVector *F_coarse = hypre_ParAMGDataFCoarse(amg_data); hypre_ParVector *U_coarse = hypre_ParAMGDataUCoarse(amg_data); Aux_U = Par_U_array[p_level]; Aux_F = Par_F_array[p_level]; first_index = hypre_ParVectorFirstIndex(Aux_U); u_local = hypre_ParVectorLocalVector(Aux_U); u_data = hypre_VectorData(u_local); n = hypre_VectorSize(u_local); if (A_coarse) { double *f_data; hypre_Vector *f_local; hypre_Vector *tmp_vec; HYPRE_Int nf; HYPRE_Int local_info; double *recv_buf; HYPRE_Int *displs, *info; HYPRE_Int size; HYPRE_Int new_num_procs; hypre_MPI_Comm_size(new_comm, &new_num_procs); f_local = hypre_ParVectorLocalVector(Aux_F); f_data = hypre_VectorData(f_local); nf = hypre_VectorSize(f_local); /* first f */ info = hypre_CTAlloc(HYPRE_Int, new_num_procs); local_info = nf; hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm); displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); displs[0] = 0; for (i=1; i < new_num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; size = displs[new_num_procs]; tmp_vec = hypre_ParVectorLocalVector(F_coarse); recv_buf = hypre_VectorData(tmp_vec); hypre_MPI_Allgatherv ( f_data, nf, hypre_MPI_DOUBLE, recv_buf, info, displs, hypre_MPI_DOUBLE, new_comm ); tmp_vec = hypre_ParVectorLocalVector(U_coarse); recv_buf = hypre_VectorData(tmp_vec); /*then u */ hypre_MPI_Allgatherv ( u_data, n, hypre_MPI_DOUBLE, recv_buf, info, displs, hypre_MPI_DOUBLE, new_comm ); /* clean up */ hypre_TFree(displs); hypre_TFree(info); hypre_BoomerAMGSolve(coarse_solver, A_coarse, F_coarse, U_coarse); /*copy my part of U to parallel vector */ { double *local_data; local_data = hypre_VectorData(hypre_ParVectorLocalVector(U_coarse)); for (i = 0; i < n; i++) { u_data[i] = local_data[first_index+i]; } } } return(Solve_err_flag); }
HYPRE_Int AmgCGCPrepare (hypre_ParCSRMatrix *S,HYPRE_Int nlocal,HYPRE_Int *CF_marker,HYPRE_Int **CF_marker_offd,HYPRE_Int coarsen_type,HYPRE_Int **vrange) /* assemble a graph representing the connections between the grids * ================================================================================================ * S : the strength matrix * nlocal : the number of locally created coarse grids * CF_marker, CF_marker_offd : the coare/fine markers * coarsen_type : the coarsening type * vrange : the ranges of the vertices representing coarse grids * ================================================================================================*/ { HYPRE_Int ierr=0; HYPRE_Int mpisize,mpirank; HYPRE_Int num_sends; HYPRE_Int *vertexrange=NULL; HYPRE_Int vstart,vend; HYPRE_Int *int_buf_data; HYPRE_Int start; HYPRE_Int i,ii,j; HYPRE_Int num_variables = hypre_CSRMatrixNumRows (hypre_ParCSRMatrixDiag(S)); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (hypre_ParCSRMatrixOffd (S)); MPI_Comm comm = hypre_ParCSRMatrixComm(S); /* hypre_MPI_Status status; */ hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg (S); hypre_ParCSRCommHandle *comm_handle; hypre_MPI_Comm_size (comm,&mpisize); hypre_MPI_Comm_rank (comm,&mpirank); if (!comm_pkg) { hypre_MatvecCommPkgCreate (S); comm_pkg = hypre_ParCSRMatrixCommPkg (S); } num_sends = hypre_ParCSRCommPkgNumSends (comm_pkg); if (coarsen_type % 2 == 0) nlocal++; /* even coarsen_type means allow_emptygrids */ #ifdef HYPRE_NO_GLOBAL_PARTITION { HYPRE_Int scan_recv; vertexrange = hypre_CTAlloc(HYPRE_Int,2); hypre_MPI_Scan(&nlocal, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); /* first point in my range */ vertexrange[0] = scan_recv - nlocal; /* first point in next proc's range */ vertexrange[1] = scan_recv; vstart = vertexrange[0]; vend = vertexrange[1]; } #else vertexrange = hypre_CTAlloc (HYPRE_Int,mpisize+1); hypre_MPI_Allgather (&nlocal,1,HYPRE_MPI_INT,vertexrange+1,1,HYPRE_MPI_INT,comm); vertexrange[0]=0; for (i=2;i<=mpisize;i++) vertexrange[i]+=vertexrange[i-1]; vstart = vertexrange[mpirank]; vend = vertexrange[mpirank+1]; #endif /* Note: vstart uses 0-based indexing, while CF_marker uses 1-based indexing */ if (coarsen_type % 2 == 1) { /* see above */ for (i=0;i<num_variables;i++) if (CF_marker[i]>0) CF_marker[i]+=vstart; } else { /* hypre_printf ("processor %d: empty grid allowed\n",mpirank); */ for (i=0;i<num_variables;i++) { if (CF_marker[i]>0) CF_marker[i]+=vstart+1; /* add one because vertexrange[mpirank]+1 denotes the empty grid. Hence, vertexrange[mpirank]+2 is the first coarse grid denoted in global indices, ... */ } } /* exchange data */ *CF_marker_offd = hypre_CTAlloc (HYPRE_Int,num_cols_offd); int_buf_data = hypre_CTAlloc (HYPRE_Int,hypre_ParCSRCommPkgSendMapStart (comm_pkg,num_sends)); for (i=0,ii=0;i<num_sends;i++) { start = hypre_ParCSRCommPkgSendMapStart (comm_pkg,i); for (j=start;j<hypre_ParCSRCommPkgSendMapStart (comm_pkg,i+1);j++) int_buf_data [ii++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } if (mpisize>1) { comm_handle = hypre_ParCSRCommHandleCreate (11,comm_pkg,int_buf_data,*CF_marker_offd); hypre_ParCSRCommHandleDestroy (comm_handle); } hypre_TFree (int_buf_data); *vrange=vertexrange; return (ierr); }
HYPRE_Int hypre_BoomerAMGCoarsenCGC (hypre_ParCSRMatrix *S,HYPRE_Int numberofgrids,HYPRE_Int coarsen_type,HYPRE_Int *CF_marker) /* CGC algorithm * ==================================================================================================== * coupling : the strong couplings * numberofgrids : the number of grids * coarsen_type : the coarsening type * gridpartition : the grid partition * =====================================================================================================*/ { HYPRE_Int j,/*p,*/mpisize,mpirank,/*rstart,rend,*/choice,*coarse,ierr=0; HYPRE_Int *vertexrange = NULL; HYPRE_Int *vertexrange_all = NULL; HYPRE_Int *CF_marker_offd = NULL; HYPRE_Int num_variables = hypre_CSRMatrixNumRows (hypre_ParCSRMatrixDiag(S)); /* HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (hypre_ParCSRMatrixOffd (S)); */ /* HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd (S); */ /* HYPRE_Real wall_time; */ HYPRE_IJMatrix ijG; hypre_ParCSRMatrix *G; hypre_CSRMatrix *Gseq; MPI_Comm comm = hypre_ParCSRMatrixComm(S); hypre_MPI_Comm_size (comm,&mpisize); hypre_MPI_Comm_rank (comm,&mpirank); #if 0 if (!mpirank) { wall_time = time_getWallclockSeconds(); hypre_printf ("Starting CGC preparation\n"); } #endif AmgCGCPrepare (S,numberofgrids,CF_marker,&CF_marker_offd,coarsen_type,&vertexrange); #if 0 /* debugging */ if (!mpirank) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf ("Finished CGC preparation, wall_time = %f s\n",wall_time); wall_time = time_getWallclockSeconds(); hypre_printf ("Starting CGC matrix assembly\n"); } #endif AmgCGCGraphAssemble (S,vertexrange,CF_marker,CF_marker_offd,coarsen_type,&ijG); #if 0 HYPRE_IJMatrixPrint (ijG,"graph.txt"); #endif HYPRE_IJMatrixGetObject (ijG,(void**)&G); #if 0 /* debugging */ if (!mpirank) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf ("Finished CGC matrix assembly, wall_time = %f s\n",wall_time); wall_time = time_getWallclockSeconds(); hypre_printf ("Starting CGC matrix communication\n"); } #endif #ifdef HYPRE_NO_GLOBAL_PARTITION { /* classical CGC does not really make sense in combination with HYPRE_NO_GLOBAL_PARTITION, but anyway, here it is: */ HYPRE_Int nlocal = vertexrange[1]-vertexrange[0]; vertexrange_all = hypre_CTAlloc (HYPRE_Int,mpisize+1); hypre_MPI_Allgather (&nlocal,1,HYPRE_MPI_INT,vertexrange_all+1,1,HYPRE_MPI_INT,comm); vertexrange_all[0]=0; for (j=2;j<=mpisize;j++) vertexrange_all[j]+=vertexrange_all[j-1]; } #else vertexrange_all = vertexrange; #endif Gseq = hypre_ParCSRMatrixToCSRMatrixAll (G); #if 0 /* debugging */ if (!mpirank) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf ("Finished CGC matrix communication, wall_time = %f s\n",wall_time); } #endif if (Gseq) { /* BM Aug 31, 2006: Gseq==NULL if G has no local rows */ #if 0 /* debugging */ if (!mpirank) { wall_time = time_getWallclockSeconds(); hypre_printf ("Starting CGC election\n"); } #endif AmgCGCChoose (Gseq,vertexrange_all,mpisize,&coarse); #if 0 /* debugging */ if (!mpirank) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf ("Finished CGC election, wall_time = %f s\n",wall_time); } #endif #if 0 /* debugging */ if (!mpirank) { for (j=0;j<mpisize;j++) hypre_printf ("Processor %d, choice = %d of range %d - %d\n",j,coarse[j],vertexrange_all[j]+1,vertexrange_all[j+1]); } fflush(stdout); #endif #if 0 /* debugging */ if (!mpirank) { wall_time = time_getWallclockSeconds(); hypre_printf ("Starting CGC CF assignment\n"); } #endif choice = coarse[mpirank]; for (j=0;j<num_variables;j++) { if (CF_marker[j]==choice) CF_marker[j] = C_PT; else CF_marker[j] = F_PT; } hypre_CSRMatrixDestroy (Gseq); hypre_TFree (coarse); } else for (j=0;j<num_variables;j++) CF_marker[j] = F_PT; #if 0 if (!mpirank) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf ("Finished CGC CF assignment, wall_time = %f s\n",wall_time); } #endif #if 0 /* debugging */ if (!mpirank) { wall_time = time_getWallclockSeconds(); hypre_printf ("Starting CGC cleanup\n"); } #endif HYPRE_IJMatrixDestroy (ijG); hypre_TFree (vertexrange); #ifdef HYPRE_NO_GLOBAL_PARTITION hypre_TFree (vertexrange_all); #endif hypre_TFree (CF_marker_offd); #if 0 if (!mpirank) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf ("Finished CGC cleanup, wall_time = %f s\n",wall_time); } #endif return(ierr); }
/****************************************************************************** * * hypre_IJMatrixCreatePETSc * * creates AuxParCSRMatrix and ParCSRMatrix if necessary, * generates arrays row_starts and col_starts using either previously * set data local_m and local_n (user defined) or generates them evenly * distributed if not previously defined by user. * *****************************************************************************/ HYPRE_Int hypre_IJMatrixCreatePETSc(hypre_IJMatrix *matrix) { MPI_Comm comm = hypre_IJMatrixContext(matrix); HYPRE_Int global_m = hypre_IJMatrixM(matrix); HYPRE_Int global_n = hypre_IJMatrixN(matrix); hypre_AuxParCSRMatrix *aux_matrix = hypre_IJMatrixTranslator(matrix); HYPRE_Int local_m; HYPRE_Int local_n; HYPRE_Int ierr = 0; HYPRE_Int *row_starts; HYPRE_Int *col_starts; HYPRE_Int num_cols_offd = 0; HYPRE_Int num_nonzeros_diag = 0; HYPRE_Int num_nonzeros_offd = 0; HYPRE_Int num_procs, my_id; HYPRE_Int equal; HYPRE_Int i; hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); if (aux_matrix) { local_m = hypre_AuxParCSRMatrixLocalNumRows(aux_matrix); local_n = hypre_AuxParCSRMatrixLocalNumCols(aux_matrix); } else { aux_matrix = hypre_AuxParCSRMatrixCreate(-1,-1,NULL); local_m = -1; local_n = -1; hypre_IJMatrixTranslator(matrix) = aux_matrix; } if (local_m < 0) { row_starts = NULL; } else { row_starts = hypre_CTAlloc(HYPRE_Int,num_procs+1); if (my_id == 0 && local_m == global_m) { row_starts[1] = local_m; } else { hypre_MPI_Allgather(&local_m,1,HYPRE_MPI_INT,&row_starts[1],1,HYPRE_MPI_INT,comm); } } if (local_n < 0) { col_starts = NULL; } else { col_starts = hypre_CTAlloc(HYPRE_Int,num_procs+1); if (my_id == 0 && local_n == global_n) { col_starts[1] = local_n; } else { hypre_MPI_Allgather(&local_n,1,HYPRE_MPI_INT,&col_starts[1],1,HYPRE_MPI_INT,comm); } } if (row_starts && col_starts) { equal = 1; for (i=0; i < num_procs; i++) { row_starts[i+1] += row_starts[i]; col_starts[i+1] += col_starts[i]; if (row_starts[i+1] != col_starts[i+1]) equal = 0; } if (equal) { hypre_TFree(col_starts); col_starts = row_starts; } } hypre_IJMatrixLocalStorage(matrix) = hypre_ParCSRMatrixCreate(comm,global_m, global_n,row_starts, col_starts, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); return ierr; }
HYPRE_Int hypre_ParCSRMatrixToParChordMatrix( hypre_ParCSRMatrix *Ap, MPI_Comm comm, hypre_ParChordMatrix **pAc ) { HYPRE_Int * row_starts = hypre_ParCSRMatrixRowStarts(Ap); HYPRE_Int * col_starts = hypre_ParCSRMatrixColStarts(Ap); hypre_CSRMatrix * diag = hypre_ParCSRMatrixDiag(Ap); hypre_CSRMatrix * offd = hypre_ParCSRMatrixOffd(Ap); HYPRE_Int * offd_j = hypre_CSRMatrixJ(offd); HYPRE_Int * diag_j = hypre_CSRMatrixJ(diag); HYPRE_Int * col_map_offd = hypre_ParCSRMatrixColMapOffd(Ap); HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(Ap); hypre_ParChordMatrix * Ac; hypre_NumbersNode * rdofs, * offd_cols_me; hypre_NumbersNode ** offd_cols; HYPRE_Int ** offd_col_array; HYPRE_Int * len_offd_col_array, * offd_col_array_me; HYPRE_Int len_offd_col_array_me; HYPRE_Int num_idofs, num_rdofs, j_local, j_global, row_global; HYPRE_Int i, j, jj, p, pto, q, qto, my_id, my_q, row, ireq; HYPRE_Int num_inprocessors, num_toprocessors, num_procs, len_num_rdofs_toprocessor; HYPRE_Int *inprocessor, *toprocessor, *pcr, *qcr, *num_inchords, *chord, *chordto; HYPRE_Int *inproc, *toproc, *num_rdofs_toprocessor; HYPRE_Int **inchord_idof, **inchord_rdof, **rdof_toprocessor; double **inchord_data; double data; HYPRE_Int *first_index_idof, *first_index_rdof; hypre_MPI_Request * request; hypre_MPI_Status * status; hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); num_idofs = row_starts[my_id+1] - row_starts[my_id]; num_rdofs = col_starts[my_id+1] - col_starts[my_id]; hypre_ParChordMatrixCreate( pAc, comm, num_idofs, num_rdofs ); Ac = *pAc; /* The following block sets Inprocessor: On each proc. my_id, we find the columns in the offd and diag blocks (global no.s). The columns are rdofs (contrary to what I wrote in ChordMatrix.txt). For each such col/rdof r, find the proc. p which owns row/idof r. We set the temporary array pcr[p]=1 for such p. An MPI all-to-all will exchange such arrays so my_id's array qcr has qcr[q]=1 iff, on proc. q, pcr[my_id]=1. In other words, qcr[q]=1 if my_id owns a row/idof i which is the same as a col/rdof owned by q. Collect all such q's into in the array Inprocessor. While constructing pcr, we also construct pj such that for any index jj into offd_j,offd_data, pj[jj] is the processor which owns jj as a row (idof) (the number jj is local to this processor). */ pcr = hypre_CTAlloc( HYPRE_Int, num_procs ); qcr = hypre_CTAlloc( HYPRE_Int, num_procs ); for ( p=0; p<num_procs; ++p ) pcr[p]=0; for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(offd); ++jj ) { j_local = offd_j[jj]; j_global = col_map_offd[j_local]; for ( p=0; p<num_procs; ++p ) { if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) { pcr[p]=1; /* not used yet... pj[jj] = p;*/ break; } } } /* jjd = jj; ...not used yet */ /* pcr[my_id] = 1; ...for square matrices (with nonzero diag block) this one line would do the job of the following nested loop. For non-square matrices, the data distribution is too arbitrary. */ for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(diag); ++jj ) { j_local = diag_j[jj]; j_global = j_local + first_col_diag; for ( p=0; p<num_procs; ++p ) { if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) { pcr[p]=1; /* not used yet... pj[jj+jjd] = p;*/ break; } } } /* Now pcr[p]=1 iff my_id owns a col/rdof r which proc. p owns as a row/idof */ hypre_MPI_Alltoall( pcr, 1, HYPRE_MPI_INT, qcr, 1, HYPRE_MPI_INT, comm ); /* Now qcr[q]==1 if my_id owns a row/idof i which is a col/rdof of proc. q The array of such q's is the array Inprocessor. */ num_inprocessors = 0; for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) ++num_inprocessors; inprocessor = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); p = 0; for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) inprocessor[ p++ ] = q; num_toprocessors = 0; for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) ++num_toprocessors; toprocessor = hypre_CTAlloc( HYPRE_Int, num_toprocessors ); p = 0; for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) toprocessor[ p++ ] = q; hypre_ParChordMatrixNumInprocessors(Ac) = num_inprocessors; hypre_ParChordMatrixInprocessor(Ac) = inprocessor; hypre_ParChordMatrixNumToprocessors(Ac) = num_toprocessors; hypre_ParChordMatrixToprocessor(Ac) = toprocessor; hypre_TFree( qcr ); /* FirstIndexIdof[p] is the global index of proc. p's row 0 */ /* FirstIndexRdof[p] is the global index of proc. p's col 0 */ /* Fir FirstIndexIdof, we copy the array row_starts rather than its pointers, because the chord matrix will think it's free to delete FirstIndexIdof */ /* col_starts[p] contains the global index of the first column in the diag block of p. But for first_index_rdof we want the global index of the first column in p (whether that's in the diag or offd block). So it's more involved than row/idof: we also check the offd block, and have to do a gather to get first_index_rdof for every proc. on every proc. */ first_index_idof = hypre_CTAlloc( HYPRE_Int, num_procs+1 ); first_index_rdof = hypre_CTAlloc( HYPRE_Int, num_procs+1 ); for ( p=0; p<=num_procs; ++p ) { first_index_idof[p] = row_starts[p]; first_index_rdof[p] = col_starts[p]; }; if ( hypre_CSRMatrixNumRows(offd) > 0 && hypre_CSRMatrixNumCols(offd) > 0 ) first_index_rdof[my_id] = col_starts[my_id]<col_map_offd[0] ? col_starts[my_id] : col_map_offd[0]; hypre_MPI_Allgather( &first_index_rdof[my_id], 1, HYPRE_MPI_INT, first_index_rdof, 1, HYPRE_MPI_INT, comm ); /* Set num_inchords: num_inchords[p] is no. chords on my_id connected to p. Set each chord (idof,jdof,data). We go through each matrix element in the diag block, find what processor owns its column no. as a row, then update num_inchords[p], inchord_idof[p], inchord_rdof[p], inchord_data[p]. */ inchord_idof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); inchord_rdof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); inchord_data = hypre_CTAlloc( double*, num_inprocessors ); num_inchords = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); chord = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); chordto = hypre_CTAlloc( HYPRE_Int, num_toprocessors ); num_rdofs = 0; for ( q=0; q<num_inprocessors; ++q ) num_inchords[q] = 0; my_q = -1; for ( q=0; q<num_inprocessors; ++q ) if ( inprocessor[q]==my_id ) my_q = q; hypre_assert( my_q>=0 ); /* diag block: first count chords (from my_id to my_id), then set them from diag block's CSR data structure */ num_idofs = hypre_CSRMatrixNumRows(diag); rdofs = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) { for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(diag)[i]; hypre_NumbersEnter( rdofs, j_local ); ++num_inchords[my_q]; } }; num_rdofs = hypre_NumbersNEntered( rdofs ); inchord_idof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] ); inchord_rdof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] ); inchord_data[my_q] = hypre_CTAlloc( double, num_inchords[my_q] ); chord[0] = 0; for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) { for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(diag)[i]; data = hypre_CSRMatrixData(diag)[i]; inchord_idof[my_q][chord[0]] = row; /* Here We need to convert from j_local - a column local to the diag of this proc., to a j which is local only to this processor - a column (rdof) numbering scheme to be shared by the diag and offd blocks... */ j_global = j_local + hypre_ParCSRMatrixColStarts(Ap)[my_q]; j = j_global - first_index_rdof[my_q]; inchord_rdof[my_q][chord[0]] = j; inchord_data[my_q][chord[0]] = data; hypre_assert( chord[0] < num_inchords[my_q] ); ++chord[0]; } }; hypre_NumbersDeleteNode(rdofs); /* offd block: */ /* >>> offd_cols_me duplicates rdofs */ offd_cols_me = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) { for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(offd)[i]; j_global = col_map_offd[j_local]; hypre_NumbersEnter( offd_cols_me, j_global ); } } offd_col_array = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); len_offd_col_array = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); offd_col_array_me = hypre_NumbersArray( offd_cols_me ); len_offd_col_array_me = hypre_NumbersNEntered( offd_cols_me ); request = hypre_CTAlloc(hypre_MPI_Request, 2*num_procs ); ireq = 0; for ( q=0; q<num_inprocessors; ++q ) hypre_MPI_Irecv( &len_offd_col_array[q], 1, HYPRE_MPI_INT, inprocessor[q], 0, comm, &request[ireq++] ); for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) { hypre_MPI_Isend( &len_offd_col_array_me, 1, HYPRE_MPI_INT, p, 0, comm, &request[ireq++] ); } status = hypre_CTAlloc(hypre_MPI_Status, ireq ); hypre_MPI_Waitall( ireq, request, status ); hypre_TFree(status); ireq = 0; for ( q=0; q<num_inprocessors; ++q ) offd_col_array[q] = hypre_CTAlloc( HYPRE_Int, len_offd_col_array[q] ); for ( q=0; q<num_inprocessors; ++q ) hypre_MPI_Irecv( offd_col_array[q], len_offd_col_array[q], HYPRE_MPI_INT, inprocessor[q], 0, comm, &request[ireq++] ); for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) { hypre_MPI_Isend( offd_col_array_me, len_offd_col_array_me, HYPRE_MPI_INT, p, 0, comm, &request[ireq++] ); } status = hypre_CTAlloc(hypre_MPI_Status, ireq ); hypre_MPI_Waitall( ireq, request, status ); hypre_TFree(request); hypre_TFree(status); offd_cols = hypre_CTAlloc( hypre_NumbersNode *, num_inprocessors ); for ( q=0; q<num_inprocessors; ++q ) { offd_cols[q] = hypre_NumbersNewNode(); for ( i=0; i<len_offd_col_array[q]; ++i ) hypre_NumbersEnter( offd_cols[q], offd_col_array[q][i] ); } len_num_rdofs_toprocessor = 1 + hypre_CSRMatrixI(offd) [hypre_CSRMatrixNumRows(offd)]; inproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); toproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); num_rdofs_toprocessor = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); for ( qto=0; qto<len_num_rdofs_toprocessor; ++qto ) { inproc[qto] = -1; toproc[qto] = -1; num_rdofs_toprocessor[qto] = 0; }; rdofs = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) { for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(offd)[i]; j_global = col_map_offd[j_local]; hypre_NumbersEnter( rdofs, j_local ); /* TO DO: find faster ways to do the two processor lookups below.*/ /* Find a processor p (local index q) from the inprocessor list, which owns the column(rdof) whichis the same as this processor's row(idof) row. Update num_inchords for p. Save q as inproc[i] for quick recall later. It represents an inprocessor (not unique) connected to a chord i. */ inproc[i] = -1; for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { p = inprocessor[q]; if ( hypre_NumbersQuery( offd_cols[q], row+hypre_ParCSRMatrixFirstRowIndex(Ap) ) == 1 ) { /* row is one of the offd columns of p */ ++num_inchords[q]; inproc[i] = q; break; } } if ( inproc[i]<0 ) { /* For square matrices, we would have found the column in some other processor's offd. But for non-square matrices it could exist only in some other processor's diag...*/ /* Note that all data in a diag block is stored. We don't check whether the value of a data entry is zero. */ for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { p = inprocessor[q]; row_global = row+hypre_ParCSRMatrixFirstRowIndex(Ap); if ( row_global>=col_starts[p] && row_global< col_starts[p+1] ) { /* row is one of the diag columns of p */ ++num_inchords[q]; inproc[i] = q; break; } } } hypre_assert( inproc[i]>=0 ); /* Find the processor pto (local index qto) from the toprocessor list, which owns the row(idof) which is the same as this processor's column(rdof) j_global. Update num_rdofs_toprocessor for pto. Save pto as toproc[i] for quick recall later. It represents the toprocessor connected to a chord i. */ for ( qto=0; qto<num_toprocessors; ++qto ) { pto = toprocessor[qto]; if ( j_global >= row_starts[pto] && j_global<row_starts[pto+1] ) { hypre_assert( qto < len_num_rdofs_toprocessor ); ++num_rdofs_toprocessor[qto]; /* ... an overestimate, as if two chords share an rdof, that rdof will be counted twice in num_rdofs_toprocessor. It can be fixed up later.*/ toproc[i] = qto; break; } } } }; num_rdofs += hypre_NumbersNEntered(rdofs); hypre_NumbersDeleteNode(rdofs); for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { inchord_idof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] ); inchord_rdof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] ); inchord_data[q] = hypre_CTAlloc( double, num_inchords[q] ); chord[q] = 0; };
HYPRE_Int HYPRE_IJMatrixCreate( MPI_Comm comm, HYPRE_Int ilower, HYPRE_Int iupper, HYPRE_Int jlower, HYPRE_Int jupper, HYPRE_IJMatrix *matrix ) { HYPRE_Int *row_partitioning; HYPRE_Int *col_partitioning; HYPRE_Int *info; HYPRE_Int num_procs; HYPRE_Int myid; hypre_IJMatrix *ijmatrix; #ifdef HYPRE_NO_GLOBAL_PARTITION HYPRE_Int row0, col0, rowN, colN; #else HYPRE_Int *recv_buf; HYPRE_Int i, i4; HYPRE_Int square; #endif ijmatrix = hypre_CTAlloc(hypre_IJMatrix, 1); hypre_IJMatrixComm(ijmatrix) = comm; hypre_IJMatrixObject(ijmatrix) = NULL; hypre_IJMatrixTranslator(ijmatrix) = NULL; hypre_IJMatrixObjectType(ijmatrix) = HYPRE_UNITIALIZED; hypre_IJMatrixAssembleFlag(ijmatrix) = 0; hypre_IJMatrixPrintLevel(ijmatrix) = 0; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm, &myid); if (ilower > iupper+1 || ilower < 0) { hypre_error_in_arg(2); hypre_TFree(ijmatrix); return hypre_error_flag; } if (iupper < -1) { hypre_error_in_arg(3); hypre_TFree(ijmatrix); return hypre_error_flag; } if (jlower > jupper+1 || jlower < 0) { hypre_error_in_arg(4); hypre_TFree(ijmatrix); return hypre_error_flag; } if (jupper < -1) { hypre_error_in_arg(5); hypre_TFree(ijmatrix); return hypre_error_flag; } #ifdef HYPRE_NO_GLOBAL_PARTITION info = hypre_CTAlloc(HYPRE_Int,2); row_partitioning = hypre_CTAlloc(HYPRE_Int, 2); col_partitioning = hypre_CTAlloc(HYPRE_Int, 2); row_partitioning[0] = ilower; row_partitioning[1] = iupper+1; col_partitioning[0] = jlower; col_partitioning[1] = jupper+1; /* now we need the global number of rows and columns as well as the global first row and column index */ /* proc 0 has the first row and col */ if (myid==0) { info[0] = ilower; info[1] = jlower; } hypre_MPI_Bcast(info, 2, HYPRE_MPI_INT, 0, comm); row0 = info[0]; col0 = info[1]; /* proc (num_procs-1) has the last row and col */ if (myid == (num_procs-1)) { info[0] = iupper; info[1] = jupper; } hypre_MPI_Bcast(info, 2, HYPRE_MPI_INT, num_procs-1, comm); rowN = info[0]; colN = info[1]; hypre_IJMatrixGlobalFirstRow(ijmatrix) = row0; hypre_IJMatrixGlobalFirstCol(ijmatrix) = col0; hypre_IJMatrixGlobalNumRows(ijmatrix) = rowN - row0 + 1; hypre_IJMatrixGlobalNumCols(ijmatrix) = colN - col0 + 1; hypre_TFree(info); #else info = hypre_CTAlloc(HYPRE_Int,4); recv_buf = hypre_CTAlloc(HYPRE_Int,4*num_procs); row_partitioning = hypre_CTAlloc(HYPRE_Int, num_procs+1); info[0] = ilower; info[1] = iupper; info[2] = jlower; info[3] = jupper; /* Generate row- and column-partitioning through information exchange across all processors, check whether the matrix is square, and if the partitionings match. i.e. no overlaps or gaps, if there are overlaps or gaps in the row partitioning or column partitioning , ierr will be set to -9 or -10, respectively */ hypre_MPI_Allgather(info,4,HYPRE_MPI_INT,recv_buf,4,HYPRE_MPI_INT,comm); row_partitioning[0] = recv_buf[0]; square = 1; for (i=0; i < num_procs-1; i++) { i4 = 4*i; if ( recv_buf[i4+1] != (recv_buf[i4+4]-1) ) { hypre_error(HYPRE_ERROR_GENERIC); hypre_TFree(ijmatrix); hypre_TFree(info); hypre_TFree(recv_buf); hypre_TFree(row_partitioning); return hypre_error_flag; } else row_partitioning[i+1] = recv_buf[i4+4]; if ((square && (recv_buf[i4] != recv_buf[i4+2])) || (recv_buf[i4+1] != recv_buf[i4+3]) ) { square = 0; } } i4 = (num_procs-1)*4; row_partitioning[num_procs] = recv_buf[i4+1]+1; if ((recv_buf[i4] != recv_buf[i4+2]) || (recv_buf[i4+1] != recv_buf[i4+3])) square = 0; if (square) col_partitioning = row_partitioning; else { col_partitioning = hypre_CTAlloc(HYPRE_Int,num_procs+1); col_partitioning[0] = recv_buf[2]; for (i=0; i < num_procs-1; i++) { i4 = 4*i; if (recv_buf[i4+3] != recv_buf[i4+6]-1) { hypre_error(HYPRE_ERROR_GENERIC); hypre_TFree(ijmatrix); hypre_TFree(info); hypre_TFree(recv_buf); hypre_TFree(row_partitioning); hypre_TFree(col_partitioning); return hypre_error_flag; } else col_partitioning[i+1] = recv_buf[i4+6]; } col_partitioning[num_procs] = recv_buf[num_procs*4-1]+1; } hypre_IJMatrixGlobalFirstRow(ijmatrix) = row_partitioning[0]; hypre_IJMatrixGlobalFirstCol(ijmatrix) = col_partitioning[0]; hypre_IJMatrixGlobalNumRows(ijmatrix) = row_partitioning[num_procs] - row_partitioning[0]; hypre_IJMatrixGlobalNumCols(ijmatrix) = col_partitioning[num_procs] - col_partitioning[0]; hypre_TFree(info); hypre_TFree(recv_buf); #endif hypre_IJMatrixRowPartitioning(ijmatrix) = row_partitioning; hypre_IJMatrixColPartitioning(ijmatrix) = col_partitioning; *matrix = (HYPRE_IJMatrix) ijmatrix; return hypre_error_flag; }