HYPRE_Int hypre_ParCSRMatrix_dof_func_offd( hypre_ParCSRMatrix *A, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int **dof_func_offd ) { hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_ParCSRCommHandle *comm_handle; hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int num_cols_offd = 0; HYPRE_Int Solve_err_flag = 0; HYPRE_Int num_sends; HYPRE_Int *int_buf_data; HYPRE_Int index, start, i, j; num_cols_offd = hypre_CSRMatrixNumCols(A_offd); *dof_func_offd = NULL; if (num_cols_offd) { if (num_functions > 1) *dof_func_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); } /*------------------------------------------------------------------- * Get the dof_func data for the off-processor columns *-------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_functions > 1) { int_buf_data = hypre_CTAlloc(HYPRE_Int,hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, *dof_func_offd); hypre_ParCSRCommHandleDestroy(comm_handle); hypre_TFree(int_buf_data); } return(Solve_err_flag); }
/* Based on hypre_ParCSRMatrixMatvec in par_csr_matvec.c */ void hypre_ParCSRMatrixBooleanMatvec(hypre_ParCSRMatrix *A, HYPRE_Bool alpha, HYPRE_Bool *x, HYPRE_Bool beta, HYPRE_Bool *y) { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int num_sends, i, j, index; HYPRE_Bool *x_tmp, *x_buf; x_tmp = hypre_CTAlloc(HYPRE_Bool, num_cols_offd); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); x_buf = hypre_CTAlloc(HYPRE_Bool, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { j = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for ( ; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { x_buf[index++] = x[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)]; } } comm_handle = hypre_ParCSRCommHandleCreate_bool(1, comm_pkg, x_buf, x_tmp); hypre_CSRMatrixBooleanMatvec(diag, alpha, x, beta, y); hypre_ParCSRCommHandleDestroy(comm_handle); if (num_cols_offd) { hypre_CSRMatrixBooleanMatvec(offd, alpha, x_tmp, 1, y); } hypre_TFree(x_buf); hypre_TFree(x_tmp); }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { HYPRE_Int num_procs, myid; HYPRE_Int verbose = 0, build_matrix_type = 1; HYPRE_Int index, matrix_arg_index, commpkg_flag=3; HYPRE_Int i, k, ierr=0; HYPRE_Int row_start, row_end; HYPRE_Int col_start, col_end, global_num_rows; HYPRE_Int *row_part, *col_part; char *csrfilename; HYPRE_Int preload = 0, loop = 0, loop2 = LOOP2; HYPRE_Int bcast_rows[2], *info; hypre_ParCSRMatrix *parcsr_A, *small_A; HYPRE_ParCSRMatrix A_temp, A_temp_small; hypre_CSRMatrix *A_CSR; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int nx, ny, nz; HYPRE_Int P, Q, R; HYPRE_Int p, q, r; HYPRE_Real values[4]; hypre_ParVector *x_new; hypre_ParVector *y_new, *y; HYPRE_Int *row_starts; HYPRE_Real ans; HYPRE_Real start_time, end_time, total_time, *loop_times; HYPRE_Real T_avg, T_std; HYPRE_Int noparmprint = 0; #if mydebug HYPRE_Int j, tmp_int; #endif /*----------------------------------------------------------- * Initialize MPI *-----------------------------------------------------------*/ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs ); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid ); /*----------------------------------------------------------- * default - is 27pt laplace *-----------------------------------------------------------*/ build_matrix_type = 2; matrix_arg_index = argc; /*----------------------------------------------------------- * Parse command line *-----------------------------------------------------------*/ index = 1; while ( index < argc) { if ( strcmp(argv[index], "-verbose") == 0 ) { index++; verbose = 1; } else if ( strcmp(argv[index], "-fromonecsrfile") == 0 ) { index++; build_matrix_type = 1; matrix_arg_index = index; /*this tells where the name is*/ } else if ( strcmp(argv[index], "-commpkg") == 0 ) { index++; commpkg_flag = atoi(argv[index++]); } else if ( strcmp(argv[index], "-laplacian") == 0 ) { index++; build_matrix_type = 2; matrix_arg_index = index; } else if ( strcmp(argv[index], "-27pt") == 0 ) { index++; build_matrix_type = 4; matrix_arg_index = index; } /* else if ( strcmp(argv[index], "-nopreload") == 0 ) { index++; preload = 0; } */ else if ( strcmp(argv[index], "-loop") == 0 ) { index++; loop = atoi(argv[index++]); } else if ( strcmp(argv[index], "-noparmprint") == 0 ) { index++; noparmprint = 1; } else { index++; /*hypre_printf("Warning: Unrecogized option '%s'\n",argv[index++] );*/ } } /*----------------------------------------------------------- * Setup the Matrix problem *-----------------------------------------------------------*/ /*----------------------------------------------------------- * Get actual partitioning- * read in an actual csr matrix. *-----------------------------------------------------------*/ if (build_matrix_type ==1) /*read in a csr matrix from one file */ { if (matrix_arg_index < argc) { csrfilename = argv[matrix_arg_index]; } else { hypre_printf("Error: No filename specified \n"); exit(1); } if (myid == 0) { /*hypre_printf(" FromFile: %s\n", csrfilename);*/ A_CSR = hypre_CSRMatrixRead(csrfilename); } row_part = NULL; col_part = NULL; parcsr_A = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, A_CSR, row_part, col_part); if (myid == 0) hypre_CSRMatrixDestroy(A_CSR); } else if (build_matrix_type ==2) { myBuildParLaplacian(argc, argv, matrix_arg_index, &A_temp, !noparmprint); parcsr_A = (hypre_ParCSRMatrix *) A_temp; } else if (build_matrix_type ==4) { myBuildParLaplacian27pt(argc, argv, matrix_arg_index, &A_temp, !noparmprint); parcsr_A = (hypre_ParCSRMatrix *) A_temp; } /*----------------------------------------------------------- * create a small problem so that timings are more accurate - * code gets run twice (small laplace) *-----------------------------------------------------------*/ /*this is no longer being used - preload = 0 is set at the beginning */ if (preload == 1) { /*hypre_printf("preload!\n");*/ values[1] = -1; values[2] = -1; values[3] = -1; values[0] = - 6.0 ; nx = 2; ny = num_procs; nz = 2; P = 1; Q = num_procs; R = 1; p = myid % P; q = (( myid - p)/P) % Q; r = ( myid - p - P*q)/( P*Q ); A_temp_small = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD, nx, ny, nz, P, Q, R, p, q, r, values); small_A = (hypre_ParCSRMatrix *) A_temp_small; /*do comm packages*/ hypre_NewCommPkgCreate(small_A); hypre_NewCommPkgDestroy(small_A); hypre_MatvecCommPkgCreate(small_A); hypre_ParCSRMatrixDestroy(small_A); } /*----------------------------------------------------------- * Prepare for timing *-----------------------------------------------------------*/ /* instead of preloading, let's not time the first one if more than one*/ if (!loop) { loop = 1; /* and don't do any timings */ } else { loop +=1; if (loop < 2) loop = 2; } loop_times = hypre_CTAlloc(HYPRE_Real, loop); /******************************************************************************************/ hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); if (commpkg_flag == 1 || commpkg_flag ==3 ) { /*----------------------------------------------------------- * Create new comm package *-----------------------------------------------------------*/ if (!myid) hypre_printf("********************************************************\n" ); /*do loop times*/ for (i=0; i< loop; i++) { loop_times[i] = 0.0; for (k=0; k< loop2; k++) { hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); start_time = hypre_MPI_Wtime(); #if mpip_on if (i==(loop-1)) hypre_MPI_Pcontrol(1); #endif hypre_NewCommPkgCreate(parcsr_A); #if mpip_on if (i==(loop-1)) hypre_MPI_Pcontrol(0); #endif end_time = hypre_MPI_Wtime(); end_time = end_time - start_time; hypre_MPI_Allreduce(&end_time, &total_time, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD); loop_times[i] += total_time; if ( !((i+1)== loop && (k+1) == loop2)) hypre_NewCommPkgDestroy(parcsr_A); }/*end of loop2 */ } /*end of loop*/ /* calculate the avg and std. */ if (loop > 1) { /* calculate the avg and std. */ stats_mo(loop_times, loop, &T_avg, &T_std); if (!myid) hypre_printf(" NewCommPkgCreate: AVG. wall clock time = %f seconds\n", T_avg); if (!myid) hypre_printf(" STD. for %d runs = %f\n", loop-1, T_std); if (!myid) hypre_printf(" (Note: avg./std. timings exclude run 0.)\n"); if (!myid) hypre_printf("********************************************************\n" ); for (i=0; i< loop; i++) { if (!myid) hypre_printf(" run %d = %f sec.\n", i, loop_times[i]); } if (!myid) hypre_printf("********************************************************\n" ); } else { if (!myid) hypre_printf("********************************************************\n" ); if (!myid) hypre_printf(" NewCommPkgCreate:\n"); if (!myid) hypre_printf(" run time = %f sec.\n", loop_times[0]); if (!myid) hypre_printf("********************************************************\n" ); } /*----------------------------------------------------------- * Verbose printing *-----------------------------------------------------------*/ /*some verification*/ global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); if (verbose) { ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A); hypre_printf("myid = %i, my ACTUAL local range: [%i, %i]\n", myid, row_start, row_end); ierr = hypre_GetAssumedPartitionRowRange( myid, global_num_rows, &row_start, &row_end); hypre_printf("myid = %i, my assumed local range: [%i, %i]\n", myid, row_start, row_end); hypre_printf("myid = %d, num_recvs = %d\n", myid, hypre_ParCSRCommPkgNumRecvs(comm_pkg) ); #if mydebug for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) { hypre_printf("myid = %d, recv proc = %d, vec_starts = [%d : %d]\n", myid, hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1); } #endif hypre_printf("myid = %d, num_sends = %d\n", myid, hypre_ParCSRCommPkgNumSends(comm_pkg) ); #if mydebug for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) { tmp_int = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] - hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; for (j=0; j< tmp_int; j++) { hypre_printf("myid = %d, send proc = %d, send element = %d\n",myid, hypre_ParCSRCommPkgSendProcs(comm_pkg)[i], hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); } } #endif } /*----------------------------------------------------------- * To verify correctness (if commpkg_flag = 3) *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { /*do a matvec - we are assuming a square matrix */ row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A); x_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts); hypre_ParVectorSetPartitioningOwner(x_new, 0); hypre_ParVectorInitialize(x_new); hypre_ParVectorSetRandomValues(x_new, 1); y_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts); hypre_ParVectorSetPartitioningOwner(y_new, 0); hypre_ParVectorInitialize(y_new); hypre_ParVectorSetConstantValues(y_new, 0.0); /*y = 1.0*A*x+1.0*y */ hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y_new); } /*----------------------------------------------------------- * Clean up after MyComm *-----------------------------------------------------------*/ hypre_NewCommPkgDestroy(parcsr_A); } /******************************************************************************************/ /******************************************************************************************/ hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); if (commpkg_flag > 1 ) { /*----------------------------------------------------------- * Set up standard comm package *-----------------------------------------------------------*/ bcast_rows[0] = 23; bcast_rows[1] = 1789; if (!myid) hypre_printf("********************************************************\n" ); /*do loop times*/ for (i=0; i< loop; i++) { loop_times[i] = 0.0; for (k=0; k< loop2; k++) { hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); start_time = hypre_MPI_Wtime(); #if time_gather info = hypre_CTAlloc(HYPRE_Int, num_procs); hypre_MPI_Allgather(bcast_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hypre_MPI_COMM_WORLD); #endif hypre_MatvecCommPkgCreate(parcsr_A); end_time = hypre_MPI_Wtime(); end_time = end_time - start_time; hypre_MPI_Allreduce(&end_time, &total_time, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD); loop_times[i] += total_time; if ( !((i+1)== loop && (k+1) == loop2)) hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkg(parcsr_A)); }/* end of loop 2*/ } /*end of loop*/ /* calculate the avg and std. */ if (loop > 1) { stats_mo(loop_times, loop, &T_avg, &T_std); if (!myid) hypre_printf("Current CommPkgCreate: AVG. wall clock time = %f seconds\n", T_avg); if (!myid) hypre_printf(" STD. for %d runs = %f\n", loop-1, T_std); if (!myid) hypre_printf(" (Note: avg./std. timings exclude run 0.)\n"); if (!myid) hypre_printf("********************************************************\n" ); for (i=0; i< loop; i++) { if (!myid) hypre_printf(" run %d = %f sec.\n", i, loop_times[i]); } if (!myid) hypre_printf("********************************************************\n" ); } else { if (!myid) hypre_printf("********************************************************\n" ); if (!myid) hypre_printf(" Current CommPkgCreate:\n"); if (!myid) hypre_printf(" run time = %f sec.\n", loop_times[0]); if (!myid) hypre_printf("********************************************************\n" ); } /*----------------------------------------------------------- * Verbose printing *-----------------------------------------------------------*/ /*some verification*/ if (verbose) { ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A); hypre_printf("myid = %i, std - my local range: [%i, %i]\n", myid, row_start, row_end); ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); hypre_printf("myid = %d, std - num_recvs = %d\n", myid, hypre_ParCSRCommPkgNumRecvs(comm_pkg) ); #if mydebug for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) { hypre_printf("myid = %d, std - recv proc = %d, vec_starts = [%d : %d]\n", myid, hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1); } #endif hypre_printf("myid = %d, std - num_sends = %d\n", myid, hypre_ParCSRCommPkgNumSends(comm_pkg)); #if mydebug for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) { tmp_int = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] - hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; for (j=0; j< tmp_int; j++) { hypre_printf("myid = %d, std - send proc = %d, send element = %d\n",myid, hypre_ParCSRCommPkgSendProcs(comm_pkg)[i], hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); } } #endif } /*----------------------------------------------------------- * Verify correctness *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A); y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows,row_starts); hypre_ParVectorSetPartitioningOwner(y, 0); hypre_ParVectorInitialize(y); hypre_ParVectorSetConstantValues(y, 0.0); hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y); } } /*----------------------------------------------------------- * Compare matvecs for both comm packages (3) *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { /*make sure that y and y_new are the same - now y_new should=0*/ hypre_ParVectorAxpy( -1.0, y, y_new ); hypre_ParVectorSetRandomValues(y, 1); ans = hypre_ParVectorInnerProd( y, y_new ); if (!myid) { if ( fabs(ans) > 1e-8 ) { hypre_printf("!!!!! WARNING !!!!! should be zero if correct = %6.10f\n", ans); } else { hypre_printf("Matvecs match ( should be zero = %6.10f )\n", ans); } } } /*----------------------------------------------------------- * Clean up *-----------------------------------------------------------*/ hypre_ParCSRMatrixDestroy(parcsr_A); /*this calls the standard comm package destroy - but we'll destroy ours separately until it is incorporated */ if (commpkg_flag == 3 ) { hypre_ParVectorDestroy(x_new); hypre_ParVectorDestroy(y); hypre_ParVectorDestroy(y_new); } hypre_MPI_Finalize(); return(ierr); }
HYPRE_Int hypre_BoomerAMGCreateNodalA(hypre_ParCSRMatrix *A, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int option, HYPRE_Int diag_option, hypre_ParCSRMatrix **AN_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); double *A_diag_data = hypre_CSRMatrixData(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int num_variables = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_nonzeros_offd = 0; HYPRE_Int num_cols_offd = 0; hypre_ParCSRMatrix *AN; hypre_CSRMatrix *AN_diag; HYPRE_Int *AN_diag_i; HYPRE_Int *AN_diag_j; double *AN_diag_data; hypre_CSRMatrix *AN_offd; HYPRE_Int *AN_offd_i; HYPRE_Int *AN_offd_j; double *AN_offd_data; HYPRE_Int *col_map_offd_AN; HYPRE_Int *new_col_map_offd; HYPRE_Int *row_starts_AN; HYPRE_Int AN_num_nonzeros_diag = 0; HYPRE_Int AN_num_nonzeros_offd = 0; HYPRE_Int num_cols_offd_AN; HYPRE_Int new_num_cols_offd; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *new_send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_AN; HYPRE_Int *send_procs_AN; HYPRE_Int *send_map_starts_AN; HYPRE_Int *send_map_elmts_AN; HYPRE_Int *recv_procs_AN; HYPRE_Int *recv_vec_starts_AN; HYPRE_Int i, j, k, k_map; HYPRE_Int ierr = 0; HYPRE_Int index, row; HYPRE_Int start_index; HYPRE_Int num_procs; HYPRE_Int node, cnt; HYPRE_Int mode; HYPRE_Int new_send_elmts_size; HYPRE_Int global_num_nodes; HYPRE_Int num_nodes; HYPRE_Int num_fun2; HYPRE_Int *map_to_node; HYPRE_Int *map_to_map; HYPRE_Int *counter; double sum; double *data; hypre_MPI_Comm_size(comm,&num_procs); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } mode = fabs(option); comm_pkg_AN = NULL; col_map_offd_AN = NULL; #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_AN = hypre_CTAlloc(HYPRE_Int, 2); for (i=0; i < 2; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(A)/num_functions; #else row_starts_AN = hypre_CTAlloc(HYPRE_Int, num_procs+1); for (i=0; i < num_procs+1; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = row_starts_AN[num_procs]; #endif num_nodes = num_variables/num_functions; num_fun2 = num_functions*num_functions; map_to_node = hypre_CTAlloc(HYPRE_Int, num_variables); AN_diag_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); counter = hypre_CTAlloc(HYPRE_Int, num_nodes); for (i=0; i < num_variables; i++) map_to_node[i] = i/num_functions; for (i=0; i < num_nodes; i++) counter[i] = -1; AN_num_nonzeros_diag = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_diag_i[i] = AN_num_nonzeros_diag; for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_diag++; } } row++; } } AN_diag_i[num_nodes] = AN_num_nonzeros_diag; AN_diag_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_diag); AN_diag_data = hypre_CTAlloc(double, AN_num_nonzeros_diag); AN_diag = hypre_CSRMatrixCreate(num_nodes,num_nodes,AN_num_nonzeros_diag); hypre_CSRMatrixI(AN_diag) = AN_diag_i; hypre_CSRMatrixJ(AN_diag) = AN_diag_j; hypre_CSRMatrixData(AN_diag) = AN_diag_data; for (i=0; i < num_nodes; i++) counter[i] = -1; index = 0; start_index = 0; row = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]*A_diag_data[k]; index++; } else { AN_diag_data[counter[k_map]] += A_diag_data[k]*A_diag_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] = sqrt(AN_diag_data[i]); } break; case 2: /* sum of abs. value of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = fabs(A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] /= num_fun2; } break; case 3: /* largest element of each block (sets true value - not abs. value) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]; index++; } else { if (fabs(A_diag_data[k]) > fabs(AN_diag_data[counter[k_map]])) AN_diag_data[counter[k_map]] = A_diag_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_diag*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; data[index*num_functions + j] = fabs(A_diag_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) { AN_diag_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_diag_data[i] = hypre_max( AN_diag_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = (A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += (A_diag_data[k]); } } row++; } start_index = index; } } break; } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (DO MORE BELOW) */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; sum = 0.0; for (k = AN_diag_i[i]+1; k < AN_diag_i[i+1]; k++) { sum += AN_diag_data[k]; } AN_diag_data[index] = -sum; } } else if (diag_option == 2) { /* make all diagonal entries negative */ /* the diagonal is the first element listed in each row - */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; AN_diag_data[index] = - AN_diag_data[index]; } } num_nonzeros_offd = A_offd_i[num_variables]; AN_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); num_cols_offd_AN = 0; if (comm_pkg) { comm_pkg_AN = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_AN) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_AN) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_AN) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); send_procs_AN = NULL; send_map_elmts_AN = NULL; if (num_sends) { send_procs_AN = hypre_CTAlloc(HYPRE_Int,num_sends); send_map_elmts_AN = hypre_CTAlloc(HYPRE_Int,send_map_starts[num_sends]); } send_map_starts_AN = hypre_CTAlloc(HYPRE_Int,num_sends+1); recv_vec_starts_AN = hypre_CTAlloc(HYPRE_Int,num_recvs+1); recv_procs_AN = NULL; if (num_recvs) recv_procs_AN = hypre_CTAlloc(HYPRE_Int,num_recvs); for (i=0; i < num_sends; i++) send_procs_AN[i] = send_procs[i]; for (i=0; i < num_recvs; i++) recv_procs_AN[i] = recv_procs[i]; send_map_starts_AN[0] = 0; cnt = 0; for (i=0; i < num_sends; i++) { k_map = send_map_starts[i]; if (send_map_starts[i+1]-k_map) send_map_elmts_AN[cnt++] = send_map_elmts[k_map]/num_functions; for (j=send_map_starts[i]+1; j < send_map_starts[i+1]; j++) { node = send_map_elmts[j]/num_functions; if (node > send_map_elmts_AN[cnt-1]) send_map_elmts_AN[cnt++] = node; } send_map_starts_AN[i+1] = cnt; } hypre_ParCSRCommPkgSendProcs(comm_pkg_AN) = send_procs_AN; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_AN) = send_map_starts_AN; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_AN) = send_map_elmts_AN; hypre_ParCSRCommPkgRecvProcs(comm_pkg_AN) = recv_procs_AN; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_AN) = recv_vec_starts_AN; } num_cols_offd = hypre_CSRMatrixNumCols(A_offd); if (num_cols_offd) { if (num_cols_offd > num_variables) { hypre_TFree(map_to_node); map_to_node = hypre_CTAlloc(HYPRE_Int,num_cols_offd); } num_cols_offd_AN = 1; map_to_node[0] = col_map_offd[0]/num_functions; for (i=1; i < num_cols_offd; i++) { map_to_node[i] = col_map_offd[i]/num_functions; if (map_to_node[i] > map_to_node[i-1]) num_cols_offd_AN++; } if (num_cols_offd_AN > num_nodes) { hypre_TFree(counter); counter = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); } map_to_map = NULL; col_map_offd_AN = NULL; map_to_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd); col_map_offd_AN = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); col_map_offd_AN[0] = map_to_node[0]; recv_vec_starts_AN[0] = 0; cnt = 1; for (i=0; i < num_recvs; i++) { for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++) { node = map_to_node[j]; if (node > col_map_offd_AN[cnt-1]) { col_map_offd_AN[cnt++] = node; } map_to_map[j] = cnt-1; } recv_vec_starts_AN[i+1] = cnt; } for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; AN_num_nonzeros_offd = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_offd_i[i] = AN_num_nonzeros_offd; for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_offd++; } } row++; } } AN_offd_i[num_nodes] = AN_num_nonzeros_offd; } AN_offd = hypre_CSRMatrixCreate(num_nodes,num_cols_offd_AN, AN_num_nonzeros_offd); hypre_CSRMatrixI(AN_offd) = AN_offd_i; if (AN_num_nonzeros_offd) { AN_offd_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_offd); AN_offd_data = hypre_CTAlloc(double, AN_num_nonzeros_offd); hypre_CSRMatrixJ(AN_offd) = AN_offd_j; hypre_CSRMatrixData(AN_offd) = AN_offd_data; for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; index = 0; row = 0; AN_offd_i[0] = 0; start_index = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]*A_offd_data[k]; index++; } else { AN_offd_data[counter[k_map]] += A_offd_data[k]*A_offd_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] = sqrt(AN_offd_data[i]); } break; case 2: /* sum of abs. value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = fabs(A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] /= num_fun2; } break; case 3: /* largest element in each block (not abs. value ) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]; index++; } else { if (fabs(A_offd_data[k]) > fabs(AN_offd_data[counter[k_map]])) AN_offd_data[counter[k_map]] = A_offd_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_offd*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; data[index*num_functions + j] = fabs(A_offd_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) { AN_offd_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_offd_data[i] = hypre_max( AN_offd_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = (A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += (A_offd_data[k]); } } row++; } start_index = index; } } break; } hypre_TFree(map_to_map); } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (here we are adding the off_diag contribution)*/ /* the diagonal is the first element listed in each row of AN_diag_data - */ for (i=0; i < num_nodes; i++) { sum = 0.0; for (k = AN_offd_i[i]; k < AN_offd_i[i+1]; k++) { sum += AN_offd_data[k]; } index = AN_diag_i[i];/* location of diag entry in data */ AN_diag_data[index] -= sum; /* subtract from current value */ } } AN = hypre_ParCSRMatrixCreate(comm, global_num_nodes, global_num_nodes, row_starts_AN, row_starts_AN, num_cols_offd_AN, AN_num_nonzeros_diag, AN_num_nonzeros_offd); /* we already created the diag and offd matrices - so we don't need the ones created above */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(AN)); hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(AN)); hypre_ParCSRMatrixDiag(AN) = AN_diag; hypre_ParCSRMatrixOffd(AN) = AN_offd; hypre_ParCSRMatrixColMapOffd(AN) = col_map_offd_AN; hypre_ParCSRMatrixCommPkg(AN) = comm_pkg_AN; new_num_cols_offd = num_functions*num_cols_offd_AN; if (new_num_cols_offd > num_cols_offd) { new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols_offd); cnt = 0; for (i=0; i < num_cols_offd_AN; i++) { for (j=0; j < num_functions; j++) { new_col_map_offd[cnt++] = num_functions*col_map_offd_AN[i]+j; } } cnt = 0; for (i=0; i < num_cols_offd; i++) { while (col_map_offd[i] > new_col_map_offd[cnt]) cnt++; col_map_offd[i] = cnt++; } for (i=0; i < num_recvs+1; i++) { recv_vec_starts[i] = num_functions*recv_vec_starts_AN[i]; } for (i=0; i < num_nonzeros_offd; i++) { j = A_offd_j[i]; A_offd_j[i] = col_map_offd[j]; } hypre_ParCSRMatrixColMapOffd(A) = new_col_map_offd; hypre_CSRMatrixNumCols(A_offd) = new_num_cols_offd; hypre_TFree(col_map_offd); } hypre_TFree(map_to_node); new_send_elmts_size = send_map_starts_AN[num_sends]*num_functions; if (new_send_elmts_size > send_map_starts[num_sends]) { new_send_map_elmts = hypre_CTAlloc(HYPRE_Int,new_send_elmts_size); cnt = 0; send_map_starts[0] = 0; for (i=0; i < num_sends; i++) { send_map_starts[i+1] = send_map_starts_AN[i+1]*num_functions; for (j=send_map_starts_AN[i]; j < send_map_starts_AN[i+1]; j++) { for (k=0; k < num_functions; k++) new_send_map_elmts[cnt++] = send_map_elmts_AN[j]*num_functions+k; } } hypre_TFree(send_map_elmts); hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = new_send_map_elmts; } *AN_ptr = AN; hypre_TFree(counter); return (ierr); }
/*-------------------------------------------------------------------------- * hypre_ParCSRMatrixMatvec_FF *--------------------------------------------------------------------------*/ HYPRE_Int hypre_ParCSRMatrixMatvec_FF( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y, HYPRE_Int *CF_marker, HYPRE_Int fpt ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int ierr = 0; HYPRE_Int num_sends, i, j, index, start, num_procs; HYPRE_Int *int_buf_data = NULL; HYPRE_Int *CF_marker_offd = NULL; HYPRE_Complex *x_tmp_data = NULL; HYPRE_Complex *x_buf_data = NULL; HYPRE_Complex *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_MPI_Comm_size(comm,&num_procs); if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; if (num_procs > 1) { if (num_cols_offd) { x_tmp = hypre_SeqVectorCreate( num_cols_offd ); hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); } /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_sends) x_buf_data = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data, x_tmp_data ); } hypre_CSRMatrixMatvec_FF( alpha, diag, x_local, beta, y_local, CF_marker, CF_marker, fpt); if (num_procs > 1) { hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_sends) int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); if (num_cols_offd) CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate(11,comm_pkg,int_buf_data,CF_marker_offd ); hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_cols_offd) hypre_CSRMatrixMatvec_FF( alpha, offd, x_tmp, 1.0, y_local, CF_marker, CF_marker_offd, fpt); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; hypre_TFree(x_buf_data); hypre_TFree(int_buf_data); hypre_TFree(CF_marker_offd); } return ierr; }
HYPRE_Int hypre_ParCSRMatrixMatvec( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_vectors = hypre_VectorNumVectors(x_local); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int ierr = 0; HYPRE_Int num_sends, i, j, jv, index, start; HYPRE_Int vecstride = hypre_VectorVectorStride( x_local ); HYPRE_Int idxstride = hypre_VectorIndexStride( x_local ); HYPRE_Complex *x_tmp_data, **x_buf_data; HYPRE_Complex *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( idxstride>0 ); if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; hypre_assert( hypre_VectorNumVectors(y_local)==num_vectors ); if ( num_vectors==1 ) x_tmp = hypre_SeqVectorCreate( num_cols_offd ); else { hypre_assert( num_vectors>1 ); x_tmp = hypre_SeqMultiVectorCreate( num_cols_offd, num_vectors ); } hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); x_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) x_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); if ( num_vectors==1 ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[0][index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } } else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[jv][index++] = x_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ]; } } hypre_assert( idxstride==1 ); /* ... The assert is because the following loop only works for 'column' storage of a multivector. This needs to be fixed to work more generally, at least for 'row' storage. This in turn, means either change CommPkg so num_sends is no.zones*no.vectors (not no.zones) or, less dangerously, put a stride in the logic of CommHandleCreate (stride either from a new arg or a new variable inside CommPkg). Or put the num_vector iteration inside CommHandleCreate (perhaps a new multivector variant of it). */ for ( jv=0; jv<num_vectors; ++jv ) { comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data[jv], &(x_tmp_data[jv*num_cols_offd]) ); } hypre_CSRMatrixMatvec( alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if (num_cols_offd) hypre_CSRMatrixMatvec( alpha, offd, x_tmp, 1.0, y_local); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(x_buf_data[jv]); hypre_TFree(x_buf_data); return ierr; }
HYPRE_Int hypre_ParCSRMatrixMatvecT( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); hypre_Vector *y_tmp; HYPRE_Int vecstride = hypre_VectorVectorStride( y_local ); HYPRE_Int idxstride = hypre_VectorIndexStride( y_local ); HYPRE_Complex *y_tmp_data, **y_buf_data; HYPRE_Complex *y_local_data = hypre_VectorData(y_local); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_vectors = hypre_VectorNumVectors(y_local); HYPRE_Int i, j, jv, index, start, num_sends; HYPRE_Int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- *-----------------------------------------------------------------------*/ comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); if ( num_vectors==1 ) { y_tmp = hypre_SeqVectorCreate(num_cols_offd); } else { y_tmp = hypre_SeqMultiVectorCreate(num_cols_offd,num_vectors); } hypre_SeqVectorInitialize(y_tmp); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); y_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) y_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); y_tmp_data = hypre_VectorData(y_tmp); y_local_data = hypre_VectorData(y_local); hypre_assert( idxstride==1 ); /* only 'column' storage of multivectors * implemented so far */ if (num_cols_offd) hypre_CSRMatrixMatvecT(alpha, offd, x_local, 0.0, y_tmp); for ( jv=0; jv<num_vectors; ++jv ) { /* this is where we assume multivectors are 'column' storage */ comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 2, comm_pkg, &(y_tmp_data[jv*num_cols_offd]), y_buf_data[jv] ); } hypre_CSRMatrixMatvecT(alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if ( num_vectors==1 ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)] += y_buf_data[0][index++]; } } else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ] += y_buf_data[jv][index++]; } } hypre_SeqVectorDestroy(y_tmp); y_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(y_buf_data[jv]); hypre_TFree(y_buf_data); return ierr; }
HYPRE_Int AmgCGCPrepare (hypre_ParCSRMatrix *S,HYPRE_Int nlocal,HYPRE_Int *CF_marker,HYPRE_Int **CF_marker_offd,HYPRE_Int coarsen_type,HYPRE_Int **vrange) /* assemble a graph representing the connections between the grids * ================================================================================================ * S : the strength matrix * nlocal : the number of locally created coarse grids * CF_marker, CF_marker_offd : the coare/fine markers * coarsen_type : the coarsening type * vrange : the ranges of the vertices representing coarse grids * ================================================================================================*/ { HYPRE_Int ierr=0; HYPRE_Int mpisize,mpirank; HYPRE_Int num_sends; HYPRE_Int *vertexrange=NULL; HYPRE_Int vstart,vend; HYPRE_Int *int_buf_data; HYPRE_Int start; HYPRE_Int i,ii,j; HYPRE_Int num_variables = hypre_CSRMatrixNumRows (hypre_ParCSRMatrixDiag(S)); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (hypre_ParCSRMatrixOffd (S)); MPI_Comm comm = hypre_ParCSRMatrixComm(S); /* hypre_MPI_Status status; */ hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg (S); hypre_ParCSRCommHandle *comm_handle; hypre_MPI_Comm_size (comm,&mpisize); hypre_MPI_Comm_rank (comm,&mpirank); if (!comm_pkg) { hypre_MatvecCommPkgCreate (S); comm_pkg = hypre_ParCSRMatrixCommPkg (S); } num_sends = hypre_ParCSRCommPkgNumSends (comm_pkg); if (coarsen_type % 2 == 0) nlocal++; /* even coarsen_type means allow_emptygrids */ #ifdef HYPRE_NO_GLOBAL_PARTITION { HYPRE_Int scan_recv; vertexrange = hypre_CTAlloc(HYPRE_Int,2); hypre_MPI_Scan(&nlocal, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); /* first point in my range */ vertexrange[0] = scan_recv - nlocal; /* first point in next proc's range */ vertexrange[1] = scan_recv; vstart = vertexrange[0]; vend = vertexrange[1]; } #else vertexrange = hypre_CTAlloc (HYPRE_Int,mpisize+1); hypre_MPI_Allgather (&nlocal,1,HYPRE_MPI_INT,vertexrange+1,1,HYPRE_MPI_INT,comm); vertexrange[0]=0; for (i=2;i<=mpisize;i++) vertexrange[i]+=vertexrange[i-1]; vstart = vertexrange[mpirank]; vend = vertexrange[mpirank+1]; #endif /* Note: vstart uses 0-based indexing, while CF_marker uses 1-based indexing */ if (coarsen_type % 2 == 1) { /* see above */ for (i=0;i<num_variables;i++) if (CF_marker[i]>0) CF_marker[i]+=vstart; } else { /* hypre_printf ("processor %d: empty grid allowed\n",mpirank); */ for (i=0;i<num_variables;i++) { if (CF_marker[i]>0) CF_marker[i]+=vstart+1; /* add one because vertexrange[mpirank]+1 denotes the empty grid. Hence, vertexrange[mpirank]+2 is the first coarse grid denoted in global indices, ... */ } } /* exchange data */ *CF_marker_offd = hypre_CTAlloc (HYPRE_Int,num_cols_offd); int_buf_data = hypre_CTAlloc (HYPRE_Int,hypre_ParCSRCommPkgSendMapStart (comm_pkg,num_sends)); for (i=0,ii=0;i<num_sends;i++) { start = hypre_ParCSRCommPkgSendMapStart (comm_pkg,i); for (j=start;j<hypre_ParCSRCommPkgSendMapStart (comm_pkg,i+1);j++) int_buf_data [ii++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } if (mpisize>1) { comm_handle = hypre_ParCSRCommHandleCreate (11,comm_pkg,int_buf_data,*CF_marker_offd); hypre_ParCSRCommHandleDestroy (comm_handle); } hypre_TFree (int_buf_data); *vrange=vertexrange; return (ierr); }
/************************************************************** * * CGC Coarsening routine * **************************************************************/ HYPRE_Int hypre_BoomerAMGCoarsenCGCb( hypre_ParCSRMatrix *S, hypre_ParCSRMatrix *A, HYPRE_Int measure_type, HYPRE_Int coarsen_type, HYPRE_Int cgc_its, HYPRE_Int debug_flag, HYPRE_Int **CF_marker_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(S); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(S); hypre_ParCSRCommHandle *comm_handle; hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S); hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S); HYPRE_Int *S_i = hypre_CSRMatrixI(S_diag); HYPRE_Int *S_j = hypre_CSRMatrixJ(S_diag); HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd); HYPRE_Int *S_offd_j; HYPRE_Int num_variables = hypre_CSRMatrixNumRows(S_diag); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(S_offd); hypre_CSRMatrix *S_ext; HYPRE_Int *S_ext_i; HYPRE_Int *S_ext_j; hypre_CSRMatrix *ST; HYPRE_Int *ST_i; HYPRE_Int *ST_j; HYPRE_Int *CF_marker; HYPRE_Int *CF_marker_offd=NULL; HYPRE_Int ci_tilde = -1; HYPRE_Int ci_tilde_mark = -1; HYPRE_Int *measure_array; HYPRE_Int *measure_array_master; HYPRE_Int *graph_array; HYPRE_Int *int_buf_data=NULL; /*HYPRE_Int *ci_array=NULL;*/ HYPRE_Int i, j, k, l, jS; HYPRE_Int ji, jj, index; HYPRE_Int set_empty = 1; HYPRE_Int C_i_nonempty = 0; HYPRE_Int num_nonzeros; HYPRE_Int num_procs, my_id; HYPRE_Int num_sends = 0; HYPRE_Int first_col, start; HYPRE_Int col_0, col_n; hypre_LinkList LoL_head; hypre_LinkList LoL_tail; HYPRE_Int *lists, *where; HYPRE_Int measure, new_meas; HYPRE_Int num_left; HYPRE_Int nabor, nabor_two; HYPRE_Int ierr = 0; HYPRE_Int use_commpkg_A = 0; HYPRE_Real wall_time; HYPRE_Int measure_max; /* BM Aug 30, 2006: maximal measure, needed for CGC */ if (coarsen_type < 0) coarsen_type = -coarsen_type; /*------------------------------------------------------- * Initialize the C/F marker, LoL_head, LoL_tail arrays *-------------------------------------------------------*/ LoL_head = NULL; LoL_tail = NULL; lists = hypre_CTAlloc(HYPRE_Int, num_variables); where = hypre_CTAlloc(HYPRE_Int, num_variables); #if 0 /* debugging */ char filename[256]; FILE *fp; HYPRE_Int iter = 0; #endif /*-------------------------------------------------------------- * Compute a CSR strength matrix, S. * * For now, the "strength" of dependence/influence is defined in * the following way: i depends on j if * aij > hypre_max (k != i) aik, aii < 0 * or * aij < hypre_min (k != i) aik, aii >= 0 * Then S_ij = 1, else S_ij = 0. * * NOTE: the entries are negative initially, corresponding * to "unaccounted-for" dependence. *----------------------------------------------------------------*/ if (debug_flag == 3) wall_time = time_getWallclockSeconds(); hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); if (!comm_pkg) { use_commpkg_A = 1; comm_pkg = hypre_ParCSRMatrixCommPkg(A); } if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_cols_offd) S_offd_j = hypre_CSRMatrixJ(S_offd); jS = S_i[num_variables]; ST = hypre_CSRMatrixCreate(num_variables, num_variables, jS); ST_i = hypre_CTAlloc(HYPRE_Int,num_variables+1); ST_j = hypre_CTAlloc(HYPRE_Int,jS); hypre_CSRMatrixI(ST) = ST_i; hypre_CSRMatrixJ(ST) = ST_j; /*---------------------------------------------------------- * generate transpose of S, ST *----------------------------------------------------------*/ for (i=0; i <= num_variables; i++) ST_i[i] = 0; for (i=0; i < jS; i++) { ST_i[S_j[i]+1]++; } for (i=0; i < num_variables; i++) { ST_i[i+1] += ST_i[i]; } for (i=0; i < num_variables; i++) { for (j=S_i[i]; j < S_i[i+1]; j++) { index = S_j[j]; ST_j[ST_i[index]] = i; ST_i[index]++; } } for (i = num_variables; i > 0; i--) { ST_i[i] = ST_i[i-1]; } ST_i[0] = 0; /*---------------------------------------------------------- * Compute the measures * * The measures are given by the row sums of ST. * Hence, measure_array[i] is the number of influences * of variable i. * correct actual measures through adding influences from * neighbor processors *----------------------------------------------------------*/ measure_array_master = hypre_CTAlloc(HYPRE_Int, num_variables); measure_array = hypre_CTAlloc(HYPRE_Int, num_variables); for (i = 0; i < num_variables; i++) { measure_array_master[i] = ST_i[i+1]-ST_i[i]; } if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) && num_procs > 1) { if (use_commpkg_A) S_ext = hypre_ParCSRMatrixExtractBExt(S,A,0); else S_ext = hypre_ParCSRMatrixExtractBExt(S,S,0); S_ext_i = hypre_CSRMatrixI(S_ext); S_ext_j = hypre_CSRMatrixJ(S_ext); num_nonzeros = S_ext_i[num_cols_offd]; first_col = hypre_ParCSRMatrixFirstColDiag(S); col_0 = first_col-1; col_n = col_0+num_variables; if (measure_type) { for (i=0; i < num_nonzeros; i++) { index = S_ext_j[i] - first_col; if (index > -1 && index < num_variables) measure_array_master[index]++; } } } /*--------------------------------------------------- * Loop until all points are either fine or coarse. *---------------------------------------------------*/ if (debug_flag == 3) wall_time = time_getWallclockSeconds(); /* first coarsening phase */ /************************************************************* * * Initialize the lists * *************************************************************/ CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables); num_left = 0; for (j = 0; j < num_variables; j++) { if ((S_i[j+1]-S_i[j])== 0 && (S_offd_i[j+1]-S_offd_i[j]) == 0) { CF_marker[j] = SF_PT; measure_array_master[j] = 0; } else { CF_marker[j] = UNDECIDED; /* num_left++; */ /* BM May 19, 2006: see below*/ } } if (coarsen_type==22) { /* BM Sep 8, 2006: allow_emptygrids only if the following holds for all points j: (a) the point has no strong connections at all, OR (b) the point has a strong connection across a boundary */ for (j=0;j<num_variables;j++) if (S_i[j+1]>S_i[j] && S_offd_i[j+1] == S_offd_i[j]) {coarsen_type=21;break;} } for (l = 1; l <= cgc_its; l++) { LoL_head = NULL; LoL_tail = NULL; num_left = 0; /* compute num_left before each RS coarsening loop */ memcpy (measure_array,measure_array_master,num_variables*sizeof(HYPRE_Int)); memset (lists,0,sizeof(HYPRE_Int)*num_variables); memset (where,0,sizeof(HYPRE_Int)*num_variables); for (j = 0; j < num_variables; j++) { measure = measure_array[j]; if (CF_marker[j] != SF_PT) { if (measure > 0) { enter_on_lists(&LoL_head, &LoL_tail, measure, j, lists, where); num_left++; /* compute num_left before each RS coarsening loop */ } else if (CF_marker[j] == 0) /* increase weight of strongly coupled neighbors only if j is not conained in a previously constructed coarse grid. Reason: these neighbors should start with the same initial weight in each CGC iteration. BM Aug 30, 2006 */ { if (measure < 0) hypre_printf("negative measure!\n"); /* CF_marker[j] = f_pnt; */ for (k = S_i[j]; k < S_i[j+1]; k++) { nabor = S_j[k]; /* if (CF_marker[nabor] != SF_PT) */ if (CF_marker[nabor] == 0) /* BM Aug 30, 2006: don't alter weights of points contained in other candidate coarse grids */ { if (nabor < j) { new_meas = measure_array[nabor]; if (new_meas > 0) remove_point(&LoL_head, &LoL_tail, new_meas, nabor, lists, where); else num_left++; /* BM Aug 29, 2006 */ new_meas = ++(measure_array[nabor]); enter_on_lists(&LoL_head, &LoL_tail, new_meas, nabor, lists, where); } else { new_meas = ++(measure_array[nabor]); } } } /* --num_left; */ /* BM May 19, 2006 */ } } } /* BM Aug 30, 2006: first iteration: determine maximal weight */ if (num_left && l==1) measure_max = measure_array[LoL_head->head]; /* BM Aug 30, 2006: break CGC iteration if no suitable starting point is available any more */ if (!num_left || measure_array[LoL_head->head]<measure_max) { while (LoL_head) { hypre_LinkList list_ptr = LoL_head; LoL_head = LoL_head->next_elt; dispose_elt (list_ptr); } break; } /**************************************************************** * * Main loop of Ruge-Stueben first coloring pass. * * WHILE there are still points to classify DO: * 1) find first point, i, on list with max_measure * make i a C-point, remove it from the lists * 2) For each point, j, in S_i^T, * a) Set j to be an F-point * b) For each point, k, in S_j * move k to the list in LoL with measure one * greater than it occupies (creating new LoL * entry if necessary) * 3) For each point, j, in S_i, * move j to the list in LoL with measure one * smaller than it occupies (creating new LoL * entry if necessary) * ****************************************************************/ while (num_left > 0) { index = LoL_head -> head; /* index = LoL_head -> tail; */ /* CF_marker[index] = C_PT; */ CF_marker[index] = l; /* BM Aug 18, 2006 */ measure = measure_array[index]; measure_array[index] = 0; measure_array_master[index] = 0; /* BM May 19: for CGC */ --num_left; remove_point(&LoL_head, &LoL_tail, measure, index, lists, where); for (j = ST_i[index]; j < ST_i[index+1]; j++) { nabor = ST_j[j]; /* if (CF_marker[nabor] == UNDECIDED) */ if (measure_array[nabor]>0) /* undecided point */ { /* CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */ measure = measure_array[nabor]; measure_array[nabor]=0; remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where); --num_left; for (k = S_i[nabor]; k < S_i[nabor+1]; k++) { nabor_two = S_j[k]; /* if (CF_marker[nabor_two] == UNDECIDED) */ if (measure_array[nabor_two]>0) /* undecided point */ { measure = measure_array[nabor_two]; remove_point(&LoL_head, &LoL_tail, measure, nabor_two, lists, where); new_meas = ++(measure_array[nabor_two]); enter_on_lists(&LoL_head, &LoL_tail, new_meas, nabor_two, lists, where); } } } } for (j = S_i[index]; j < S_i[index+1]; j++) { nabor = S_j[j]; /* if (CF_marker[nabor] == UNDECIDED) */ if (measure_array[nabor]>0) /* undecided point */ { measure = measure_array[nabor]; remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where); measure_array[nabor] = --measure; if (measure > 0) enter_on_lists(&LoL_head, &LoL_tail, measure, nabor, lists, where); else { /* CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */ --num_left; for (k = S_i[nabor]; k < S_i[nabor+1]; k++) { nabor_two = S_j[k]; /* if (CF_marker[nabor_two] == UNDECIDED) */ if (measure_array[nabor_two]>0) { new_meas = measure_array[nabor_two]; remove_point(&LoL_head, &LoL_tail, new_meas, nabor_two, lists, where); new_meas = ++(measure_array[nabor_two]); enter_on_lists(&LoL_head, &LoL_tail, new_meas, nabor_two, lists, where); } } } } } } if (LoL_head) hypre_printf ("Linked list not empty! head: %d\n",LoL_head->head); } l--; /* BM Aug 15, 2006 */ hypre_TFree(measure_array); hypre_TFree(measure_array_master); hypre_CSRMatrixDestroy(ST); if (debug_flag == 3) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d Coarsen 1st pass = %f\n", my_id, wall_time); } hypre_TFree(lists); hypre_TFree(where); if (num_procs>1) { if (debug_flag == 3) wall_time = time_getWallclockSeconds(); hypre_BoomerAMGCoarsenCGC (S,l,coarsen_type,CF_marker); if (debug_flag == 3) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d Coarsen CGC = %f\n", my_id, wall_time); } } else { /* the first candiate coarse grid is the coarse grid */ for (j=0;j<num_variables;j++) { if (CF_marker[j]==1) CF_marker[j]=C_PT; else CF_marker[j]=F_PT; } } /* BM May 19, 2006: Set all undecided points to be fine grid points. */ for (j=0;j<num_variables;j++) if (!CF_marker[j]) CF_marker[j]=F_PT; /*--------------------------------------------------- * Initialize the graph array *---------------------------------------------------*/ graph_array = hypre_CTAlloc(HYPRE_Int, num_variables); for (i = 0; i < num_variables; i++) { graph_array[i] = -1; } if (debug_flag == 3) wall_time = time_getWallclockSeconds(); for (i=0; i < num_variables; i++) { if (ci_tilde_mark != i) ci_tilde = -1; if (CF_marker[i] == -1) { for (ji = S_i[i]; ji < S_i[i+1]; ji++) { j = S_j[ji]; if (CF_marker[j] > 0) graph_array[j] = i; } for (ji = S_i[i]; ji < S_i[i+1]; ji++) { j = S_j[ji]; if (CF_marker[j] == -1) { set_empty = 1; for (jj = S_i[j]; jj < S_i[j+1]; jj++) { index = S_j[jj]; if (graph_array[index] == i) { set_empty = 0; break; } } if (set_empty) { if (C_i_nonempty) { CF_marker[i] = 1; if (ci_tilde > -1) { CF_marker[ci_tilde] = -1; ci_tilde = -1; } C_i_nonempty = 0; break; } else { ci_tilde = j; ci_tilde_mark = i; CF_marker[j] = 1; C_i_nonempty = 1; i--; break; } } } } } } if (debug_flag == 3 && coarsen_type != 2) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d Coarsen 2nd pass = %f\n", my_id, wall_time); } /* third pass, check boundary fine points for coarse neighbors */ /*------------------------------------------------ * Exchange boundary data for CF_marker *------------------------------------------------*/ if (debug_flag == 3) wall_time = time_getWallclockSeconds(); CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } if (num_procs > 1) { comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, CF_marker_offd); hypre_ParCSRCommHandleDestroy(comm_handle); } AmgCGCBoundaryFix (S,CF_marker,CF_marker_offd); if (debug_flag == 3) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d CGC boundary fix = %f\n", my_id, wall_time); } /*--------------------------------------------------- * Clean up and return *---------------------------------------------------*/ /*if (coarsen_type != 1) { */ if (CF_marker_offd) hypre_TFree(CF_marker_offd); /* BM Aug 21, 2006 */ if (int_buf_data) hypre_TFree(int_buf_data); /* BM Aug 21, 2006 */ /*if (ci_array) hypre_TFree(ci_array);*/ /* BM Aug 21, 2006 */ /*} */ hypre_TFree(graph_array); if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) && num_procs > 1) hypre_CSRMatrixDestroy(S_ext); *CF_marker_ptr = CF_marker; return (ierr); }
void hypre_BoomerAMGJacobiInterp_1( hypre_ParCSRMatrix * A, hypre_ParCSRMatrix ** P, hypre_ParCSRMatrix * S, HYPRE_Int * CF_marker, HYPRE_Int level, HYPRE_Real truncation_threshold, HYPRE_Real truncation_threshold_minus, HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd, HYPRE_Real weight_AF) /* One step of Jacobi interpolation: A is the linear system. P is an interpolation matrix, input and output CF_marker identifies coarse and fine points If we imagine P and A as split into coarse and fine submatrices, [ AFF AFC ] [ AF ] [ IFC ] A = [ ] = [ ] , P = [ ] [ ACF ACC ] [ AC ] [ ICC ] (note that ICC is an identity matrix, applied to coarse points only) then this function computes IFCnew = IFCold - DFF(-1) * ( AFF*IFCold + AFC ) = IFCold - DFF(-1) * AF * Pold) where DFF is the diagonal of AFF, (-1) represents the inverse, and where "old" denotes a value on entry to this function, "new" a returned value. */ { hypre_ParCSRMatrix * Pnew; hypre_ParCSRMatrix * C; hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(*P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(*P); HYPRE_Real *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); HYPRE_Real *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); hypre_CSRMatrix *C_diag; hypre_CSRMatrix *C_offd; hypre_CSRMatrix *Pnew_diag; hypre_CSRMatrix *Pnew_offd; HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int i; HYPRE_Int Jnochanges=0, Jchanges, Pnew_num_nonzeros; HYPRE_Int CF_coarse=0; HYPRE_Int * J_marker = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P ); HYPRE_Int nc, ncmax, ncmin, nc1; HYPRE_Int num_procs, my_id; MPI_Comm comm = hypre_ParCSRMatrixComm( A ); #ifdef HYPRE_JACINT_PRINT_ROW_SUMS HYPRE_Int m, nmav, npav; HYPRE_Real PIi, PIimax, PIimin, PIimav, PIipav, randthresh; HYPRE_Real eps = 1.0e-17; #endif #ifdef HYPRE_JACINT_PRINT_MATRICES char filename[80]; HYPRE_Int i_dummy, j_dummy; HYPRE_Int *base_i_ptr = &i_dummy; HYPRE_Int *base_j_ptr = &j_dummy; #endif #ifdef HYPRE_JACINT_PRINT_SOME_ROWS HYPRE_Int sample_rows[50], n_sample_rows=0, isamp; #endif hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm,&my_id); for ( i=0; i<num_rows_diag_P; ++i ) { J_marker[i] = CF_marker[i]; if (CF_marker[i]>=0) ++CF_coarse; } #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1, P has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(P_diag), hypre_CSRMatrixNumNonzeros(P_offd), hypre_CSRMatrixNumNonzeros(P_diag)+hypre_CSRMatrixNumNonzeros(P_offd), hypre_ParCSRMatrixLocalSumElts(*P) ); #endif /* row sum computations, for output */ #ifdef HYPRE_JACINT_PRINT_ROW_SUMS PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0; nmav=0, npav=0; for ( i=0; i<num_rows_diag_P; ++i ) { PIi = 0; /* i-th value of P*1, i.e. sum of row i of P */ for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) PIi += P_diag_data[m]; for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m ) PIi += P_offd_data[m]; if (CF_marker[i]<0) { PIimax = hypre_max( PIimax, PIi ); PIimin = hypre_min( PIimin, PIi ); if (PIi<=1-eps) { PIimav+=PIi; ++nmav; }; if (PIi>=1+eps) { PIipav+=PIi; ++npav; }; } } if ( nmav>0 ) PIimav = PIimav/nmav; if ( npav>0 ) PIipav = PIipav/npav; hypre_printf("%i %i P in max,min row sums %e %e\n", my_id, level, PIimax, PIimin ); #endif ncmax=0; ncmin=num_rows_diag_P; nc1=0; for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc<=1) { ++nc1; } ncmax = hypre_max( nc, ncmax ); ncmin = hypre_min( nc, ncmin ); } #if 0 /* a very agressive reduction in how much the Jacobi step does: */ for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc>ncmin+1) /*if ( nc > ncmin + 0.5*(ncmax-ncmin) )*/ { J_marker[i] = 1; ++Jnochanges; } } #endif Jchanges = num_rows_diag_P - Jnochanges - CF_coarse; #ifdef HYPRE_JACINT_PRINT_SOME_ROWS hypre_printf("some rows to be changed: "); randthresh = 15/(HYPRE_Real)Jchanges; for ( i=0; i<num_rows_diag_P; ++i ) { if ( J_marker[i]<0 ) { if ( ((HYPRE_Real)rand())/RAND_MAX < randthresh ) { hypre_printf( "%i: ", i ); for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] ); hypre_printf("; "); sample_rows[n_sample_rows] = i; ++n_sample_rows; } } } hypre_printf("\n"); #endif #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i P has %i rows, %i changeable, %i don't change-good, %i coarse\n", my_id, level, num_rows_diag_P, Jchanges, Jnochanges, CF_coarse ); hypre_printf("%i %i min,max diag cols per row: %i, %i; no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 ); #endif #ifdef HYPRE_JACINT_PRINT_MATRICES if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) { hypre_sprintf( filename, "Ain%i", level ); hypre_ParCSRMatrixPrintIJ( A,0,0,filename); hypre_sprintf( filename, "Sin%i", level ); hypre_ParCSRMatrixPrintIJ( S,0,0,filename); hypre_sprintf( filename, "Pin%i", level ); hypre_ParCSRMatrixPrintIJ( *P,0,0,filename); } #endif C = hypre_ParMatmul_FC( A, *P, J_marker, dof_func, dof_func_offd ); /* hypre_parMatmul_FC creates and returns C, a variation of the matrix product A*P in which only the "Fine"-designated rows have been computed. (all columns are Coarse because all columns of P are). "Fine" is defined solely by the marker array, and for example could be a proper subset of the fine points of a multigrid hierarchy. As a matrix, C is the size of A*P. But only the marked rows have been computed. */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "C%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( C,0,0,filename); #endif C_diag = hypre_ParCSRMatrixDiag(C); C_offd = hypre_ParCSRMatrixOffd(C); #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1 after matmul, C has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(C_diag), hypre_CSRMatrixNumNonzeros(C_offd), hypre_CSRMatrixNumNonzeros(C_diag)+hypre_CSRMatrixNumNonzeros(C_offd), hypre_ParCSRMatrixLocalSumElts(C) ); #endif hypre_ParMatScaleDiagInv_F( C, A, weight_AF, J_marker ); /* hypre_ParMatScaleDiagInv scales of its first argument by premultiplying with a submatrix of the inverse of the diagonal of its second argument. The marker array determines which diagonal elements are used. The marker array should select exactly the right number of diagonal elements (the number of rows of AP_FC). */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "Cout%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( C,0,0,filename); #endif Pnew = hypre_ParMatMinus_F( *P, C, J_marker ); /* hypre_ParMatMinus_F subtracts rows of its second argument from selected rows of its first argument. The marker array determines which rows of the first argument are affected, and they should exactly correspond to all the rows of the second argument. */ Pnew_diag = hypre_ParCSRMatrixDiag(Pnew); Pnew_offd = hypre_ParCSRMatrixOffd(Pnew); Pnew_num_nonzeros = hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd); #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1 after MatMinus, Pnew has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(Pnew_diag), hypre_CSRMatrixNumNonzeros(Pnew_offd), Pnew_num_nonzeros, hypre_ParCSRMatrixLocalSumElts(Pnew) ); #endif /* Transfer ownership of col_starts from P to Pnew ... */ if ( hypre_ParCSRMatrixColStarts(*P) && hypre_ParCSRMatrixColStarts(*P)==hypre_ParCSRMatrixColStarts(Pnew) ) { if ( hypre_ParCSRMatrixOwnsColStarts(*P) && !hypre_ParCSRMatrixOwnsColStarts(Pnew) ) { hypre_ParCSRMatrixSetColStartsOwner(*P,0); hypre_ParCSRMatrixSetColStartsOwner(Pnew,1); } } hypre_ParCSRMatrixDestroy( C ); hypre_ParCSRMatrixDestroy( *P ); /* Note that I'm truncating all the fine rows, not just the J-marked ones. */ #if 0 if ( Pnew_num_nonzeros < 10000 ) /* a fixed number like this makes it no.procs.-depdendent */ { /* ad-hoc attempt to reduce zero-matrix problems seen in testing..*/ truncation_threshold = 1.0e-6 * truncation_threshold; truncation_threshold_minus = 1.0e-6 * truncation_threshold_minus; } #endif hypre_BoomerAMGTruncateInterp( Pnew, truncation_threshold, truncation_threshold_minus, CF_marker ); hypre_MatvecCommPkgCreate ( Pnew ); *P = Pnew; P_diag = hypre_ParCSRMatrixDiag(*P); P_offd = hypre_ParCSRMatrixOffd(*P); P_diag_data = hypre_CSRMatrixData(P_diag); P_diag_i = hypre_CSRMatrixI(P_diag); P_diag_j = hypre_CSRMatrixJ(P_diag); P_offd_data = hypre_CSRMatrixData(P_offd); P_offd_i = hypre_CSRMatrixI(P_offd); /* row sum computations, for output */ #ifdef HYPRE_JACINT_PRINT_ROW_SUMS PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0; nmav=0, npav=0; for ( i=0; i<num_rows_diag_P; ++i ) { PIi = 0; /* i-th value of P*1, i.e. sum of row i of P */ for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) PIi += P_diag_data[m]; for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m ) PIi += P_offd_data[m]; if (CF_marker[i]<0) { PIimax = hypre_max( PIimax, PIi ); PIimin = hypre_min( PIimin, PIi ); if (PIi<=1-eps) { PIimav+=PIi; ++nmav; }; if (PIi>=1+eps) { PIipav+=PIi; ++npav; }; } } if ( nmav>0 ) PIimav = PIimav/nmav; if ( npav>0 ) PIipav = PIipav/npav; hypre_printf("%i %i P out max,min row sums %e %e\n", my_id, level, PIimax, PIimin ); #endif #ifdef HYPRE_JACINT_PRINT_SOME_ROWS hypre_printf("some changed rows: "); for ( isamp=0; isamp<n_sample_rows; ++isamp ) { i = sample_rows[isamp]; hypre_printf( "%i: ", i ); for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] ); hypre_printf("; "); } hypre_printf("\n"); #endif ncmax=0; ncmin=num_rows_diag_P; nc1=0; for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc<=1) ++nc1; ncmax = hypre_max( nc, ncmax ); ncmin = hypre_min( nc, ncmin ); } #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i P has %i rows, %i changeable, %i too good, %i coarse\n", my_id, level, num_rows_diag_P, num_rows_diag_P-Jnochanges-CF_coarse, Jnochanges, CF_coarse ); hypre_printf("%i %i min,max diag cols per row: %i, %i; no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 ); hypre_printf("%i %i Jacobi_Interp_1 after truncation (%e), Pnew has %i+%i=%i nonzeros, local sum %e\n", my_id, level, truncation_threshold, hypre_CSRMatrixNumNonzeros(Pnew_diag), hypre_CSRMatrixNumNonzeros(Pnew_offd), hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd), hypre_ParCSRMatrixLocalSumElts(Pnew) ); #endif /* Programming Notes: 1. Judging by around line 299 of par_interp.c, they typical use of CF_marker is that CF_marker>=0 means Coarse, CF_marker<0 means Fine. */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "Pout%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( *P,0,0,filename); #endif hypre_TFree( J_marker ); }
HYPRE_Int hypre_AMESetup(void *esolver) { HYPRE_Int ne, *edge_bc; hypre_AMEData *ame_data = esolver; hypre_AMSData *ams_data = ame_data -> precond; if (ams_data -> beta_is_zero) { ame_data -> t1 = hypre_ParVectorInDomainOf(ams_data -> G); ame_data -> t2 = hypre_ParVectorInDomainOf(ams_data -> G); } else { ame_data -> t1 = ams_data -> r1; ame_data -> t2 = ams_data -> g1; } ame_data -> t3 = ams_data -> r0; /* Eliminate boundary conditions in G = [Gii, Gib; 0, Gbb], i.e., compute [Gii, 0; 0, 0] */ { HYPRE_Int i, j, k, nv; HYPRE_Int *offd_edge_bc; hypre_ParCSRMatrix *Gt; nv = hypre_ParCSRMatrixNumCols(ams_data -> G); ne = hypre_ParCSRMatrixNumRows(ams_data -> G); edge_bc = hypre_TAlloc(HYPRE_Int, ne); for (i = 0; i < ne; i++) edge_bc[i] = 0; /* Find boundary (eliminated) edges */ { hypre_CSRMatrix *Ad = hypre_ParCSRMatrixDiag(ams_data -> A); HYPRE_Int *AdI = hypre_CSRMatrixI(Ad); HYPRE_Int *AdJ = hypre_CSRMatrixJ(Ad); HYPRE_Real *AdA = hypre_CSRMatrixData(Ad); hypre_CSRMatrix *Ao = hypre_ParCSRMatrixOffd(ams_data -> A); HYPRE_Int *AoI = hypre_CSRMatrixI(Ao); HYPRE_Real *AoA = hypre_CSRMatrixData(Ao); HYPRE_Real l1_norm; /* A row (edge) is boundary if its off-diag l1 norm is less than eps */ HYPRE_Real eps = DBL_EPSILON * 1e+4; for (i = 0; i < ne; i++) { l1_norm = 0.0; for (j = AdI[i]; j < AdI[i+1]; j++) if (AdJ[j] != i) l1_norm += fabs(AdA[j]); if (AoI) for (j = AoI[i]; j < AoI[i+1]; j++) l1_norm += fabs(AoA[j]); if (l1_norm < eps) edge_bc[i] = 1; } } hypre_ParCSRMatrixTranspose(ams_data -> G, &Gt, 1); /* Use a Matvec communication to find which of the edges connected to local vertices are on the boundary */ { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int num_sends, *int_buf_data; HYPRE_Int index, start; offd_edge_bc = hypre_CTAlloc(HYPRE_Int, hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(Gt))); hypre_MatvecCommPkgCreate(Gt); comm_pkg = hypre_ParCSRMatrixCommPkg(Gt); num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j); int_buf_data[index++] = edge_bc[k]; } } comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, offd_edge_bc); hypre_ParCSRCommHandleDestroy(comm_handle); hypre_TFree(int_buf_data); } /* Eliminate boundary vertex entries in G^t */ { hypre_CSRMatrix *Gtd = hypre_ParCSRMatrixDiag(Gt); HYPRE_Int *GtdI = hypre_CSRMatrixI(Gtd); HYPRE_Int *GtdJ = hypre_CSRMatrixJ(Gtd); HYPRE_Real *GtdA = hypre_CSRMatrixData(Gtd); hypre_CSRMatrix *Gto = hypre_ParCSRMatrixOffd(Gt); HYPRE_Int *GtoI = hypre_CSRMatrixI(Gto); HYPRE_Int *GtoJ = hypre_CSRMatrixJ(Gto); HYPRE_Real *GtoA = hypre_CSRMatrixData(Gto); HYPRE_Int bdr; for (i = 0; i < nv; i++) { bdr = 0; /* A vertex is boundary if it belongs to a boundary edge */ for (j = GtdI[i]; j < GtdI[i+1]; j++) if (edge_bc[GtdJ[j]]) { bdr = 1; break; } if (!bdr && GtoI) for (j = GtoI[i]; j < GtoI[i+1]; j++) if (offd_edge_bc[GtoJ[j]]) { bdr = 1; break; } if (bdr) { for (j = GtdI[i]; j < GtdI[i+1]; j++) /* if (!edge_bc[GtdJ[j]]) */ GtdA[j] = 0.0; if (GtoI) for (j = GtoI[i]; j < GtoI[i+1]; j++) /* if (!offd_edge_bc[GtoJ[j]]) */ GtoA[j] = 0.0; } } } hypre_ParCSRMatrixTranspose(Gt, &ame_data -> G, 1); hypre_ParCSRMatrixDestroy(Gt); hypre_TFree(offd_edge_bc); } /* Compute G^t M G */ { if (!hypre_ParCSRMatrixCommPkg(ame_data -> G)) hypre_MatvecCommPkgCreate(ame_data -> G); if (!hypre_ParCSRMatrixCommPkg(ame_data -> M)) hypre_MatvecCommPkgCreate(ame_data -> M); hypre_BoomerAMGBuildCoarseOperator(ame_data -> G, ame_data -> M, ame_data -> G, &ame_data -> A_G); hypre_ParCSRMatrixFixZeroRows(ame_data -> A_G); } /* Create AMG preconditioner and PCG-AMG solver for G^tMG */ { HYPRE_BoomerAMGCreate(&ame_data -> B1_G); HYPRE_BoomerAMGSetCoarsenType(ame_data -> B1_G, ams_data -> B_G_coarsen_type); HYPRE_BoomerAMGSetAggNumLevels(ame_data -> B1_G, ams_data -> B_G_agg_levels); HYPRE_BoomerAMGSetRelaxType(ame_data -> B1_G, ams_data -> B_G_relax_type); HYPRE_BoomerAMGSetNumSweeps(ame_data -> B1_G, 1); HYPRE_BoomerAMGSetMaxLevels(ame_data -> B1_G, 25); HYPRE_BoomerAMGSetTol(ame_data -> B1_G, 0.0); HYPRE_BoomerAMGSetMaxIter(ame_data -> B1_G, 1); HYPRE_BoomerAMGSetStrongThreshold(ame_data -> B1_G, ams_data -> B_G_theta); /* don't use exact solve on the coarsest level (matrix may be singular) */ HYPRE_BoomerAMGSetCycleRelaxType(ame_data -> B1_G, ams_data -> B_G_relax_type, 3); HYPRE_ParCSRPCGCreate(hypre_ParCSRMatrixComm(ame_data->A_G), &ame_data -> B2_G); HYPRE_PCGSetPrintLevel(ame_data -> B2_G, 0); HYPRE_PCGSetTol(ame_data -> B2_G, 1e-12); HYPRE_PCGSetMaxIter(ame_data -> B2_G, 20); HYPRE_PCGSetPrecond(ame_data -> B2_G, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, ame_data -> B1_G); HYPRE_ParCSRPCGSetup(ame_data -> B2_G, (HYPRE_ParCSRMatrix)ame_data->A_G, (HYPRE_ParVector)ame_data->t1, (HYPRE_ParVector)ame_data->t2); } /* Setup LOBPCG */ { HYPRE_Int seed = 75; mv_InterfaceInterpreter* interpreter; mv_MultiVectorPtr eigenvectors; ame_data -> interpreter = hypre_CTAlloc(mv_InterfaceInterpreter,1); interpreter = (mv_InterfaceInterpreter*) ame_data -> interpreter; HYPRE_ParCSRSetupInterpreter(interpreter); ame_data -> eigenvalues = hypre_CTAlloc(HYPRE_Real, ame_data -> block_size); ame_data -> eigenvectors = mv_MultiVectorCreateFromSampleVector(interpreter, ame_data -> block_size, ame_data -> t3); eigenvectors = (mv_MultiVectorPtr) ame_data -> eigenvectors; mv_MultiVectorSetRandom (eigenvectors, seed); /* Make the initial vectors discretely divergence free */ { HYPRE_Int i, j; HYPRE_Real *data; mv_TempMultiVector* tmp = mv_MultiVectorGetData(eigenvectors); HYPRE_ParVector *v = (HYPRE_ParVector*)(tmp -> vector); hypre_ParVector *vi; for (i = 0; i < ame_data -> block_size; i++) { vi = (hypre_ParVector*) v[i]; data = hypre_VectorData(hypre_ParVectorLocalVector(vi)); for (j = 0; j < ne; j++) if (edge_bc[j]) data[j] = 0.0; hypre_AMEDiscrDivFreeComponent(esolver, vi); } } } hypre_TFree(edge_bc); return hypre_error_flag; }
void hypre_ParCSRMatrixSplit(hypre_ParCSRMatrix *A, HYPRE_Int nr, HYPRE_Int nc, hypre_ParCSRMatrix **blocks, int interleaved_rows, int interleaved_cols) { HYPRE_Int i, j, k; MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *Adiag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *Aoffd = hypre_ParCSRMatrixOffd(A); HYPRE_Int global_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int global_cols = hypre_ParCSRMatrixGlobalNumCols(A); HYPRE_Int local_rows = hypre_CSRMatrixNumRows(Adiag); HYPRE_Int local_cols = hypre_CSRMatrixNumCols(Adiag); HYPRE_Int offd_cols = hypre_CSRMatrixNumCols(Aoffd); hypre_assert(local_rows % nr == 0 && local_cols % nc == 0); hypre_assert(global_rows % nr == 0 && global_cols % nc == 0); HYPRE_Int block_rows = local_rows / nr; HYPRE_Int block_cols = local_cols / nc; HYPRE_Int num_blocks = nr * nc; /* mark local rows and columns with block number */ HYPRE_Int *row_block_num = hypre_TAlloc(HYPRE_Int, local_rows); HYPRE_Int *col_block_num = hypre_TAlloc(HYPRE_Int, local_cols); for (i = 0; i < local_rows; i++) { row_block_num[i] = interleaved_rows ? (i % nr) : (i / block_rows); } for (i = 0; i < local_cols; i++) { col_block_num[i] = interleaved_cols ? (i % nc) : (i / block_cols); } /* determine the block numbers for offd columns */ HYPRE_Int* offd_col_block_num = hypre_TAlloc(HYPRE_Int, offd_cols); hypre_ParCSRCommHandle *comm_handle; HYPRE_Int *int_buf_data; { /* make sure A has a communication package */ hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } /* calculate the final global column numbers for each block */ HYPRE_Int *count = hypre_CTAlloc(HYPRE_Int, nc); HYPRE_Int *block_global_col = hypre_TAlloc(HYPRE_Int, local_cols); HYPRE_Int first_col = hypre_ParCSRMatrixFirstColDiag(A) / nc; for (i = 0; i < local_cols; i++) { block_global_col[i] = first_col + count[col_block_num[i]]++; } hypre_TFree(count); /* use a Matvec communication pattern to determine offd_col_block_num */ HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); HYPRE_Int start, index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); int_buf_data[index++] = col_block_num[k] + nc*block_global_col[k]; } } hypre_TFree(block_global_col); comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, offd_col_block_num); } /* create the block matrices */ HYPRE_Int num_procs = 1; if (!hypre_ParCSRMatrixAssumedPartition(A)) { hypre_MPI_Comm_size(comm, &num_procs); } HYPRE_Int *row_starts = hypre_TAlloc(HYPRE_Int, num_procs+1); HYPRE_Int *col_starts = hypre_TAlloc(HYPRE_Int, num_procs+1); for (i = 0; i <= num_procs; i++) { row_starts[i] = hypre_ParCSRMatrixRowStarts(A)[i] / nr; col_starts[i] = hypre_ParCSRMatrixColStarts(A)[i] / nc; } for (i = 0; i < num_blocks; i++) { blocks[i] = hypre_ParCSRMatrixCreate(comm, global_rows/nr, global_cols/nc, row_starts, col_starts, 0, 0, 0); } /* split diag part */ hypre_CSRMatrix **csr_blocks = hypre_TAlloc(hypre_CSRMatrix*, nr*nc); hypre_CSRMatrixSplit(Adiag, nr, nc, row_block_num, col_block_num, csr_blocks); for (i = 0; i < num_blocks; i++) { hypre_TFree(hypre_ParCSRMatrixDiag(blocks[i])); hypre_ParCSRMatrixDiag(blocks[i]) = csr_blocks[i]; } /* finish communication, receive offd_col_block_num */ hypre_ParCSRCommHandleDestroy(comm_handle); hypre_TFree(int_buf_data); /* decode global offd column numbers */ HYPRE_Int* offd_global_col = hypre_TAlloc(HYPRE_Int, offd_cols); for (i = 0; i < offd_cols; i++) { offd_global_col[i] = offd_col_block_num[i] / nc; offd_col_block_num[i] %= nc; } /* split offd part */ hypre_CSRMatrixSplit(Aoffd, nr, nc, row_block_num, offd_col_block_num, csr_blocks); for (i = 0; i < num_blocks; i++) { hypre_TFree(hypre_ParCSRMatrixOffd(blocks[i])); hypre_ParCSRMatrixOffd(blocks[i]) = csr_blocks[i]; } hypre_TFree(csr_blocks); hypre_TFree(col_block_num); hypre_TFree(row_block_num); /* update block col-maps */ for (int bi = 0; bi < nr; bi++) { for (int bj = 0; bj < nc; bj++) { hypre_ParCSRMatrix *block = blocks[bi*nc + bj]; hypre_CSRMatrix *block_offd = hypre_ParCSRMatrixOffd(block); HYPRE_Int block_offd_cols = hypre_CSRMatrixNumCols(block_offd); HYPRE_Int *block_col_map = hypre_TAlloc(HYPRE_Int, block_offd_cols); for (i = j = 0; i < offd_cols; i++) { HYPRE_Int bn = offd_col_block_num[i]; if (bn == bj) { block_col_map[j++] = offd_global_col[i]; } } hypre_assert(j == block_offd_cols); hypre_ParCSRMatrixColMapOffd(block) = block_col_map; } } hypre_TFree(offd_global_col); hypre_TFree(offd_col_block_num); /* finish the new matrices, make them own all the stuff */ for (i = 0; i < num_blocks; i++) { hypre_ParCSRMatrixSetNumNonzeros(blocks[i]); hypre_MatvecCommPkgCreate(blocks[i]); hypre_ParCSRMatrixOwnsData(blocks[i]) = 1; /* only the first block will own the row/col_starts */ hypre_ParCSRMatrixOwnsRowStarts(blocks[i]) = !i; hypre_ParCSRMatrixOwnsColStarts(blocks[i]) = !i; } }
/* Function: hypre_ParCSRMatrixEliminateAAe (input) (output) / A_ii | A_ib \ / A_ii | 0 \ A = | -----+----- | ---> | -----+----- | \ A_bi | A_bb / \ 0 | I / / 0 | A_ib \ Ae = | -----+--------- | \ A_bi | A_bb - I / */ void hypre_ParCSRMatrixEliminateAAe(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **Ae, HYPRE_Int num_rowscols_to_elim, HYPRE_Int *rowscols_to_elim) { HYPRE_Int i, j, k; hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int A_diag_nrows = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int A_offd_ncols = hypre_CSRMatrixNumCols(A_offd); *Ae = hypre_ParCSRMatrixCreate(hypre_ParCSRMatrixComm(A), hypre_ParCSRMatrixGlobalNumRows(A), hypre_ParCSRMatrixGlobalNumCols(A), hypre_ParCSRMatrixRowStarts(A), hypre_ParCSRMatrixColStarts(A), 0, 0, 0); hypre_ParCSRMatrixSetRowStartsOwner(*Ae, 0); hypre_ParCSRMatrixSetColStartsOwner(*Ae, 0); hypre_CSRMatrix *Ae_diag = hypre_ParCSRMatrixDiag(*Ae); hypre_CSRMatrix *Ae_offd = hypre_ParCSRMatrixOffd(*Ae); HYPRE_Int Ae_offd_ncols; HYPRE_Int num_offd_cols_to_elim; HYPRE_Int *offd_cols_to_elim; HYPRE_Int *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *Ae_col_map_offd; HYPRE_Int *col_mark; HYPRE_Int *col_remap; /* figure out which offd cols should be eliminated */ { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int num_sends, *int_buf_data; HYPRE_Int index, start; HYPRE_Int *eliminate_row = hypre_CTAlloc(HYPRE_Int, A_diag_nrows); HYPRE_Int *eliminate_col = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); /* make sure A has a communication package */ comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } /* which of the local rows are to be eliminated */ for (i = 0; i < A_diag_nrows; i++) { eliminate_row[i] = 0; } for (i = 0; i < num_rowscols_to_elim; i++) { eliminate_row[rowscols_to_elim[i]] = 1; } /* use a Matvec communication pattern to find (in eliminate_col) which of the local offd columns are to be eliminated */ num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); int_buf_data[index++] = eliminate_row[k]; } } comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, eliminate_col); /* eliminate diagonal part, overlapping it with communication */ hypre_CSRMatrixElimCreate(A_diag, Ae_diag, num_rowscols_to_elim, rowscols_to_elim, num_rowscols_to_elim, rowscols_to_elim, NULL); hypre_CSRMatrixEliminateRowsCols(A_diag, Ae_diag, num_rowscols_to_elim, rowscols_to_elim, num_rowscols_to_elim, rowscols_to_elim, 1, NULL); hypre_CSRMatrixReorder(Ae_diag); /* finish the communication */ hypre_ParCSRCommHandleDestroy(comm_handle); /* received eliminate_col[], count offd columns to eliminate */ num_offd_cols_to_elim = 0; for (i = 0; i < A_offd_ncols; i++) { if (eliminate_col[i]) { num_offd_cols_to_elim++; } } offd_cols_to_elim = hypre_CTAlloc(HYPRE_Int, num_offd_cols_to_elim); /* get a list of offd column indices and coefs */ num_offd_cols_to_elim = 0; for (i = 0; i < A_offd_ncols; i++) { if (eliminate_col[i]) { offd_cols_to_elim[num_offd_cols_to_elim++] = i; } } hypre_TFree(int_buf_data); hypre_TFree(eliminate_row); hypre_TFree(eliminate_col); } /* eliminate the off-diagonal part */ col_mark = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); col_remap = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); hypre_CSRMatrixElimCreate(A_offd, Ae_offd, num_rowscols_to_elim, rowscols_to_elim, num_offd_cols_to_elim, offd_cols_to_elim, col_mark); for (i = k = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { col_remap[i] = k++; } } hypre_CSRMatrixEliminateRowsCols(A_offd, Ae_offd, num_rowscols_to_elim, rowscols_to_elim, num_offd_cols_to_elim, offd_cols_to_elim, 0, col_remap); /* create col_map_offd for Ae */ Ae_offd_ncols = 0; for (i = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { Ae_offd_ncols++; } } Ae_col_map_offd = hypre_CTAlloc(HYPRE_Int, Ae_offd_ncols); Ae_offd_ncols = 0; for (i = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { Ae_col_map_offd[Ae_offd_ncols++] = A_col_map_offd[i]; } } hypre_ParCSRMatrixColMapOffd(*Ae) = Ae_col_map_offd; hypre_CSRMatrixNumCols(Ae_offd) = Ae_offd_ncols; hypre_TFree(col_remap); hypre_TFree(col_mark); hypre_TFree(offd_cols_to_elim); hypre_ParCSRMatrixSetNumNonzeros(*Ae); hypre_MatvecCommPkgCreate(*Ae); }
/* Function: hypre_ParCSRMatrixEliminateAXB This function eliminates the global rows and columns of a matrix A corresponding to given lists of sorted (!) local row numbers, so that the solution to the system A*X = B is X_b for the given rows. The elimination is done as follows: (input) (output) / A_ii | A_ib \ / A_ii | 0 \ A = | -----+----- | ---> | -----+----- | \ A_bi | A_bb / \ 0 | I / / X_i \ / X_i \ X = | --- | ---> | --- | (no change) \ X_b / \ X_b / / B_i \ / B_i - A_ib * X_b \ B = | --- | ---> | ---------------- | \ B_b / \ X_b / */ void hypre_ParCSRMatrixEliminateAXB(hypre_ParCSRMatrix *A, HYPRE_Int num_rowscols_to_elim, HYPRE_Int *rowscols_to_elim, hypre_ParVector *X, hypre_ParVector *B) { hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int diag_nrows = hypre_CSRMatrixNumRows(diag); HYPRE_Int offd_ncols = hypre_CSRMatrixNumCols(offd); hypre_Vector *Xlocal = hypre_ParVectorLocalVector(X); hypre_Vector *Blocal = hypre_ParVectorLocalVector(B); HYPRE_Real *Bdata = hypre_VectorData(Blocal); HYPRE_Real *Xdata = hypre_VectorData(Xlocal); HYPRE_Int num_offd_cols_to_elim; HYPRE_Int *offd_cols_to_elim; HYPRE_Real *eliminate_coefs; /* figure out which offd cols should be eliminated and with what coef */ hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int num_sends; HYPRE_Int index, start; HYPRE_Int i, j, k, irow; HYPRE_Real *eliminate_row = hypre_CTAlloc(HYPRE_Real, diag_nrows); HYPRE_Real *eliminate_col = hypre_CTAlloc(HYPRE_Real, offd_ncols); HYPRE_Real *buf_data, coef; /* make sure A has a communication package */ comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } /* HACK: rows that shouldn't be eliminated are marked with quiet NaN; those that should are set to the boundary value from X; this is to avoid sending complex type (int+double) or communicating twice. */ for (i = 0; i < diag_nrows; i++) { eliminate_row[i] = std::numeric_limits<HYPRE_Real>::quiet_NaN(); } for (i = 0; i < num_rowscols_to_elim; i++) { irow = rowscols_to_elim[i]; eliminate_row[irow] = Xdata[irow]; } /* use a Matvec communication pattern to find (in eliminate_col) which of the local offd columns are to be eliminated */ num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j); buf_data[index++] = eliminate_row[k]; } } comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg, buf_data, eliminate_col); /* do sequential part of the elimination while stuff is getting sent */ hypre_CSRMatrixEliminateAXB(diag, num_rowscols_to_elim, rowscols_to_elim, Xlocal, Blocal); /* finish the communication */ hypre_ParCSRCommHandleDestroy(comm_handle); /* received eliminate_col[], count offd columns to eliminate */ num_offd_cols_to_elim = 0; for (i = 0; i < offd_ncols; i++) { coef = eliminate_col[i]; if (coef == coef) // test for NaN { num_offd_cols_to_elim++; } } offd_cols_to_elim = hypre_CTAlloc(HYPRE_Int, num_offd_cols_to_elim); eliminate_coefs = hypre_CTAlloc(HYPRE_Real, num_offd_cols_to_elim); /* get a list of offd column indices and coefs */ num_offd_cols_to_elim = 0; for (i = 0; i < offd_ncols; i++) { coef = eliminate_col[i]; if (coef == coef) // test for NaN { offd_cols_to_elim[num_offd_cols_to_elim] = i; eliminate_coefs[num_offd_cols_to_elim] = coef; num_offd_cols_to_elim++; } } hypre_TFree(buf_data); hypre_TFree(eliminate_row); hypre_TFree(eliminate_col); /* eliminate the off-diagonal part */ hypre_CSRMatrixEliminateOffdColsAXB(offd, num_offd_cols_to_elim, offd_cols_to_elim, eliminate_coefs, Blocal); hypre_CSRMatrixEliminateOffdRowsAXB(offd, num_rowscols_to_elim, rowscols_to_elim); /* set boundary values in the rhs */ for (int i = 0; i < num_rowscols_to_elim; i++) { irow = rowscols_to_elim[i]; Bdata[irow] = Xdata[irow]; } hypre_TFree(offd_cols_to_elim); hypre_TFree(eliminate_coefs); }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { hypre_CSRMatrix *matrix; hypre_CSRMatrix *matrix1; hypre_ParCSRMatrix *par_matrix; hypre_Vector *x_local; hypre_Vector *y_local; hypre_Vector *y2_local; hypre_ParVector *x; hypre_ParVector *x2; hypre_ParVector *y; hypre_ParVector *y2; HYPRE_Int vecstride_x, idxstride_x, vecstride_y, idxstride_y; HYPRE_Int num_procs, my_id; HYPRE_Int local_size; HYPRE_Int num_vectors; HYPRE_Int global_num_rows, global_num_cols; HYPRE_Int first_index; HYPRE_Int i, j, ierr=0; double *data, *data2; HYPRE_Int *row_starts, *col_starts; char file_name[80]; /* Initialize MPI */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &my_id); hypre_printf(" my_id: %d num_procs: %d\n", my_id, num_procs); if (my_id == 0) { matrix = hypre_CSRMatrixRead("input"); hypre_printf(" read input\n"); } row_starts = NULL; col_starts = NULL; par_matrix = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, matrix, row_starts, col_starts); hypre_printf(" converted\n"); matrix1 = hypre_ParCSRMatrixToCSRMatrixAll(par_matrix); hypre_sprintf(file_name,"matrix1.%d",my_id); if (matrix1) hypre_CSRMatrixPrint(matrix1, file_name); hypre_ParCSRMatrixPrint(par_matrix,"matrix"); hypre_ParCSRMatrixPrintIJ(par_matrix,0,0,"matrixIJ"); par_matrix = hypre_ParCSRMatrixRead(hypre_MPI_COMM_WORLD,"matrix"); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix); hypre_printf(" global_num_cols %d\n", global_num_cols); global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix); col_starts = hypre_ParCSRMatrixColStarts(par_matrix); first_index = col_starts[my_id]; local_size = col_starts[my_id+1] - first_index; num_vectors = 3; x = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_cols, col_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(x,0); hypre_ParVectorInitialize(x); x_local = hypre_ParVectorLocalVector(x); data = hypre_VectorData(x_local); vecstride_x = hypre_VectorVectorStride(x_local); idxstride_x = hypre_VectorIndexStride(x_local); for ( j=0; j<num_vectors; ++j ) for (i=0; i < local_size; i++) data[i*idxstride_x + j*vecstride_x] = first_index+i+1 + 100*j; x2 = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_cols, col_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(x2,0); hypre_ParVectorInitialize(x2); hypre_ParVectorSetConstantValues(x2,2.0); row_starts = hypre_ParCSRMatrixRowStarts(par_matrix); first_index = row_starts[my_id]; local_size = row_starts[my_id+1] - first_index; y = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_rows, row_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(y,0); hypre_ParVectorInitialize(y); y_local = hypre_ParVectorLocalVector(y); y2 = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_rows, row_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(y2,0); hypre_ParVectorInitialize(y2); y2_local = hypre_ParVectorLocalVector(y2); data2 = hypre_VectorData(y2_local); vecstride_y = hypre_VectorVectorStride(y2_local); idxstride_y = hypre_VectorIndexStride(y2_local); for ( j=0; j<num_vectors; ++j ) for (i=0; i < local_size; i++) data2[i*idxstride_y+j*vecstride_y] = first_index+i+1 + 100*j; hypre_ParVectorSetConstantValues(y,1.0); hypre_printf(" initialized vectors, first_index=%i\n", first_index); hypre_ParVectorPrint(x, "vectorx"); hypre_ParVectorPrint(y, "vectory"); hypre_MatvecCommPkgCreate(par_matrix); hypre_ParCSRMatrixMatvec ( 1.0, par_matrix, x, 1.0, y); hypre_printf(" did matvec\n"); hypre_ParVectorPrint(y, "result"); ierr = hypre_ParCSRMatrixMatvecT ( 1.0, par_matrix, y2, 1.0, x2); hypre_printf(" did matvecT %d\n", ierr); hypre_ParVectorPrint(x2, "transp"); hypre_ParCSRMatrixDestroy(par_matrix); hypre_ParVectorDestroy(x); hypre_ParVectorDestroy(x2); hypre_ParVectorDestroy(y); hypre_ParVectorDestroy(y2); if (my_id == 0) hypre_CSRMatrixDestroy(matrix); if (matrix1) hypre_CSRMatrixDestroy(matrix1); /* Finalize MPI */ hypre_MPI_Finalize(); return 0; }