HYPRE_Int hypre_IJMatrixInitializePETSc(hypre_IJMatrix *matrix) { HYPRE_Int ierr = 0; hypre_ParCSRMatrix *par_matrix = hypre_IJMatrixLocalStorage(matrix); hypre_AuxParCSRMatrix *aux_matrix = hypre_IJMatrixTranslator(matrix); HYPRE_Int local_num_rows = hypre_AuxParCSRMatrixLocalNumRows(aux_matrix); HYPRE_Int local_num_cols = hypre_AuxParCSRMatrixLocalNumCols(aux_matrix); HYPRE_Int *row_space = hypre_AuxParCSRMatrixRowSpace(aux_matrix); HYPRE_Int num_nonzeros = hypre_ParCSRMatrixNumNonzeros(par_matrix); HYPRE_Int local_nnz; HYPRE_Int num_procs, my_id; MPI_Comm comm = hypre_IJMatrixContext(matrix); HYPRE_Int global_num_rows = hypre_IJMatrixM(matrix); hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); local_nnz = (num_nonzeros/global_num_rows+1)*local_num_rows; if (local_num_rows < 0) hypre_AuxParCSRMatrixLocalNumRows(aux_matrix) = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(par_matrix)); if (local_num_cols < 0) hypre_AuxParCSRMatrixLocalNumCols(aux_matrix) = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(par_matrix)); ierr = hypre_AuxParCSRMatrixInitialize(aux_matrix); ierr += hypre_ParCSRMatrixInitialize(par_matrix); return ierr; }
hypre_ParMultiVector * hypre_ParMultiVectorCreate(MPI_Comm comm, HYPRE_Int global_size, HYPRE_Int *partitioning, HYPRE_Int num_vectors) { hypre_ParMultiVector *vector; HYPRE_Int num_procs, my_id; vector = hypre_CTAlloc(hypre_ParMultiVector, 1); hypre_MPI_Comm_rank(comm, &my_id); if (! partitioning) { hypre_MPI_Comm_size(comm, &num_procs); hypre_GeneratePartitioning(global_size, num_procs, &partitioning); } hypre_ParMultiVectorComm(vector) = comm; hypre_ParMultiVectorGlobalSize(vector) = global_size; hypre_ParMultiVectorPartitioning(vector) = partitioning; hypre_ParMultiVectorNumVectors(vector) = num_vectors; hypre_ParMultiVectorLocalVector(vector) = hypre_SeqMultivectorCreate((partitioning[my_id+1]-partitioning[my_id]), num_vectors); hypre_ParMultiVectorFirstIndex(vector) = partitioning[my_id]; /* we set these 2 defaults exactly as in par_vector.c, although it's questionable */ hypre_ParMultiVectorOwnsData(vector) = 1; hypre_ParMultiVectorOwnsPartitioning(vector) = 1; return vector; }
HYPRE_Int hypre_ParKrylovCommInfo( void *A, HYPRE_Int *my_id, HYPRE_Int *num_procs) { MPI_Comm comm = hypre_ParCSRMatrixComm ( (hypre_ParCSRMatrix *) A); hypre_MPI_Comm_size(comm,num_procs); hypre_MPI_Comm_rank(comm,my_id); return 0; }
HYPRE_Int hypre_StructKrylovCommInfo( void *A, HYPRE_Int *my_id, HYPRE_Int *num_procs ) { MPI_Comm comm = hypre_StructMatrixComm((hypre_StructMatrix *) A); hypre_MPI_Comm_size(comm,num_procs); hypre_MPI_Comm_rank(comm,my_id); return hypre_error_flag; }
hypre_ParVector *hypre_ParVectorRead( MPI_Comm comm, const char *file_name ) { char new_file_name[80]; hypre_ParVector *par_vector; HYPRE_Int my_id, num_procs; HYPRE_Int *partitioning; HYPRE_Int global_size, i; FILE *fp; hypre_MPI_Comm_rank(comm,&my_id); hypre_MPI_Comm_size(comm,&num_procs); partitioning = hypre_CTAlloc(HYPRE_Int,num_procs+1); hypre_sprintf(new_file_name,"%s.INFO.%d",file_name,my_id); fp = fopen(new_file_name, "r"); hypre_fscanf(fp, "%d\n", &global_size); #ifdef HYPRE_NO_GLOBAL_PARTITION for (i=0; i < 2; i++) hypre_fscanf(fp, "%d\n", &partitioning[i]); fclose (fp); #else for (i=0; i < num_procs; i++) hypre_fscanf(fp, "%d\n", &partitioning[i]); fclose (fp); partitioning[num_procs] = global_size; #endif par_vector = hypre_CTAlloc(hypre_ParVector, 1); hypre_ParVectorComm(par_vector) = comm; hypre_ParVectorGlobalSize(par_vector) = global_size; #ifdef HYPRE_NO_GLOBAL_PARTITION hypre_ParVectorFirstIndex(par_vector) = partitioning[0]; hypre_ParVectorLastIndex(par_vector) = partitioning[1]-1; #else hypre_ParVectorFirstIndex(par_vector) = partitioning[my_id]; hypre_ParVectorLastIndex(par_vector) = partitioning[my_id+1]-1; #endif hypre_ParVectorPartitioning(par_vector) = partitioning; hypre_ParVectorOwnsData(par_vector) = 1; hypre_ParVectorOwnsPartitioning(par_vector) = 1; hypre_sprintf(new_file_name,"%s.%d",file_name,my_id); hypre_ParVectorLocalVector(par_vector) = hypre_SeqVectorRead(new_file_name); /* multivector code not written yet >>> */ hypre_assert( hypre_ParVectorNumVectors(par_vector) == 1 ); return par_vector; }
hypre_ParVector * hypre_ParVectorCreate( MPI_Comm comm, HYPRE_Int global_size, HYPRE_Int *partitioning) { hypre_ParVector *vector; HYPRE_Int num_procs, my_id; if (global_size < 0) { hypre_error_in_arg(2); return NULL; } vector = hypre_CTAlloc(hypre_ParVector, 1); hypre_MPI_Comm_rank(comm,&my_id); if (!partitioning) { hypre_MPI_Comm_size(comm,&num_procs); #ifdef HYPRE_NO_GLOBAL_PARTITION hypre_GenerateLocalPartitioning(global_size, num_procs, my_id, &partitioning); #else hypre_GeneratePartitioning(global_size, num_procs, &partitioning); #endif } hypre_ParVectorAssumedPartition(vector) = NULL; hypre_ParVectorComm(vector) = comm; hypre_ParVectorGlobalSize(vector) = global_size; #ifdef HYPRE_NO_GLOBAL_PARTITION hypre_ParVectorFirstIndex(vector) = partitioning[0]; hypre_ParVectorLastIndex(vector) = partitioning[1]-1; hypre_ParVectorPartitioning(vector) = partitioning; hypre_ParVectorLocalVector(vector) = hypre_SeqVectorCreate(partitioning[1]-partitioning[0]); #else hypre_ParVectorFirstIndex(vector) = partitioning[my_id]; hypre_ParVectorLastIndex(vector) = partitioning[my_id+1] -1; hypre_ParVectorPartitioning(vector) = partitioning; hypre_ParVectorLocalVector(vector) = hypre_SeqVectorCreate(partitioning[my_id+1]-partitioning[my_id]); #endif /* set defaults */ hypre_ParVectorOwnsData(vector) = 1; hypre_ParVectorOwnsPartitioning(vector) = 1; return vector; }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { hypre_ParCSRBooleanMatrix *A; hypre_ParCSRBooleanMatrix *C; hypre_CSRBooleanMatrix *As; HYPRE_Int *row_starts, *col_starts; HYPRE_Int num_procs, my_id; /* Initialize MPI */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD,&num_procs); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD,&my_id); row_starts = NULL; col_starts = NULL; if (my_id == 0) { As = hypre_CSRBooleanMatrixRead("inpr"); hypre_printf(" read input A\n"); } A = hypre_CSRBooleanMatrixToParCSRBooleanMatrix(hypre_MPI_COMM_WORLD, As, row_starts, col_starts); row_starts = hypre_ParCSRBooleanMatrix_Get_RowStarts(A); col_starts = hypre_ParCSRBooleanMatrix_Get_ColStarts(A); hypre_ParCSRBooleanMatrixPrint(A, "echo_A" ); hypre_ParCSRBooleanMatrixPrintIJ(A, "echo_AIJ" ); C = hypre_ParBooleanAAt( A ); hypre_ParCSRBooleanMatrixPrint(C, "result"); hypre_ParCSRBooleanMatrixPrintIJ(C, "resultIJ"); if (my_id == 0) { hypre_CSRBooleanMatrixDestroy(As); } hypre_ParCSRBooleanMatrixDestroy(A); hypre_ParCSRBooleanMatrixDestroy(C); hypre_MPI_Finalize(); return 0; }
HYPRE_Int hypre_ParCSRBooleanMatrixPrint( hypre_ParCSRBooleanMatrix *matrix, const char *file_name ) { MPI_Comm comm = hypre_ParCSRBooleanMatrix_Get_Comm(matrix); HYPRE_Int global_num_rows = hypre_ParCSRBooleanMatrix_Get_GlobalNRows(matrix); HYPRE_Int global_num_cols = hypre_ParCSRBooleanMatrix_Get_GlobalNCols(matrix); HYPRE_Int *col_map_offd = hypre_ParCSRBooleanMatrix_Get_ColMapOffd(matrix); HYPRE_Int *row_starts = hypre_ParCSRBooleanMatrix_Get_RowStarts(matrix); HYPRE_Int *col_starts = hypre_ParCSRBooleanMatrix_Get_ColStarts(matrix); HYPRE_Int my_id, i, num_procs; char new_file_d[80], new_file_o[80], new_file_info[80]; HYPRE_Int ierr = 0; FILE *fp; HYPRE_Int num_cols_offd = 0; if (hypre_ParCSRBooleanMatrix_Get_Offd(matrix)) num_cols_offd = hypre_CSRBooleanMatrix_Get_NCols(hypre_ParCSRBooleanMatrix_Get_Offd(matrix)); hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); hypre_sprintf(new_file_d,"%s.D.%d",file_name,my_id); hypre_sprintf(new_file_o,"%s.O.%d",file_name,my_id); hypre_sprintf(new_file_info,"%s.INFO.%d",file_name,my_id); hypre_CSRBooleanMatrixPrint(hypre_ParCSRBooleanMatrix_Get_Diag(matrix),new_file_d); if (num_cols_offd != 0) hypre_CSRBooleanMatrixPrint(hypre_ParCSRBooleanMatrix_Get_Offd(matrix), new_file_o); fp = fopen(new_file_info, "w"); hypre_fprintf(fp, "%d\n", global_num_rows); hypre_fprintf(fp, "%d\n", global_num_cols); hypre_fprintf(fp, "%d\n", num_cols_offd); for (i=0; i < num_procs; i++) hypre_fprintf(fp, "%d %d\n", row_starts[i], col_starts[i]); for (i=0; i < num_cols_offd; i++) hypre_fprintf(fp, "%d\n", col_map_offd[i]); fclose(fp); return ierr; }
HYPRE_Int hypre_ParVectorPrint( hypre_ParVector *vector, const char *file_name ) { char new_file_name[80]; hypre_Vector *local_vector; MPI_Comm comm; HYPRE_Int my_id, num_procs, i; HYPRE_Int *partitioning; HYPRE_Int global_size; FILE *fp; if (!vector) { hypre_error_in_arg(1); return hypre_error_flag; } local_vector = hypre_ParVectorLocalVector(vector); comm = hypre_ParVectorComm(vector); partitioning = hypre_ParVectorPartitioning(vector); global_size = hypre_ParVectorGlobalSize(vector); hypre_MPI_Comm_rank(comm,&my_id); hypre_MPI_Comm_size(comm,&num_procs); hypre_sprintf(new_file_name,"%s.%d",file_name,my_id); hypre_SeqVectorPrint(local_vector,new_file_name); hypre_sprintf(new_file_name,"%s.INFO.%d",file_name,my_id); fp = fopen(new_file_name, "w"); hypre_fprintf(fp, "%d\n", global_size); #ifdef HYPRE_NO_GLOBAL_PARTITION for (i=0; i < 2; i++) hypre_fprintf(fp, "%d\n", partitioning[i]); #else for (i=0; i < num_procs; i++) hypre_fprintf(fp, "%d\n", partitioning[i]); #endif fclose (fp); return hypre_error_flag; }
/****************************************************************************** * * hypre_IJVectorCreatePar * * creates ParVector if necessary, and leaves a pointer to it as the * hypre_IJVector object * *****************************************************************************/ HYPRE_Int hypre_IJVectorCreatePar(hypre_IJVector *vector, HYPRE_Int *IJpartitioning) { MPI_Comm comm = hypre_IJVectorComm(vector); HYPRE_Int num_procs, jmin, global_n, *partitioning, j; hypre_MPI_Comm_size(comm, &num_procs); #ifdef HYPRE_NO_GLOBAL_PARTITION jmin = hypre_IJVectorGlobalFirstRow(vector); global_n = hypre_IJVectorGlobalNumRows(vector); partitioning = hypre_CTAlloc(HYPRE_Int, 2); /* Shift to zero-based partitioning for ParVector object */ for (j = 0; j < 2; j++) partitioning[j] = IJpartitioning[j] - jmin; #else jmin = IJpartitioning[0]; global_n = IJpartitioning[num_procs] - jmin; partitioning = hypre_CTAlloc(HYPRE_Int, num_procs+1); /* Shift to zero-based partitioning for ParVector object */ for (j = 0; j < num_procs+1; j++) partitioning[j] = IJpartitioning[j] - jmin; #endif hypre_IJVectorObject(vector) = hypre_ParVectorCreate(comm, global_n, (HYPRE_Int *) partitioning); return hypre_error_flag; }
HYPRE_Int hypre_BoomerAMGCreateNodalA(hypre_ParCSRMatrix *A, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int option, HYPRE_Int diag_option, hypre_ParCSRMatrix **AN_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); double *A_diag_data = hypre_CSRMatrixData(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int num_variables = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_nonzeros_offd = 0; HYPRE_Int num_cols_offd = 0; hypre_ParCSRMatrix *AN; hypre_CSRMatrix *AN_diag; HYPRE_Int *AN_diag_i; HYPRE_Int *AN_diag_j; double *AN_diag_data; hypre_CSRMatrix *AN_offd; HYPRE_Int *AN_offd_i; HYPRE_Int *AN_offd_j; double *AN_offd_data; HYPRE_Int *col_map_offd_AN; HYPRE_Int *new_col_map_offd; HYPRE_Int *row_starts_AN; HYPRE_Int AN_num_nonzeros_diag = 0; HYPRE_Int AN_num_nonzeros_offd = 0; HYPRE_Int num_cols_offd_AN; HYPRE_Int new_num_cols_offd; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *new_send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_AN; HYPRE_Int *send_procs_AN; HYPRE_Int *send_map_starts_AN; HYPRE_Int *send_map_elmts_AN; HYPRE_Int *recv_procs_AN; HYPRE_Int *recv_vec_starts_AN; HYPRE_Int i, j, k, k_map; HYPRE_Int ierr = 0; HYPRE_Int index, row; HYPRE_Int start_index; HYPRE_Int num_procs; HYPRE_Int node, cnt; HYPRE_Int mode; HYPRE_Int new_send_elmts_size; HYPRE_Int global_num_nodes; HYPRE_Int num_nodes; HYPRE_Int num_fun2; HYPRE_Int *map_to_node; HYPRE_Int *map_to_map; HYPRE_Int *counter; double sum; double *data; hypre_MPI_Comm_size(comm,&num_procs); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } mode = fabs(option); comm_pkg_AN = NULL; col_map_offd_AN = NULL; #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_AN = hypre_CTAlloc(HYPRE_Int, 2); for (i=0; i < 2; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(A)/num_functions; #else row_starts_AN = hypre_CTAlloc(HYPRE_Int, num_procs+1); for (i=0; i < num_procs+1; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = row_starts_AN[num_procs]; #endif num_nodes = num_variables/num_functions; num_fun2 = num_functions*num_functions; map_to_node = hypre_CTAlloc(HYPRE_Int, num_variables); AN_diag_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); counter = hypre_CTAlloc(HYPRE_Int, num_nodes); for (i=0; i < num_variables; i++) map_to_node[i] = i/num_functions; for (i=0; i < num_nodes; i++) counter[i] = -1; AN_num_nonzeros_diag = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_diag_i[i] = AN_num_nonzeros_diag; for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_diag++; } } row++; } } AN_diag_i[num_nodes] = AN_num_nonzeros_diag; AN_diag_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_diag); AN_diag_data = hypre_CTAlloc(double, AN_num_nonzeros_diag); AN_diag = hypre_CSRMatrixCreate(num_nodes,num_nodes,AN_num_nonzeros_diag); hypre_CSRMatrixI(AN_diag) = AN_diag_i; hypre_CSRMatrixJ(AN_diag) = AN_diag_j; hypre_CSRMatrixData(AN_diag) = AN_diag_data; for (i=0; i < num_nodes; i++) counter[i] = -1; index = 0; start_index = 0; row = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]*A_diag_data[k]; index++; } else { AN_diag_data[counter[k_map]] += A_diag_data[k]*A_diag_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] = sqrt(AN_diag_data[i]); } break; case 2: /* sum of abs. value of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = fabs(A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] /= num_fun2; } break; case 3: /* largest element of each block (sets true value - not abs. value) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]; index++; } else { if (fabs(A_diag_data[k]) > fabs(AN_diag_data[counter[k_map]])) AN_diag_data[counter[k_map]] = A_diag_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_diag*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; data[index*num_functions + j] = fabs(A_diag_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) { AN_diag_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_diag_data[i] = hypre_max( AN_diag_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = (A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += (A_diag_data[k]); } } row++; } start_index = index; } } break; } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (DO MORE BELOW) */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; sum = 0.0; for (k = AN_diag_i[i]+1; k < AN_diag_i[i+1]; k++) { sum += AN_diag_data[k]; } AN_diag_data[index] = -sum; } } else if (diag_option == 2) { /* make all diagonal entries negative */ /* the diagonal is the first element listed in each row - */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; AN_diag_data[index] = - AN_diag_data[index]; } } num_nonzeros_offd = A_offd_i[num_variables]; AN_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); num_cols_offd_AN = 0; if (comm_pkg) { comm_pkg_AN = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_AN) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_AN) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_AN) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); send_procs_AN = NULL; send_map_elmts_AN = NULL; if (num_sends) { send_procs_AN = hypre_CTAlloc(HYPRE_Int,num_sends); send_map_elmts_AN = hypre_CTAlloc(HYPRE_Int,send_map_starts[num_sends]); } send_map_starts_AN = hypre_CTAlloc(HYPRE_Int,num_sends+1); recv_vec_starts_AN = hypre_CTAlloc(HYPRE_Int,num_recvs+1); recv_procs_AN = NULL; if (num_recvs) recv_procs_AN = hypre_CTAlloc(HYPRE_Int,num_recvs); for (i=0; i < num_sends; i++) send_procs_AN[i] = send_procs[i]; for (i=0; i < num_recvs; i++) recv_procs_AN[i] = recv_procs[i]; send_map_starts_AN[0] = 0; cnt = 0; for (i=0; i < num_sends; i++) { k_map = send_map_starts[i]; if (send_map_starts[i+1]-k_map) send_map_elmts_AN[cnt++] = send_map_elmts[k_map]/num_functions; for (j=send_map_starts[i]+1; j < send_map_starts[i+1]; j++) { node = send_map_elmts[j]/num_functions; if (node > send_map_elmts_AN[cnt-1]) send_map_elmts_AN[cnt++] = node; } send_map_starts_AN[i+1] = cnt; } hypre_ParCSRCommPkgSendProcs(comm_pkg_AN) = send_procs_AN; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_AN) = send_map_starts_AN; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_AN) = send_map_elmts_AN; hypre_ParCSRCommPkgRecvProcs(comm_pkg_AN) = recv_procs_AN; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_AN) = recv_vec_starts_AN; } num_cols_offd = hypre_CSRMatrixNumCols(A_offd); if (num_cols_offd) { if (num_cols_offd > num_variables) { hypre_TFree(map_to_node); map_to_node = hypre_CTAlloc(HYPRE_Int,num_cols_offd); } num_cols_offd_AN = 1; map_to_node[0] = col_map_offd[0]/num_functions; for (i=1; i < num_cols_offd; i++) { map_to_node[i] = col_map_offd[i]/num_functions; if (map_to_node[i] > map_to_node[i-1]) num_cols_offd_AN++; } if (num_cols_offd_AN > num_nodes) { hypre_TFree(counter); counter = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); } map_to_map = NULL; col_map_offd_AN = NULL; map_to_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd); col_map_offd_AN = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); col_map_offd_AN[0] = map_to_node[0]; recv_vec_starts_AN[0] = 0; cnt = 1; for (i=0; i < num_recvs; i++) { for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++) { node = map_to_node[j]; if (node > col_map_offd_AN[cnt-1]) { col_map_offd_AN[cnt++] = node; } map_to_map[j] = cnt-1; } recv_vec_starts_AN[i+1] = cnt; } for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; AN_num_nonzeros_offd = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_offd_i[i] = AN_num_nonzeros_offd; for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_offd++; } } row++; } } AN_offd_i[num_nodes] = AN_num_nonzeros_offd; } AN_offd = hypre_CSRMatrixCreate(num_nodes,num_cols_offd_AN, AN_num_nonzeros_offd); hypre_CSRMatrixI(AN_offd) = AN_offd_i; if (AN_num_nonzeros_offd) { AN_offd_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_offd); AN_offd_data = hypre_CTAlloc(double, AN_num_nonzeros_offd); hypre_CSRMatrixJ(AN_offd) = AN_offd_j; hypre_CSRMatrixData(AN_offd) = AN_offd_data; for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; index = 0; row = 0; AN_offd_i[0] = 0; start_index = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]*A_offd_data[k]; index++; } else { AN_offd_data[counter[k_map]] += A_offd_data[k]*A_offd_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] = sqrt(AN_offd_data[i]); } break; case 2: /* sum of abs. value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = fabs(A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] /= num_fun2; } break; case 3: /* largest element in each block (not abs. value ) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]; index++; } else { if (fabs(A_offd_data[k]) > fabs(AN_offd_data[counter[k_map]])) AN_offd_data[counter[k_map]] = A_offd_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_offd*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; data[index*num_functions + j] = fabs(A_offd_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) { AN_offd_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_offd_data[i] = hypre_max( AN_offd_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = (A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += (A_offd_data[k]); } } row++; } start_index = index; } } break; } hypre_TFree(map_to_map); } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (here we are adding the off_diag contribution)*/ /* the diagonal is the first element listed in each row of AN_diag_data - */ for (i=0; i < num_nodes; i++) { sum = 0.0; for (k = AN_offd_i[i]; k < AN_offd_i[i+1]; k++) { sum += AN_offd_data[k]; } index = AN_diag_i[i];/* location of diag entry in data */ AN_diag_data[index] -= sum; /* subtract from current value */ } } AN = hypre_ParCSRMatrixCreate(comm, global_num_nodes, global_num_nodes, row_starts_AN, row_starts_AN, num_cols_offd_AN, AN_num_nonzeros_diag, AN_num_nonzeros_offd); /* we already created the diag and offd matrices - so we don't need the ones created above */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(AN)); hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(AN)); hypre_ParCSRMatrixDiag(AN) = AN_diag; hypre_ParCSRMatrixOffd(AN) = AN_offd; hypre_ParCSRMatrixColMapOffd(AN) = col_map_offd_AN; hypre_ParCSRMatrixCommPkg(AN) = comm_pkg_AN; new_num_cols_offd = num_functions*num_cols_offd_AN; if (new_num_cols_offd > num_cols_offd) { new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols_offd); cnt = 0; for (i=0; i < num_cols_offd_AN; i++) { for (j=0; j < num_functions; j++) { new_col_map_offd[cnt++] = num_functions*col_map_offd_AN[i]+j; } } cnt = 0; for (i=0; i < num_cols_offd; i++) { while (col_map_offd[i] > new_col_map_offd[cnt]) cnt++; col_map_offd[i] = cnt++; } for (i=0; i < num_recvs+1; i++) { recv_vec_starts[i] = num_functions*recv_vec_starts_AN[i]; } for (i=0; i < num_nonzeros_offd; i++) { j = A_offd_j[i]; A_offd_j[i] = col_map_offd[j]; } hypre_ParCSRMatrixColMapOffd(A) = new_col_map_offd; hypre_CSRMatrixNumCols(A_offd) = new_num_cols_offd; hypre_TFree(col_map_offd); } hypre_TFree(map_to_node); new_send_elmts_size = send_map_starts_AN[num_sends]*num_functions; if (new_send_elmts_size > send_map_starts[num_sends]) { new_send_map_elmts = hypre_CTAlloc(HYPRE_Int,new_send_elmts_size); cnt = 0; send_map_starts[0] = 0; for (i=0; i < num_sends; i++) { send_map_starts[i+1] = send_map_starts_AN[i+1]*num_functions; for (j=send_map_starts_AN[i]; j < send_map_starts_AN[i+1]; j++) { for (k=0; k < num_functions; k++) new_send_map_elmts[cnt++] = send_map_elmts_AN[j]*num_functions+k; } } hypre_TFree(send_map_elmts); hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = new_send_map_elmts; } *AN_ptr = AN; hypre_TFree(counter); return (ierr); }
HYPRE_ParCSRMatrix GenerateRotate7pt( MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int p, HYPRE_Int q, HYPRE_Real alpha, HYPRE_Real eps ) { hypre_ParCSRMatrix *A; hypre_CSRMatrix *diag; hypre_CSRMatrix *offd; HYPRE_Int *diag_i; HYPRE_Int *diag_j; HYPRE_Real *diag_data; HYPRE_Int *offd_i; HYPRE_Int *offd_j; HYPRE_Real *offd_data; HYPRE_Real *value; HYPRE_Real ac, bc, cc, s, c, pi, x; HYPRE_Int *global_part; HYPRE_Int ix, iy; HYPRE_Int cnt, o_cnt; HYPRE_Int local_num_rows; HYPRE_Int *col_map_offd; HYPRE_Int *work; HYPRE_Int row_index; HYPRE_Int i,j; HYPRE_Int nx_local, ny_local; HYPRE_Int nx_size, ny_size; HYPRE_Int num_cols_offd; HYPRE_Int grid_size; HYPRE_Int *nx_part; HYPRE_Int *ny_part; HYPRE_Int num_procs, my_id; HYPRE_Int P_busy, Q_busy; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); grid_size = nx*ny; value = hypre_CTAlloc(HYPRE_Real,4); pi = 4.0*atan(1.0); x = pi*alpha/180.0; s = sin(x); c = cos(x); ac = -(c*c + eps*s*s); bc = 2.0*(1.0 - eps)*s*c; cc = -(s*s + eps*c*c); value[0] = -2*(2*ac+bc+2*cc); value[1] = 2*ac+bc; value[2] = bc+2*cc; value[3] = -bc; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1); global_part[0] = 0; cnt = 1; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size; } } nx_local = nx_part[p+1] - nx_part[p]; ny_local = ny_part[q+1] - ny_part[q]; my_id = q*P + p; num_procs = P*Q; local_num_rows = nx_local*ny_local; diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); P_busy = hypre_min(nx,P); Q_busy = hypre_min(ny,Q); num_cols_offd = 0; if (p) num_cols_offd += ny_local; if (p < P_busy-1) num_cols_offd += ny_local; if (q) num_cols_offd += nx_local; if (q < Q_busy-1) num_cols_offd += nx_local; if (p && q) num_cols_offd++; if (p && q < Q_busy-1 ) num_cols_offd++; if (p < P_busy-1 && q ) num_cols_offd++; if (p < P_busy-1 && q < Q_busy-1 ) num_cols_offd++; if (!local_num_rows) num_cols_offd = 0; col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); cnt = 0; o_cnt = 0; diag_i[0] = 0; offd_i[0] = 0; for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { cnt++; o_cnt++; diag_i[cnt] = diag_i[cnt-1]; offd_i[o_cnt] = offd_i[o_cnt-1]; diag_i[cnt]++; if (iy > ny_part[q]) { diag_i[cnt]++; if (ix > nx_part[p]) { diag_i[cnt]++; } else { if (ix) offd_i[o_cnt]++; } } else { if (iy) { offd_i[o_cnt]++; if (ix > nx_part[p]) { offd_i[o_cnt]++; } else if (ix) { offd_i[o_cnt]++; } } } if (ix > nx_part[p]) diag_i[cnt]++; else { if (ix) { offd_i[o_cnt]++; } } if (ix+1 < nx_part[p+1]) diag_i[cnt]++; else { if (ix+1 < nx) { offd_i[o_cnt]++; } } if (iy+1 < ny_part[q+1]) { diag_i[cnt]++; if (ix < nx_part[p+1]-1) { diag_i[cnt]++; } else { if (ix+1 < nx) offd_i[o_cnt]++; } } else { if (iy+1 < ny) { offd_i[o_cnt]++; if (ix < nx_part[p+1]-1) { offd_i[o_cnt]++; } else if (ix < nx-1) { offd_i[o_cnt]++; } } } } } diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]); diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]); if (num_procs > 1) { offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]); offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]); } row_index = 0; cnt = 0; o_cnt = 0; for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { diag_j[cnt] = row_index; diag_data[cnt++] = value[0]; if (iy > ny_part[q]) { if (ix > nx_part[p]) { diag_j[cnt] = row_index-nx_local-1 ; diag_data[cnt++] = value[3]; } else { if (ix) { offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } } diag_j[cnt] = row_index-nx_local; diag_data[cnt++] = value[2]; } else { if (iy) { if (ix > nx_part[p]) { offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p,q-1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } else if (ix) { offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q-1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } offd_j[o_cnt] = hypre_map2(ix,iy-1,p,q-1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[2]; } } if (ix > nx_part[p]) { diag_j[cnt] = row_index-1; diag_data[cnt++] = value[1]; } else { if (ix) { offd_j[o_cnt] = hypre_map2(ix-1,iy,p-1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[1]; } } if (ix+1 < nx_part[p+1]) { diag_j[cnt] = row_index+1; diag_data[cnt++] = value[1]; } else { if (ix+1 < nx) { offd_j[o_cnt] = hypre_map2(ix+1,iy,p+1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[1]; } } if (iy+1 < ny_part[q+1]) { diag_j[cnt] = row_index+nx_local; diag_data[cnt++] = value[2]; if (ix < nx_part[p+1]-1) { diag_j[cnt] = row_index+nx_local+1 ; diag_data[cnt++] = value[3]; } else { if (ix+1 < nx) { offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } } } else { if (iy+1 < ny) { offd_j[o_cnt] = hypre_map2(ix,iy+1,p,q+1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[2]; if (ix < nx_part[p+1]-1) { offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p,q+1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } else if (ix < nx-1) { offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q+1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } } } row_index++; } } if (num_procs > 1) { work = hypre_CTAlloc(HYPRE_Int,o_cnt); for (i=0; i < o_cnt; i++) work[i] = offd_j[i]; qsort0(work, 0, o_cnt-1); col_map_offd[0] = work[0]; cnt = 0; for (i=0; i < o_cnt; i++) { if (work[i] > col_map_offd[cnt]) { cnt++; col_map_offd[cnt] = work[i]; } } for (i=0; i < o_cnt; i++) { for (j=0; j < num_cols_offd; j++) { if (offd_j[i] == col_map_offd[j]) { offd_j[i] = j; break; } } } hypre_TFree(work); } A = hypre_ParCSRMatrixCreate(comm, grid_size, grid_size, global_part, global_part, num_cols_offd, diag_i[local_num_rows], offd_i[local_num_rows]); hypre_ParCSRMatrixColMapOffd(A) = col_map_offd; diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrixI(diag) = diag_i; hypre_CSRMatrixJ(diag) = diag_j; hypre_CSRMatrixData(diag) = diag_data; offd = hypre_ParCSRMatrixOffd(A); hypre_CSRMatrixI(offd) = offd_i; if (num_cols_offd) { hypre_CSRMatrixJ(offd) = offd_j; hypre_CSRMatrixData(offd) = offd_data; } hypre_TFree(nx_part); hypre_TFree(ny_part); hypre_TFree(value); return (HYPRE_ParCSRMatrix) A; }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { hypre_ParCSRMatrix *par_matrix, *g_matrix, **submatrices; hypre_CSRMatrix *A_diag, *A_offd; hypre_CSRBlockMatrix *diag; hypre_CSRBlockMatrix *offd; hypre_ParCSRBlockMatrix *par_blk_matrix, *par_blk_matrixT, *rap_matrix; hypre_Vector *x_local; hypre_Vector *y_local; hypre_ParVector *x; hypre_ParVector *y; HYPRE_Solver gmres_solver, precon; HYPRE_Int *diag_i, *diag_j, *offd_i, *offd_j; HYPRE_Int *diag_i2, *diag_j2, *offd_i2, *offd_j2; double *diag_d, *diag_d2, *offd_d, *offd_d2; HYPRE_Int mypid, local_size, nprocs; HYPRE_Int global_num_rows, global_num_cols, num_cols_offd; HYPRE_Int num_nonzeros_diag, num_nonzeros_offd, *colMap; HYPRE_Int ii, jj, kk, row, col, nnz, *indices, *colMap2; double *data, ddata, *y_data; HYPRE_Int *row_starts, *col_starts, *rstarts, *cstarts; HYPRE_Int *row_starts2, *col_starts2; HYPRE_Int block_size=2, bnnz=4, *index_set; FILE *fp; /* --------------------------------------------- */ /* Initialize MPI */ /* --------------------------------------------- */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &mypid); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &nprocs); /* build and fetch matrix */ MyBuildParLaplacian9pt((HYPRE_ParCSRMatrix *) &par_matrix); global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix); row_starts = hypre_ParCSRMatrixRowStarts(par_matrix); col_starts = hypre_ParCSRMatrixColStarts(par_matrix); A_diag = hypre_ParCSRMatrixDiag(par_matrix); A_offd = hypre_ParCSRMatrixOffd(par_matrix); num_cols_offd = hypre_CSRMatrixNumCols(A_offd); num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(A_diag); num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(A_offd); /* --------------------------------------------- */ /* build vector and apply matvec */ /* --------------------------------------------- */ x = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_cols,col_starts); hypre_ParVectorSetPartitioningOwner(x,0); hypre_ParVectorInitialize(x); x_local = hypre_ParVectorLocalVector(x); data = hypre_VectorData(x_local); local_size = col_starts[mypid+1] - col_starts[mypid]; for (ii = 0; ii < local_size; ii++) data[ii] = 1.0; y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_rows,row_starts); hypre_ParVectorSetPartitioningOwner(y,0); hypre_ParVectorInitialize(y); hypre_ParCSRMatrixMatvec (1.0, par_matrix, x, 0.0, y); ddata = hypre_ParVectorInnerProd(y, y); if (mypid == 0) hypre_printf("y inner product = %e\n", ddata); hypre_ParVectorDestroy(x); hypre_ParVectorDestroy(y); /* --------------------------------------------- */ /* build block matrix */ /* --------------------------------------------- */ rstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) rstarts[ii] = row_starts[ii]; cstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) cstarts[ii] = col_starts[ii]; par_blk_matrix = hypre_ParCSRBlockMatrixCreate(hypre_MPI_COMM_WORLD,block_size, global_num_rows, global_num_cols, rstarts, cstarts, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); colMap = hypre_ParCSRMatrixColMapOffd(par_matrix); if (num_cols_offd > 0) colMap2 = hypre_CTAlloc(HYPRE_Int, num_cols_offd); else colMap2 = NULL; for (ii = 0; ii < num_cols_offd; ii++) colMap2[ii] = colMap[ii]; hypre_ParCSRBlockMatrixColMapOffd(par_blk_matrix) = colMap2; diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(par_matrix)); diag_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixDiag(par_matrix)); diag_d = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(par_matrix)); diag = hypre_ParCSRBlockMatrixDiag(par_blk_matrix); diag_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); diag_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_diag); diag_d2 = hypre_CTAlloc(double, num_nonzeros_diag*bnnz); for (ii = 0; ii <= local_size; ii++) diag_i2[ii] = diag_i[ii]; for (ii = 0; ii < num_nonzeros_diag; ii++) diag_j2[ii] = diag_j[ii]; hypre_CSRBlockMatrixI(diag) = diag_i2; hypre_CSRBlockMatrixJ(diag) = diag_j2; for (ii = 0; ii < num_nonzeros_diag; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) diag_d2[ii*bnnz+jj*block_size+kk] = diag_d[ii]; else diag_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(diag) = diag_d2; offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(par_matrix)); offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(par_matrix)); offd_d = hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(par_matrix)); offd = hypre_ParCSRBlockMatrixOffd(par_blk_matrix); offd_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); for (ii = 0; ii <= local_size; ii++) offd_i2[ii] = offd_i[ii]; hypre_CSRBlockMatrixI(offd) = offd_i2; if (num_cols_offd) { offd_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_offd); for (ii = 0; ii < num_nonzeros_offd; ii++) offd_j2[ii] = offd_j[ii]; hypre_CSRBlockMatrixJ(offd) = offd_j2; offd_d2 = hypre_CTAlloc(double, num_nonzeros_offd*bnnz); for (ii = 0; ii < num_nonzeros_offd; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) offd_d2[ii*bnnz+jj*block_size+kk] = offd_d[ii]; else offd_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(offd) = offd_d2; } else {
HYPRE_Int hypre_seqAMGSetup( hypre_ParAMGData *amg_data, HYPRE_Int p_level, HYPRE_Int coarse_threshold) { /* Par Data Structure variables */ hypre_ParCSRMatrix **Par_A_array = hypre_ParAMGDataAArray(amg_data); MPI_Comm comm = hypre_ParCSRMatrixComm(Par_A_array[0]); MPI_Comm new_comm, seq_comm; hypre_ParCSRMatrix *A_seq = NULL; hypre_CSRMatrix *A_seq_diag; hypre_CSRMatrix *A_seq_offd; hypre_ParVector *F_seq = NULL; hypre_ParVector *U_seq = NULL; hypre_ParCSRMatrix *A; HYPRE_Int **dof_func_array; HYPRE_Int num_procs, my_id; HYPRE_Int not_finished_coarsening; HYPRE_Int level; HYPRE_Solver coarse_solver; /* misc */ dof_func_array = hypre_ParAMGDataDofFuncArray(amg_data); /*MPI Stuff */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm,&my_id); /*initial */ level = p_level; not_finished_coarsening = 1; /* convert A at this level to sequential */ A = Par_A_array[level]; { double *A_seq_data = NULL; HYPRE_Int *A_seq_i = NULL; HYPRE_Int *A_seq_offd_i = NULL; HYPRE_Int *A_seq_j = NULL; double *A_tmp_data = NULL; HYPRE_Int *A_tmp_i = NULL; HYPRE_Int *A_tmp_j = NULL; HYPRE_Int *info, *displs, *displs2; HYPRE_Int i, j, size, num_nonzeros, total_nnz, cnt; hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); double *A_diag_data = hypre_CSRMatrixData(A_diag); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A); hypre_MPI_Group orig_group, new_group; HYPRE_Int *ranks, new_num_procs, *row_starts; info = hypre_CTAlloc(HYPRE_Int, num_procs); hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); ranks = hypre_CTAlloc(HYPRE_Int, num_procs); new_num_procs = 0; for (i=0; i < num_procs; i++) if (info[i]) { ranks[new_num_procs] = i; info[new_num_procs++] = info[i]; } MPI_Comm_group(comm, &orig_group); hypre_MPI_Group_incl(orig_group, new_num_procs, ranks, &new_group); MPI_Comm_create(comm, new_group, &new_comm); hypre_MPI_Group_free(&new_group); hypre_MPI_Group_free(&orig_group); if (num_rows) { /* alloc space in seq data structure only for participating procs*/ HYPRE_BoomerAMGCreate(&coarse_solver); HYPRE_BoomerAMGSetMaxRowSum(coarse_solver, hypre_ParAMGDataMaxRowSum(amg_data)); HYPRE_BoomerAMGSetStrongThreshold(coarse_solver, hypre_ParAMGDataStrongThreshold(amg_data)); HYPRE_BoomerAMGSetCoarsenType(coarse_solver, hypre_ParAMGDataCoarsenType(amg_data)); HYPRE_BoomerAMGSetInterpType(coarse_solver, hypre_ParAMGDataInterpType(amg_data)); HYPRE_BoomerAMGSetTruncFactor(coarse_solver, hypre_ParAMGDataTruncFactor(amg_data)); HYPRE_BoomerAMGSetPMaxElmts(coarse_solver, hypre_ParAMGDataPMaxElmts(amg_data)); if (hypre_ParAMGDataUserRelaxType(amg_data) > -1) HYPRE_BoomerAMGSetRelaxType(coarse_solver, hypre_ParAMGDataUserRelaxType(amg_data)); HYPRE_BoomerAMGSetRelaxOrder(coarse_solver, hypre_ParAMGDataRelaxOrder(amg_data)); HYPRE_BoomerAMGSetRelaxWt(coarse_solver, hypre_ParAMGDataUserRelaxWeight(amg_data)); if (hypre_ParAMGDataUserNumSweeps(amg_data) > -1) HYPRE_BoomerAMGSetNumSweeps(coarse_solver, hypre_ParAMGDataUserNumSweeps(amg_data)); HYPRE_BoomerAMGSetNumFunctions(coarse_solver, hypre_ParAMGDataNumFunctions(amg_data)); HYPRE_BoomerAMGSetMaxIter(coarse_solver, 1); HYPRE_BoomerAMGSetTol(coarse_solver, 0); /* Create CSR Matrix, will be Diag part of new matrix */ A_tmp_i = hypre_CTAlloc(HYPRE_Int, num_rows+1); A_tmp_i[0] = 0; for (i=1; i < num_rows+1; i++) A_tmp_i[i] = A_diag_i[i]-A_diag_i[i-1]+A_offd_i[i]-A_offd_i[i-1]; num_nonzeros = A_offd_i[num_rows]+A_diag_i[num_rows]; A_tmp_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros); A_tmp_data = hypre_CTAlloc(double, num_nonzeros); cnt = 0; for (i=0; i < num_rows; i++) { for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++) { A_tmp_j[cnt] = A_diag_j[j]+first_row_index; A_tmp_data[cnt++] = A_diag_data[j]; } for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++) { A_tmp_j[cnt] = col_map_offd[A_offd_j[j]]; A_tmp_data[cnt++] = A_offd_data[j]; } } displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); displs[0] = 0; for (i=1; i < new_num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; size = displs[new_num_procs]; A_seq_i = hypre_CTAlloc(HYPRE_Int, size+1); A_seq_offd_i = hypre_CTAlloc(HYPRE_Int, size+1); hypre_MPI_Allgatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, displs, HYPRE_MPI_INT, new_comm ); displs2 = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); A_seq_i[0] = 0; displs2[0] = 0; for (j=1; j < displs[1]; j++) A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1]; for (i=1; i < new_num_procs; i++) { for (j=displs[i]; j < displs[i+1]; j++) { A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1]; } } A_seq_i[size] = A_seq_i[size]+A_seq_i[size-1]; displs2[new_num_procs] = A_seq_i[size]; for (i=1; i < new_num_procs+1; i++) { displs2[i] = A_seq_i[displs[i]]; info[i-1] = displs2[i] - displs2[i-1]; } total_nnz = displs2[new_num_procs]; A_seq_j = hypre_CTAlloc(HYPRE_Int, total_nnz); A_seq_data = hypre_CTAlloc(double, total_nnz); hypre_MPI_Allgatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT, A_seq_j, info, displs2, HYPRE_MPI_INT, new_comm ); hypre_MPI_Allgatherv ( A_tmp_data, num_nonzeros, hypre_MPI_DOUBLE, A_seq_data, info, displs2, hypre_MPI_DOUBLE, new_comm ); hypre_TFree(displs); hypre_TFree(displs2); hypre_TFree(A_tmp_i); hypre_TFree(A_tmp_j); hypre_TFree(A_tmp_data); row_starts = hypre_CTAlloc(HYPRE_Int,2); row_starts[0] = 0; row_starts[1] = size; /* Create 1 proc communicator */ seq_comm = hypre_MPI_COMM_SELF; A_seq = hypre_ParCSRMatrixCreate(seq_comm,size,size, row_starts, row_starts, 0,total_nnz,0); A_seq_diag = hypre_ParCSRMatrixDiag(A_seq); A_seq_offd = hypre_ParCSRMatrixOffd(A_seq); hypre_CSRMatrixData(A_seq_diag) = A_seq_data; hypre_CSRMatrixI(A_seq_diag) = A_seq_i; hypre_CSRMatrixJ(A_seq_diag) = A_seq_j; hypre_CSRMatrixI(A_seq_offd) = A_seq_offd_i; F_seq = hypre_ParVectorCreate(seq_comm, size, row_starts); U_seq = hypre_ParVectorCreate(seq_comm, size, row_starts); hypre_ParVectorOwnsPartitioning(F_seq) = 0; hypre_ParVectorOwnsPartitioning(U_seq) = 0; hypre_ParVectorInitialize(F_seq); hypre_ParVectorInitialize(U_seq); hypre_BoomerAMGSetup(coarse_solver,A_seq,F_seq,U_seq); hypre_ParAMGDataCoarseSolver(amg_data) = coarse_solver; hypre_ParAMGDataACoarse(amg_data) = A_seq; hypre_ParAMGDataFCoarse(amg_data) = F_seq; hypre_ParAMGDataUCoarse(amg_data) = U_seq; hypre_ParAMGDataNewComm(amg_data) = new_comm; } hypre_TFree(info); hypre_TFree(ranks); } return 0; }
hypre_ParCSRBooleanMatrix * hypre_CSRBooleanMatrixToParCSRBooleanMatrix ( MPI_Comm comm, hypre_CSRBooleanMatrix *A, HYPRE_Int *row_starts, HYPRE_Int *col_starts ) { HYPRE_Int global_data[2]; HYPRE_Int global_num_rows; HYPRE_Int global_num_cols; HYPRE_Int *local_num_rows; HYPRE_Int num_procs, my_id; HYPRE_Int *local_num_nonzeros; HYPRE_Int num_nonzeros; HYPRE_Int *a_i; HYPRE_Int *a_j; hypre_CSRBooleanMatrix *local_A; hypre_MPI_Request *requests; hypre_MPI_Status *status, status0; hypre_MPI_Datatype *csr_matrix_datatypes; hypre_ParCSRBooleanMatrix *par_matrix; HYPRE_Int first_col_diag; HYPRE_Int last_col_diag; HYPRE_Int i, j, ind; hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); if (my_id == 0) { global_data[0] = hypre_CSRBooleanMatrix_Get_NRows(A); global_data[1] = hypre_CSRBooleanMatrix_Get_NCols(A); a_i = hypre_CSRBooleanMatrix_Get_I(A); a_j = hypre_CSRBooleanMatrix_Get_J(A); } hypre_MPI_Bcast(global_data,2,HYPRE_MPI_INT,0,comm); global_num_rows = global_data[0]; global_num_cols = global_data[1]; local_num_rows = hypre_CTAlloc(HYPRE_Int, num_procs); csr_matrix_datatypes = hypre_CTAlloc(hypre_MPI_Datatype, num_procs); par_matrix = hypre_ParCSRBooleanMatrixCreate (comm, global_num_rows, global_num_cols,row_starts,col_starts,0,0,0); row_starts = hypre_ParCSRBooleanMatrix_Get_RowStarts(par_matrix); col_starts = hypre_ParCSRBooleanMatrix_Get_ColStarts(par_matrix); for (i=0; i < num_procs; i++) local_num_rows[i] = row_starts[i+1] - row_starts[i]; if (my_id == 0) { local_num_nonzeros = hypre_CTAlloc(HYPRE_Int, num_procs); for (i=0; i < num_procs-1; i++) local_num_nonzeros[i] = a_i[row_starts[i+1]] - a_i[row_starts[i]]; local_num_nonzeros[num_procs-1] = a_i[global_num_rows] - a_i[row_starts[num_procs-1]]; } hypre_MPI_Scatter(local_num_nonzeros,1,HYPRE_MPI_INT,&num_nonzeros,1,HYPRE_MPI_INT,0,comm); if (my_id == 0) num_nonzeros = local_num_nonzeros[0]; local_A = hypre_CSRBooleanMatrixCreate(local_num_rows[my_id], global_num_cols, num_nonzeros); if (my_id == 0) { requests = hypre_CTAlloc (hypre_MPI_Request, num_procs-1); status = hypre_CTAlloc(hypre_MPI_Status, num_procs-1); j=0; for (i=1; i < num_procs; i++) { ind = a_i[row_starts[i]]; hypre_BuildCSRBooleanMatrixMPIDataType(local_num_nonzeros[i], local_num_rows[i], &a_i[row_starts[i]], &a_j[ind], &csr_matrix_datatypes[i]); hypre_MPI_Isend(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[i], i, 0, comm, &requests[j++]); hypre_MPI_Type_free(&csr_matrix_datatypes[i]); } hypre_CSRBooleanMatrix_Get_I(local_A) = a_i; hypre_CSRBooleanMatrix_Get_J(local_A) = a_j; hypre_MPI_Waitall(num_procs-1,requests,status); hypre_TFree(requests); hypre_TFree(status); hypre_TFree(local_num_nonzeros); } else { hypre_CSRBooleanMatrixInitialize(local_A); hypre_BuildCSRBooleanMatrixMPIDataType(num_nonzeros, local_num_rows[my_id], hypre_CSRBooleanMatrix_Get_I(local_A), hypre_CSRBooleanMatrix_Get_J(local_A), csr_matrix_datatypes); hypre_MPI_Recv(hypre_MPI_BOTTOM,1,csr_matrix_datatypes[0],0,0,comm,&status0); hypre_MPI_Type_free(csr_matrix_datatypes); } first_col_diag = col_starts[my_id]; last_col_diag = col_starts[my_id+1]-1; BooleanGenerateDiagAndOffd(local_A, par_matrix, first_col_diag, last_col_diag); /* set pointers back to NULL before destroying */ if (my_id == 0) { hypre_CSRBooleanMatrix_Get_I(local_A) = NULL; hypre_CSRBooleanMatrix_Get_J(local_A) = NULL; } hypre_CSRBooleanMatrixDestroy(local_A); hypre_TFree(local_num_rows); hypre_TFree(csr_matrix_datatypes); return par_matrix; }
hypre_ParCSRBooleanMatrix *hypre_ParCSRBooleanMatrixCreate( MPI_Comm comm, HYPRE_Int global_num_rows, HYPRE_Int global_num_cols, HYPRE_Int *row_starts, HYPRE_Int *col_starts, HYPRE_Int num_cols_offd, HYPRE_Int num_nonzeros_diag, HYPRE_Int num_nonzeros_offd) { hypre_ParCSRBooleanMatrix *matrix; HYPRE_Int num_procs, my_id; HYPRE_Int local_num_rows, local_num_cols; HYPRE_Int first_row_index, first_col_diag; matrix = hypre_CTAlloc(hypre_ParCSRBooleanMatrix, 1); hypre_MPI_Comm_rank(comm,&my_id); hypre_MPI_Comm_size(comm,&num_procs); if (!row_starts) { hypre_GeneratePartitioning(global_num_rows,num_procs,&row_starts); } if (!col_starts) { if (global_num_rows == global_num_cols) { col_starts = row_starts; } else { hypre_GeneratePartitioning(global_num_cols,num_procs,&col_starts); } } first_row_index = row_starts[my_id]; local_num_rows = row_starts[my_id+1]-first_row_index; first_col_diag = col_starts[my_id]; local_num_cols = col_starts[my_id+1]-first_col_diag; hypre_ParCSRBooleanMatrix_Get_Comm(matrix) = comm; hypre_ParCSRBooleanMatrix_Get_Diag(matrix) = hypre_CSRBooleanMatrixCreate(local_num_rows, local_num_cols, num_nonzeros_diag); hypre_ParCSRBooleanMatrix_Get_Offd(matrix) = hypre_CSRBooleanMatrixCreate(local_num_rows, num_cols_offd, num_nonzeros_offd); hypre_ParCSRBooleanMatrix_Get_GlobalNRows(matrix) = global_num_rows; hypre_ParCSRBooleanMatrix_Get_GlobalNCols(matrix) = global_num_cols; hypre_ParCSRBooleanMatrix_Get_StartRow(matrix) = first_row_index; hypre_ParCSRBooleanMatrix_Get_FirstColDiag(matrix) = first_col_diag; hypre_ParCSRBooleanMatrix_Get_ColMapOffd(matrix) = NULL; hypre_ParCSRBooleanMatrix_Get_RowStarts(matrix) = row_starts; hypre_ParCSRBooleanMatrix_Get_ColStarts(matrix) = col_starts; hypre_ParCSRBooleanMatrix_Get_CommPkg(matrix) = NULL; hypre_ParCSRBooleanMatrix_Get_OwnsData(matrix) = 1; hypre_ParCSRBooleanMatrix_Get_OwnsRowStarts(matrix) = 1; hypre_ParCSRBooleanMatrix_Get_OwnsColStarts(matrix) = 1; if (row_starts == col_starts) hypre_ParCSRBooleanMatrix_Get_OwnsColStarts(matrix) = 0; hypre_ParCSRBooleanMatrix_Get_Rowindices(matrix) = NULL; hypre_ParCSRBooleanMatrix_Get_Getrowactive(matrix) = 0; return matrix; }
hypre_ParCSRBooleanMatrix * hypre_ParCSRBooleanMatrixRead( MPI_Comm comm, const char *file_name ) { hypre_ParCSRBooleanMatrix *matrix; hypre_CSRBooleanMatrix *diag; hypre_CSRBooleanMatrix *offd; HYPRE_Int my_id, i, num_procs; char new_file_d[80], new_file_o[80], new_file_info[80]; HYPRE_Int global_num_rows, global_num_cols, num_cols_offd; HYPRE_Int local_num_rows; HYPRE_Int *row_starts; HYPRE_Int *col_starts; HYPRE_Int *col_map_offd; FILE *fp; HYPRE_Int equal = 1; hypre_MPI_Comm_rank(comm,&my_id); hypre_MPI_Comm_size(comm,&num_procs); row_starts = hypre_CTAlloc(HYPRE_Int, num_procs+1); col_starts = hypre_CTAlloc(HYPRE_Int, num_procs+1); hypre_sprintf(new_file_d,"%s.D.%d",file_name,my_id); hypre_sprintf(new_file_o,"%s.O.%d",file_name,my_id); hypre_sprintf(new_file_info,"%s.INFO.%d",file_name,my_id); fp = fopen(new_file_info, "r"); hypre_fscanf(fp, "%d", &global_num_rows); hypre_fscanf(fp, "%d", &global_num_cols); hypre_fscanf(fp, "%d", &num_cols_offd); for (i=0; i < num_procs; i++) hypre_fscanf(fp, "%d %d", &row_starts[i], &col_starts[i]); row_starts[num_procs] = global_num_rows; col_starts[num_procs] = global_num_cols; col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); for (i=0; i < num_cols_offd; i++) hypre_fscanf(fp, "%d", &col_map_offd[i]); fclose(fp); for (i=num_procs; i >= 0; i--) if (row_starts[i] != col_starts[i]) { equal = 0; break; } if (equal) { hypre_TFree(col_starts); col_starts = row_starts; } diag = hypre_CSRBooleanMatrixRead(new_file_d); local_num_rows = hypre_CSRBooleanMatrix_Get_NRows(diag); if (num_cols_offd) { offd = hypre_CSRBooleanMatrixRead(new_file_o); } else offd = hypre_CSRBooleanMatrixCreate(local_num_rows,0,0); matrix = hypre_CTAlloc(hypre_ParCSRBooleanMatrix, 1); hypre_ParCSRBooleanMatrix_Get_Comm(matrix) = comm; hypre_ParCSRBooleanMatrix_Get_GlobalNRows(matrix) = global_num_rows; hypre_ParCSRBooleanMatrix_Get_GlobalNCols(matrix) = global_num_cols; hypre_ParCSRBooleanMatrix_Get_StartRow(matrix) = row_starts[my_id]; hypre_ParCSRBooleanMatrix_Get_FirstColDiag(matrix) = col_starts[my_id]; hypre_ParCSRBooleanMatrix_Get_RowStarts(matrix) = row_starts; hypre_ParCSRBooleanMatrix_Get_ColStarts(matrix) = col_starts; hypre_ParCSRBooleanMatrix_Get_CommPkg(matrix) = NULL; /* set defaults */ hypre_ParCSRBooleanMatrix_Get_OwnsData(matrix) = 1; hypre_ParCSRBooleanMatrix_Get_OwnsRowStarts(matrix) = 1; hypre_ParCSRBooleanMatrix_Get_OwnsColStarts(matrix) = 1; if (row_starts == col_starts) hypre_ParCSRBooleanMatrix_Get_OwnsColStarts(matrix) = 0; hypre_ParCSRBooleanMatrix_Get_Diag(matrix) = diag; hypre_ParCSRBooleanMatrix_Get_Offd(matrix) = offd; if (num_cols_offd) hypre_ParCSRBooleanMatrix_Get_ColMapOffd(matrix) = col_map_offd; else hypre_ParCSRBooleanMatrix_Get_ColMapOffd(matrix) = NULL; return matrix; }
hypre_ParCSRMatrix * hypre_ParMatmul_FC( hypre_ParCSRMatrix * A, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker, HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd ) /* hypre_parMatmul_FC creates and returns the "Fine"-designated rows of the matrix product A*P. A's size is (nC+nF)*(nC+nF), P's size is (nC+nF)*nC where nC is the number of coarse rows/columns, nF the number of fine rows/columns. The size of C=A*P is (nC+nF)*nC, even though not all rows of C are actually computed. If we were to construct a matrix consisting only of the computed rows of C, its size would be nF*nC. "Fine" is defined solely by the marker array, and for example could be a proper subset of the fine points of a multigrid hierarchy. */ { /* To compute a submatrix of C containing only the computed data, i.e. only "Fine" rows, we would have to do a lot of computational work, with a lot of communication. The communication is because such a matrix would need global information that depends on which rows are "Fine". */ MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); double *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag); HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); double *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); HYPRE_Int *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P); HYPRE_Int last_col_diag_P; HYPRE_Int *col_starts_P = hypre_ParCSRMatrixColStarts(P); HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag); HYPRE_Int num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd); hypre_ParCSRMatrix *C; HYPRE_Int *col_map_offd_C; HYPRE_Int *map_P_to_C; hypre_CSRMatrix *C_diag; double *C_diag_data; HYPRE_Int *C_diag_i; HYPRE_Int *C_diag_j; hypre_CSRMatrix *C_offd; double *C_offd_data=NULL; HYPRE_Int *C_offd_i=NULL; HYPRE_Int *C_offd_j=NULL; HYPRE_Int C_diag_size; HYPRE_Int C_offd_size; HYPRE_Int num_cols_offd_C = 0; hypre_CSRMatrix *Ps_ext; double *Ps_ext_data; HYPRE_Int *Ps_ext_i; HYPRE_Int *Ps_ext_j; double *P_ext_diag_data; HYPRE_Int *P_ext_diag_i; HYPRE_Int *P_ext_diag_j; HYPRE_Int P_ext_diag_size; double *P_ext_offd_data; HYPRE_Int *P_ext_offd_i; HYPRE_Int *P_ext_offd_j; HYPRE_Int P_ext_offd_size; HYPRE_Int *P_marker; HYPRE_Int *temp; HYPRE_Int i, j; HYPRE_Int i1, i2, i3; HYPRE_Int jj2, jj3; HYPRE_Int jj_count_diag, jj_count_offd; HYPRE_Int jj_row_begin_diag, jj_row_begin_offd; HYPRE_Int start_indexing = 0; /* start indexing for C_data at 0 */ HYPRE_Int n_rows_A_global, n_cols_A_global; HYPRE_Int n_rows_P_global, n_cols_P_global; HYPRE_Int allsquare = 0; HYPRE_Int cnt, cnt_offd, cnt_diag; HYPRE_Int num_procs; HYPRE_Int value; double a_entry; double a_b_product; n_rows_A_global = hypre_ParCSRMatrixGlobalNumRows(A); n_cols_A_global = hypre_ParCSRMatrixGlobalNumCols(A); n_rows_P_global = hypre_ParCSRMatrixGlobalNumRows(P); n_cols_P_global = hypre_ParCSRMatrixGlobalNumCols(P); if (n_cols_A_global != n_rows_P_global || num_cols_diag_A != num_rows_diag_P) { hypre_printf(" Error! Incompatible matrix dimensions!\n"); return NULL; } /* if (num_rows_A==num_cols_P) allsquare = 1; */ /*----------------------------------------------------------------------- * Extract P_ext, i.e. portion of P that is stored on neighbor procs * and needed locally for matrix matrix product *-----------------------------------------------------------------------*/ hypre_MPI_Comm_size(comm, &num_procs); if (num_procs > 1) { /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings within * hypre_ParCSRMatrixExtractBExt *--------------------------------------------------------------------*/ Ps_ext = hypre_ParCSRMatrixExtractBExt(P,A,1); Ps_ext_data = hypre_CSRMatrixData(Ps_ext); Ps_ext_i = hypre_CSRMatrixI(Ps_ext); Ps_ext_j = hypre_CSRMatrixJ(Ps_ext); } P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_diag_size = 0; P_ext_offd_size = 0; last_col_diag_P = first_col_diag_P + num_cols_diag_P -1; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) P_ext_offd_size++; else P_ext_diag_size++; P_ext_diag_i[i+1] = P_ext_diag_size; P_ext_offd_i[i+1] = P_ext_offd_size; } if (P_ext_diag_size) { P_ext_diag_j = hypre_CTAlloc(HYPRE_Int, P_ext_diag_size); P_ext_diag_data = hypre_CTAlloc(double, P_ext_diag_size); } if (P_ext_offd_size) { P_ext_offd_j = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size); P_ext_offd_data = hypre_CTAlloc(double, P_ext_offd_size); } cnt_offd = 0; cnt_diag = 0; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) { P_ext_offd_j[cnt_offd] = Ps_ext_j[j]; P_ext_offd_data[cnt_offd++] = Ps_ext_data[j]; } else { P_ext_diag_j[cnt_diag] = Ps_ext_j[j] - first_col_diag_P; P_ext_diag_data[cnt_diag++] = Ps_ext_data[j]; } } if (num_procs > 1) { hypre_CSRMatrixDestroy(Ps_ext); Ps_ext = NULL; } cnt = 0; if (P_ext_offd_size || num_cols_offd_P) { temp = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size+num_cols_offd_P); for (i=0; i < P_ext_offd_size; i++) temp[i] = P_ext_offd_j[i]; cnt = P_ext_offd_size; for (i=0; i < num_cols_offd_P; i++) temp[cnt++] = col_map_offd_P[i]; } if (cnt) { qsort0(temp, 0, cnt-1); num_cols_offd_C = 1; value = temp[0]; for (i=1; i < cnt; i++) { if (temp[i] > value) { value = temp[i]; temp[num_cols_offd_C++] = value; } } } if (num_cols_offd_C) col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C); for (i=0; i < num_cols_offd_C; i++) col_map_offd_C[i] = temp[i]; if (P_ext_offd_size || num_cols_offd_P) hypre_TFree(temp); for (i=0 ; i < P_ext_offd_size; i++) P_ext_offd_j[i] = hypre_BinarySearch(col_map_offd_C, P_ext_offd_j[i], num_cols_offd_C); if (num_cols_offd_P) { map_P_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P); cnt = 0; for (i=0; i < num_cols_offd_C; i++) if (col_map_offd_C[i] == col_map_offd_P[cnt]) { map_P_to_C[cnt++] = i; if (cnt == num_cols_offd_P) break; } } /*----------------------------------------------------------------------- * Allocate marker array. *-----------------------------------------------------------------------*/ P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_C); /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /* no changes for the marked version above this point */ /* This function call is the first pass: */ hypre_ParMatmul_RowSizes_Marked( &C_diag_i, &C_offd_i, &P_marker, A_diag_i, A_diag_j, A_offd_i, A_offd_j, P_diag_i, P_diag_j, P_offd_i, P_offd_j, P_ext_diag_i, P_ext_diag_j, P_ext_offd_i, P_ext_offd_j, map_P_to_C, &C_diag_size, &C_offd_size, num_rows_diag_A, num_cols_offd_A, allsquare, num_cols_diag_P, num_cols_offd_P, num_cols_offd_C, CF_marker, dof_func, dof_func_offd ); /* The above call of hypre_ParMatmul_RowSizes_Marked computed two scalars: C_diag_size, C_offd_size, and two arrays: C_diag_i, C_offd_i ( P_marker is also computed, but only used internally ) */ /*----------------------------------------------------------------------- * Allocate C_diag_data and C_diag_j arrays. * Allocate C_offd_data and C_offd_j arrays. *-----------------------------------------------------------------------*/ last_col_diag_P = first_col_diag_P + num_cols_diag_P - 1; C_diag_data = hypre_CTAlloc(double, C_diag_size); C_diag_j = hypre_CTAlloc(HYPRE_Int, C_diag_size); if (C_offd_size) { C_offd_data = hypre_CTAlloc(double, C_offd_size); C_offd_j = hypre_CTAlloc(HYPRE_Int, C_offd_size); } /*----------------------------------------------------------------------- * Second Pass: Fill in C_diag_data and C_diag_j. * Second Pass: Fill in C_offd_data and C_offd_j. *-----------------------------------------------------------------------*/ /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ jj_count_diag = start_indexing; jj_count_offd = start_indexing; for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /*----------------------------------------------------------------------- * Loop over interior c-points. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_rows_diag_A; i1++) { if ( CF_marker[i1] < 0 ) /* i1 is a fine row */ /* ... This and the coarse row code are the only parts between first pass and near the end where hypre_ParMatmul_FC is different from the regular hypre_ParMatmul */ { /*-------------------------------------------------------------------- * Create diagonal entry, C_{i1,i1} *--------------------------------------------------------------------*/ jj_row_begin_diag = jj_count_diag; jj_row_begin_offd = jj_count_offd; /*----------------------------------------------------------------- * Loop over entries in row i1 of A_offd. *-----------------------------------------------------------------*/ if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func_offd[i2] ) { /* interpolate only like "functions" */ a_entry = A_offd_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_ext. *-----------------------------------------------------------*/ for (jj3 = P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+P_ext_offd_j[jj3]; a_b_product = a_entry * P_ext_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else C_offd_data[P_marker[i3]] += a_b_product; } for (jj3 = P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++) { i3 = P_ext_diag_j[jj3]; a_b_product = a_entry * P_ext_diag_data[jj3]; if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else C_diag_data[P_marker[i3]] += a_b_product; } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } /*----------------------------------------------------------------- * Loop over entries in row i1 of A_diag. *-----------------------------------------------------------------*/ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func[i2] ) { /* interpolate only like "functions" */ a_entry = A_diag_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_diag. *-----------------------------------------------------------*/ for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++) { i3 = P_diag_j[jj3]; a_b_product = a_entry * P_diag_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[P_marker[i3]] += a_b_product; } } if (num_cols_offd_P) { for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+map_P_to_C[P_offd_j[jj3]]; a_b_product = a_entry * P_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else { C_offd_data[P_marker[i3]] += a_b_product; } } } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } else /* i1 is a coarse row.*/ /* Copy P coarse-row values to C. This is useful if C is meant to become a replacement for P */ { if (num_cols_offd_P) { for (jj2 = P_offd_i[i1]; jj2 < P_offd_i[i1+1]; jj2++) { C_offd_j[jj_count_offd] = P_offd_j[jj_count_offd]; C_offd_data[jj_count_offd] = P_offd_data[jj_count_offd]; ++jj_count_offd; } } for (jj2 = P_diag_i[i1]; jj2 < P_diag_i[i1+1]; jj2++) { C_diag_j[jj_count_diag] = P_diag_j[jj2]; C_diag_data[jj_count_diag] = P_diag_data[jj2]; ++jj_count_diag; } } } C = hypre_ParCSRMatrixCreate( comm, n_rows_A_global, n_cols_P_global, row_starts_A, col_starts_P, num_cols_offd_C, C_diag_size, C_offd_size ); /* Note that C does not own the partitionings */ hypre_ParCSRMatrixSetRowStartsOwner(C,0); hypre_ParCSRMatrixSetColStartsOwner(C,0); C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrixData(C_diag) = C_diag_data; hypre_CSRMatrixI(C_diag) = C_diag_i; hypre_CSRMatrixJ(C_diag) = C_diag_j; C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrixI(C_offd) = C_offd_i; hypre_ParCSRMatrixOffd(C) = C_offd; if (num_cols_offd_C) { hypre_CSRMatrixData(C_offd) = C_offd_data; hypre_CSRMatrixJ(C_offd) = C_offd_j; hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C; } /*----------------------------------------------------------------------- * Free various arrays *-----------------------------------------------------------------------*/ hypre_TFree(P_marker); hypre_TFree(P_ext_diag_i); if (P_ext_diag_size) { hypre_TFree(P_ext_diag_j); hypre_TFree(P_ext_diag_data); } hypre_TFree(P_ext_offd_i); if (P_ext_offd_size) { hypre_TFree(P_ext_offd_j); hypre_TFree(P_ext_offd_data); } if (num_cols_offd_P) hypre_TFree(map_P_to_C); return C; }
void hypre_MatTCommPkgCreate_core ( /* input args: */ MPI_Comm comm, HYPRE_Int * col_map_offd, HYPRE_Int first_col_diag, HYPRE_Int * col_starts, HYPRE_Int num_rows_diag, HYPRE_Int num_cols_diag, HYPRE_Int num_cols_offd, HYPRE_Int * row_starts, HYPRE_Int firstColDiag, HYPRE_Int * colMapOffd, HYPRE_Int * mat_i_diag, HYPRE_Int * mat_j_diag, HYPRE_Int * mat_i_offd, HYPRE_Int * mat_j_offd, HYPRE_Int data, /* = 1 for a matrix with floating-point data, =0 for Boolean matrix */ /* pointers to output args: */ HYPRE_Int * p_num_recvs, HYPRE_Int ** p_recv_procs, HYPRE_Int ** p_recv_vec_starts, HYPRE_Int * p_num_sends, HYPRE_Int ** p_send_procs, HYPRE_Int ** p_send_map_starts, HYPRE_Int ** p_send_map_elmts ) { HYPRE_Int num_sends; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int num_recvs; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; HYPRE_Int i, j, j2, k, ir, rowmin, rowmax; HYPRE_Int *tmp, *recv_buf, *displs, *info, *send_buf, *all_num_sends3; HYPRE_Int num_procs, my_id, num_elmts; HYPRE_Int local_info, index, index2; HYPRE_Int pmatch, col, kc, p; HYPRE_Int * recv_sz_buf; HYPRE_Int * row_marker; hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); info = hypre_CTAlloc(HYPRE_Int, num_procs); /* ---------------------------------------------------------------------- * determine which processors to receive from (set proc_mark) and num_recvs, * at the end of the loop proc_mark[i] contains the number of elements to be * received from Proc. i * * - For A*b or A*B: for each off-diagonal column i of A, you want to identify * the processor which has the corresponding element i of b (row i of B) * (columns in the local diagonal block, just multiply local rows of B). * You do it by finding the processor which has that column of A in its * _diagonal_ block - assuming b or B is distributed the same, which I believe * is evenly among processors, by row. There is a unique solution because * the diag/offd blocking is defined by which processor owns which rows of A. * * - For A*A^T: A^T is not distributed by rows as B or any 'normal' matrix is. * For each off-diagonal row,column k,i element of A, you want to identify * the processors which have the corresponding row,column i,j elements of A^T * i.e., row,column j,i elements of A (all i,j,k for which these entries are * nonzero, row k of A lives on this processor, and row j of A lives on * a different processor). So, given a column i in the local A-offd or A-diag, * we want to find all the processors which have column i, in diag or offd * blocks. Unlike the A*B case, I don't think you can eliminate looking at * any class of blocks. * ---------------------------------------------------------------------*/ /* The algorithm for A*B was: For each of my columns i (in offd block), use known information on data distribution of columns in _diagonal_ blocks to find the processor p which owns row i. (Note that for i in diag block, I own the row, nothing to do.) Count up such i's for each processor in proc_mark. Construct a data structure, recv_buf, made by appending a structure tmp from each processor. The data structure tmp looks like (p, no. of i's, i1, i2,...) (p=0,...) . There are two communication steps: gather size information (local_info) from all processors (into info), then gather the data (tmp) from all processors (into recv_buf). Then you go through recv_buf. For each (sink) processor p you search for for the appearance of my (source) processor number (most of recv_buf pertains to other processors and is ignored). When you find the appropriate section, pull out the i's, count them and save them, in send_map_elmts, and save p in send_procs and index information in send_map_starts. */ /* The algorithm for A*A^T: [ Originally I had planned to figure out approximately which processors had the information (for A*B it could be done exactly) to save on communication. But even for A*B where the data owner is known, all data is sent to all processors, so that's not worth worrying about on the first cut. One consequence is that proc_mark is not needed.] Construct a data structure, recv_buf, made by appending a structure tmp for each processor. It simply consists of (no. of i's, i1, i2,...) where i is the global number of a column in the offd block. There are still two communication steps: gather size information (local_info) from all processors (into info), then gather the data (tmp) from all processors (into recv_buf). Then you go through recv_buf. For each (sink) processor p you go through all its column numbers in recv_buf. Check each one for whether you have data in that column. If so, put in in send_map_elmts, p in send_procs, and update the index information in send_map_starts. Note that these arrays don't mean quite the same thing as for A*B. */ num_recvs=num_procs-1; local_info = num_procs + num_cols_offd + num_cols_diag; hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); /* ---------------------------------------------------------------------- * generate information to be send: tmp contains for each recv_proc: * {deleted: id of recv_procs}, number of elements to be received for this processor, * indices of elements (in this order) * ---------------------------------------------------------------------*/ displs = hypre_CTAlloc(HYPRE_Int, num_procs+1); displs[0] = 0; for (i=1; i < num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; recv_buf = hypre_CTAlloc(HYPRE_Int, displs[num_procs]); tmp = hypre_CTAlloc(HYPRE_Int, local_info); j = 0; for (i=0; i < num_procs; i++) { j2 = j++; tmp[j2] = 0; for (k=0; k < num_cols_offd; k++) if (col_map_offd[k] >= col_starts[i] && col_map_offd[k] < col_starts[i+1]) { tmp[j++] = col_map_offd[k]; ++(tmp[j2]); }; for (k=0; k < num_cols_diag; k++) if ( k+first_col_diag >= col_starts[i] && k+first_col_diag < col_starts[i+1] ) { tmp[j++] = k + first_col_diag; ++(tmp[j2]); } } hypre_MPI_Allgatherv(tmp,local_info,HYPRE_MPI_INT,recv_buf,info,displs,HYPRE_MPI_INT,comm); /* ---------------------------------------------------------------------- * determine send_procs and actual elements to be send (in send_map_elmts) * and send_map_starts whose i-th entry points to the beginning of the * elements to be send to proc. i * ---------------------------------------------------------------------*/ /* Meanings of arrays being set here, more verbosely stated: send_procs: processors p to send to send_map_starts: for each p, gives range of indices in send_map_elmts; send_map_elmts: Each element is a send_map_elmts[i], with i in a range given by send_map_starts[p..p+1], for some p. This element is is the global column number for a column in the offd block of p which is to be multiplied by data from this processor. For A*B, send_map_elmts[i] is therefore a row of B belonging to this processor, to be sent to p. For A*A^T, send_map_elmts[i] is a row of A belonging to this processor, to be sent to p; this row was selected because it has a nonzero on a _column_ needed by p. */ num_sends = num_procs; /* may turn out to be less, but we can't know yet */ num_elmts = (num_procs-1)*num_rows_diag; /* ... a crude upper bound; should try to do better even if more comm required */ send_procs = hypre_CTAlloc(HYPRE_Int, num_sends); send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1); send_map_elmts = hypre_CTAlloc(HYPRE_Int, num_elmts); row_marker = hypre_CTAlloc(HYPRE_Int,num_rows_diag); index = 0; index2 = 0; send_map_starts[0] = 0; for (i=0; i < num_procs; i++) { send_map_starts[index+1] = send_map_starts[index]; j = displs[i]; pmatch = 0; for ( ir=0; ir<num_rows_diag; ++ir ) row_marker[ir] = 0; while ( j < displs[i+1]) { num_elmts = recv_buf[j++]; /* no. of columns proc. i wants */ for ( k=0; k<num_elmts; k++ ) { col = recv_buf[j++]; /* a global column no. at proc. i */ for ( kc=0; kc<num_cols_offd; kc++ ) { if ( col_map_offd[kc]==col && i!=my_id ) { /* this processor has the same column as proc. i (but is different) */ pmatch = 1; send_procs[index] = i; /* this would be right if we could send columns, but we can't ... offset = first_col_diag; ++send_map_starts[index+1]; send_map_elmts[index2++] = col - offset; */ /* Plan to send all of my rows which use this column... */ RowsWithColumn( &rowmin, &rowmax, col, num_rows_diag, firstColDiag, colMapOffd, mat_i_diag, mat_j_diag, mat_i_offd, mat_j_offd ); for ( ir=rowmin; ir<=rowmax; ++ir ) { if ( row_marker[ir]==0 ) { row_marker[ir] = 1; ++send_map_starts[index+1]; send_map_elmts[index2++] = ir; } } } } /* alternative way of doing the following for-loop: for ( kc=0; kc<num_cols_diag; kc++ ) { if ( kc+first_col_diag==col && i!=my_id ) { / * this processor has the same column as proc. i (but is different) * / pmatch = 1; / * this would be right if we could send columns, but we can't ... >>> * / send_procs[index] = i; ++send_map_starts[index+1]; send_map_elmts[index2++] = col - offset; / * Plan to send all of my rows which use this column... * / / * NOT DONE * / } } */ for ( kc=row_starts[my_id]; kc<row_starts[my_id+1]; kc++ ) { if ( kc==col && i!=my_id ) { /* this processor has the same column as proc. i (but is different) */ pmatch = 1; send_procs[index] = i; /* this would be right if we could send columns, but we can't ... >>> ++send_map_starts[index+1]; send_map_elmts[index2++] = col - offset;*/ /* Plan to send all of my rows which use this column... */ RowsWithColumn( &rowmin, &rowmax, col, num_rows_diag, firstColDiag, colMapOffd, mat_i_diag, mat_j_diag, mat_i_offd, mat_j_offd ); for ( ir=rowmin; ir<=rowmax; ++ir ) { if ( row_marker[ir]==0 ) { row_marker[ir] = 1; ++send_map_starts[index+1]; send_map_elmts[index2++] = ir; } } } } } } if ( pmatch ) index++; } num_sends = index; /* no. of proc. rows will be sent to */ /* Compute receive arrays recv_procs, recv_vec_starts ... */ recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs); recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1); j2 = 0; for (i=0; i < num_procs; i++) { if ( i!=my_id ) { recv_procs[j2] = i; j2++; }; }; /* Compute recv_vec_starts. The real job is, for each processor p, to figure out how many rows p will send to me (me=this processor). I now know how many (and which) rows I will send to each p. Indeed, if send_procs[index]=p, then the number is send_map_starts[index+1]-send_map_starts[index]. More communication is needed. options: hypre_MPI_Allgather of communication sizes. <--- my choice, for now good: simple bad: send num_procs*num_sends data, only need num_procs but: not that much data compared to previous communication hypre_MPI_Allgatherv of communication sizes, only for pairs of procs. that communicate good: less data than above bad: need extra commun. step to get recvcounts hypre_MPI_ISend,hypre_MPI_IRecv of each size, separately between each pair of procs. good: no excess data sent bad: lots of little messages but: Allgather might be done the same under the hood may be much slower than Allgather or may be a bit faster depending on implementations */ send_buf = hypre_CTAlloc( HYPRE_Int, 3*num_sends ); all_num_sends3 = hypre_CTAlloc( HYPRE_Int, num_procs ); /* scatter-gather num_sends, to set up the size for the main comm. step */ i = 3*num_sends; hypre_MPI_Allgather( &i, 1, HYPRE_MPI_INT, all_num_sends3, 1, HYPRE_MPI_INT, comm ); displs[0] = 0; for ( p=0; p<num_procs; ++p ) { displs[p+1] = displs[p] + all_num_sends3[p]; }; recv_sz_buf = hypre_CTAlloc( HYPRE_Int, displs[num_procs] ); /* scatter-gather size of row info to send, and proc. to send to */ index = 0; for ( i=0; i<num_sends; ++i ) { send_buf[index++] = send_procs[i]; /* processor to send to */ send_buf[index++] = my_id; send_buf[index++] = send_map_starts[i+1] - send_map_starts[i]; /* ... sizes of info to send */ }; hypre_MPI_Allgatherv( send_buf, 3*num_sends, HYPRE_MPI_INT, recv_sz_buf, all_num_sends3, displs, HYPRE_MPI_INT, comm); recv_vec_starts[0] = 0; j2 = 0; j = 0; for ( i=0; i<displs[num_procs]; i=i+3 ) { j = i; if ( recv_sz_buf[j++]==my_id ) { recv_procs[j2] = recv_sz_buf[j++]; recv_vec_starts[j2+1] = recv_vec_starts[j2] + recv_sz_buf[j++]; j2++; } } num_recvs = j2; #if 0 hypre_printf("num_procs=%i send_map_starts (%i):",num_procs,num_sends+1); for( i=0; i<=num_sends; ++i ) hypre_printf(" %i", send_map_starts[i] ); hypre_printf(" send_procs (%i):",num_sends); for( i=0; i<num_sends; ++i ) hypre_printf(" %i", send_procs[i] ); hypre_printf("\n"); hypre_printf("my_id=%i num_sends=%i send_buf[0,1,2]=%i %i %i", my_id, num_sends, send_buf[0], send_buf[1], send_buf[2] ); hypre_printf(" all_num_sends3[0,1]=%i %i\n", all_num_sends3[0], all_num_sends3[1] ); hypre_printf("my_id=%i rcv_sz_buf (%i):", my_id, displs[num_procs] ); for( i=0; i<displs[num_procs]; ++i ) hypre_printf(" %i", recv_sz_buf[i] ); hypre_printf("\n"); hypre_printf("my_id=%i recv_vec_starts (%i):",my_id,num_recvs+1); for( i=0; i<=num_recvs; ++i ) hypre_printf(" %i", recv_vec_starts[i] ); hypre_printf(" recv_procs (%i):",num_recvs); for( i=0; i<num_recvs; ++i ) hypre_printf(" %i", recv_procs[i] ); hypre_printf("\n"); hypre_printf("my_id=%i num_recvs=%i recv_sz_buf[0,1,2]=%i %i %i\n", my_id, num_recvs, recv_sz_buf[0], recv_sz_buf[1], recv_sz_buf[2] ); #endif hypre_TFree(send_buf); hypre_TFree(all_num_sends3); hypre_TFree(tmp); hypre_TFree(recv_buf); hypre_TFree(displs); hypre_TFree(info); hypre_TFree(recv_sz_buf); hypre_TFree(row_marker); /* finish up with the hand-coded call-by-reference... */ *p_num_recvs = num_recvs; *p_recv_procs = recv_procs; *p_recv_vec_starts = recv_vec_starts; *p_num_sends = num_sends; *p_send_procs = send_procs; *p_send_map_starts = send_map_starts; *p_send_map_elmts = send_map_elmts; }
HYPRE_ParCSRMatrix GenerateLaplacian9pt( MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int p, HYPRE_Int q, double *value ) { hypre_ParCSRMatrix *A; hypre_CSRMatrix *diag; hypre_CSRMatrix *offd; HYPRE_Int *diag_i; HYPRE_Int *diag_j; double *diag_data; HYPRE_Int *offd_i; HYPRE_Int *offd_j; double *offd_data; HYPRE_Int *global_part; HYPRE_Int ix, iy; HYPRE_Int cnt, o_cnt; HYPRE_Int local_num_rows; HYPRE_Int *col_map_offd; HYPRE_Int *work; HYPRE_Int row_index; HYPRE_Int i,j; HYPRE_Int nx_local, ny_local; HYPRE_Int nx_size, ny_size; HYPRE_Int num_cols_offd; HYPRE_Int grid_size; HYPRE_Int *nx_part; HYPRE_Int *ny_part; HYPRE_Int num_procs, my_id; HYPRE_Int P_busy, Q_busy; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); grid_size = nx*ny; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1); global_part[0] = 0; cnt = 1; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size; } } nx_local = nx_part[p+1] - nx_part[p]; ny_local = ny_part[q+1] - ny_part[q]; my_id = q*P + p; num_procs = P*Q; local_num_rows = nx_local*ny_local; diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); P_busy = hypre_min(nx,P); Q_busy = hypre_min(ny,Q); num_cols_offd = 0; if (p) num_cols_offd += ny_local; if (p < P_busy-1) num_cols_offd += ny_local; if (q) num_cols_offd += nx_local; if (q < Q_busy-1) num_cols_offd += nx_local; if (p && q) num_cols_offd++; if (p && q < Q_busy-1 ) num_cols_offd++; if (p < P_busy-1 && q ) num_cols_offd++; if (p < P_busy-1 && q < Q_busy-1 ) num_cols_offd++; if (!local_num_rows) num_cols_offd = 0; col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); cnt = 0; o_cnt = 0; diag_i[0] = 0; offd_i[0] = 0; for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { cnt++; o_cnt++; diag_i[cnt] = diag_i[cnt-1]; offd_i[o_cnt] = offd_i[o_cnt-1]; diag_i[cnt]++; if (iy > ny_part[q]) { diag_i[cnt]++; if (ix > nx_part[p]) { diag_i[cnt]++; } else { if (ix) offd_i[o_cnt]++; } if (ix < nx_part[p+1]-1) { diag_i[cnt]++; } else { if (ix+1 < nx) offd_i[o_cnt]++; } } else { if (iy) { offd_i[o_cnt]++; if (ix > nx_part[p]) { offd_i[o_cnt]++; } else if (ix) { offd_i[o_cnt]++; } if (ix < nx_part[p+1]-1) { offd_i[o_cnt]++; } else if (ix < nx-1) { offd_i[o_cnt]++; } } } if (ix > nx_part[p]) diag_i[cnt]++; else { if (ix) { offd_i[o_cnt]++; } } if (ix+1 < nx_part[p+1]) diag_i[cnt]++; else { if (ix+1 < nx) { offd_i[o_cnt]++; } } if (iy+1 < ny_part[q+1]) { diag_i[cnt]++; if (ix > nx_part[p]) { diag_i[cnt]++; } else { if (ix) offd_i[o_cnt]++; } if (ix < nx_part[p+1]-1) { diag_i[cnt]++; } else { if (ix+1 < nx) offd_i[o_cnt]++; } } else { if (iy+1 < ny) { offd_i[o_cnt]++; if (ix > nx_part[p]) { offd_i[o_cnt]++; } else if (ix) { offd_i[o_cnt]++; } if (ix < nx_part[p+1]-1) { offd_i[o_cnt]++; } else if (ix < nx-1) { offd_i[o_cnt]++; } } } } } diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]); diag_data = hypre_CTAlloc(double, diag_i[local_num_rows]); if (num_procs > 1) { offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]); offd_data = hypre_CTAlloc(double, offd_i[local_num_rows]); }
/*-------------------------------------------------------------------------- * hypre_ParCSRMatrixMatvec_FF *--------------------------------------------------------------------------*/ HYPRE_Int hypre_ParCSRMatrixMatvec_FF( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y, HYPRE_Int *CF_marker, HYPRE_Int fpt ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int ierr = 0; HYPRE_Int num_sends, i, j, index, start, num_procs; HYPRE_Int *int_buf_data = NULL; HYPRE_Int *CF_marker_offd = NULL; HYPRE_Complex *x_tmp_data = NULL; HYPRE_Complex *x_buf_data = NULL; HYPRE_Complex *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_MPI_Comm_size(comm,&num_procs); if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; if (num_procs > 1) { if (num_cols_offd) { x_tmp = hypre_SeqVectorCreate( num_cols_offd ); hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); } /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_sends) x_buf_data = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data, x_tmp_data ); } hypre_CSRMatrixMatvec_FF( alpha, diag, x_local, beta, y_local, CF_marker, CF_marker, fpt); if (num_procs > 1) { hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_sends) int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); if (num_cols_offd) CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate(11,comm_pkg,int_buf_data,CF_marker_offd ); hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_cols_offd) hypre_CSRMatrixMatvec_FF( alpha, offd, x_tmp, 1.0, y_local, CF_marker, CF_marker_offd, fpt); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; hypre_TFree(x_buf_data); hypre_TFree(int_buf_data); hypre_TFree(CF_marker_offd); } return ierr; }
HYPRE_Int myBuildParLaplacian( HYPRE_Int argc, char *argv[], HYPRE_Int arg_index, HYPRE_ParCSRMatrix *A_ptr , HYPRE_Int parmprint ) { HYPRE_Int nx, ny, nz; HYPRE_Int P, Q, R; HYPRE_Real cx, cy, cz; HYPRE_ParCSRMatrix A; HYPRE_Int num_procs, myid; HYPRE_Int p, q, r; HYPRE_Real *values; /*----------------------------------------------------------- * Initialize some stuff *-----------------------------------------------------------*/ hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs ); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid ); /*----------------------------------------------------------- * Set defaults *-----------------------------------------------------------*/ nx = 10; ny = 10; nz = 10; P = 1; Q = num_procs; R = 1; cx = 1.; cy = 1.; cz = 1.; /*----------------------------------------------------------- * Parse command line *-----------------------------------------------------------*/ arg_index = 0; while (arg_index < argc) { if ( strcmp(argv[arg_index], "-n") == 0 ) { arg_index++; nx = atoi(argv[arg_index++]); ny = atoi(argv[arg_index++]); nz = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-P") == 0 ) { arg_index++; P = atoi(argv[arg_index++]); Q = atoi(argv[arg_index++]); R = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-c") == 0 ) { arg_index++; cx = atof(argv[arg_index++]); cy = atof(argv[arg_index++]); cz = atof(argv[arg_index++]); } else { arg_index++; } } /*----------------------------------------------------------- * Check a few things *-----------------------------------------------------------*/ if ((P*Q*R) != num_procs) { hypre_printf("Error: Invalid number of processors or processor topology \n"); exit(1); } /*----------------------------------------------------------- * Print driver parameters *-----------------------------------------------------------*/ if (myid == 0 && parmprint) { hypre_printf(" Laplacian:\n"); hypre_printf(" (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz); hypre_printf(" (Px, Py, Pz) = (%d, %d, %d)\n", P, Q, R); hypre_printf(" (cx, cy, cz) = (%f, %f, %f)\n\n", cx, cy, cz); } /*----------------------------------------------------------- * Set up the grid structure *-----------------------------------------------------------*/ /* compute p,q,r from P,Q,R and myid */ p = myid % P; q = (( myid - p)/P) % Q; r = ( myid - p - P*q)/( P*Q ); /*----------------------------------------------------------- * Generate the matrix *-----------------------------------------------------------*/ values = hypre_CTAlloc(HYPRE_Real, 4); values[1] = -cx; values[2] = -cy; values[3] = -cz; values[0] = 0.; if (nx > 1) { values[0] += 2.0*cx; } if (ny > 1) { values[0] += 2.0*cy; } if (nz > 1) { values[0] += 2.0*cz; } A = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD, nx, ny, nz, P, Q, R, p, q, r, values); hypre_TFree(values); *A_ptr = A; return (0); }
HYPRE_Int hypre_BoomerAMGCreateScalarCFS(hypre_ParCSRMatrix *SN, HYPRE_Int *CFN_marker, HYPRE_Int *col_offd_SN_to_AN, HYPRE_Int num_functions, HYPRE_Int nodal, HYPRE_Int data, HYPRE_Int **dof_func_ptr, HYPRE_Int **CF_marker_ptr, HYPRE_Int **col_offd_S_to_A_ptr, hypre_ParCSRMatrix **S_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(SN); hypre_ParCSRMatrix *S; hypre_CSRMatrix *S_diag; HYPRE_Int *S_diag_i; HYPRE_Int *S_diag_j; double *S_diag_data; hypre_CSRMatrix *S_offd; HYPRE_Int *S_offd_i; HYPRE_Int *S_offd_j; double *S_offd_data; HYPRE_Int *row_starts_S; HYPRE_Int *col_starts_S; HYPRE_Int *row_starts_SN = hypre_ParCSRMatrixRowStarts(SN); HYPRE_Int *col_starts_SN = hypre_ParCSRMatrixColStarts(SN); hypre_CSRMatrix *SN_diag = hypre_ParCSRMatrixDiag(SN); HYPRE_Int *SN_diag_i = hypre_CSRMatrixI(SN_diag); HYPRE_Int *SN_diag_j = hypre_CSRMatrixJ(SN_diag); double *SN_diag_data; hypre_CSRMatrix *SN_offd = hypre_ParCSRMatrixOffd(SN); HYPRE_Int *SN_offd_i = hypre_CSRMatrixI(SN_offd); HYPRE_Int *SN_offd_j = hypre_CSRMatrixJ(SN_offd); double *SN_offd_data; HYPRE_Int *CF_marker; HYPRE_Int *col_map_offd_SN = hypre_ParCSRMatrixColMapOffd(SN); HYPRE_Int *col_map_offd_S; HYPRE_Int *dof_func; HYPRE_Int num_nodes = hypre_CSRMatrixNumRows(SN_diag); HYPRE_Int num_variables; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(SN); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_S; HYPRE_Int *send_procs_S; HYPRE_Int *send_map_starts_S; HYPRE_Int *send_map_elmts_S; HYPRE_Int *recv_procs_S; HYPRE_Int *recv_vec_starts_S; HYPRE_Int *col_offd_S_to_A = NULL; HYPRE_Int num_coarse_nodes; HYPRE_Int i,j,k,k1,jj,cnt; HYPRE_Int row, start, end; HYPRE_Int num_procs; HYPRE_Int num_cols_offd_SN = hypre_CSRMatrixNumCols(SN_offd); HYPRE_Int num_cols_offd_S; HYPRE_Int SN_num_nonzeros_diag; HYPRE_Int SN_num_nonzeros_offd; HYPRE_Int S_num_nonzeros_diag; HYPRE_Int S_num_nonzeros_offd; HYPRE_Int global_num_vars; HYPRE_Int global_num_cols; HYPRE_Int global_num_nodes; HYPRE_Int ierr = 0; hypre_MPI_Comm_size(comm, &num_procs); num_variables = num_functions*num_nodes; CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables); if (nodal < 0) { cnt = 0; num_coarse_nodes = 0; for (i=0; i < num_nodes; i++) { if (CFN_marker[i] == 1) num_coarse_nodes++; for (j=0; j < num_functions; j++) CF_marker[cnt++] = CFN_marker[i]; } dof_func = hypre_CTAlloc(HYPRE_Int,num_coarse_nodes*num_functions); cnt = 0; for (i=0; i < num_nodes; i++) { if (CFN_marker[i] == 1) { for (k=0; k < num_functions; k++) dof_func[cnt++] = k; } } *dof_func_ptr = dof_func; } else { cnt = 0; for (i=0; i < num_nodes; i++) for (j=0; j < num_functions; j++) CF_marker[cnt++] = CFN_marker[i]; } *CF_marker_ptr = CF_marker; #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_S = hypre_CTAlloc(HYPRE_Int,2); for (i=0; i < 2; i++) row_starts_S[i] = num_functions*row_starts_SN[i]; if (row_starts_SN != col_starts_SN) { col_starts_S = hypre_CTAlloc(HYPRE_Int,2); for (i=0; i < 2; i++) col_starts_S[i] = num_functions*col_starts_SN[i]; } else { col_starts_S = row_starts_S; } #else row_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1); for (i=0; i < num_procs+1; i++) row_starts_S[i] = num_functions*row_starts_SN[i]; if (row_starts_SN != col_starts_SN) { col_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1); for (i=0; i < num_procs+1; i++) col_starts_S[i] = num_functions*col_starts_SN[i]; } else { col_starts_S = row_starts_S; } #endif SN_num_nonzeros_diag = SN_diag_i[num_nodes]; SN_num_nonzeros_offd = SN_offd_i[num_nodes]; global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(SN); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(SN)*num_functions; global_num_vars = global_num_nodes*num_functions; S_num_nonzeros_diag = num_functions*SN_num_nonzeros_diag; S_num_nonzeros_offd = num_functions*SN_num_nonzeros_offd; num_cols_offd_S = num_functions*num_cols_offd_SN; S = hypre_ParCSRMatrixCreate(comm, global_num_vars, global_num_cols, row_starts_S, col_starts_S, num_cols_offd_S, S_num_nonzeros_diag, S_num_nonzeros_offd); S_diag = hypre_ParCSRMatrixDiag(S); S_offd = hypre_ParCSRMatrixOffd(S); S_diag_i = hypre_CTAlloc(HYPRE_Int, num_variables+1); S_offd_i = hypre_CTAlloc(HYPRE_Int, num_variables+1); S_diag_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_diag); hypre_CSRMatrixI(S_diag) = S_diag_i; hypre_CSRMatrixJ(S_diag) = S_diag_j; if (data) { SN_diag_data = hypre_CSRMatrixData(SN_diag); S_diag_data = hypre_CTAlloc(double, S_num_nonzeros_diag); hypre_CSRMatrixData(S_diag) = S_diag_data; if (num_cols_offd_S) { SN_offd_data = hypre_CSRMatrixData(SN_offd); S_offd_data = hypre_CTAlloc(double, S_num_nonzeros_offd); hypre_CSRMatrixData(S_offd) = S_offd_data; } } hypre_CSRMatrixI(S_offd) = S_offd_i; if (comm_pkg) { comm_pkg_S = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_S) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_S) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_S) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); send_procs_S = NULL; send_map_elmts_S = NULL; if (num_sends) { send_procs_S = hypre_CTAlloc(HYPRE_Int,num_sends); send_map_elmts_S = hypre_CTAlloc(HYPRE_Int, num_functions*send_map_starts[num_sends]); } send_map_starts_S = hypre_CTAlloc(HYPRE_Int,num_sends+1); recv_vec_starts_S = hypre_CTAlloc(HYPRE_Int,num_recvs+1); recv_procs_S = NULL; if (num_recvs) recv_procs_S = hypre_CTAlloc(HYPRE_Int,num_recvs); send_map_starts_S[0] = 0; for (i=0; i < num_sends; i++) { send_procs_S[i] = send_procs[i]; send_map_starts_S[i+1] = num_functions*send_map_starts[i+1]; } recv_vec_starts_S[0] = 0; for (i=0; i < num_recvs; i++) { recv_procs_S[i] = recv_procs[i]; recv_vec_starts_S[i+1] = num_functions*recv_vec_starts[i+1]; } cnt = 0; for (i=0; i < send_map_starts[num_sends]; i++) { k1 = num_functions*send_map_elmts[i]; for (j=0; j < num_functions; j++) { send_map_elmts_S[cnt++] = k1+j; } } hypre_ParCSRCommPkgSendProcs(comm_pkg_S) = send_procs_S; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_S) = send_map_starts_S; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_S) = send_map_elmts_S; hypre_ParCSRCommPkgRecvProcs(comm_pkg_S) = recv_procs_S; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_S) = recv_vec_starts_S; hypre_ParCSRMatrixCommPkg(S) = comm_pkg_S; } if (num_cols_offd_S) { S_offd_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_offd); hypre_CSRMatrixJ(S_offd) = S_offd_j; col_map_offd_S = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S); cnt = 0; for (i=0; i < num_cols_offd_SN; i++) { k1 = col_map_offd_SN[i]*num_functions; for (j=0; j < num_functions; j++) col_map_offd_S[cnt++] = k1+j; } hypre_ParCSRMatrixColMapOffd(S) = col_map_offd_S; } if (col_offd_SN_to_AN) { col_offd_S_to_A = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S); cnt = 0; for (i=0; i < num_cols_offd_SN; i++) { k1 = col_offd_SN_to_AN[i]*num_functions; for (j=0; j < num_functions; j++) col_offd_S_to_A[cnt++] = k1+j; } *col_offd_S_to_A_ptr = col_offd_S_to_A; } cnt = 0; row = 0; for (i=0; i < num_nodes; i++) { row++; start = cnt; for (j=SN_diag_i[i]; j < SN_diag_i[i+1]; j++) { jj = SN_diag_j[j]; if (data) S_diag_data[cnt] = SN_diag_data[j]; S_diag_j[cnt++] = jj*num_functions; } end = cnt; S_diag_i[row] = cnt; for (k1=1; k1 < num_functions; k1++) { row++; for (k=start; k < end; k++) { if (data) S_diag_data[cnt] = S_diag_data[k]; S_diag_j[cnt++] = S_diag_j[k]+k1; } S_diag_i[row] = cnt; } } cnt = 0; row = 0; for (i=0; i < num_nodes; i++) { row++; start = cnt; for (j=SN_offd_i[i]; j < SN_offd_i[i+1]; j++) { jj = SN_offd_j[j]; if (data) S_offd_data[cnt] = SN_offd_data[j]; S_offd_j[cnt++] = jj*num_functions; } end = cnt; S_offd_i[row] = cnt; for (k1=1; k1 < num_functions; k1++) { row++; for (k=start; k < end; k++) { if (data) S_offd_data[cnt] = S_offd_data[k]; S_offd_j[cnt++] = S_offd_j[k]+k1; } S_offd_i[row] = cnt; } } *S_ptr = S; return (ierr); }
HYPRE_Int hypre_DataExchangeList(HYPRE_Int num_contacts, HYPRE_Int *contact_proc_list, void *contact_send_buf, HYPRE_Int *contact_send_buf_starts, HYPRE_Int contact_obj_size, HYPRE_Int response_obj_size, hypre_DataExchangeResponse *response_obj, HYPRE_Int max_response_size, HYPRE_Int rnum, MPI_Comm comm, void **p_response_recv_buf, HYPRE_Int **p_response_recv_buf_starts) { /*------------------------------------------- * parameters: * * num_contacts = how many procs to contact * contact_proc_list = list of processors to contact * contact_send_buf = array of data to send * contact_send_buf_starts = index for contact_send_buf corresponding to * contact_proc_list * contact_obj_size = sizeof() one obj in contact list * response_obj_size = sizeof() one obj in response_recv_buf * response_obj = this will give us the function we need to * fill the reponse as well as * any data we might need to accomplish that * max_response_size = max size of a single response expected (do NOT * need to be an absolute upper bound) * rnum = two consequentive exchanges should have different * rnums. Alternate rnum = 1 * and rnum=2 - these flags will be even (so odd * numbered tags could be used in calling code) * p_response_recv_buf = where to receive the reponses - will be allocated * in this function * p_response_recv_buf_starts = index of p_response_buf corresponding to * contact_buf_list - will be allocated here *-------------------------------------------*/ HYPRE_Int num_procs, myid; HYPRE_Int i; HYPRE_Int terminate, responses_complete; HYPRE_Int children_complete; HYPRE_Int contact_flag; HYPRE_Int proc; HYPRE_Int contact_size; HYPRE_Int size, post_size, copy_size; HYPRE_Int total_size, count; void *start_ptr = NULL, *index_ptr=NULL; HYPRE_Int *int_ptr=NULL; void *response_recv_buf = NULL; void *send_response_buf = NULL; HYPRE_Int *response_recv_buf_starts = NULL; void *initial_recv_buf = NULL; void *recv_contact_buf = NULL; HYPRE_Int recv_contact_buf_size = 0; HYPRE_Int response_message_size = 0; HYPRE_Int overhead; HYPRE_Int max_response_size_bytes; HYPRE_Int max_response_total_bytes; void **post_array = NULL; /*this must be set to null or realloc will crash */ HYPRE_Int post_array_storage = 0; HYPRE_Int post_array_size = 0; HYPRE_Int num_post_recvs =0; void **contact_ptrs = NULL, **response_ptrs=NULL, **post_ptrs=NULL; hypre_BinaryTree tree; hypre_MPI_Request *response_requests, *contact_requests; hypre_MPI_Status *response_statuses, *contact_statuses; hypre_MPI_Request *post_send_requests = NULL, *post_recv_requests = NULL; hypre_MPI_Status *post_send_statuses = NULL, *post_recv_statuses = NULL; hypre_MPI_Request *term_requests, term_request1, request_parent; hypre_MPI_Status *term_statuses, term_status1, status_parent; hypre_MPI_Status status, fill_status; const HYPRE_Int contact_tag = 1000*rnum; const HYPRE_Int response_tag = 1002*rnum; const HYPRE_Int term_tag = 1004*rnum; const HYPRE_Int post_tag = 1006*rnum; hypre_MPI_Comm_size(comm, &num_procs ); hypre_MPI_Comm_rank(comm, &myid ); /* ---------initializations ----------------*/ /* if the response_obj_size or contact_obj_size is 0, set to sizeof(HYPRE_Int) */ if (!response_obj_size) response_obj_size = sizeof(HYPRE_Int); if (!contact_obj_size) contact_obj_size = sizeof(HYPRE_Int); max_response_size_bytes = max_response_size*response_obj_size; /* pre-allocate the max space for responding to contacts */ overhead = ceil((HYPRE_Real) sizeof(HYPRE_Int)/response_obj_size); /*for appending an integer*/ max_response_total_bytes = (max_response_size+overhead)*response_obj_size; response_obj->send_response_overhead = overhead; response_obj->send_response_storage = max_response_size; /*send_response_buf = hypre_MAlloc(max_response_total_bytes);*/ send_response_buf = hypre_CAlloc(max_response_size+overhead, response_obj_size); /*allocate space for inital recv array for the responses - give each processor size max_response_size */ initial_recv_buf = hypre_MAlloc(max_response_total_bytes*num_contacts); response_recv_buf_starts = hypre_CTAlloc(HYPRE_Int, num_contacts+1); contact_ptrs = hypre_TAlloc( void *, num_contacts); response_ptrs = hypre_TAlloc(void *, num_contacts); /*-------------SEND CONTACTS AND POST RECVS FOR RESPONSES---*/ for (i=0; i<= num_contacts; i++) { response_recv_buf_starts[i] = i*(max_response_size+overhead); } /* Send "contact" messages to the list of processors and pre-post receives to wait for their response*/ responses_complete = 1; if (num_contacts > 0 ) { responses_complete = 0; response_requests = hypre_CTAlloc(hypre_MPI_Request, num_contacts); response_statuses = hypre_CTAlloc(hypre_MPI_Status, num_contacts); contact_requests = hypre_CTAlloc(hypre_MPI_Request, num_contacts); contact_statuses = hypre_CTAlloc(hypre_MPI_Status, num_contacts); /* post receives - could be confirmation or data*/ /* the size to post is max_response_total_bytes*/ for (i=0; i< num_contacts; i++) { /* response_ptrs[i] = initial_recv_buf + i*max_response_total_bytes ; */ response_ptrs[i] = (void *)((char *) initial_recv_buf + i*max_response_total_bytes) ; hypre_MPI_Irecv(response_ptrs[i], max_response_total_bytes, hypre_MPI_BYTE, contact_proc_list[i], response_tag, comm, &response_requests[i]); } /* send out contact messages */ start_ptr = contact_send_buf; for (i=0; i< num_contacts; i++) { contact_ptrs[i] = start_ptr; size = contact_send_buf_starts[i+1] - contact_send_buf_starts[i] ; hypre_MPI_Isend(contact_ptrs[i], size*contact_obj_size, hypre_MPI_BYTE, contact_proc_list[i], contact_tag, comm, &contact_requests[i]); /* start_ptr += (size*contact_obj_size); */ start_ptr = (void *) ((char *) start_ptr + (size*contact_obj_size)); } } /*------------BINARY TREE-----------------------*/ /*Now let's find out our binary tree information and initialize for the termination check sweep */ terminate = 1; /*indicates whether we can stop probing for contact */ children_complete = 1;/*indicates whether we have recv. term messages from our children*/ if (num_procs > 1) { hypre_CreateBinaryTree(myid, num_procs, &tree); /* we will get a message from all of our children when they have received responses for all of their contacts. So post receives now */ term_requests = hypre_CTAlloc(hypre_MPI_Request, tree.num_child); term_statuses = hypre_CTAlloc(hypre_MPI_Status, tree.num_child); for (i=0; i< tree.num_child; i++) { hypre_MPI_Irecv(NULL, 0, HYPRE_MPI_INT, tree.child_id[i], term_tag, comm, &term_requests[i]); } terminate = 0; children_complete = 0; } else if (num_procs ==1 && num_contacts > 0 ) /* added 11/08 */ { terminate = 0; } /*---------PROBE LOOP-----------------------------------------*/ /*Look for incoming contact messages - don't know how many I will get!*/ while (!terminate) { /* did I receive any contact messages? */ hypre_MPI_Iprobe(hypre_MPI_ANY_SOURCE, contact_tag, comm, &contact_flag, &status); while (contact_flag) { /* received contacts - from who and what do we do ?*/ proc = status.hypre_MPI_SOURCE; hypre_MPI_Get_count(&status, hypre_MPI_BYTE, &contact_size); contact_size = contact_size/contact_obj_size; /*---------------FILL RESPONSE ------------------------*/ /*first receive the contact buffer - then call a function to determine how to populate the send buffer for the reponse*/ /* do we have enough space to recv it? */ if(contact_size > recv_contact_buf_size) { recv_contact_buf = hypre_ReAlloc(recv_contact_buf, contact_obj_size*contact_size); recv_contact_buf_size = contact_size; } /* this must be blocking - can't fill recv without the buffer*/ hypre_MPI_Recv(recv_contact_buf, contact_size*contact_obj_size, hypre_MPI_BYTE, proc, contact_tag, comm, &fill_status); response_obj->fill_response(recv_contact_buf, contact_size, proc, response_obj, comm, &send_response_buf, &response_message_size ); /* we need to append the size of the send obj */ /* first we copy out any part that may be needed to send later so we don't overwrite */ post_size = response_message_size - max_response_size; if (post_size > 0) /*we will need to send the extra information later */ { /*hypre_printf("myid = %d, post_size = %d\n", myid, post_size);*/ if (post_array_size == post_array_storage) { /* allocate room for more posts - add 20*/ post_array_storage += 20; post_array = hypre_TReAlloc(post_array, void *, post_array_storage); post_send_requests = hypre_TReAlloc(post_send_requests, hypre_MPI_Request, post_array_storage); } /* allocate space for the data this post only*/ /* this should not happen often (unless a poor max_size has been chosen) - so we will allocate space for the data as needed */ size = post_size*response_obj_size; post_array[post_array_size] = hypre_MAlloc(size); /* index_ptr = send_response_buf + max_response_size_bytes */; index_ptr = (void *) ((char *) send_response_buf + max_response_size_bytes); memcpy(post_array[post_array_size], index_ptr, size); /*now post any part of the message that is too long with a non-blocking send and a different tag */ hypre_MPI_Isend(post_array[post_array_size], size, hypre_MPI_BYTE, proc, post_tag, /*hypre_MPI_COMM_WORLD, */ comm, &post_send_requests[post_array_size]); post_array_size++; } /*now append the size information into the overhead storage */ /* index_ptr = send_response_buf + max_response_size_bytes; */ index_ptr = (void *) ((char *) send_response_buf + max_response_size_bytes); memcpy(index_ptr, &response_message_size, sizeof(HYPRE_Int)); /*send the block of data that includes the overhead */ /* this is a blocking send - the recv has already been posted */ hypre_MPI_Send(send_response_buf, max_response_total_bytes, hypre_MPI_BYTE, proc, response_tag, comm); /*--------------------------------------------------------------*/ /* look for any more contact messages*/ hypre_MPI_Iprobe(hypre_MPI_ANY_SOURCE, contact_tag, comm, &contact_flag, &status); } /* no more contact messages waiting - either (1) check to see if we have received all of our response messages (2) participate in termination (check for messages from children) (3) participate in termination sweep (check for message from parent) */ if (!responses_complete) { hypre_MPI_Testall(num_contacts, response_requests, &responses_complete, response_statuses); if (responses_complete && num_procs == 1) terminate = 1; /*added 11/08 */ } else if(!children_complete) /* have all of our children received all of their response messages?*/ { hypre_MPI_Testall(tree.num_child, term_requests, &children_complete, term_statuses); /* if we have gotten term messages from all of our children, send a term message to our parent. Then post a receive to hear back from parent */ if (children_complete & (myid > 0)) /*root does not have a parent*/ { hypre_MPI_Isend(NULL, 0, HYPRE_MPI_INT, tree.parent_id, term_tag, comm, &request_parent); hypre_MPI_Irecv(NULL, 0, HYPRE_MPI_INT, tree.parent_id, term_tag, comm, &term_request1); } } else /*have we gotten a term message from our parent? */ { if (myid == 0) /* root doesn't have a parent */ { terminate = 1; } else { hypre_MPI_Test(&term_request1, &terminate, &term_status1); } if (terminate) /*tell children to terminate */ { if (myid > 0 ) hypre_MPI_Wait(&request_parent, &status_parent); for (i=0; i< tree.num_child; i++) { /*a blocking send - recv has been posted already*/ hypre_MPI_Send(NULL, 0, HYPRE_MPI_INT, tree.child_id[i], term_tag, comm); } } } }
HYPRE_Int BuildParLaplacian27pt( HYPRE_Int argc, char *argv[], HYPRE_Int arg_index, HYPRE_ParCSRMatrix *A_ptr ) { HYPRE_Int nx, ny, nz; HYPRE_Int P, Q, R; HYPRE_ParCSRMatrix A; HYPRE_Int num_procs, myid; HYPRE_Int p, q, r; HYPRE_Real *values; /*----------------------------------------------------------- * Initialize some stuff *-----------------------------------------------------------*/ hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs ); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid ); /*----------------------------------------------------------- * Set defaults *-----------------------------------------------------------*/ nx = 20; ny = 20; nz = 20; P = 1; Q = num_procs; R = 1; /*----------------------------------------------------------- * Check a few things *-----------------------------------------------------------*/ if ((P*Q*R) != num_procs) { hypre_printf("Error: Invalid number of processors or processor topology \n"); exit(1); } /*----------------------------------------------------------- * Print driver parameters *-----------------------------------------------------------*/ if (myid == 0) { hypre_printf(" Laplacian_27pt:\n"); hypre_printf(" (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz); hypre_printf(" (Px, Py, Pz) = (%d, %d, %d)\n", P, Q, R); } /*----------------------------------------------------------- * Set up the grid structure *-----------------------------------------------------------*/ /* compute p,q,r from P,Q,R and myid */ p = myid % P; q = (( myid - p)/P) % Q; r = ( myid - p - P*q)/( P*Q ); /*----------------------------------------------------------- * Generate the matrix *-----------------------------------------------------------*/ values = hypre_CTAlloc(HYPRE_Real, 2); values[0] = 26.0; if (nx == 1 || ny == 1 || nz == 1) values[0] = 8.0; if (nx*ny == 1 || nx*nz == 1 || ny*nz == 1) values[0] = 2.0; values[1] = -1.0; A = (HYPRE_ParCSRMatrix) GenerateLaplacian27pt(hypre_MPI_COMM_WORLD, nx, ny, nz, P, Q, R, p, q, r, values); hypre_TFree(values); *A_ptr = A; return (0); }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { HYPRE_Int arg_index; HYPRE_Int print_usage; HYPRE_Int build_matrix_arg_index; HYPRE_Int solver_id; HYPRE_Int ierr,i,j; HYPRE_Int num_iterations; HYPRE_ParCSRMatrix parcsr_A; HYPRE_Int num_procs, myid; HYPRE_Int local_row; HYPRE_Int time_index; MPI_Comm comm; HYPRE_Int M, N; HYPRE_Int first_local_row, last_local_row; HYPRE_Int first_local_col, last_local_col; HYPRE_Int size, *col_ind; HYPRE_Real *values; /* parameters for BoomerAMG */ HYPRE_Real strong_threshold; HYPRE_Int num_grid_sweeps; HYPRE_Real relax_weight; /* parameters for GMRES */ HYPRE_Int k_dim; char *paramString = new char[100]; /*----------------------------------------------------------- * Initialize some stuff *-----------------------------------------------------------*/ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs ); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid ); /*----------------------------------------------------------- * Set defaults *-----------------------------------------------------------*/ build_matrix_arg_index = argc; solver_id = 0; strong_threshold = 0.25; num_grid_sweeps = 2; relax_weight = 0.5; k_dim = 20; /*----------------------------------------------------------- * Parse command line *-----------------------------------------------------------*/ print_usage = 0; arg_index = 1; while ( (arg_index < argc) && (!print_usage) ) { if ( strcmp(argv[arg_index], "-solver") == 0 ) { arg_index++; solver_id = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-dbg") == 0 ) { arg_index++; atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-help") == 0 ) { print_usage = 1; } else { arg_index++; } } /*----------------------------------------------------------- * Print usage info *-----------------------------------------------------------*/ if ( (print_usage) && (myid == 0) ) { hypre_printf("\n"); hypre_printf("Usage: %s [<options>]\n", argv[0]); hypre_printf("\n"); hypre_printf(" -solver <ID> : solver ID\n"); hypre_printf(" 0=DS-PCG 1=ParaSails-PCG \n"); hypre_printf(" 2=AMG-PCG 3=DS-GMRES \n"); hypre_printf(" 4=PILUT-GMRES 5=AMG-GMRES \n"); hypre_printf("\n"); hypre_printf(" -rlx <val> : relaxation type\n"); hypre_printf(" 0=Weighted Jacobi \n"); hypre_printf(" 1=Gauss-Seidel (very slow!) \n"); hypre_printf(" 3=Hybrid Jacobi/Gauss-Seidel \n"); hypre_printf("\n"); exit(1); } /*----------------------------------------------------------- * Print driver parameters *-----------------------------------------------------------*/ if (myid == 0) { hypre_printf("Running with these driver parameters:\n"); hypre_printf(" solver ID = %d\n", solver_id); } /*----------------------------------------------------------- * Set up matrix *-----------------------------------------------------------*/ strcpy(paramString, "LS Interface"); time_index = hypre_InitializeTiming(paramString); hypre_BeginTiming(time_index); BuildParLaplacian27pt(argc, argv, build_matrix_arg_index, &parcsr_A); /*----------------------------------------------------------- * Copy the parcsr matrix into the LSI through interface calls *-----------------------------------------------------------*/ ierr = HYPRE_ParCSRMatrixGetComm( parcsr_A, &comm ); ierr += HYPRE_ParCSRMatrixGetDims( parcsr_A, &M, &N ); ierr = HYPRE_ParCSRMatrixGetLocalRange( parcsr_A, &first_local_row, &last_local_row , &first_local_col, &last_local_col ); HYPRE_LinSysCore H(hypre_MPI_COMM_WORLD); HYPRE_Int numLocalEqns = last_local_row - first_local_row + 1; H.createMatricesAndVectors(M,first_local_row+1,numLocalEqns); HYPRE_Int index; HYPRE_Int *rowLengths = new HYPRE_Int[numLocalEqns]; HYPRE_Int **colIndices = new HYPRE_Int*[numLocalEqns]; local_row = 0; for (i=first_local_row; i<= last_local_row; i++) { ierr += HYPRE_ParCSRMatrixGetRow(parcsr_A,i,&size,&col_ind,&values ); rowLengths[local_row] = size; colIndices[local_row] = new HYPRE_Int[size]; for (j=0; j<size; j++) colIndices[local_row][j] = col_ind[j] + 1; local_row++; HYPRE_ParCSRMatrixRestoreRow(parcsr_A,i,&size,&col_ind,&values); } H.allocateMatrix(colIndices, rowLengths); delete [] rowLengths; for (i=0; i< numLocalEqns; i++) delete [] colIndices[i]; delete [] colIndices; HYPRE_Int *newColInd; for (i=first_local_row; i<= last_local_row; i++) { ierr += HYPRE_ParCSRMatrixGetRow(parcsr_A,i,&size,&col_ind,&values ); newColInd = new HYPRE_Int[size]; for (j=0; j<size; j++) newColInd[j] = col_ind[j] + 1; H.sumIntoSystemMatrix(i+1,size,(const HYPRE_Real*)values, (const HYPRE_Int*)newColInd); delete [] newColInd; ierr += HYPRE_ParCSRMatrixRestoreRow(parcsr_A,i,&size,&col_ind,&values); } H.matrixLoadComplete(); HYPRE_ParCSRMatrixDestroy(parcsr_A); /*----------------------------------------------------------- * Set up the RHS and initial guess *-----------------------------------------------------------*/ HYPRE_Real ddata=1.0; HYPRE_Int status; for (i=first_local_row; i<= last_local_row; i++) { index = i + 1; H.sumIntoRHSVector(1,(const HYPRE_Real*) &ddata, (const HYPRE_Int*) &index); } hypre_EndTiming(time_index); strcpy(paramString, "LS Interface"); hypre_PrintTiming(paramString, hypre_MPI_COMM_WORLD); hypre_FinalizeTiming(time_index); hypre_ClearTiming(); /*----------------------------------------------------------- * Solve the system using PCG *-----------------------------------------------------------*/ if ( solver_id == 0 ) { strcpy(paramString, "solver cg"); H.parameters(1, ¶mString); if (myid == 0) hypre_printf("Solver: DS-PCG\n"); strcpy(paramString, "preconditioner diagonal"); H.parameters(1, ¶mString); } else if ( solver_id == 1 ) { strcpy(paramString, "solver cg"); H.parameters(1, ¶mString); if (myid == 0) hypre_printf("Solver: ParaSails-PCG\n"); strcpy(paramString, "preconditioner parasails"); H.parameters(1, ¶mString); strcpy(paramString, "parasailsNlevels 1"); H.parameters(1, ¶mString); strcpy(paramString, "parasailsThreshold 0.1"); H.parameters(1, ¶mString); } else if ( solver_id == 2 ) { strcpy(paramString, "solver cg"); H.parameters(1, ¶mString); if (myid == 0) hypre_printf("Solver: AMG-PCG\n"); strcpy(paramString, "preconditioner boomeramg"); H.parameters(1, ¶mString); strcpy(paramString, "amgCoarsenType falgout"); H.parameters(1, ¶mString); hypre_sprintf(paramString, "amgStrongThreshold %e", strong_threshold); H.parameters(1, ¶mString); hypre_sprintf(paramString, "amgNumSweeps %d", num_grid_sweeps); H.parameters(1, ¶mString); strcpy(paramString, "amgRelaxType jacobi"); H.parameters(1, ¶mString); hypre_sprintf(paramString, "amgRelaxWeight %e", relax_weight); H.parameters(1, ¶mString); } else if ( solver_id == 3 ) { strcpy(paramString, "solver cg"); H.parameters(1, ¶mString); if (myid == 0) hypre_printf("Solver: Poly-PCG\n"); strcpy(paramString, "preconditioner poly"); H.parameters(1, ¶mString); strcpy(paramString, "polyOrder 9"); H.parameters(1, ¶mString); } else if ( solver_id == 4 ) { strcpy(paramString, "solver gmres"); H.parameters(1, ¶mString); hypre_sprintf(paramString, "gmresDim %d", k_dim); H.parameters(1, ¶mString); if (myid == 0) hypre_printf("Solver: DS-GMRES\n"); strcpy(paramString, "preconditioner diagonal"); H.parameters(1, ¶mString); } else if ( solver_id == 5 ) { strcpy(paramString, "solver gmres"); H.parameters(1, ¶mString); hypre_sprintf(paramString, "gmresDim %d", k_dim); H.parameters(1, ¶mString); if (myid == 0) hypre_printf("Solver: PILUT-GMRES\n"); strcpy(paramString, "preconditioner pilut"); H.parameters(1, ¶mString); strcpy(paramString, "pilutRowSize 0"); H.parameters(1, ¶mString); strcpy(paramString, "pilutDropTol 0.0"); H.parameters(1, ¶mString); } else if ( solver_id == 6 ) { strcpy(paramString, "solver gmres"); H.parameters(1, ¶mString); hypre_sprintf(paramString, "gmresDim %d", k_dim); H.parameters(1, ¶mString); if (myid == 0) hypre_printf("Solver: AMG-GMRES\n"); strcpy(paramString, "preconditioner boomeramg"); H.parameters(1, ¶mString); strcpy(paramString, "amgCoarsenType falgout"); H.parameters(1, ¶mString); hypre_sprintf(paramString, "amgStrongThreshold %e", strong_threshold); H.parameters(1, ¶mString); hypre_sprintf(paramString, "amgNumSweeps %d", num_grid_sweeps); H.parameters(1, ¶mString); strcpy(paramString, "amgRelaxType jacobi"); H.parameters(1, ¶mString); hypre_sprintf(paramString, "amgRelaxWeight %e", relax_weight); H.parameters(1, ¶mString); } else if ( solver_id == 7 ) { strcpy(paramString, "solver gmres"); H.parameters(1, ¶mString); hypre_sprintf(paramString, "gmresDim %d", k_dim); H.parameters(1, ¶mString); if (myid == 0) hypre_printf("Solver: DDILUT-GMRES\n"); strcpy(paramString, "preconditioner ddilut"); H.parameters(1, ¶mString); strcpy(paramString, "ddilutFillin 5.0"); H.parameters(1, ¶mString); strcpy(paramString, "ddilutDropTol 0.0"); H.parameters(1, ¶mString); } else if ( solver_id == 8 ) { strcpy(paramString, "solver gmres"); H.parameters(1, ¶mString); hypre_sprintf(paramString, "gmresDim %d", k_dim); H.parameters(1, ¶mString); if (myid == 0) hypre_printf("Solver: POLY-GMRES\n"); strcpy(paramString, "preconditioner poly"); H.parameters(1, ¶mString); strcpy(paramString, "polyOrder 5"); H.parameters(1, ¶mString); } strcpy(paramString, "Krylov Solve"); time_index = hypre_InitializeTiming(paramString); hypre_BeginTiming(time_index); H.launchSolver(status, num_iterations); hypre_EndTiming(time_index); strcpy(paramString, "Solve phase times"); hypre_PrintTiming(paramString, hypre_MPI_COMM_WORLD); hypre_FinalizeTiming(time_index); hypre_ClearTiming(); if (myid == 0) { hypre_printf("\n Iterations = %d\n", num_iterations); hypre_printf("\n"); } /*----------------------------------------------------------- * Finalize things *-----------------------------------------------------------*/ delete [] paramString; hypre_MPI_Finalize(); return (0); }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { HYPRE_Int num_procs, myid; HYPRE_Int verbose = 0, build_matrix_type = 1; HYPRE_Int index, matrix_arg_index, commpkg_flag=3; HYPRE_Int i, k, ierr=0; HYPRE_Int row_start, row_end; HYPRE_Int col_start, col_end, global_num_rows; HYPRE_Int *row_part, *col_part; char *csrfilename; HYPRE_Int preload = 0, loop = 0, loop2 = LOOP2; HYPRE_Int bcast_rows[2], *info; hypre_ParCSRMatrix *parcsr_A, *small_A; HYPRE_ParCSRMatrix A_temp, A_temp_small; hypre_CSRMatrix *A_CSR; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int nx, ny, nz; HYPRE_Int P, Q, R; HYPRE_Int p, q, r; HYPRE_Real values[4]; hypre_ParVector *x_new; hypre_ParVector *y_new, *y; HYPRE_Int *row_starts; HYPRE_Real ans; HYPRE_Real start_time, end_time, total_time, *loop_times; HYPRE_Real T_avg, T_std; HYPRE_Int noparmprint = 0; #if mydebug HYPRE_Int j, tmp_int; #endif /*----------------------------------------------------------- * Initialize MPI *-----------------------------------------------------------*/ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs ); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid ); /*----------------------------------------------------------- * default - is 27pt laplace *-----------------------------------------------------------*/ build_matrix_type = 2; matrix_arg_index = argc; /*----------------------------------------------------------- * Parse command line *-----------------------------------------------------------*/ index = 1; while ( index < argc) { if ( strcmp(argv[index], "-verbose") == 0 ) { index++; verbose = 1; } else if ( strcmp(argv[index], "-fromonecsrfile") == 0 ) { index++; build_matrix_type = 1; matrix_arg_index = index; /*this tells where the name is*/ } else if ( strcmp(argv[index], "-commpkg") == 0 ) { index++; commpkg_flag = atoi(argv[index++]); } else if ( strcmp(argv[index], "-laplacian") == 0 ) { index++; build_matrix_type = 2; matrix_arg_index = index; } else if ( strcmp(argv[index], "-27pt") == 0 ) { index++; build_matrix_type = 4; matrix_arg_index = index; } /* else if ( strcmp(argv[index], "-nopreload") == 0 ) { index++; preload = 0; } */ else if ( strcmp(argv[index], "-loop") == 0 ) { index++; loop = atoi(argv[index++]); } else if ( strcmp(argv[index], "-noparmprint") == 0 ) { index++; noparmprint = 1; } else { index++; /*hypre_printf("Warning: Unrecogized option '%s'\n",argv[index++] );*/ } } /*----------------------------------------------------------- * Setup the Matrix problem *-----------------------------------------------------------*/ /*----------------------------------------------------------- * Get actual partitioning- * read in an actual csr matrix. *-----------------------------------------------------------*/ if (build_matrix_type ==1) /*read in a csr matrix from one file */ { if (matrix_arg_index < argc) { csrfilename = argv[matrix_arg_index]; } else { hypre_printf("Error: No filename specified \n"); exit(1); } if (myid == 0) { /*hypre_printf(" FromFile: %s\n", csrfilename);*/ A_CSR = hypre_CSRMatrixRead(csrfilename); } row_part = NULL; col_part = NULL; parcsr_A = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, A_CSR, row_part, col_part); if (myid == 0) hypre_CSRMatrixDestroy(A_CSR); } else if (build_matrix_type ==2) { myBuildParLaplacian(argc, argv, matrix_arg_index, &A_temp, !noparmprint); parcsr_A = (hypre_ParCSRMatrix *) A_temp; } else if (build_matrix_type ==4) { myBuildParLaplacian27pt(argc, argv, matrix_arg_index, &A_temp, !noparmprint); parcsr_A = (hypre_ParCSRMatrix *) A_temp; } /*----------------------------------------------------------- * create a small problem so that timings are more accurate - * code gets run twice (small laplace) *-----------------------------------------------------------*/ /*this is no longer being used - preload = 0 is set at the beginning */ if (preload == 1) { /*hypre_printf("preload!\n");*/ values[1] = -1; values[2] = -1; values[3] = -1; values[0] = - 6.0 ; nx = 2; ny = num_procs; nz = 2; P = 1; Q = num_procs; R = 1; p = myid % P; q = (( myid - p)/P) % Q; r = ( myid - p - P*q)/( P*Q ); A_temp_small = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD, nx, ny, nz, P, Q, R, p, q, r, values); small_A = (hypre_ParCSRMatrix *) A_temp_small; /*do comm packages*/ hypre_NewCommPkgCreate(small_A); hypre_NewCommPkgDestroy(small_A); hypre_MatvecCommPkgCreate(small_A); hypre_ParCSRMatrixDestroy(small_A); } /*----------------------------------------------------------- * Prepare for timing *-----------------------------------------------------------*/ /* instead of preloading, let's not time the first one if more than one*/ if (!loop) { loop = 1; /* and don't do any timings */ } else { loop +=1; if (loop < 2) loop = 2; } loop_times = hypre_CTAlloc(HYPRE_Real, loop); /******************************************************************************************/ hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); if (commpkg_flag == 1 || commpkg_flag ==3 ) { /*----------------------------------------------------------- * Create new comm package *-----------------------------------------------------------*/ if (!myid) hypre_printf("********************************************************\n" ); /*do loop times*/ for (i=0; i< loop; i++) { loop_times[i] = 0.0; for (k=0; k< loop2; k++) { hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); start_time = hypre_MPI_Wtime(); #if mpip_on if (i==(loop-1)) hypre_MPI_Pcontrol(1); #endif hypre_NewCommPkgCreate(parcsr_A); #if mpip_on if (i==(loop-1)) hypre_MPI_Pcontrol(0); #endif end_time = hypre_MPI_Wtime(); end_time = end_time - start_time; hypre_MPI_Allreduce(&end_time, &total_time, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD); loop_times[i] += total_time; if ( !((i+1)== loop && (k+1) == loop2)) hypre_NewCommPkgDestroy(parcsr_A); }/*end of loop2 */ } /*end of loop*/ /* calculate the avg and std. */ if (loop > 1) { /* calculate the avg and std. */ stats_mo(loop_times, loop, &T_avg, &T_std); if (!myid) hypre_printf(" NewCommPkgCreate: AVG. wall clock time = %f seconds\n", T_avg); if (!myid) hypre_printf(" STD. for %d runs = %f\n", loop-1, T_std); if (!myid) hypre_printf(" (Note: avg./std. timings exclude run 0.)\n"); if (!myid) hypre_printf("********************************************************\n" ); for (i=0; i< loop; i++) { if (!myid) hypre_printf(" run %d = %f sec.\n", i, loop_times[i]); } if (!myid) hypre_printf("********************************************************\n" ); } else { if (!myid) hypre_printf("********************************************************\n" ); if (!myid) hypre_printf(" NewCommPkgCreate:\n"); if (!myid) hypre_printf(" run time = %f sec.\n", loop_times[0]); if (!myid) hypre_printf("********************************************************\n" ); } /*----------------------------------------------------------- * Verbose printing *-----------------------------------------------------------*/ /*some verification*/ global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); if (verbose) { ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A); hypre_printf("myid = %i, my ACTUAL local range: [%i, %i]\n", myid, row_start, row_end); ierr = hypre_GetAssumedPartitionRowRange( myid, global_num_rows, &row_start, &row_end); hypre_printf("myid = %i, my assumed local range: [%i, %i]\n", myid, row_start, row_end); hypre_printf("myid = %d, num_recvs = %d\n", myid, hypre_ParCSRCommPkgNumRecvs(comm_pkg) ); #if mydebug for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) { hypre_printf("myid = %d, recv proc = %d, vec_starts = [%d : %d]\n", myid, hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1); } #endif hypre_printf("myid = %d, num_sends = %d\n", myid, hypre_ParCSRCommPkgNumSends(comm_pkg) ); #if mydebug for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) { tmp_int = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] - hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; for (j=0; j< tmp_int; j++) { hypre_printf("myid = %d, send proc = %d, send element = %d\n",myid, hypre_ParCSRCommPkgSendProcs(comm_pkg)[i], hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); } } #endif } /*----------------------------------------------------------- * To verify correctness (if commpkg_flag = 3) *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { /*do a matvec - we are assuming a square matrix */ row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A); x_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts); hypre_ParVectorSetPartitioningOwner(x_new, 0); hypre_ParVectorInitialize(x_new); hypre_ParVectorSetRandomValues(x_new, 1); y_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts); hypre_ParVectorSetPartitioningOwner(y_new, 0); hypre_ParVectorInitialize(y_new); hypre_ParVectorSetConstantValues(y_new, 0.0); /*y = 1.0*A*x+1.0*y */ hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y_new); } /*----------------------------------------------------------- * Clean up after MyComm *-----------------------------------------------------------*/ hypre_NewCommPkgDestroy(parcsr_A); } /******************************************************************************************/ /******************************************************************************************/ hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); if (commpkg_flag > 1 ) { /*----------------------------------------------------------- * Set up standard comm package *-----------------------------------------------------------*/ bcast_rows[0] = 23; bcast_rows[1] = 1789; if (!myid) hypre_printf("********************************************************\n" ); /*do loop times*/ for (i=0; i< loop; i++) { loop_times[i] = 0.0; for (k=0; k< loop2; k++) { hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); start_time = hypre_MPI_Wtime(); #if time_gather info = hypre_CTAlloc(HYPRE_Int, num_procs); hypre_MPI_Allgather(bcast_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hypre_MPI_COMM_WORLD); #endif hypre_MatvecCommPkgCreate(parcsr_A); end_time = hypre_MPI_Wtime(); end_time = end_time - start_time; hypre_MPI_Allreduce(&end_time, &total_time, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD); loop_times[i] += total_time; if ( !((i+1)== loop && (k+1) == loop2)) hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkg(parcsr_A)); }/* end of loop 2*/ } /*end of loop*/ /* calculate the avg and std. */ if (loop > 1) { stats_mo(loop_times, loop, &T_avg, &T_std); if (!myid) hypre_printf("Current CommPkgCreate: AVG. wall clock time = %f seconds\n", T_avg); if (!myid) hypre_printf(" STD. for %d runs = %f\n", loop-1, T_std); if (!myid) hypre_printf(" (Note: avg./std. timings exclude run 0.)\n"); if (!myid) hypre_printf("********************************************************\n" ); for (i=0; i< loop; i++) { if (!myid) hypre_printf(" run %d = %f sec.\n", i, loop_times[i]); } if (!myid) hypre_printf("********************************************************\n" ); } else { if (!myid) hypre_printf("********************************************************\n" ); if (!myid) hypre_printf(" Current CommPkgCreate:\n"); if (!myid) hypre_printf(" run time = %f sec.\n", loop_times[0]); if (!myid) hypre_printf("********************************************************\n" ); } /*----------------------------------------------------------- * Verbose printing *-----------------------------------------------------------*/ /*some verification*/ if (verbose) { ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A); hypre_printf("myid = %i, std - my local range: [%i, %i]\n", myid, row_start, row_end); ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); hypre_printf("myid = %d, std - num_recvs = %d\n", myid, hypre_ParCSRCommPkgNumRecvs(comm_pkg) ); #if mydebug for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) { hypre_printf("myid = %d, std - recv proc = %d, vec_starts = [%d : %d]\n", myid, hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1); } #endif hypre_printf("myid = %d, std - num_sends = %d\n", myid, hypre_ParCSRCommPkgNumSends(comm_pkg)); #if mydebug for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) { tmp_int = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] - hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; for (j=0; j< tmp_int; j++) { hypre_printf("myid = %d, std - send proc = %d, send element = %d\n",myid, hypre_ParCSRCommPkgSendProcs(comm_pkg)[i], hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); } } #endif } /*----------------------------------------------------------- * Verify correctness *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A); y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows,row_starts); hypre_ParVectorSetPartitioningOwner(y, 0); hypre_ParVectorInitialize(y); hypre_ParVectorSetConstantValues(y, 0.0); hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y); } } /*----------------------------------------------------------- * Compare matvecs for both comm packages (3) *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { /*make sure that y and y_new are the same - now y_new should=0*/ hypre_ParVectorAxpy( -1.0, y, y_new ); hypre_ParVectorSetRandomValues(y, 1); ans = hypre_ParVectorInnerProd( y, y_new ); if (!myid) { if ( fabs(ans) > 1e-8 ) { hypre_printf("!!!!! WARNING !!!!! should be zero if correct = %6.10f\n", ans); } else { hypre_printf("Matvecs match ( should be zero = %6.10f )\n", ans); } } } /*----------------------------------------------------------- * Clean up *-----------------------------------------------------------*/ hypre_ParCSRMatrixDestroy(parcsr_A); /*this calls the standard comm package destroy - but we'll destroy ours separately until it is incorporated */ if (commpkg_flag == 3 ) { hypre_ParVectorDestroy(x_new); hypre_ParVectorDestroy(y); hypre_ParVectorDestroy(y_new); } hypre_MPI_Finalize(); return(ierr); }
hypre_ParCSRCommMultiHandle * hypre_ParCSRCommMultiHandleCreate (HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg, void *send_data, void *recv_data, HYPRE_Int num_vecs ) { HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg); hypre_ParCSRCommMultiHandle *comm_handle; HYPRE_Int num_requests; hypre_MPI_Request *requests; HYPRE_Int i, j; HYPRE_Int my_id, num_procs; HYPRE_Int ip, vec_start, vec_len; /*-------------------------------------------------------------------- * hypre_Initialize sets up a communication handle, * posts receives and initiates sends. It always requires num_sends, * num_recvs, recv_procs and send_procs to be set in comm_pkg. * There are different options for job: * job = 1 : is used to initialize communication exchange for the parts * of vector needed to perform a Matvec, it requires send_data * and recv_data to be doubles, recv_vec_starts and * send_map_starts need to be set in comm_pkg. * job = 2 : is used to initialize communication exchange for the parts * of vector needed to perform a MatvecT, it requires send_data * and recv_data to be doubles, recv_vec_starts and * send_map_starts need to be set in comm_pkg. *--------------------------------------------------------------------*/ num_requests = num_sends + num_recvs; requests = hypre_CTAlloc(hypre_MPI_Request, num_requests); hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); j = 0; switch (job) { case 1: { double *d_send_data = (double *) send_data; double *d_recv_data = (double *) recv_data; for (i = 0; i < num_recvs; i++) { ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start; hypre_MPI_Irecv(&d_recv_data[vec_start*num_vecs], vec_len*num_vecs, hypre_MPI_DOUBLE, ip, 0, comm, &requests[j++]); } for (i = 0; i < num_sends; i++) { vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1)-vec_start; ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); hypre_MPI_Isend(&d_send_data[vec_start*num_vecs], vec_len*num_vecs, hypre_MPI_DOUBLE, ip, 0, comm, &requests[j++]); } break; } case 2: { double *d_send_data = (double *) send_data; double *d_recv_data = (double *) recv_data; for (i = 0; i < num_sends; i++) { vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1) - vec_start; ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); hypre_MPI_Irecv(&d_recv_data[vec_start*num_vecs], vec_len*num_vecs, hypre_MPI_DOUBLE, ip, 0, comm, &requests[j++]); } for (i = 0; i < num_recvs; i++) { ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start; hypre_MPI_Isend(&d_send_data[vec_start*num_vecs], vec_len*num_vecs, hypre_MPI_DOUBLE, ip, 0, comm, &requests[j++]); } break; } } /*-------------------------------------------------------------------- * set up comm_handle and return *--------------------------------------------------------------------*/ comm_handle = hypre_CTAlloc(hypre_ParCSRCommMultiHandle, 1); hypre_ParCSRCommMultiHandleCommPkg(comm_handle) = comm_pkg; hypre_ParCSRCommMultiHandleSendData(comm_handle) = send_data; hypre_ParCSRCommMultiHandleRecvData(comm_handle) = recv_data; hypre_ParCSRCommMultiHandleNumRequests(comm_handle) = num_requests; hypre_ParCSRCommMultiHandleRequests(comm_handle) = requests; return (comm_handle); }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { hypre_ParVector *vector1; hypre_ParVector *vector2; hypre_ParVector *tmp_vector; HYPRE_Int num_procs, my_id; HYPRE_Int global_size = 20; HYPRE_Int local_size; HYPRE_Int first_index; HYPRE_Int num_vectors, vecstride, idxstride; HYPRE_Int i, j; HYPRE_Int *partitioning; double prod; double *data, *data2; hypre_Vector *vector; hypre_Vector *local_vector; hypre_Vector *local_vector2; /* Initialize MPI */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs ); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &my_id ); hypre_printf(" my_id: %d num_procs: %d\n", my_id, num_procs); partitioning = NULL; num_vectors = 3; vector1 = hypre_ParMultiVectorCreate ( hypre_MPI_COMM_WORLD, global_size, partitioning, num_vectors ); partitioning = hypre_ParVectorPartitioning(vector1); hypre_ParVectorInitialize(vector1); local_vector = hypre_ParVectorLocalVector(vector1); data = hypre_VectorData(local_vector); local_size = hypre_VectorSize(local_vector); vecstride = hypre_VectorVectorStride(local_vector); idxstride = hypre_VectorIndexStride(local_vector); first_index = partitioning[my_id]; hypre_printf("vecstride=%i idxstride=%i local_size=%i num_vectors=%i", vecstride, idxstride, local_size, num_vectors ); for (j=0; j<num_vectors; ++j ) for (i=0; i < local_size; i++) data[ j*vecstride + i*idxstride ] = first_index+i + 100*j; hypre_ParVectorPrint(vector1, "Vector"); local_vector2 = hypre_SeqMultiVectorCreate( global_size, num_vectors ); hypre_SeqVectorInitialize(local_vector2); data2 = hypre_VectorData(local_vector2); vecstride = hypre_VectorVectorStride(local_vector2); idxstride = hypre_VectorIndexStride(local_vector2); for (j=0; j<num_vectors; ++j ) for (i=0; i < global_size; i++) data2[ j*vecstride + i*idxstride ] = i + 100*j; /* partitioning = hypre_CTAlloc(HYPRE_Int,4); partitioning[0] = 0; partitioning[1] = 10; partitioning[2] = 10; partitioning[3] = 20; */ partitioning = hypre_CTAlloc(HYPRE_Int,1+num_procs); hypre_GeneratePartitioning( global_size, num_procs, &partitioning ); vector2 = hypre_VectorToParVector(hypre_MPI_COMM_WORLD,local_vector2,partitioning); hypre_ParVectorSetPartitioningOwner(vector2,0); hypre_ParVectorPrint(vector2, "Convert"); vector = hypre_ParVectorToVectorAll(vector2); /*----------------------------------------------------------- * Copy the vector into tmp_vector *-----------------------------------------------------------*/ /* Read doesn't work for multivectors yet... tmp_vector = hypre_ParVectorRead(hypre_MPI_COMM_WORLD, "Convert");*/ tmp_vector = hypre_ParMultiVectorCreate ( hypre_MPI_COMM_WORLD, global_size, partitioning, num_vectors ); hypre_ParVectorInitialize( tmp_vector ); hypre_ParVectorCopy( vector2, tmp_vector ); /* tmp_vector = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_size,partitioning); hypre_ParVectorSetPartitioningOwner(tmp_vector,0); hypre_ParVectorInitialize(tmp_vector); hypre_ParVectorCopy(vector1, tmp_vector); hypre_ParVectorPrint(tmp_vector,"Copy"); */ /*----------------------------------------------------------- * Scale tmp_vector *-----------------------------------------------------------*/ hypre_ParVectorScale(2.0, tmp_vector); hypre_ParVectorPrint(tmp_vector,"Scale"); /*----------------------------------------------------------- * Do an Axpy (2*vector - vector) = vector *-----------------------------------------------------------*/ hypre_ParVectorAxpy(-1.0, vector1, tmp_vector); hypre_ParVectorPrint(tmp_vector,"Axpy"); /*----------------------------------------------------------- * Do an inner product vector* tmp_vector *-----------------------------------------------------------*/ prod = hypre_ParVectorInnerProd(vector1, tmp_vector); hypre_printf (" prod: %8.2f \n", prod); /*----------------------------------------------------------- * Finalize things *-----------------------------------------------------------*/ hypre_ParVectorDestroy(vector1); hypre_ParVectorDestroy(vector2); hypre_ParVectorDestroy(tmp_vector); hypre_SeqVectorDestroy(local_vector2); if (vector) hypre_SeqVectorDestroy(vector); /* Finalize MPI */ hypre_MPI_Finalize(); return 0; }
HYPRE_Int hypre_BoomerAMGSolveT( void *amg_vdata, hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParAMGData *amg_data = amg_vdata; /* Data Structure variables */ HYPRE_Int amg_print_level; HYPRE_Int amg_logging; HYPRE_Real *num_coeffs; HYPRE_Int *num_variables; HYPRE_Real cycle_op_count; HYPRE_Int num_levels; /* HYPRE_Int num_unknowns; */ HYPRE_Real tol; char *file_name; hypre_ParCSRMatrix **A_array; hypre_ParVector **F_array; hypre_ParVector **U_array; /* Local variables */ /*FILE *fp;*/ HYPRE_Int j; HYPRE_Int Solve_err_flag; HYPRE_Int min_iter; HYPRE_Int max_iter; HYPRE_Int cycle_count; HYPRE_Real total_coeffs; HYPRE_Int total_variables; HYPRE_Int num_procs, my_id; HYPRE_Real alpha = 1.0; HYPRE_Real beta = -1.0; HYPRE_Real cycle_cmplxty = 0.0; HYPRE_Real operat_cmplxty; HYPRE_Real grid_cmplxty; HYPRE_Real conv_factor; HYPRE_Real resid_nrm; HYPRE_Real resid_nrm_init; HYPRE_Real relative_resid; HYPRE_Real rhs_norm; HYPRE_Real old_resid; hypre_ParVector *Vtemp; hypre_ParVector *Residual; hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm,&my_id); amg_print_level = hypre_ParAMGDataPrintLevel(amg_data); amg_logging = hypre_ParAMGDataLogging(amg_data); if ( amg_logging>1 ) Residual = hypre_ParAMGDataResidual(amg_data); file_name = hypre_ParAMGDataLogFileName(amg_data); /* num_unknowns = hypre_ParAMGDataNumUnknowns(amg_data); */ num_levels = hypre_ParAMGDataNumLevels(amg_data); A_array = hypre_ParAMGDataAArray(amg_data); F_array = hypre_ParAMGDataFArray(amg_data); U_array = hypre_ParAMGDataUArray(amg_data); tol = hypre_ParAMGDataTol(amg_data); min_iter = hypre_ParAMGDataMinIter(amg_data); max_iter = hypre_ParAMGDataMaxIter(amg_data); num_coeffs = hypre_CTAlloc(HYPRE_Real, num_levels); num_variables = hypre_CTAlloc(HYPRE_Int, num_levels); num_coeffs[0] = hypre_ParCSRMatrixDNumNonzeros(A_array[0]); num_variables[0] = hypre_ParCSRMatrixGlobalNumRows(A_array[0]); A_array[0] = A; F_array[0] = f; U_array[0] = u; /* Vtemp = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A_array[0]), hypre_ParCSRMatrixGlobalNumRows(A_array[0]), hypre_ParCSRMatrixRowStarts(A_array[0])); hypre_ParVectorInitialize(Vtemp); hypre_ParVectorSetPartitioningOwner(Vtemp,0); hypre_ParAMGDataVtemp(amg_data) = Vtemp; */ Vtemp = hypre_ParAMGDataVtemp(amg_data); for (j = 1; j < num_levels; j++) { num_coeffs[j] = hypre_ParCSRMatrixDNumNonzeros(A_array[j]); num_variables[j] = hypre_ParCSRMatrixGlobalNumRows(A_array[j]); } /*----------------------------------------------------------------------- * Write the solver parameters *-----------------------------------------------------------------------*/ if (my_id == 0 && amg_print_level > 1) hypre_BoomerAMGWriteSolverParams(amg_data); /*----------------------------------------------------------------------- * Initialize the solver error flag and assorted bookkeeping variables *-----------------------------------------------------------------------*/ Solve_err_flag = 0; total_coeffs = 0; total_variables = 0; cycle_count = 0; operat_cmplxty = 0; grid_cmplxty = 0; /*----------------------------------------------------------------------- * open the log file and write some initial info *-----------------------------------------------------------------------*/ if (my_id == 0 && amg_print_level > 1) { /*fp = fopen(file_name, "a");*/ hypre_printf("\n\nAMG SOLUTION INFO:\n"); } /*----------------------------------------------------------------------- * Compute initial fine-grid residual and print to logfile *-----------------------------------------------------------------------*/ if ( amg_logging > 1 ) { hypre_ParVectorCopy(F_array[0], Residual ); hypre_ParCSRMatrixMatvecT(alpha, A_array[0], U_array[0], beta, Residual ); resid_nrm = sqrt(hypre_ParVectorInnerProd( Residual, Residual )); } else { hypre_ParVectorCopy(F_array[0], Vtemp); hypre_ParCSRMatrixMatvecT(alpha, A_array[0], U_array[0], beta, Vtemp); resid_nrm = sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp)); } resid_nrm_init = resid_nrm; rhs_norm = sqrt(hypre_ParVectorInnerProd(f, f)); relative_resid = 9999; if (rhs_norm) { relative_resid = resid_nrm_init / rhs_norm; } if (my_id ==0 && (amg_print_level > 1)) { hypre_printf(" relative\n"); hypre_printf(" residual factor residual\n"); hypre_printf(" -------- ------ --------\n"); hypre_printf(" Initial %e %e\n",resid_nrm_init, relative_resid); } /*----------------------------------------------------------------------- * Main V-cycle loop *-----------------------------------------------------------------------*/ while ((relative_resid >= tol || cycle_count < min_iter) && cycle_count < max_iter && Solve_err_flag == 0) { hypre_ParAMGDataCycleOpCount(amg_data) = 0; /* Op count only needed for one cycle */ Solve_err_flag = hypre_BoomerAMGCycleT(amg_data, F_array, U_array); old_resid = resid_nrm; /*--------------------------------------------------------------- * Compute fine-grid residual and residual norm *----------------------------------------------------------------*/ if ( amg_logging > 1 ) { hypre_ParVectorCopy(F_array[0], Residual ); hypre_ParCSRMatrixMatvecT(alpha, A_array[0], U_array[0], beta, Residual ); resid_nrm = sqrt(hypre_ParVectorInnerProd( Residual, Residual )); } else { hypre_ParVectorCopy(F_array[0], Vtemp); hypre_ParCSRMatrixMatvecT(alpha, A_array[0], U_array[0], beta, Vtemp); resid_nrm = sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp)); } conv_factor = resid_nrm / old_resid; relative_resid = 9999; if (rhs_norm) { relative_resid = resid_nrm / rhs_norm; } ++cycle_count; hypre_ParAMGDataRelativeResidualNorm(amg_data) = relative_resid; hypre_ParAMGDataNumIterations(amg_data) = cycle_count; if (my_id == 0 && (amg_print_level > 1)) { hypre_printf(" Cycle %2d %e %f %e \n", cycle_count, resid_nrm, conv_factor, relative_resid); } } if (cycle_count == max_iter) Solve_err_flag = 1; /*----------------------------------------------------------------------- * Compute closing statistics *-----------------------------------------------------------------------*/ conv_factor = pow((resid_nrm/resid_nrm_init),(1.0/((HYPRE_Real) cycle_count))); for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++) { total_coeffs += num_coeffs[j]; total_variables += num_variables[j]; } cycle_op_count = hypre_ParAMGDataCycleOpCount(amg_data); if (num_variables[0]) grid_cmplxty = ((HYPRE_Real) total_variables) / ((HYPRE_Real) num_variables[0]); if (num_coeffs[0]) { operat_cmplxty = total_coeffs / num_coeffs[0]; cycle_cmplxty = cycle_op_count / num_coeffs[0]; } if (my_id == 0 && amg_print_level > 1) { if (Solve_err_flag == 1) { hypre_printf("\n\n=============================================="); hypre_printf("\n NOTE: Convergence tolerance was not achieved\n"); hypre_printf(" within the allowed %d V-cycles\n",max_iter); hypre_printf("=============================================="); } hypre_printf("\n\n Average Convergence Factor = %f",conv_factor); hypre_printf("\n\n Complexity: grid = %f\n",grid_cmplxty); hypre_printf(" operator = %f\n",operat_cmplxty); hypre_printf(" cycle = %f\n\n",cycle_cmplxty); } /*---------------------------------------------------------- * Close the output file (if open) *----------------------------------------------------------*/ /*if (my_id == 0 && amg_print_level >= 1) { fclose(fp); }*/ hypre_TFree(num_coeffs); hypre_TFree(num_variables); return(Solve_err_flag); }