int hypre_BoomerAMGIndepSetInit( hypre_ParCSRMatrix *S, double *measure_array , int seq_rand) { hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S); MPI_Comm comm = hypre_ParCSRMatrixComm(S); int S_num_nodes = hypre_CSRMatrixNumRows(S_diag); HYPRE_BigInt i; int j, my_id; int ierr = 0; MPI_Comm_rank(comm,&my_id); j = 2747+my_id; if (seq_rand) j = 2747; hypre_SeedRand(j); if (seq_rand) { for (i = 0; i < hypre_ParCSRMatrixFirstRowIndex(S); i++) hypre_Rand(); } for (j = 0; j < S_num_nodes; j++) { measure_array[j] += hypre_Rand(); } return (ierr); }
/* Assume that we are given a fine and coarse topology and the coarse degrees of freedom (DOFs) have been chosen. Assume also, that the global interpolation matrix dof_DOF has a prescribed nonzero pattern. Then, the fine degrees of freedom can be split into 4 groups (here "i" stands for "interior"): NODEidof - dofs which are interpolated only from the DOF in one coarse vertex EDGEidof - dofs which are interpolated only from the DOFs in one coarse edge FACEidof - dofs which are interpolated only from the DOFs in one coarse face ELEMidof - dofs which are interpolated only from the DOFs in one coarse element The interpolation operator dof_DOF can be build in 4 steps, by consequently filling-in the rows corresponding to the above groups. The code below uses harmonic extension to extend the interpolation from one group to the next. */ HYPRE_Int hypre_ND1AMGeInterpolation (hypre_ParCSRMatrix * Aee, hypre_ParCSRMatrix * ELEM_idof, hypre_ParCSRMatrix * FACE_idof, hypre_ParCSRMatrix * EDGE_idof, hypre_ParCSRMatrix * ELEM_FACE, hypre_ParCSRMatrix * ELEM_EDGE, HYPRE_Int num_OffProcRows, hypre_MaxwellOffProcRow ** OffProcRows, hypre_IJMatrix * IJ_dof_DOF) { HYPRE_Int ierr = 0; HYPRE_Int i, j, k; HYPRE_Int *offproc_rnums, *swap; hypre_ParCSRMatrix * dof_DOF = hypre_IJMatrixObject(IJ_dof_DOF); hypre_ParCSRMatrix * ELEM_DOF = ELEM_EDGE; hypre_ParCSRMatrix * ELEM_FACEidof; hypre_ParCSRMatrix * ELEM_EDGEidof; hypre_CSRMatrix *A, *P; HYPRE_Int numELEM = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(ELEM_EDGE)); HYPRE_Int getrow_ierr; HYPRE_Int three_dimensional_problem; MPI_Comm comm= hypre_ParCSRMatrixComm(Aee); HYPRE_Int myproc; hypre_MPI_Comm_rank(comm, &myproc); #if 0 hypre_IJMatrix * ij_dof_DOF = hypre_CTAlloc(hypre_IJMatrix, 1); /* Convert dof_DOF to IJ matrix, so we can use AddToValues */ hypre_IJMatrixComm(ij_dof_DOF) = hypre_ParCSRMatrixComm(dof_DOF); hypre_IJMatrixRowPartitioning(ij_dof_DOF) = hypre_ParCSRMatrixRowStarts(dof_DOF); hypre_IJMatrixColPartitioning(ij_dof_DOF) = hypre_ParCSRMatrixColStarts(dof_DOF); hypre_IJMatrixObject(ij_dof_DOF) = dof_DOF; hypre_IJMatrixAssembleFlag(ij_dof_DOF) = 1; #endif /* sort the offproc rows to get quicker comparison for later */ if (num_OffProcRows) { offproc_rnums= hypre_TAlloc(HYPRE_Int, num_OffProcRows); swap = hypre_TAlloc(HYPRE_Int, num_OffProcRows); for (i= 0; i< num_OffProcRows; i++) { offproc_rnums[i]=(OffProcRows[i] -> row); swap[i] = i; } } if (num_OffProcRows > 1) { hypre_qsort2i(offproc_rnums, swap, 0, num_OffProcRows-1); } if (FACE_idof == EDGE_idof) three_dimensional_problem = 0; else three_dimensional_problem = 1; /* ELEM_FACEidof = ELEM_FACE x FACE_idof */ if (three_dimensional_problem) ELEM_FACEidof = hypre_ParMatmul(ELEM_FACE, FACE_idof); /* ELEM_EDGEidof = ELEM_EDGE x EDGE_idof */ ELEM_EDGEidof = hypre_ParMatmul(ELEM_EDGE, EDGE_idof); /* Loop over local coarse elements */ k = hypre_ParCSRMatrixFirstRowIndex(ELEM_EDGE); for (i = 0; i < numELEM; i++, k++) { HYPRE_Int size1, size2; HYPRE_Int *col_ind0, *col_ind1, *col_ind2; HYPRE_Int num_DOF, *DOF0, *DOF; HYPRE_Int num_idof, *idof0, *idof; HYPRE_Int num_bdof, *bdof; double *boolean_data; /* Determine the coarse DOFs */ hypre_ParCSRMatrixGetRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data); DOF= hypre_TAlloc(HYPRE_Int, num_DOF); for (j= 0; j< num_DOF; j++) { DOF[j]= DOF0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data); qsort0(DOF,0,num_DOF-1); /* Find the fine dofs interior for the current coarse element */ hypre_ParCSRMatrixGetRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data); idof= hypre_TAlloc(HYPRE_Int, num_idof); for (j= 0; j< num_idof; j++) { idof[j]= idof0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data); /* Sort the interior dofs according to their global number */ qsort0(idof,0,num_idof-1); /* Find the fine dofs on the boundary of the current coarse element */ if (three_dimensional_problem) { hypre_ParCSRMatrixGetRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data); col_ind1= hypre_TAlloc(HYPRE_Int, size1); for (j= 0; j< size1; j++) { col_ind1[j]= col_ind0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data); } else size1 = 0; hypre_ParCSRMatrixGetRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data); col_ind2= hypre_TAlloc(HYPRE_Int, size2); for (j= 0; j< size2; j++) { col_ind2[j]= col_ind0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data); /* Merge and sort the boundary dofs according to their global number */ num_bdof = size1 + size2; bdof = hypre_CTAlloc(HYPRE_Int, num_bdof); if (three_dimensional_problem) memcpy(bdof, col_ind1, size1*sizeof(HYPRE_Int)); memcpy(bdof+size1, col_ind2, size2*sizeof(HYPRE_Int)); qsort0(bdof,0,num_bdof-1); /* A = extract_rows(Aee, idof) */ A = hypre_CSRMatrixCreate (num_idof, num_idof + num_bdof, num_idof * (num_idof + num_bdof)); hypre_CSRMatrixInitialize(A); { HYPRE_Int *I = hypre_CSRMatrixI(A); HYPRE_Int *J = hypre_CSRMatrixJ(A); double *data = hypre_CSRMatrixData(A); HYPRE_Int *tmp_J; double *tmp_data; I[0] = 0; for (j = 0; j < num_idof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr <0) hypre_printf("getrow Aee off proc[%d] = \n",myproc); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } } /* P = extract_rows(dof_DOF, idof+bdof) */ P = hypre_CSRMatrixCreate (num_idof + num_bdof, num_DOF, (num_idof + num_bdof) * num_DOF); hypre_CSRMatrixInitialize(P); { HYPRE_Int *I = hypre_CSRMatrixI(P); HYPRE_Int *J = hypre_CSRMatrixJ(P); double *data = hypre_CSRMatrixData(P); HYPRE_Int m; HYPRE_Int *tmp_J; double *tmp_data; I[0] = 0; for (j = 0; j < num_idof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr >= 0) { memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } else /* row offproc */ { hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); /* search for OffProcRows */ m= 0; while (m < num_OffProcRows) { if (offproc_rnums[m] == idof[j]) { break; } else { m++; } } I[j+1]= (OffProcRows[swap[m]] -> ncols); tmp_J = (OffProcRows[swap[m]] -> cols); tmp_data= (OffProcRows[swap[m]] -> data); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; I[j+1] += I[j]; } } for ( ; j < num_idof + num_bdof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr >= 0) { memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } else /* row offproc */ { hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); /* search for OffProcRows */ m= 0; while (m < num_OffProcRows) { if (offproc_rnums[m] == bdof[j-num_idof]) { break; } else { m++; } } if (m>= num_OffProcRows)hypre_printf("here the mistake\n"); I[j+1]= (OffProcRows[swap[m]] -> ncols); tmp_J = (OffProcRows[swap[m]] -> cols); tmp_data= (OffProcRows[swap[m]] -> data); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; I[j+1] += I[j]; } } } /* Pi = Aii^{-1} Aib Pb */ hypre_HarmonicExtension (A, P, num_DOF, DOF, num_idof, idof, num_bdof, bdof); /* Insert Pi in dof_DOF */ { HYPRE_Int * ncols = hypre_CTAlloc(HYPRE_Int, num_idof); for (j = 0; j < num_idof; j++) ncols[j] = num_DOF; hypre_IJMatrixAddToValuesParCSR (IJ_dof_DOF, num_idof, ncols, idof, hypre_CSRMatrixJ(P), hypre_CSRMatrixData(P)); hypre_TFree(ncols); } hypre_TFree(DOF); hypre_TFree(idof); if (three_dimensional_problem) { hypre_TFree(col_ind1); } hypre_TFree(col_ind2); hypre_TFree(bdof); hypre_CSRMatrixDestroy(A); hypre_CSRMatrixDestroy(P); } #if 0 hypre_TFree(ij_dof_DOF); #endif if (three_dimensional_problem) hypre_ParCSRMatrixDestroy(ELEM_FACEidof); hypre_ParCSRMatrixDestroy(ELEM_EDGEidof); if (num_OffProcRows) { hypre_TFree(offproc_rnums); hypre_TFree(swap); } return ierr; }
HYPRE_Int hypre_seqAMGSetup( hypre_ParAMGData *amg_data, HYPRE_Int p_level, HYPRE_Int coarse_threshold) { /* Par Data Structure variables */ hypre_ParCSRMatrix **Par_A_array = hypre_ParAMGDataAArray(amg_data); MPI_Comm comm = hypre_ParCSRMatrixComm(Par_A_array[0]); MPI_Comm new_comm, seq_comm; hypre_ParCSRMatrix *A_seq = NULL; hypre_CSRMatrix *A_seq_diag; hypre_CSRMatrix *A_seq_offd; hypre_ParVector *F_seq = NULL; hypre_ParVector *U_seq = NULL; hypre_ParCSRMatrix *A; HYPRE_Int **dof_func_array; HYPRE_Int num_procs, my_id; HYPRE_Int not_finished_coarsening; HYPRE_Int level; HYPRE_Solver coarse_solver; /* misc */ dof_func_array = hypre_ParAMGDataDofFuncArray(amg_data); /*MPI Stuff */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm,&my_id); /*initial */ level = p_level; not_finished_coarsening = 1; /* convert A at this level to sequential */ A = Par_A_array[level]; { double *A_seq_data = NULL; HYPRE_Int *A_seq_i = NULL; HYPRE_Int *A_seq_offd_i = NULL; HYPRE_Int *A_seq_j = NULL; double *A_tmp_data = NULL; HYPRE_Int *A_tmp_i = NULL; HYPRE_Int *A_tmp_j = NULL; HYPRE_Int *info, *displs, *displs2; HYPRE_Int i, j, size, num_nonzeros, total_nnz, cnt; hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); double *A_diag_data = hypre_CSRMatrixData(A_diag); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A); hypre_MPI_Group orig_group, new_group; HYPRE_Int *ranks, new_num_procs, *row_starts; info = hypre_CTAlloc(HYPRE_Int, num_procs); hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); ranks = hypre_CTAlloc(HYPRE_Int, num_procs); new_num_procs = 0; for (i=0; i < num_procs; i++) if (info[i]) { ranks[new_num_procs] = i; info[new_num_procs++] = info[i]; } MPI_Comm_group(comm, &orig_group); hypre_MPI_Group_incl(orig_group, new_num_procs, ranks, &new_group); MPI_Comm_create(comm, new_group, &new_comm); hypre_MPI_Group_free(&new_group); hypre_MPI_Group_free(&orig_group); if (num_rows) { /* alloc space in seq data structure only for participating procs*/ HYPRE_BoomerAMGCreate(&coarse_solver); HYPRE_BoomerAMGSetMaxRowSum(coarse_solver, hypre_ParAMGDataMaxRowSum(amg_data)); HYPRE_BoomerAMGSetStrongThreshold(coarse_solver, hypre_ParAMGDataStrongThreshold(amg_data)); HYPRE_BoomerAMGSetCoarsenType(coarse_solver, hypre_ParAMGDataCoarsenType(amg_data)); HYPRE_BoomerAMGSetInterpType(coarse_solver, hypre_ParAMGDataInterpType(amg_data)); HYPRE_BoomerAMGSetTruncFactor(coarse_solver, hypre_ParAMGDataTruncFactor(amg_data)); HYPRE_BoomerAMGSetPMaxElmts(coarse_solver, hypre_ParAMGDataPMaxElmts(amg_data)); if (hypre_ParAMGDataUserRelaxType(amg_data) > -1) HYPRE_BoomerAMGSetRelaxType(coarse_solver, hypre_ParAMGDataUserRelaxType(amg_data)); HYPRE_BoomerAMGSetRelaxOrder(coarse_solver, hypre_ParAMGDataRelaxOrder(amg_data)); HYPRE_BoomerAMGSetRelaxWt(coarse_solver, hypre_ParAMGDataUserRelaxWeight(amg_data)); if (hypre_ParAMGDataUserNumSweeps(amg_data) > -1) HYPRE_BoomerAMGSetNumSweeps(coarse_solver, hypre_ParAMGDataUserNumSweeps(amg_data)); HYPRE_BoomerAMGSetNumFunctions(coarse_solver, hypre_ParAMGDataNumFunctions(amg_data)); HYPRE_BoomerAMGSetMaxIter(coarse_solver, 1); HYPRE_BoomerAMGSetTol(coarse_solver, 0); /* Create CSR Matrix, will be Diag part of new matrix */ A_tmp_i = hypre_CTAlloc(HYPRE_Int, num_rows+1); A_tmp_i[0] = 0; for (i=1; i < num_rows+1; i++) A_tmp_i[i] = A_diag_i[i]-A_diag_i[i-1]+A_offd_i[i]-A_offd_i[i-1]; num_nonzeros = A_offd_i[num_rows]+A_diag_i[num_rows]; A_tmp_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros); A_tmp_data = hypre_CTAlloc(double, num_nonzeros); cnt = 0; for (i=0; i < num_rows; i++) { for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++) { A_tmp_j[cnt] = A_diag_j[j]+first_row_index; A_tmp_data[cnt++] = A_diag_data[j]; } for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++) { A_tmp_j[cnt] = col_map_offd[A_offd_j[j]]; A_tmp_data[cnt++] = A_offd_data[j]; } } displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); displs[0] = 0; for (i=1; i < new_num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; size = displs[new_num_procs]; A_seq_i = hypre_CTAlloc(HYPRE_Int, size+1); A_seq_offd_i = hypre_CTAlloc(HYPRE_Int, size+1); hypre_MPI_Allgatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, displs, HYPRE_MPI_INT, new_comm ); displs2 = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); A_seq_i[0] = 0; displs2[0] = 0; for (j=1; j < displs[1]; j++) A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1]; for (i=1; i < new_num_procs; i++) { for (j=displs[i]; j < displs[i+1]; j++) { A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1]; } } A_seq_i[size] = A_seq_i[size]+A_seq_i[size-1]; displs2[new_num_procs] = A_seq_i[size]; for (i=1; i < new_num_procs+1; i++) { displs2[i] = A_seq_i[displs[i]]; info[i-1] = displs2[i] - displs2[i-1]; } total_nnz = displs2[new_num_procs]; A_seq_j = hypre_CTAlloc(HYPRE_Int, total_nnz); A_seq_data = hypre_CTAlloc(double, total_nnz); hypre_MPI_Allgatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT, A_seq_j, info, displs2, HYPRE_MPI_INT, new_comm ); hypre_MPI_Allgatherv ( A_tmp_data, num_nonzeros, hypre_MPI_DOUBLE, A_seq_data, info, displs2, hypre_MPI_DOUBLE, new_comm ); hypre_TFree(displs); hypre_TFree(displs2); hypre_TFree(A_tmp_i); hypre_TFree(A_tmp_j); hypre_TFree(A_tmp_data); row_starts = hypre_CTAlloc(HYPRE_Int,2); row_starts[0] = 0; row_starts[1] = size; /* Create 1 proc communicator */ seq_comm = hypre_MPI_COMM_SELF; A_seq = hypre_ParCSRMatrixCreate(seq_comm,size,size, row_starts, row_starts, 0,total_nnz,0); A_seq_diag = hypre_ParCSRMatrixDiag(A_seq); A_seq_offd = hypre_ParCSRMatrixOffd(A_seq); hypre_CSRMatrixData(A_seq_diag) = A_seq_data; hypre_CSRMatrixI(A_seq_diag) = A_seq_i; hypre_CSRMatrixJ(A_seq_diag) = A_seq_j; hypre_CSRMatrixI(A_seq_offd) = A_seq_offd_i; F_seq = hypre_ParVectorCreate(seq_comm, size, row_starts); U_seq = hypre_ParVectorCreate(seq_comm, size, row_starts); hypre_ParVectorOwnsPartitioning(F_seq) = 0; hypre_ParVectorOwnsPartitioning(U_seq) = 0; hypre_ParVectorInitialize(F_seq); hypre_ParVectorInitialize(U_seq); hypre_BoomerAMGSetup(coarse_solver,A_seq,F_seq,U_seq); hypre_ParAMGDataCoarseSolver(amg_data) = coarse_solver; hypre_ParAMGDataACoarse(amg_data) = A_seq; hypre_ParAMGDataFCoarse(amg_data) = F_seq; hypre_ParAMGDataUCoarse(amg_data) = U_seq; hypre_ParAMGDataNewComm(amg_data) = new_comm; } hypre_TFree(info); hypre_TFree(ranks); } return 0; }
HYPRE_Int hypre_BlockTridiagSetup(void *data, hypre_ParCSRMatrix *A, hypre_ParVector *b, hypre_ParVector *x) { HYPRE_Int i, j, *index_set1, print_level, nsweeps, relax_type; HYPRE_Int nrows, nrows1, nrows2, start1, start2, *index_set2; HYPRE_Int count, ierr; double threshold; hypre_ParCSRMatrix **submatrices; HYPRE_Solver precon1; HYPRE_Solver precon2; HYPRE_IJVector ij_u1, ij_u2, ij_f1, ij_f2; hypre_ParVector *vector; MPI_Comm comm; hypre_BlockTridiagData *b_data = (hypre_BlockTridiagData *) data; HYPRE_ParCSRMatrixGetComm((HYPRE_ParCSRMatrix) A, &comm); index_set1 = b_data->index_set1; nrows1 = index_set1[0]; nrows = hypre_ParCSRMatrixNumRows(A); nrows2 = nrows - nrows1; b_data->index_set2 = hypre_CTAlloc(HYPRE_Int, nrows2+1); index_set2 = b_data->index_set2; index_set2[0] = nrows2; count = 1; for (i = 0; i < index_set1[1]; i++) index_set2[count++] = i; for (i = 1; i < nrows1; i++) for (j = index_set1[i]+1; j < index_set1[i+1]; j++) index_set2[count++] = j; for (i = index_set1[nrows1]+1; i < nrows; i++) index_set2[count++] = i; submatrices = hypre_CTAlloc(hypre_ParCSRMatrix *, 4); hypre_ParCSRMatrixExtractSubmatrices(A, index_set1, &submatrices); nrows1 = hypre_ParCSRMatrixNumRows(submatrices[0]); nrows2 = hypre_ParCSRMatrixNumRows(submatrices[3]); start1 = hypre_ParCSRMatrixFirstRowIndex(submatrices[0]); start2 = hypre_ParCSRMatrixFirstRowIndex(submatrices[3]); HYPRE_IJVectorCreate(comm, start1, start1+nrows1-1, &ij_u1); HYPRE_IJVectorSetObjectType(ij_u1, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(ij_u1); ierr += HYPRE_IJVectorAssemble(ij_u1); hypre_assert(!ierr); HYPRE_IJVectorCreate(comm, start1, start1+nrows1-1, &ij_f1); HYPRE_IJVectorSetObjectType(ij_f1, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(ij_f1); ierr += HYPRE_IJVectorAssemble(ij_f1); hypre_assert(!ierr); HYPRE_IJVectorCreate(comm, start2, start2+nrows2-1, &ij_u2); HYPRE_IJVectorSetObjectType(ij_u2, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(ij_u2); ierr += HYPRE_IJVectorAssemble(ij_u2); hypre_assert(!ierr); HYPRE_IJVectorCreate(comm, start2, start2+nrows1-1, &ij_f2); HYPRE_IJVectorSetObjectType(ij_f2, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(ij_f2); ierr += HYPRE_IJVectorAssemble(ij_f2); hypre_assert(!ierr); HYPRE_IJVectorGetObject(ij_f1, (void **) &vector); b_data->F1 = vector; HYPRE_IJVectorGetObject(ij_u1, (void **) &vector); b_data->U1 = vector; HYPRE_IJVectorGetObject(ij_f2, (void **) &vector); b_data->F2 = vector; HYPRE_IJVectorGetObject(ij_u2, (void **) &vector); b_data->U2 = vector; print_level = b_data->print_level; threshold = b_data->threshold; nsweeps = b_data->num_sweeps; relax_type = b_data->relax_type; threshold = b_data->threshold; HYPRE_BoomerAMGCreate(&precon1); HYPRE_BoomerAMGSetMaxIter(precon1, 1); HYPRE_BoomerAMGSetCycleType(precon1, 1); HYPRE_BoomerAMGSetPrintLevel(precon1, print_level); HYPRE_BoomerAMGSetMaxLevels(precon1, 25); HYPRE_BoomerAMGSetMeasureType(precon1, 0); HYPRE_BoomerAMGSetCoarsenType(precon1, 0); HYPRE_BoomerAMGSetStrongThreshold(precon1, threshold); HYPRE_BoomerAMGSetNumFunctions(precon1, 1); HYPRE_BoomerAMGSetNumSweeps(precon1, nsweeps); HYPRE_BoomerAMGSetRelaxType(precon1, relax_type); hypre_BoomerAMGSetup(precon1, submatrices[0], b_data->U1, b_data->F1); HYPRE_BoomerAMGCreate(&precon2); HYPRE_BoomerAMGSetMaxIter(precon2, 1); HYPRE_BoomerAMGSetCycleType(precon2, 1); HYPRE_BoomerAMGSetPrintLevel(precon2, print_level); HYPRE_BoomerAMGSetMaxLevels(precon2, 25); HYPRE_BoomerAMGSetMeasureType(precon2, 0); HYPRE_BoomerAMGSetCoarsenType(precon2, 0); HYPRE_BoomerAMGSetMeasureType(precon2, 1); HYPRE_BoomerAMGSetStrongThreshold(precon2, threshold); HYPRE_BoomerAMGSetNumFunctions(precon2, 1); HYPRE_BoomerAMGSetNumSweeps(precon2, nsweeps); HYPRE_BoomerAMGSetRelaxType(precon2, relax_type); hypre_BoomerAMGSetup(precon2, submatrices[3], NULL, NULL); b_data->precon1 = precon1; b_data->precon2 = precon2; b_data->A11 = submatrices[0]; hypre_ParCSRMatrixDestroy(submatrices[1]); b_data->A21 = submatrices[2]; b_data->A22 = submatrices[3]; hypre_TFree(submatrices); return (0); }
/*-------------------------------------------------------------------------- * hypre_SStructSharedDOF_ParcsrMatRowsComm * Given a sstruct_grid & parcsr matrix with rows corresponding to the * sstruct_grid, determine and extract the rows that must be communicated. * These rows are for shared dof that geometrically lie on processor * boundaries but internally are stored on one processor. * Algo: * for each cellbox * RECVs: * i) stretch the cellbox to the variable box * ii) in the appropriate (dof-dependent) direction, take the * boundary and boxman_intersect to extract boxmanentries * that contain these boundary edges. * iii)loop over the boxmanentries and see if they belong * on this proc or another proc * a) if belong on another proc, these are the recvs: * count and prepare the communication buffers and * values. * * SENDs: * i) form layer of cells that is one layer off cellbox * (stretches in the appropriate direction) * ii) boxman_intersect with the cellgrid boxman * iii)loop over the boxmanentries and see if they belong * on this proc or another proc * a) if belong on another proc, these are the sends: * count and prepare the communication buffers and * values. * * Note: For the recv data, the dof can come from only one processor. * For the send data, the dof can go to more than one processor * (the same dof is on the boundary of several cells). *--------------------------------------------------------------------------*/ HYPRE_Int hypre_SStructSharedDOF_ParcsrMatRowsComm( hypre_SStructGrid *grid, hypre_ParCSRMatrix *A, HYPRE_Int *num_offprocrows_ptr, hypre_MaxwellOffProcRow ***OffProcRows_ptr) { MPI_Comm A_comm= hypre_ParCSRMatrixComm(A); MPI_Comm grid_comm= hypre_SStructGridComm(grid); HYPRE_Int matrix_type= HYPRE_PARCSR; HYPRE_Int nparts= hypre_SStructGridNParts(grid); HYPRE_Int ndim = hypre_SStructGridNDim(grid); hypre_SStructGrid *cell_ssgrid; hypre_SStructPGrid *pgrid; hypre_StructGrid *cellgrid; hypre_BoxArray *cellboxes; hypre_Box *box, *cellbox, vbox, boxman_entry_box; hypre_Index loop_size, start; HYPRE_Int loopi, loopj, loopk; HYPRE_Int start_rank, end_rank, rank; HYPRE_Int i, j, k, m, n, t, part, var, nvars; HYPRE_SStructVariable *vartypes; HYPRE_Int nbdry_slabs; hypre_BoxArray *recv_slabs, *send_slabs; hypre_Index varoffset; hypre_BoxManager **boxmans, *cell_boxman; hypre_BoxManEntry **boxman_entries, *entry; HYPRE_Int nboxman_entries; hypre_Index ishift, jshift, kshift, zero_index; hypre_Index ilower, iupper, index; HYPRE_Int proc, nprocs, myproc; HYPRE_Int *SendToProcs, *RecvFromProcs; HYPRE_Int **send_RowsNcols; /* buffer for rows & ncols */ HYPRE_Int *send_RowsNcols_alloc; HYPRE_Int *send_ColsData_alloc; HYPRE_Int *tot_nsendRowsNcols, *tot_sendColsData; double **vals; /* buffer for cols & data */ HYPRE_Int *col_inds; double *values; hypre_MPI_Request *requests; hypre_MPI_Status *status; HYPRE_Int **rbuffer_RowsNcols; double **rbuffer_ColsData; HYPRE_Int num_sends, num_recvs; hypre_MaxwellOffProcRow **OffProcRows; HYPRE_Int *starts; HYPRE_Int ierr= 0; hypre_MPI_Comm_rank(A_comm, &myproc); hypre_MPI_Comm_size(grid_comm, &nprocs); start_rank= hypre_ParCSRMatrixFirstRowIndex(A); end_rank = hypre_ParCSRMatrixLastRowIndex(A); hypre_SetIndex(ishift, 1, 0, 0); hypre_SetIndex(jshift, 0, 1, 0); hypre_SetIndex(kshift, 0, 0, 1); hypre_SetIndex(zero_index, 0, 0, 0); /* need a cellgrid boxman to determine the send boxes -> only the cell dofs are unique so a boxman intersect can be used to get the edges that must be sent. */ HYPRE_SStructGridCreate(grid_comm, ndim, nparts, &cell_ssgrid); vartypes= hypre_CTAlloc(HYPRE_SStructVariable, 1); vartypes[0]= HYPRE_SSTRUCT_VARIABLE_CELL; for (i= 0; i< nparts; i++) { pgrid= hypre_SStructGridPGrid(grid, i); cellgrid= hypre_SStructPGridCellSGrid(pgrid); cellboxes= hypre_StructGridBoxes(cellgrid); hypre_ForBoxI(j, cellboxes) { box= hypre_BoxArrayBox(cellboxes, j); HYPRE_SStructGridSetExtents(cell_ssgrid, i, hypre_BoxIMin(box), hypre_BoxIMax(box)); } HYPRE_SStructGridSetVariables(cell_ssgrid, i, 1, vartypes); }
hypre_CSRMatrix * hypre_ParCSRMatrixExtractAExt( hypre_ParCSRMatrix *A, HYPRE_Int data, HYPRE_Int ** pA_ext_row_map ) { /* Note that A's role as the first factor in A*A^T is used only through ...CommPkgT(A), which basically says which rows of A (columns of A^T) are needed. In all the other places where A serves as an input, it is through its role as A^T, the matrix whose data needs to be passed between processors. */ MPI_Comm comm = hypre_ParCSRMatrixComm(A); HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(A); HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkgT(A); /* ... CommPkgT(A) should identify all rows of A^T needed for A*A^T (that is * generally a bigger set than ...CommPkg(A), the rows of B needed for A*B) */ HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); HYPRE_Int *recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); HYPRE_Int *send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); HYPRE_Int *send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); HYPRE_Int *diag_i = hypre_CSRMatrixI(diag); HYPRE_Int *diag_j = hypre_CSRMatrixJ(diag); HYPRE_Complex *diag_data = hypre_CSRMatrixData(diag); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *offd_i = hypre_CSRMatrixI(offd); HYPRE_Int *offd_j = hypre_CSRMatrixJ(offd); HYPRE_Complex *offd_data = hypre_CSRMatrixData(offd); HYPRE_Int num_cols_A, num_nonzeros; HYPRE_Int num_rows_A_ext; hypre_CSRMatrix *A_ext; HYPRE_Int *A_ext_i; HYPRE_Int *A_ext_j; HYPRE_Complex *A_ext_data; num_cols_A = hypre_ParCSRMatrixGlobalNumCols(A); num_rows_A_ext = recv_vec_starts[num_recvs]; hypre_ParCSRMatrixExtractBExt_Arrays ( &A_ext_i, &A_ext_j, &A_ext_data, pA_ext_row_map, &num_nonzeros, data, 1, comm, comm_pkg, num_cols_A, num_recvs, num_sends, first_col_diag, first_row_index, recv_vec_starts, send_map_starts, send_map_elmts, diag_i, diag_j, offd_i, offd_j, col_map_offd, diag_data, offd_data ); A_ext = hypre_CSRMatrixCreate(num_rows_A_ext,num_cols_A,num_nonzeros); hypre_CSRMatrixI(A_ext) = A_ext_i; hypre_CSRMatrixJ(A_ext) = A_ext_j; if (data) hypre_CSRMatrixData(A_ext) = A_ext_data; return A_ext; }
hypre_ParCSRMatrix *hypre_ParCSRAAt( hypre_ParCSRMatrix *A ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); HYPRE_Complex *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Complex *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *A_ext_row_map; HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); hypre_ParCSRMatrix *C; HYPRE_Int *col_map_offd_C; hypre_CSRMatrix *C_diag; HYPRE_Complex *C_diag_data; HYPRE_Int *C_diag_i; HYPRE_Int *C_diag_j; hypre_CSRMatrix *C_offd; HYPRE_Complex *C_offd_data=NULL; HYPRE_Int *C_offd_i=NULL; HYPRE_Int *C_offd_j=NULL; HYPRE_Int *new_C_offd_j; HYPRE_Int C_diag_size; HYPRE_Int C_offd_size; HYPRE_Int last_col_diag_C; HYPRE_Int num_cols_offd_C; hypre_CSRMatrix *A_ext; HYPRE_Complex *A_ext_data; HYPRE_Int *A_ext_i; HYPRE_Int *A_ext_j; HYPRE_Int num_rows_A_ext=0; HYPRE_Int first_row_index_A = hypre_ParCSRMatrixFirstRowIndex(A); HYPRE_Int first_col_diag_A = hypre_ParCSRMatrixFirstColDiag(A); HYPRE_Int *B_marker; HYPRE_Int i; HYPRE_Int i1, i2, i3; HYPRE_Int jj2, jj3; HYPRE_Int jj_count_diag, jj_count_offd; HYPRE_Int jj_row_begin_diag, jj_row_begin_offd; HYPRE_Int start_indexing = 0; /* start indexing for C_data at 0 */ HYPRE_Int count; HYPRE_Int n_rows_A, n_cols_A; HYPRE_Complex a_entry; HYPRE_Complex a_b_product; HYPRE_Complex zero = 0.0; n_rows_A = hypre_ParCSRMatrixGlobalNumRows(A); n_cols_A = hypre_ParCSRMatrixGlobalNumCols(A); if (n_cols_A != n_rows_A) { hypre_printf(" Error! Incompatible matrix dimensions!\n"); return NULL; } /*----------------------------------------------------------------------- * Extract A_ext, i.e. portion of A that is stored on neighbor procs * and needed locally for A^T in the matrix matrix product A*A^T *-----------------------------------------------------------------------*/ if (num_rows_diag_A != n_rows_A) { /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!hypre_ParCSRMatrixCommPkg(A)) { hypre_MatTCommPkgCreate(A); } A_ext = hypre_ParCSRMatrixExtractAExt( A, 1, &A_ext_row_map ); A_ext_data = hypre_CSRMatrixData(A_ext); A_ext_i = hypre_CSRMatrixI(A_ext); A_ext_j = hypre_CSRMatrixJ(A_ext); num_rows_A_ext = hypre_CSRMatrixNumRows(A_ext); } /*----------------------------------------------------------------------- * Allocate marker array. *-----------------------------------------------------------------------*/ B_marker = hypre_CTAlloc(HYPRE_Int, num_rows_diag_A+num_rows_A_ext ); /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 ) { B_marker[i1] = -1; } hypre_ParAat_RowSizes( &C_diag_i, &C_offd_i, B_marker, A_diag_i, A_diag_j, A_offd_i, A_offd_j, A_col_map_offd, A_ext_i, A_ext_j, A_ext_row_map, &C_diag_size, &C_offd_size, num_rows_diag_A, num_cols_offd_A, num_rows_A_ext, first_col_diag_A, first_row_index_A ); #if 0 /* debugging output: */ hypre_printf("A_ext_row_map (%i):",num_rows_A_ext); for ( i1=0; i1<num_rows_A_ext; ++i1 ) hypre_printf(" %i",A_ext_row_map[i1] ); hypre_printf("\nC_diag_i (%i):",C_diag_size); for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_diag_i[i1] ); hypre_printf("\nC_offd_i (%i):",C_offd_size); for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_offd_i[i1] ); hypre_printf("\n"); #endif /*----------------------------------------------------------------------- * Allocate C_diag_data and C_diag_j arrays. * Allocate C_offd_data and C_offd_j arrays. *-----------------------------------------------------------------------*/ last_col_diag_C = first_row_index_A + num_rows_diag_A - 1; C_diag_data = hypre_CTAlloc(HYPRE_Complex, C_diag_size); C_diag_j = hypre_CTAlloc(HYPRE_Int, C_diag_size); if (C_offd_size) { C_offd_data = hypre_CTAlloc(HYPRE_Complex, C_offd_size); C_offd_j = hypre_CTAlloc(HYPRE_Int, C_offd_size); } /*----------------------------------------------------------------------- * Second Pass: Fill in C_diag_data and C_diag_j. * Second Pass: Fill in C_offd_data and C_offd_j. *-----------------------------------------------------------------------*/ /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ jj_count_diag = start_indexing; jj_count_offd = start_indexing; for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 ) { B_marker[i1] = -1; } /*----------------------------------------------------------------------- * Loop over interior c-points. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_rows_diag_A; i1++) { /*-------------------------------------------------------------------- * Create diagonal entry, C_{i1,i1} *--------------------------------------------------------------------*/ B_marker[i1] = jj_count_diag; jj_row_begin_diag = jj_count_diag; jj_row_begin_offd = jj_count_offd; C_diag_data[jj_count_diag] = zero; C_diag_j[jj_count_diag] = i1; jj_count_diag++; /*----------------------------------------------------------------- * Loop over entries in row i1 of A_offd. *-----------------------------------------------------------------*/ /* There are 3 CSRMatrix or CSRBooleanMatrix objects here: ext*ext, ext*diag, and ext*offd belong to another processor. diag*offd and offd*diag don't count - never share a column by definition. So we have to do 4 cases: diag*ext, offd*ext, diag*diag, and offd*offd. */ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; a_entry = A_diag_data[jj2]; /* diag*ext */ /*----------------------------------------------------------- * Loop over entries (columns) i3 in row i2 of (A_ext)^T * That is, rows i3 having a column i2 of A_ext. * For now, for each row i3 of A_ext we crudely check _all_ * columns to see whether one matches i2. * For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2) * to C(i1,i3) . This contributes to both the diag and offd * blocks of C. *-----------------------------------------------------------*/ for ( i3=0; i3<num_rows_A_ext; i3++ ) { for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) { if ( A_ext_j[jj3]==i2+first_col_diag_A ) { /* row i3, column i2 of A_ext; or, row i2, column i3 of (A_ext)^T */ a_b_product = a_entry * A_ext_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if ( A_ext_row_map[i3] < first_row_index_A || A_ext_row_map[i3] > last_col_diag_C ) { /* offd */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) { B_marker[i3+num_rows_diag_A] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3; jj_count_offd++; } else C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } else { /* diag */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) { B_marker[i3+num_rows_diag_A] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3-first_col_diag_A; jj_count_diag++; } else C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } } } } } if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; a_entry = A_offd_data[jj2]; /* offd * ext */ /*----------------------------------------------------------- * Loop over entries (columns) i3 in row i2 of (A_ext)^T * That is, rows i3 having a column i2 of A_ext. * For now, for each row i3 of A_ext we crudely check _all_ * columns to see whether one matches i2. * For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2) * to C(i1,i3) . This contributes to both the diag and offd * blocks of C. *-----------------------------------------------------------*/ for ( i3=0; i3<num_rows_A_ext; i3++ ) { for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) { if ( A_ext_j[jj3]==A_col_map_offd[i2] ) { /* row i3, column i2 of A_ext; or, row i2, column i3 of (A_ext)^T */ a_b_product = a_entry * A_ext_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if ( A_ext_row_map[i3] < first_row_index_A || A_ext_row_map[i3] > last_col_diag_C ) { /* offd */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) { B_marker[i3+num_rows_diag_A] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3; jj_count_offd++; } else C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } else { /* diag */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) { B_marker[i3+num_rows_diag_A] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3-first_row_index_A; jj_count_diag++; } else C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } } } } } } /* diag * diag */ /*----------------------------------------------------------------- * Loop over entries (columns) i2 in row i1 of A_diag. * For each such column we will find the contributions of the * corresponding rows i2 of A^T to C=A*A^T . Now we only look * at the local part of A^T - with columns (rows of A) living * on this processor. *-----------------------------------------------------------------*/ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; a_entry = A_diag_data[jj2]; /*----------------------------------------------------------- * Loop over entries (columns) i3 in row i2 of A^T * That is, rows i3 having a column i2 of A (local part). * For now, for each row i3 of A we crudely check _all_ * columns to see whether one matches i2. * This i3-loop is for the diagonal block of A. * It contributes to the diagonal block of C. * For each entry (i2,i3) of A^T, add A(i1,i2)*A(i3,i2) * to C(i1,i3) *-----------------------------------------------------------*/ for ( i3=0; i3<num_rows_diag_A; i3++ ) { for ( jj3=A_diag_i[i3]; jj3<A_diag_i[i3+1]; jj3++ ) { if ( A_diag_j[jj3]==i2 ) { /* row i3, column i2 of A; or, row i2, column i3 of A^T */ a_b_product = a_entry * A_diag_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, mark it and increment * counter. *--------------------------------------------------------*/ if (B_marker[i3] < jj_row_begin_diag) { B_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[B_marker[i3]] += a_b_product; } } } } /* end of i3 loop */ } /* end of third i2 loop */ /* offd * offd */ /*----------------------------------------------------------- * Loop over offd columns i2 of A in A*A^T. Then * loop over offd entries (columns) i3 in row i2 of A^T * That is, rows i3 having a column i2 of A (local part). * For now, for each row i3 of A we crudely check _all_ * columns to see whether one matches i2. * This i3-loop is for the off-diagonal block of A. * It contributes to the diag block of C. * For each entry (i2,i3) of A^T, add A*A^T to C *-----------------------------------------------------------*/ if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; a_entry = A_offd_data[jj2]; for ( i3=0; i3<num_rows_diag_A; i3++ ) { /* ... note that num_rows_diag_A == num_rows_offd_A */ for ( jj3=A_offd_i[i3]; jj3<A_offd_i[i3+1]; jj3++ ) { if ( A_offd_j[jj3]==i2 ) { /* row i3, column i2 of A; or, row i2, column i3 of A^T */ a_b_product = a_entry * A_offd_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution *--------------------------------------------------------*/ if (B_marker[i3] < jj_row_begin_diag) { B_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[B_marker[i3]] += a_b_product; } } } } /* end of last i3 loop */ } /* end of if (num_cols_offd_A) */ } /* end of fourth and last i2 loop */ #if 0 /* debugging printout */ hypre_printf("end of i1 loop: i1=%i jj_count_diag=%i\n", i1, jj_count_diag ); hypre_printf(" C_diag_j="); for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%i ",C_diag_j[jj3]); hypre_printf(" C_diag_data="); for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%f ",C_diag_data[jj3]); hypre_printf("\n"); hypre_printf(" C_offd_j="); for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%i ",C_offd_j[jj3]); hypre_printf(" C_offd_data="); for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%f ",C_offd_data[jj3]); hypre_printf("\n"); hypre_printf( " B_marker =" ); for ( it=0; it<num_rows_diag_A+num_rows_A_ext; ++it ) hypre_printf(" %i", B_marker[it] ); hypre_printf( "\n" ); #endif } /* end of i1 loop */ /*----------------------------------------------------------------------- * Delete 0-columns in C_offd, i.e. generate col_map_offd and reset * C_offd_j. Note that (with the indexing we have coming into this * block) col_map_offd_C[i3]==A_ext_row_map[i3]. *-----------------------------------------------------------------------*/ for ( i=0; i<num_rows_diag_A+num_rows_A_ext; ++i ) B_marker[i] = -1; for ( i=0; i<C_offd_size; i++ ) B_marker[ C_offd_j[i] ] = -2; count = 0; for (i=0; i < num_rows_diag_A + num_rows_A_ext; i++) { if (B_marker[i] == -2) { B_marker[i] = count; count++; } } num_cols_offd_C = count; if (num_cols_offd_C) { col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C); new_C_offd_j = hypre_CTAlloc(HYPRE_Int,C_offd_size); /* ... a bit big, but num_cols_offd_C is too small. It might be worth computing the correct size, which is sum( no. columns in row i, over all rows i ) */ for (i=0; i < C_offd_size; i++) { new_C_offd_j[i] = B_marker[C_offd_j[i]]; col_map_offd_C[ new_C_offd_j[i] ] = A_ext_row_map[ C_offd_j[i] ]; } hypre_TFree(C_offd_j); C_offd_j = new_C_offd_j; } /*---------------------------------------------------------------- * Create C *----------------------------------------------------------------*/ C = hypre_ParCSRMatrixCreate(comm, n_rows_A, n_rows_A, row_starts_A, row_starts_A, num_cols_offd_C, C_diag_size, C_offd_size); /* Note that C does not own the partitionings */ hypre_ParCSRMatrixSetRowStartsOwner(C,0); hypre_ParCSRMatrixSetColStartsOwner(C,0); C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrixData(C_diag) = C_diag_data; hypre_CSRMatrixI(C_diag) = C_diag_i; hypre_CSRMatrixJ(C_diag) = C_diag_j; if (num_cols_offd_C) { C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrixData(C_offd) = C_offd_data; hypre_CSRMatrixI(C_offd) = C_offd_i; hypre_CSRMatrixJ(C_offd) = C_offd_j; hypre_ParCSRMatrixOffd(C) = C_offd; hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C; } else hypre_TFree(C_offd_i); /*----------------------------------------------------------------------- * Free B_ext and marker array. *-----------------------------------------------------------------------*/ if (num_cols_offd_A) { hypre_CSRMatrixDestroy(A_ext); A_ext = NULL; } hypre_TFree(B_marker); if ( num_rows_diag_A != n_rows_A ) hypre_TFree(A_ext_row_map); return C; }
int main(int argc, char *argv[]) { GRID *g; DOF *u_h; MAT *A, *A0, *B; MAP *map; INT i; size_t nnz, mem, mem_peak; VEC *x, *y0, *y1, *y2; double t0, t1, dnz, dnz1, mflops, mop; char *fn = "../test/cube.dat"; FLOAT mem_max = 300; INT refine = 0; phgOptionsRegisterFilename("-mesh_file", "Mesh file", (char **)&fn); phgOptionsRegisterInt("-loop_count", "Loop count", &loop_count); phgOptionsRegisterInt("-refine", "Refinement level", &refine); phgOptionsRegisterFloat("-mem_max", "Maximum memory", &mem_max); phgInit(&argc, &argv); g = phgNewGrid(-1); if (!phgImport(g, fn, FALSE)) phgError(1, "can't read file \"%s\".\n", fn); phgRefineAllElements(g, refine); u_h = phgDofNew(g, DOF_DEFAULT, 1, "u_h", DofNoAction); while (TRUE) { phgPrintf("\n"); if (phgBalanceGrid(g, 1.2, 1, NULL, 0.)) phgPrintf("Repartition mesh, %d submeshes, load imbalance: %lg\n", g->nprocs, (double)g->lif); map = phgMapCreate(u_h, NULL); A = phgMapCreateMat(map, map); A->handle_bdry_eqns = TRUE; build_matrix(A, u_h); phgMatAssemble(A); /* Note: A is unsymmetric (A' != A) if boundary entries not removed */ phgMatRemoveBoundaryEntries(A); #if 0 /* test block matrix operation */ A0 = phgMatCreateBlockMatrix(g->comm, 1, 1, &A, NULL); #else A0 = A; #endif phgPrintf("%d DOF, %d elems, %d submeshes, matrix size: %d, LIF: %lg\n", DofGetDataCountGlobal(u_h), g->nleaf_global, g->nprocs, A->rmap->nglobal, (double)g->lif); /* test PHG mat-vec multiply */ x = phgMapCreateVec(A->cmap, 1); y1 = phgMapCreateVec(A->rmap, 1); phgVecRandomize(x, 123); phgMatVec(MAT_OP_N, 1.0, A0, x, 0.0, &y1); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { phgMatVec(MAT_OP_N, 1.0, A0, x, 0.0, &y1); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); y0 = phgVecCopy(y1, NULL); nnz = A->nnz_d + A->nnz_o; #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif mop = loop_count * (dnz + dnz - A->rmap->nlocal) * 1e-6; phgPrintf("\n"); t1 -= t0; phgPrintf(" PHG: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF)\n", t1, dnz, mop / (t1 == 0 ? 1. : t1), mflops); /* test trans(A)*x */ phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { phgMatVec(MAT_OP_T, 1.0, A0, x, 0.0, &y1); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); t1 -= t0; phgPrintf(" A'*x: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1 == 0 ? 1. : t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); /* time A * trans(A) */ phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); B = phgMatMat(MAT_OP_N, MAT_OP_N, 1.0, A, A, 0.0, NULL); t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); nnz = B->nnz_d + B->nnz_o; #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif /* compare B*x <--> A*A*x */ y2 = phgMatVec(MAT_OP_N, 1.0, B, x, 0.0, NULL); phgMatVec(MAT_OP_N, 1.0, A0, y0, 0.0, &y1); phgMatDestroy(&B); t1 -= t0; phgPrintf(" A*A: time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n", t1, dnz, mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL)); #if USE_PETSC { Mat ma, mb; MatInfo info; Vec va, vb, vc; PetscScalar *vec; ma = phgPetscCreateMatAIJ(A); MatGetVecs(ma, PETSC_NULL, &va); VecDuplicate(va, &vb); VecGetArray(va, &vec); memcpy(vec, x->data, x->map->nlocal * sizeof(*vec)); VecRestoreArray(va, &vec); MatMult(ma, va, vb); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { MatMult(ma, va, vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); VecGetArray(vb, &vec); memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vb, &vec); MatGetInfo(ma, MAT_GLOBAL_SUM, &info); /*phgPrintf(" --------------------------------------------" "-------------------------\n");*/ phgPrintf("\n"); t1 -= t0; dnz = info.nz_used; phgPrintf(" PETSc: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { MatMultTranspose(ma, va, vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); VecGetArray(vb, &vec); memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vb, &vec); t1 -= t0; phgPrintf(" A'*x: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); MatMatMult(ma, ma, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &mb); t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); t1 -= t0; MatGetInfo(mb, MAT_GLOBAL_SUM, &info); dnz = info.nz_used; VecDuplicate(va, &vc); /* compare B*x <--> A*A*x */ MatMult(ma, vb, vc); MatMult(mb, va, vb); VecGetArray(vb, &vec); memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vb, &vec); VecGetArray(vc, &vec); memcpy(y2->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vc, &vec); phgPrintf(" A*A: time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n", t1, dnz, mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL)); phgPetscMatDestroy(&mb); phgPetscMatDestroy(&ma); phgPetscVecDestroy(&va); phgPetscVecDestroy(&vb); phgPetscVecDestroy(&vc); } #endif /* USE_PETSC */ #if USE_HYPRE { HYPRE_IJMatrix ma; HYPRE_IJVector va, vb, vc; HYPRE_ParCSRMatrix par_ma; hypre_ParCSRMatrix *par_mb; HYPRE_ParVector par_va, par_vb, par_vc; HYPRE_Int offset, *ni, start, end; assert(sizeof(INT)==sizeof(int) && sizeof(FLOAT)==sizeof(double)); setup_hypre_mat(A, &ma); ni = phgAlloc(2 * A->rmap->nlocal * sizeof(*ni)); offset = A->cmap->partition[A->cmap->rank]; for (i = 0; i < A->rmap->nlocal; i++) ni[i] = i + offset; HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1, &va); HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1, &vb); HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1, &vc); HYPRE_IJVectorSetObjectType(va, HYPRE_PARCSR); HYPRE_IJVectorSetObjectType(vb, HYPRE_PARCSR); HYPRE_IJVectorSetObjectType(vc, HYPRE_PARCSR); HYPRE_IJVectorSetMaxOffProcElmts(va, 0); HYPRE_IJVectorSetMaxOffProcElmts(vb, 0); HYPRE_IJVectorSetMaxOffProcElmts(vc, 0); HYPRE_IJVectorInitialize(va); HYPRE_IJVectorInitialize(vb); HYPRE_IJVectorInitialize(vc); HYPRE_IJMatrixGetObject(ma, (void **)(void *)&par_ma); HYPRE_IJVectorGetObject(va, (void **)(void *)&par_va); HYPRE_IJVectorGetObject(vb, (void **)(void *)&par_vb); HYPRE_IJVectorGetObject(vc, (void **)(void *)&par_vc); HYPRE_IJVectorSetValues(va, A->cmap->nlocal, ni, (double *)x->data); HYPRE_IJVectorAssemble(va); HYPRE_IJVectorAssemble(vb); HYPRE_IJVectorAssemble(vc); HYPRE_IJMatrixGetRowCounts(ma, A->cmap->nlocal, ni, ni + A->rmap->nlocal); for (i = 0, nnz = 0; i < A->rmap->nlocal; i++) nnz += ni[A->rmap->nlocal + i]; #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_va, 0.0, par_vb); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_va, 0.0, par_vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data); /*phgPrintf(" --------------------------------------------" "-------------------------\n");*/ phgPrintf("\n"); t1 -= t0; phgPrintf(" HYPRE: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { HYPRE_ParCSRMatrixMatvecT(1.0, par_ma, par_va, 0.0, par_vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data); t1 -= t0; phgPrintf(" A'*x: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); /* Note: 'HYPRE_ParCSRMatrix' is currently typedef'ed to * 'hypre_ParCSRMatrix *' */ par_mb = hypre_ParMatmul((hypre_ParCSRMatrix *)par_ma, (hypre_ParCSRMatrix *)par_ma); t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); start = hypre_ParCSRMatrixFirstRowIndex(par_mb); end = hypre_ParCSRMatrixLastRowIndex(par_mb) + 1; for (i = start, nnz = 0; i < end; i++) { HYPRE_Int ncols; hypre_ParCSRMatrixGetRow(par_mb, i, &ncols, NULL, NULL); hypre_ParCSRMatrixRestoreRow(par_mb, i, &ncols, NULL, NULL); nnz += ncols; } #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif /* compare B*x <--> A*A*x */ HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_vb, 0.0, par_vc); HYPRE_ParCSRMatrixMatvec(1.0, (void *)par_mb, par_va, 0.0, par_vb); HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data); HYPRE_IJVectorGetValues(vc, A->rmap->nlocal, ni, (double*)y2->data); hypre_ParCSRMatrixDestroy((par_mb)); t1 -= t0; phgPrintf(" A*A: time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n", t1, dnz, mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL)); phgFree(ni); HYPRE_IJMatrixDestroy(ma); HYPRE_IJVectorDestroy(va); HYPRE_IJVectorDestroy(vb); HYPRE_IJVectorDestroy(vc); } #endif /* USE_HYPRE */ if (A0 != A) phgMatDestroy(&A0); #if 0 if (A->rmap->nglobal > 1000) { VEC *v = phgMapCreateVec(A->rmap, 3); for (i = 0; i < v->map->nlocal; i++) { v->data[i + 0 * v->map->nlocal] = 1 * (i + v->map->partition[g->rank]); v->data[i + 1 * v->map->nlocal] = 2 * (i + v->map->partition[g->rank]); v->data[i + 2 * v->map->nlocal] = 3 * (i + v->map->partition[g->rank]); } phgMatDumpMATLAB(A, "A", "A.m"); phgVecDumpMATLAB(v, "v", "v.m"); phgFinalize(); exit(0); } #endif phgMatDestroy(&A); phgVecDestroy(&x); phgVecDestroy(&y0); phgVecDestroy(&y1); phgVecDestroy(&y2); phgMapDestroy(&map); mem = phgMemoryUsage(g, &mem_peak); dnz = mem / (1024.0 * 1024.0); dnz1 = mem_peak / (1024.0 * 1024.0); /*phgPrintf(" --------------------------------------------" "-------------------------\n");*/ phgPrintf("\n"); phgPrintf(" Memory: current %0.4lgMB, peak %0.4lgMB\n", dnz, dnz1); #if 0 { static int loop_count = 0; if (++loop_count == 4) break; } #endif if (mem_peak > 1024 * (size_t)1024 * mem_max) break; phgRefineAllElements(g, 1); } phgDofFree(&u_h); phgFreeGrid(&g); phgFinalize(); return 0; }
HYPRE_Int hypre_ParCSRMatrixToParChordMatrix( hypre_ParCSRMatrix *Ap, MPI_Comm comm, hypre_ParChordMatrix **pAc ) { HYPRE_Int * row_starts = hypre_ParCSRMatrixRowStarts(Ap); HYPRE_Int * col_starts = hypre_ParCSRMatrixColStarts(Ap); hypre_CSRMatrix * diag = hypre_ParCSRMatrixDiag(Ap); hypre_CSRMatrix * offd = hypre_ParCSRMatrixOffd(Ap); HYPRE_Int * offd_j = hypre_CSRMatrixJ(offd); HYPRE_Int * diag_j = hypre_CSRMatrixJ(diag); HYPRE_Int * col_map_offd = hypre_ParCSRMatrixColMapOffd(Ap); HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(Ap); hypre_ParChordMatrix * Ac; hypre_NumbersNode * rdofs, * offd_cols_me; hypre_NumbersNode ** offd_cols; HYPRE_Int ** offd_col_array; HYPRE_Int * len_offd_col_array, * offd_col_array_me; HYPRE_Int len_offd_col_array_me; HYPRE_Int num_idofs, num_rdofs, j_local, j_global, row_global; HYPRE_Int i, j, jj, p, pto, q, qto, my_id, my_q, row, ireq; HYPRE_Int num_inprocessors, num_toprocessors, num_procs, len_num_rdofs_toprocessor; HYPRE_Int *inprocessor, *toprocessor, *pcr, *qcr, *num_inchords, *chord, *chordto; HYPRE_Int *inproc, *toproc, *num_rdofs_toprocessor; HYPRE_Int **inchord_idof, **inchord_rdof, **rdof_toprocessor; double **inchord_data; double data; HYPRE_Int *first_index_idof, *first_index_rdof; hypre_MPI_Request * request; hypre_MPI_Status * status; hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); num_idofs = row_starts[my_id+1] - row_starts[my_id]; num_rdofs = col_starts[my_id+1] - col_starts[my_id]; hypre_ParChordMatrixCreate( pAc, comm, num_idofs, num_rdofs ); Ac = *pAc; /* The following block sets Inprocessor: On each proc. my_id, we find the columns in the offd and diag blocks (global no.s). The columns are rdofs (contrary to what I wrote in ChordMatrix.txt). For each such col/rdof r, find the proc. p which owns row/idof r. We set the temporary array pcr[p]=1 for such p. An MPI all-to-all will exchange such arrays so my_id's array qcr has qcr[q]=1 iff, on proc. q, pcr[my_id]=1. In other words, qcr[q]=1 if my_id owns a row/idof i which is the same as a col/rdof owned by q. Collect all such q's into in the array Inprocessor. While constructing pcr, we also construct pj such that for any index jj into offd_j,offd_data, pj[jj] is the processor which owns jj as a row (idof) (the number jj is local to this processor). */ pcr = hypre_CTAlloc( HYPRE_Int, num_procs ); qcr = hypre_CTAlloc( HYPRE_Int, num_procs ); for ( p=0; p<num_procs; ++p ) pcr[p]=0; for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(offd); ++jj ) { j_local = offd_j[jj]; j_global = col_map_offd[j_local]; for ( p=0; p<num_procs; ++p ) { if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) { pcr[p]=1; /* not used yet... pj[jj] = p;*/ break; } } } /* jjd = jj; ...not used yet */ /* pcr[my_id] = 1; ...for square matrices (with nonzero diag block) this one line would do the job of the following nested loop. For non-square matrices, the data distribution is too arbitrary. */ for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(diag); ++jj ) { j_local = diag_j[jj]; j_global = j_local + first_col_diag; for ( p=0; p<num_procs; ++p ) { if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) { pcr[p]=1; /* not used yet... pj[jj+jjd] = p;*/ break; } } } /* Now pcr[p]=1 iff my_id owns a col/rdof r which proc. p owns as a row/idof */ hypre_MPI_Alltoall( pcr, 1, HYPRE_MPI_INT, qcr, 1, HYPRE_MPI_INT, comm ); /* Now qcr[q]==1 if my_id owns a row/idof i which is a col/rdof of proc. q The array of such q's is the array Inprocessor. */ num_inprocessors = 0; for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) ++num_inprocessors; inprocessor = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); p = 0; for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) inprocessor[ p++ ] = q; num_toprocessors = 0; for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) ++num_toprocessors; toprocessor = hypre_CTAlloc( HYPRE_Int, num_toprocessors ); p = 0; for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) toprocessor[ p++ ] = q; hypre_ParChordMatrixNumInprocessors(Ac) = num_inprocessors; hypre_ParChordMatrixInprocessor(Ac) = inprocessor; hypre_ParChordMatrixNumToprocessors(Ac) = num_toprocessors; hypre_ParChordMatrixToprocessor(Ac) = toprocessor; hypre_TFree( qcr ); /* FirstIndexIdof[p] is the global index of proc. p's row 0 */ /* FirstIndexRdof[p] is the global index of proc. p's col 0 */ /* Fir FirstIndexIdof, we copy the array row_starts rather than its pointers, because the chord matrix will think it's free to delete FirstIndexIdof */ /* col_starts[p] contains the global index of the first column in the diag block of p. But for first_index_rdof we want the global index of the first column in p (whether that's in the diag or offd block). So it's more involved than row/idof: we also check the offd block, and have to do a gather to get first_index_rdof for every proc. on every proc. */ first_index_idof = hypre_CTAlloc( HYPRE_Int, num_procs+1 ); first_index_rdof = hypre_CTAlloc( HYPRE_Int, num_procs+1 ); for ( p=0; p<=num_procs; ++p ) { first_index_idof[p] = row_starts[p]; first_index_rdof[p] = col_starts[p]; }; if ( hypre_CSRMatrixNumRows(offd) > 0 && hypre_CSRMatrixNumCols(offd) > 0 ) first_index_rdof[my_id] = col_starts[my_id]<col_map_offd[0] ? col_starts[my_id] : col_map_offd[0]; hypre_MPI_Allgather( &first_index_rdof[my_id], 1, HYPRE_MPI_INT, first_index_rdof, 1, HYPRE_MPI_INT, comm ); /* Set num_inchords: num_inchords[p] is no. chords on my_id connected to p. Set each chord (idof,jdof,data). We go through each matrix element in the diag block, find what processor owns its column no. as a row, then update num_inchords[p], inchord_idof[p], inchord_rdof[p], inchord_data[p]. */ inchord_idof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); inchord_rdof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); inchord_data = hypre_CTAlloc( double*, num_inprocessors ); num_inchords = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); chord = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); chordto = hypre_CTAlloc( HYPRE_Int, num_toprocessors ); num_rdofs = 0; for ( q=0; q<num_inprocessors; ++q ) num_inchords[q] = 0; my_q = -1; for ( q=0; q<num_inprocessors; ++q ) if ( inprocessor[q]==my_id ) my_q = q; hypre_assert( my_q>=0 ); /* diag block: first count chords (from my_id to my_id), then set them from diag block's CSR data structure */ num_idofs = hypre_CSRMatrixNumRows(diag); rdofs = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) { for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(diag)[i]; hypre_NumbersEnter( rdofs, j_local ); ++num_inchords[my_q]; } }; num_rdofs = hypre_NumbersNEntered( rdofs ); inchord_idof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] ); inchord_rdof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] ); inchord_data[my_q] = hypre_CTAlloc( double, num_inchords[my_q] ); chord[0] = 0; for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) { for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(diag)[i]; data = hypre_CSRMatrixData(diag)[i]; inchord_idof[my_q][chord[0]] = row; /* Here We need to convert from j_local - a column local to the diag of this proc., to a j which is local only to this processor - a column (rdof) numbering scheme to be shared by the diag and offd blocks... */ j_global = j_local + hypre_ParCSRMatrixColStarts(Ap)[my_q]; j = j_global - first_index_rdof[my_q]; inchord_rdof[my_q][chord[0]] = j; inchord_data[my_q][chord[0]] = data; hypre_assert( chord[0] < num_inchords[my_q] ); ++chord[0]; } }; hypre_NumbersDeleteNode(rdofs); /* offd block: */ /* >>> offd_cols_me duplicates rdofs */ offd_cols_me = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) { for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(offd)[i]; j_global = col_map_offd[j_local]; hypre_NumbersEnter( offd_cols_me, j_global ); } } offd_col_array = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); len_offd_col_array = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); offd_col_array_me = hypre_NumbersArray( offd_cols_me ); len_offd_col_array_me = hypre_NumbersNEntered( offd_cols_me ); request = hypre_CTAlloc(hypre_MPI_Request, 2*num_procs ); ireq = 0; for ( q=0; q<num_inprocessors; ++q ) hypre_MPI_Irecv( &len_offd_col_array[q], 1, HYPRE_MPI_INT, inprocessor[q], 0, comm, &request[ireq++] ); for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) { hypre_MPI_Isend( &len_offd_col_array_me, 1, HYPRE_MPI_INT, p, 0, comm, &request[ireq++] ); } status = hypre_CTAlloc(hypre_MPI_Status, ireq ); hypre_MPI_Waitall( ireq, request, status ); hypre_TFree(status); ireq = 0; for ( q=0; q<num_inprocessors; ++q ) offd_col_array[q] = hypre_CTAlloc( HYPRE_Int, len_offd_col_array[q] ); for ( q=0; q<num_inprocessors; ++q ) hypre_MPI_Irecv( offd_col_array[q], len_offd_col_array[q], HYPRE_MPI_INT, inprocessor[q], 0, comm, &request[ireq++] ); for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) { hypre_MPI_Isend( offd_col_array_me, len_offd_col_array_me, HYPRE_MPI_INT, p, 0, comm, &request[ireq++] ); } status = hypre_CTAlloc(hypre_MPI_Status, ireq ); hypre_MPI_Waitall( ireq, request, status ); hypre_TFree(request); hypre_TFree(status); offd_cols = hypre_CTAlloc( hypre_NumbersNode *, num_inprocessors ); for ( q=0; q<num_inprocessors; ++q ) { offd_cols[q] = hypre_NumbersNewNode(); for ( i=0; i<len_offd_col_array[q]; ++i ) hypre_NumbersEnter( offd_cols[q], offd_col_array[q][i] ); } len_num_rdofs_toprocessor = 1 + hypre_CSRMatrixI(offd) [hypre_CSRMatrixNumRows(offd)]; inproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); toproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); num_rdofs_toprocessor = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); for ( qto=0; qto<len_num_rdofs_toprocessor; ++qto ) { inproc[qto] = -1; toproc[qto] = -1; num_rdofs_toprocessor[qto] = 0; }; rdofs = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) { for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(offd)[i]; j_global = col_map_offd[j_local]; hypre_NumbersEnter( rdofs, j_local ); /* TO DO: find faster ways to do the two processor lookups below.*/ /* Find a processor p (local index q) from the inprocessor list, which owns the column(rdof) whichis the same as this processor's row(idof) row. Update num_inchords for p. Save q as inproc[i] for quick recall later. It represents an inprocessor (not unique) connected to a chord i. */ inproc[i] = -1; for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { p = inprocessor[q]; if ( hypre_NumbersQuery( offd_cols[q], row+hypre_ParCSRMatrixFirstRowIndex(Ap) ) == 1 ) { /* row is one of the offd columns of p */ ++num_inchords[q]; inproc[i] = q; break; } } if ( inproc[i]<0 ) { /* For square matrices, we would have found the column in some other processor's offd. But for non-square matrices it could exist only in some other processor's diag...*/ /* Note that all data in a diag block is stored. We don't check whether the value of a data entry is zero. */ for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { p = inprocessor[q]; row_global = row+hypre_ParCSRMatrixFirstRowIndex(Ap); if ( row_global>=col_starts[p] && row_global< col_starts[p+1] ) { /* row is one of the diag columns of p */ ++num_inchords[q]; inproc[i] = q; break; } } } hypre_assert( inproc[i]>=0 ); /* Find the processor pto (local index qto) from the toprocessor list, which owns the row(idof) which is the same as this processor's column(rdof) j_global. Update num_rdofs_toprocessor for pto. Save pto as toproc[i] for quick recall later. It represents the toprocessor connected to a chord i. */ for ( qto=0; qto<num_toprocessors; ++qto ) { pto = toprocessor[qto]; if ( j_global >= row_starts[pto] && j_global<row_starts[pto+1] ) { hypre_assert( qto < len_num_rdofs_toprocessor ); ++num_rdofs_toprocessor[qto]; /* ... an overestimate, as if two chords share an rdof, that rdof will be counted twice in num_rdofs_toprocessor. It can be fixed up later.*/ toproc[i] = qto; break; } } } }; num_rdofs += hypre_NumbersNEntered(rdofs); hypre_NumbersDeleteNode(rdofs); for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { inchord_idof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] ); inchord_rdof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] ); inchord_data[q] = hypre_CTAlloc( double, num_inchords[q] ); chord[q] = 0; };