Example #1
0
HYPRE_Int
hypre_MatTCommPkgCreate ( hypre_ParCSRMatrix *A)
{
   hypre_ParCSRCommPkg	*comm_pkg;
   
   MPI_Comm             comm = hypre_ParCSRMatrixComm(A);
/*   hypre_MPI_Datatype         *recv_mpi_types;
   hypre_MPI_Datatype         *send_mpi_types;
*/
   HYPRE_Int			num_sends;
   HYPRE_Int			*send_procs;
   HYPRE_Int			*send_map_starts;
   HYPRE_Int			*send_map_elmts;
   HYPRE_Int			num_recvs;
   HYPRE_Int			*recv_procs;
   HYPRE_Int			*recv_vec_starts;
   
   HYPRE_Int  *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
   HYPRE_Int  first_col_diag = hypre_ParCSRMatrixFirstColDiag(A);
   HYPRE_Int  *col_starts = hypre_ParCSRMatrixColStarts(A);

   HYPRE_Int	ierr = 0;
   HYPRE_Int	num_rows_diag = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A));
   HYPRE_Int	num_cols_diag = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(A));
   HYPRE_Int	num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
   HYPRE_Int * row_starts = hypre_ParCSRMatrixRowStarts(A);

   hypre_MatTCommPkgCreate_core (
      comm, col_map_offd, first_col_diag, col_starts,
      num_rows_diag, num_cols_diag, num_cols_offd, row_starts,
                                  hypre_ParCSRMatrixFirstColDiag(A),
                                  hypre_ParCSRMatrixColMapOffd(A),
                                  hypre_CSRMatrixI( hypre_ParCSRMatrixDiag(A) ),
                                  hypre_CSRMatrixJ( hypre_ParCSRMatrixDiag(A) ),
                                  hypre_CSRMatrixI( hypre_ParCSRMatrixOffd(A) ),
      hypre_CSRMatrixJ( hypre_ParCSRMatrixOffd(A) ),
      1,
      &num_recvs, &recv_procs, &recv_vec_starts,
      &num_sends, &send_procs, &send_map_starts,
      &send_map_elmts
      );

   comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1);

   hypre_ParCSRCommPkgComm(comm_pkg) = comm;

   hypre_ParCSRCommPkgNumRecvs(comm_pkg) = num_recvs;
   hypre_ParCSRCommPkgRecvProcs(comm_pkg) = recv_procs;
   hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) = recv_vec_starts;
   hypre_ParCSRCommPkgNumSends(comm_pkg) = num_sends;
   hypre_ParCSRCommPkgSendProcs(comm_pkg) = send_procs;
   hypre_ParCSRCommPkgSendMapStarts(comm_pkg) = send_map_starts;
   hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = send_map_elmts;

   hypre_ParCSRMatrixCommPkgT(A) = comm_pkg;

   return ierr;
}
Example #2
0
void RowsWithColumn_original
( HYPRE_Int * rowmin, HYPRE_Int * rowmax, HYPRE_Int column, hypre_ParCSRMatrix * A )
/* Finds rows of A which have a nonzero at the given (global) column number.
   Sets rowmin to the minimum (local) row number of such rows, and rowmax
   to the max.  If there are no such rows, will return rowmax<0<=rowmin */
{
   hypre_CSRMatrix * diag = hypre_ParCSRMatrixDiag(A);
   hypre_CSRMatrix * offd = hypre_ParCSRMatrixOffd(A);
   HYPRE_Int * mat_i, * mat_j;
   HYPRE_Int i, j, num_rows;
   HYPRE_Int firstColDiag;
   HYPRE_Int * colMapOffd;

   mat_i = hypre_CSRMatrixI(diag);
   mat_j = hypre_CSRMatrixJ(diag);
   num_rows = hypre_CSRMatrixNumRows(diag);
   firstColDiag = hypre_ParCSRMatrixFirstColDiag(A);
   *rowmin = num_rows;
   *rowmax = -1;

   for ( i=0; i<num_rows; ++i ) {
      /* global number: row = i + firstRowIndex;*/
      for ( j=mat_i[i]; j<mat_i[i+1]; ++j ) {
         if ( mat_j[j]+firstColDiag==column ) {
            /* row i (local row number) has column mat_j[j] (local column number) */
            *rowmin = i<*rowmin ? i : *rowmin;
            *rowmax = i>*rowmax ? i : *rowmax;
            break;
         }
      }
   }
   mat_i = hypre_CSRMatrixI(offd);
   mat_j = hypre_CSRMatrixJ(offd);
   num_rows = hypre_CSRMatrixNumRows(offd);
   colMapOffd = hypre_ParCSRMatrixColMapOffd(A);
   for ( i=0; i<num_rows; ++i ) {
      /* global number: row = i + firstRowIndex;*/
      for ( j=mat_i[i]; j<mat_i[i+1]; ++j ) {
         if ( colMapOffd[ mat_j[j] ]==column ) {
            /* row i (local row number) has column mat_j[j] (local column number) */
            *rowmin = i<*rowmin ? i : *rowmin;
            *rowmax = i>*rowmax ? i : *rowmax;
            break;
         }
      }
   }

/*      global col no.:  mat_j[j]+hypre_ParCSRMatrixFirstColDiag(A)
                      or hypre_ParCSRMatrixColMapOffd(A)[ mat_j[j] ]
        global row no.: i + hypre_ParCSRMatrixFirstRowIndex(A)
*/

}
Example #3
0
hypre_ParCSRMatrix * hypre_ParMatmul_FC(
   hypre_ParCSRMatrix * A, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker,
   HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd )
/* hypre_parMatmul_FC creates and returns the "Fine"-designated rows of the
   matrix product A*P.  A's size is (nC+nF)*(nC+nF), P's size is (nC+nF)*nC
   where nC is the number of coarse rows/columns, nF the number of fine
   rows/columns.  The size of C=A*P is (nC+nF)*nC, even though not all rows
   of C are actually computed.  If we were to construct a matrix consisting
   only of the computed rows of C, its size would be nF*nC.
   "Fine" is defined solely by the marker array, and for example could be
   a proper subset of the fine points of a multigrid hierarchy.
*/
{
   /* To compute a submatrix of C containing only the computed data, i.e.
      only "Fine" rows, we would have to do a lot of computational work,
      with a lot of communication.  The communication is because such a
      matrix would need global information that depends on which rows are
      "Fine".
   */

   MPI_Comm 	   comm = hypre_ParCSRMatrixComm(A);

   hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
   
   double          *A_diag_data = hypre_CSRMatrixData(A_diag);
   HYPRE_Int             *A_diag_i = hypre_CSRMatrixI(A_diag);
   HYPRE_Int             *A_diag_j = hypre_CSRMatrixJ(A_diag);

   hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
   
   double          *A_offd_data = hypre_CSRMatrixData(A_offd);
   HYPRE_Int             *A_offd_i = hypre_CSRMatrixI(A_offd);
   HYPRE_Int             *A_offd_j = hypre_CSRMatrixJ(A_offd);

   HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
   HYPRE_Int	num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
   HYPRE_Int	num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag);
   HYPRE_Int	num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
   
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   
   double          *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);

   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
   HYPRE_Int		   *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P);
   
   double          *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   HYPRE_Int             *P_offd_j = hypre_CSRMatrixJ(P_offd);

   HYPRE_Int	first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P);
   HYPRE_Int	last_col_diag_P;
   HYPRE_Int *col_starts_P = hypre_ParCSRMatrixColStarts(P);
   HYPRE_Int	num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
   HYPRE_Int	num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag);
   HYPRE_Int	num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd);

   hypre_ParCSRMatrix *C;
   HYPRE_Int		      *col_map_offd_C;
   HYPRE_Int		      *map_P_to_C;

   hypre_CSRMatrix *C_diag;

   double          *C_diag_data;
   HYPRE_Int             *C_diag_i;
   HYPRE_Int             *C_diag_j;

   hypre_CSRMatrix *C_offd;

   double          *C_offd_data=NULL;
   HYPRE_Int             *C_offd_i=NULL;
   HYPRE_Int             *C_offd_j=NULL;

   HYPRE_Int              C_diag_size;
   HYPRE_Int              C_offd_size;
   HYPRE_Int		    num_cols_offd_C = 0;
   
   hypre_CSRMatrix *Ps_ext;
   
   double          *Ps_ext_data;
   HYPRE_Int             *Ps_ext_i;
   HYPRE_Int             *Ps_ext_j;

   double          *P_ext_diag_data;
   HYPRE_Int             *P_ext_diag_i;
   HYPRE_Int             *P_ext_diag_j;
   HYPRE_Int              P_ext_diag_size;

   double          *P_ext_offd_data;
   HYPRE_Int             *P_ext_offd_i;
   HYPRE_Int             *P_ext_offd_j;
   HYPRE_Int              P_ext_offd_size;

   HYPRE_Int		   *P_marker;
   HYPRE_Int		   *temp;

   HYPRE_Int              i, j;
   HYPRE_Int              i1, i2, i3;
   HYPRE_Int              jj2, jj3;
   
   HYPRE_Int              jj_count_diag, jj_count_offd;
   HYPRE_Int              jj_row_begin_diag, jj_row_begin_offd;
   HYPRE_Int              start_indexing = 0; /* start indexing for C_data at 0 */
   HYPRE_Int		    n_rows_A_global, n_cols_A_global;
   HYPRE_Int		    n_rows_P_global, n_cols_P_global;
   HYPRE_Int              allsquare = 0;
   HYPRE_Int              cnt, cnt_offd, cnt_diag;
   HYPRE_Int 		    num_procs;
   HYPRE_Int 		    value;

   double           a_entry;
   double           a_b_product;
   
   n_rows_A_global = hypre_ParCSRMatrixGlobalNumRows(A);
   n_cols_A_global = hypre_ParCSRMatrixGlobalNumCols(A);
   n_rows_P_global = hypre_ParCSRMatrixGlobalNumRows(P);
   n_cols_P_global = hypre_ParCSRMatrixGlobalNumCols(P);

   if (n_cols_A_global != n_rows_P_global || num_cols_diag_A != num_rows_diag_P)
   {
	hypre_printf(" Error! Incompatible matrix dimensions!\n");
	return NULL;
   }
   /* if (num_rows_A==num_cols_P) allsquare = 1; */

   /*-----------------------------------------------------------------------
    *  Extract P_ext, i.e. portion of P that is stored on neighbor procs
    *  and needed locally for matrix matrix product 
    *-----------------------------------------------------------------------*/

   hypre_MPI_Comm_size(comm, &num_procs);

   if (num_procs > 1)
   {
       /*---------------------------------------------------------------------
    	* If there exists no CommPkg for A, a CommPkg is generated using
    	* equally load balanced partitionings within 
	* hypre_ParCSRMatrixExtractBExt
    	*--------------------------------------------------------------------*/
   	Ps_ext = hypre_ParCSRMatrixExtractBExt(P,A,1);
   	Ps_ext_data = hypre_CSRMatrixData(Ps_ext);
   	Ps_ext_i    = hypre_CSRMatrixI(Ps_ext);
   	Ps_ext_j    = hypre_CSRMatrixJ(Ps_ext);
   }
   P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1);
   P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1);
   P_ext_diag_size = 0;
   P_ext_offd_size = 0;
   last_col_diag_P = first_col_diag_P + num_cols_diag_P -1;

   for (i=0; i < num_cols_offd_A; i++)
   {
      for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
         if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
            P_ext_offd_size++;
         else
            P_ext_diag_size++;
      P_ext_diag_i[i+1] = P_ext_diag_size;
      P_ext_offd_i[i+1] = P_ext_offd_size;
   }

   if (P_ext_diag_size)
   {
      P_ext_diag_j = hypre_CTAlloc(HYPRE_Int, P_ext_diag_size);
      P_ext_diag_data = hypre_CTAlloc(double, P_ext_diag_size);
   }
   if (P_ext_offd_size)
   {
      P_ext_offd_j = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size);
      P_ext_offd_data = hypre_CTAlloc(double, P_ext_offd_size);
   }

   cnt_offd = 0;
   cnt_diag = 0;
   for (i=0; i < num_cols_offd_A; i++)
   {
      for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
         if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
         {
            P_ext_offd_j[cnt_offd] = Ps_ext_j[j];
            P_ext_offd_data[cnt_offd++] = Ps_ext_data[j];
         }
         else
         {
            P_ext_diag_j[cnt_diag] = Ps_ext_j[j] - first_col_diag_P;
            P_ext_diag_data[cnt_diag++] = Ps_ext_data[j];
         }
   }

   if (num_procs > 1)
   {
      hypre_CSRMatrixDestroy(Ps_ext);
      Ps_ext = NULL;
   }

   cnt = 0;
   if (P_ext_offd_size || num_cols_offd_P)
   {
      temp = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size+num_cols_offd_P);
      for (i=0; i < P_ext_offd_size; i++)
         temp[i] = P_ext_offd_j[i];
      cnt = P_ext_offd_size;
      for (i=0; i < num_cols_offd_P; i++)
         temp[cnt++] = col_map_offd_P[i];
   }
   if (cnt)
   {
      qsort0(temp, 0, cnt-1);

      num_cols_offd_C = 1;
      value = temp[0];
      for (i=1; i < cnt; i++)
      {
         if (temp[i] > value)
         {
            value = temp[i];
            temp[num_cols_offd_C++] = value;
         }
      }
   }

   if (num_cols_offd_C)
        col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C);

   for (i=0; i < num_cols_offd_C; i++)
      col_map_offd_C[i] = temp[i];

   if (P_ext_offd_size || num_cols_offd_P)
      hypre_TFree(temp);

   for (i=0 ; i < P_ext_offd_size; i++)
      P_ext_offd_j[i] = hypre_BinarySearch(col_map_offd_C,
                                           P_ext_offd_j[i],
                                           num_cols_offd_C);
   if (num_cols_offd_P)
   {
      map_P_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P);

      cnt = 0;
      for (i=0; i < num_cols_offd_C; i++)
         if (col_map_offd_C[i] == col_map_offd_P[cnt])
         {
            map_P_to_C[cnt++] = i;
            if (cnt == num_cols_offd_P) break;
         }
   }

   /*-----------------------------------------------------------------------
   *  Allocate marker array.
    *-----------------------------------------------------------------------*/

   P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_C);

   /*-----------------------------------------------------------------------
    *  Initialize some stuff.
    *-----------------------------------------------------------------------*/

   for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++)
   {      
      P_marker[i1] = -1;
   }


/* no changes for the marked version above this point */
   /* This function call is the first pass: */
   hypre_ParMatmul_RowSizes_Marked(
      &C_diag_i, &C_offd_i, &P_marker,
      A_diag_i, A_diag_j, A_offd_i, A_offd_j,
      P_diag_i, P_diag_j, P_offd_i, P_offd_j,
      P_ext_diag_i, P_ext_diag_j, P_ext_offd_i, P_ext_offd_j,
      map_P_to_C,
      &C_diag_size, &C_offd_size,
      num_rows_diag_A, num_cols_offd_A, allsquare,
      num_cols_diag_P, num_cols_offd_P,
      num_cols_offd_C, CF_marker, dof_func, dof_func_offd
      );

   /* The above call of hypre_ParMatmul_RowSizes_Marked computed
      two scalars: C_diag_size, C_offd_size,
      and two arrays: C_diag_i, C_offd_i
      ( P_marker is also computed, but only used internally )
   */

   /*-----------------------------------------------------------------------
    *  Allocate C_diag_data and C_diag_j arrays.
    *  Allocate C_offd_data and C_offd_j arrays.
    *-----------------------------------------------------------------------*/
 
   last_col_diag_P = first_col_diag_P + num_cols_diag_P - 1;
   C_diag_data = hypre_CTAlloc(double, C_diag_size);
   C_diag_j    = hypre_CTAlloc(HYPRE_Int, C_diag_size);
   if (C_offd_size)
   { 
   	C_offd_data = hypre_CTAlloc(double, C_offd_size);
   	C_offd_j    = hypre_CTAlloc(HYPRE_Int, C_offd_size);
   } 


   /*-----------------------------------------------------------------------
    *  Second Pass: Fill in C_diag_data and C_diag_j.
    *  Second Pass: Fill in C_offd_data and C_offd_j.
    *-----------------------------------------------------------------------*/

   /*-----------------------------------------------------------------------
    *  Initialize some stuff.
    *-----------------------------------------------------------------------*/

   jj_count_diag = start_indexing;
   jj_count_offd = start_indexing;
   for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++)
   {      
      P_marker[i1] = -1;
   }
   
   /*-----------------------------------------------------------------------
    *  Loop over interior c-points.
    *-----------------------------------------------------------------------*/
    
   for (i1 = 0; i1 < num_rows_diag_A; i1++)
   {

      if ( CF_marker[i1] < 0 )  /* i1 is a fine row */
         /* ... This and the coarse row code are the only parts between first pass
            and near the end where
            hypre_ParMatmul_FC is different from the regular hypre_ParMatmul */
      {

         /*--------------------------------------------------------------------
          *  Create diagonal entry, C_{i1,i1} 
          *--------------------------------------------------------------------*/

         jj_row_begin_diag = jj_count_diag;
         jj_row_begin_offd = jj_count_offd;

         /*-----------------------------------------------------------------
          *  Loop over entries in row i1 of A_offd.
          *-----------------------------------------------------------------*/
         
	 if (num_cols_offd_A)
	 {
            for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
            {
               i2 = A_offd_j[jj2];
               if( dof_func==NULL || dof_func[i1] == dof_func_offd[i2] )
               {  /* interpolate only like "functions" */
                  a_entry = A_offd_data[jj2];
            
                  /*-----------------------------------------------------------
                   *  Loop over entries in row i2 of P_ext.
                   *-----------------------------------------------------------*/

                  for (jj3 = P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
                  {
                     i3 = num_cols_diag_P+P_ext_offd_j[jj3];
                     a_b_product = a_entry * P_ext_offd_data[jj3];
                  
                     /*--------------------------------------------------------
                      *  Check P_marker to see that C_{i1,i3} has not already
                      *  been accounted for. If it has not, create a new entry.
                      *  If it has, add new contribution.
                      *--------------------------------------------------------*/
                     if (P_marker[i3] < jj_row_begin_offd)
                     {
                        P_marker[i3] = jj_count_offd;
                        C_offd_data[jj_count_offd] = a_b_product;
                        C_offd_j[jj_count_offd] = i3-num_cols_diag_P;
                        jj_count_offd++;
                     }
                     else
                        C_offd_data[P_marker[i3]] += a_b_product;
                  }
                  for (jj3 = P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
                  {
                     i3 = P_ext_diag_j[jj3];
                     a_b_product = a_entry * P_ext_diag_data[jj3];

                     if (P_marker[i3] < jj_row_begin_diag)
                     {
                        P_marker[i3] = jj_count_diag;
                        C_diag_data[jj_count_diag] = a_b_product;
                        C_diag_j[jj_count_diag] = i3;
                        jj_count_diag++;
                     }
                     else
                        C_diag_data[P_marker[i3]] += a_b_product;
                  }
               }
               else
               {  /* Interpolation mat should be 0 where i1 and i2 correspond to
                     different "functions".  As we haven't created an entry for
                     C(i1,i2), nothing needs to be done. */
               }

            }
         }

         /*-----------------------------------------------------------------
          *  Loop over entries in row i1 of A_diag.
          *-----------------------------------------------------------------*/

         for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
         {
            i2 = A_diag_j[jj2];
            if( dof_func==NULL || dof_func[i1] == dof_func[i2] )
            {  /* interpolate only like "functions" */
               a_entry = A_diag_data[jj2];
            
               /*-----------------------------------------------------------
                *  Loop over entries in row i2 of P_diag.
                *-----------------------------------------------------------*/

               for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
               {
                  i3 = P_diag_j[jj3];
                  a_b_product = a_entry * P_diag_data[jj3];
                  
                  /*--------------------------------------------------------
                   *  Check P_marker to see that C_{i1,i3} has not already
                   *  been accounted for. If it has not, create a new entry.
                   *  If it has, add new contribution.
                   *--------------------------------------------------------*/

                  if (P_marker[i3] < jj_row_begin_diag)
                  {
                     P_marker[i3] = jj_count_diag;
                     C_diag_data[jj_count_diag] = a_b_product;
                     C_diag_j[jj_count_diag] = i3;
                     jj_count_diag++;
                  }
                  else
                  {
                     C_diag_data[P_marker[i3]] += a_b_product;
                  }
               }
               if (num_cols_offd_P)
	       {
                  for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
                  {
                     i3 = num_cols_diag_P+map_P_to_C[P_offd_j[jj3]];
                     a_b_product = a_entry * P_offd_data[jj3];
                  
                     /*--------------------------------------------------------
                      *  Check P_marker to see that C_{i1,i3} has not already
                      *  been accounted for. If it has not, create a new entry.
                      *  If it has, add new contribution.
                      *--------------------------------------------------------*/

                     if (P_marker[i3] < jj_row_begin_offd)
                     {
                        P_marker[i3] = jj_count_offd;
                        C_offd_data[jj_count_offd] = a_b_product;
                        C_offd_j[jj_count_offd] = i3-num_cols_diag_P;
                        jj_count_offd++;
                     }
                     else
                     {
                        C_offd_data[P_marker[i3]] += a_b_product;
                     }
                  }
               }
            }
            else
            {  /* Interpolation mat should be 0 where i1 and i2 correspond to
                  different "functions".  As we haven't created an entry for
                  C(i1,i2), nothing needs to be done. */
            }
         }
      }
      else  /* i1 is a coarse row.*/
         /* Copy P coarse-row values to C.  This is useful if C is meant to
            become a replacement for P */
      {
	 if (num_cols_offd_P)
	 {
            for (jj2 = P_offd_i[i1]; jj2 < P_offd_i[i1+1]; jj2++)
            {
               C_offd_j[jj_count_offd] = P_offd_j[jj_count_offd];
               C_offd_data[jj_count_offd] = P_offd_data[jj_count_offd];
               ++jj_count_offd;
            }
         }
         for (jj2 = P_diag_i[i1]; jj2 < P_diag_i[i1+1]; jj2++)
         {
            C_diag_j[jj_count_diag] = P_diag_j[jj2];
            C_diag_data[jj_count_diag] = P_diag_data[jj2];
            ++jj_count_diag;
         }
      }
   }

   C = hypre_ParCSRMatrixCreate(
      comm, n_rows_A_global, n_cols_P_global,
      row_starts_A, col_starts_P, num_cols_offd_C, C_diag_size, C_offd_size );

   /* Note that C does not own the partitionings */
   hypre_ParCSRMatrixSetRowStartsOwner(C,0);
   hypre_ParCSRMatrixSetColStartsOwner(C,0);

   C_diag = hypre_ParCSRMatrixDiag(C);
   hypre_CSRMatrixData(C_diag) = C_diag_data; 
   hypre_CSRMatrixI(C_diag) = C_diag_i; 
   hypre_CSRMatrixJ(C_diag) = C_diag_j; 

   C_offd = hypre_ParCSRMatrixOffd(C);
   hypre_CSRMatrixI(C_offd) = C_offd_i; 
   hypre_ParCSRMatrixOffd(C) = C_offd;

   if (num_cols_offd_C)
   {
      hypre_CSRMatrixData(C_offd) = C_offd_data; 
      hypre_CSRMatrixJ(C_offd) = C_offd_j; 
      hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;

   }

   /*-----------------------------------------------------------------------
    *  Free various arrays
    *-----------------------------------------------------------------------*/

   hypre_TFree(P_marker);   
   hypre_TFree(P_ext_diag_i);
   if (P_ext_diag_size)
   {
      hypre_TFree(P_ext_diag_j);
      hypre_TFree(P_ext_diag_data);
   }
   hypre_TFree(P_ext_offd_i);
   if (P_ext_offd_size)
   {
      hypre_TFree(P_ext_offd_j);
      hypre_TFree(P_ext_offd_data);
   }
   if (num_cols_offd_P) hypre_TFree(map_P_to_C);

   return C;
   
}
Example #4
0
/**************************************************************
 *
 *      CGC Coarsening routine
 *
 **************************************************************/
HYPRE_Int
hypre_BoomerAMGCoarsenCGCb( hypre_ParCSRMatrix    *S,
                            hypre_ParCSRMatrix    *A,
                            HYPRE_Int                    measure_type,
                            HYPRE_Int                    coarsen_type,
			    HYPRE_Int                    cgc_its,
                            HYPRE_Int                    debug_flag,
                            HYPRE_Int                  **CF_marker_ptr)
{
   MPI_Comm         comm          = hypre_ParCSRMatrixComm(S);
   hypre_ParCSRCommPkg   *comm_pkg      = hypre_ParCSRMatrixCommPkg(S);
   hypre_ParCSRCommHandle *comm_handle;
   hypre_CSRMatrix *S_diag        = hypre_ParCSRMatrixDiag(S);
   hypre_CSRMatrix *S_offd        = hypre_ParCSRMatrixOffd(S);
   HYPRE_Int             *S_i           = hypre_CSRMatrixI(S_diag);
   HYPRE_Int             *S_j           = hypre_CSRMatrixJ(S_diag);
   HYPRE_Int             *S_offd_i      = hypre_CSRMatrixI(S_offd);
   HYPRE_Int             *S_offd_j;
   HYPRE_Int              num_variables = hypre_CSRMatrixNumRows(S_diag);
   HYPRE_Int              num_cols_offd = hypre_CSRMatrixNumCols(S_offd);
                  
   hypre_CSRMatrix *S_ext;
   HYPRE_Int             *S_ext_i;
   HYPRE_Int             *S_ext_j;
                 
   hypre_CSRMatrix *ST;
   HYPRE_Int             *ST_i;
   HYPRE_Int             *ST_j;
                 
   HYPRE_Int             *CF_marker;
   HYPRE_Int             *CF_marker_offd=NULL;
   HYPRE_Int              ci_tilde = -1;
   HYPRE_Int              ci_tilde_mark = -1;

   HYPRE_Int             *measure_array;
   HYPRE_Int             *measure_array_master;
   HYPRE_Int             *graph_array;
   HYPRE_Int 	           *int_buf_data=NULL;
   /*HYPRE_Int 	           *ci_array=NULL;*/

   HYPRE_Int              i, j, k, l, jS;
   HYPRE_Int		    ji, jj, index;
   HYPRE_Int		    set_empty = 1;
   HYPRE_Int		    C_i_nonempty = 0;
   HYPRE_Int		    num_nonzeros;
   HYPRE_Int		    num_procs, my_id;
   HYPRE_Int		    num_sends = 0;
   HYPRE_Int		    first_col, start;
   HYPRE_Int		    col_0, col_n;

   hypre_LinkList   LoL_head;
   hypre_LinkList   LoL_tail;

   HYPRE_Int             *lists, *where;
   HYPRE_Int              measure, new_meas;
   HYPRE_Int              num_left;
   HYPRE_Int              nabor, nabor_two;

   HYPRE_Int              ierr = 0;
   HYPRE_Int              use_commpkg_A = 0;
   HYPRE_Real	    wall_time;

   HYPRE_Int              measure_max; /* BM Aug 30, 2006: maximal measure, needed for CGC */

   if (coarsen_type < 0) coarsen_type = -coarsen_type;

   /*-------------------------------------------------------
    * Initialize the C/F marker, LoL_head, LoL_tail  arrays
    *-------------------------------------------------------*/

   LoL_head = NULL;
   LoL_tail = NULL;
   lists = hypre_CTAlloc(HYPRE_Int, num_variables);
   where = hypre_CTAlloc(HYPRE_Int, num_variables);

#if 0 /* debugging */
   char  filename[256];
   FILE *fp;
   HYPRE_Int   iter = 0;
#endif

   /*--------------------------------------------------------------
    * Compute a CSR strength matrix, S.
    *
    * For now, the "strength" of dependence/influence is defined in
    * the following way: i depends on j if
    *     aij > hypre_max (k != i) aik,    aii < 0
    * or
    *     aij < hypre_min (k != i) aik,    aii >= 0
    * Then S_ij = 1, else S_ij = 0.
    *
    * NOTE: the entries are negative initially, corresponding
    * to "unaccounted-for" dependence.
    *----------------------------------------------------------------*/

   if (debug_flag == 3) wall_time = time_getWallclockSeconds();

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);

   if (!comm_pkg)
   {
        use_commpkg_A = 1;
        comm_pkg = hypre_ParCSRMatrixCommPkg(A); 
   }

   if (!comm_pkg)
   {
        hypre_MatvecCommPkgCreate(A);
        comm_pkg = hypre_ParCSRMatrixCommPkg(A); 
   }

   num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);

   if (num_cols_offd) S_offd_j = hypre_CSRMatrixJ(S_offd);

   jS = S_i[num_variables];

   ST = hypre_CSRMatrixCreate(num_variables, num_variables, jS);
   ST_i = hypre_CTAlloc(HYPRE_Int,num_variables+1);
   ST_j = hypre_CTAlloc(HYPRE_Int,jS);
   hypre_CSRMatrixI(ST) = ST_i;
   hypre_CSRMatrixJ(ST) = ST_j;

   /*----------------------------------------------------------
    * generate transpose of S, ST
    *----------------------------------------------------------*/

   for (i=0; i <= num_variables; i++)
      ST_i[i] = 0;
 
   for (i=0; i < jS; i++)
   {
	 ST_i[S_j[i]+1]++;
   }
   for (i=0; i < num_variables; i++)
   {
      ST_i[i+1] += ST_i[i];
   }
   for (i=0; i < num_variables; i++)
   {
      for (j=S_i[i]; j < S_i[i+1]; j++)
      {
	 index = S_j[j];
       	 ST_j[ST_i[index]] = i;
       	 ST_i[index]++;
      }
   }      
   for (i = num_variables; i > 0; i--)
   {
      ST_i[i] = ST_i[i-1];
   }
   ST_i[0] = 0;

   /*----------------------------------------------------------
    * Compute the measures
    *
    * The measures are given by the row sums of ST.
    * Hence, measure_array[i] is the number of influences
    * of variable i.
    * correct actual measures through adding influences from
    * neighbor processors
    *----------------------------------------------------------*/

   measure_array_master = hypre_CTAlloc(HYPRE_Int, num_variables);
   measure_array = hypre_CTAlloc(HYPRE_Int, num_variables);

   for (i = 0; i < num_variables; i++)
   {
      measure_array_master[i] = ST_i[i+1]-ST_i[i];
   }

   if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) 
		&& num_procs > 1)
   {
      if (use_commpkg_A)
         S_ext      = hypre_ParCSRMatrixExtractBExt(S,A,0);
      else
         S_ext      = hypre_ParCSRMatrixExtractBExt(S,S,0);
      S_ext_i    = hypre_CSRMatrixI(S_ext);
      S_ext_j    = hypre_CSRMatrixJ(S_ext);
      num_nonzeros = S_ext_i[num_cols_offd];
      first_col = hypre_ParCSRMatrixFirstColDiag(S);
      col_0 = first_col-1;
      col_n = col_0+num_variables;
      if (measure_type)
      {
	 for (i=0; i < num_nonzeros; i++)
         {
	    index = S_ext_j[i] - first_col;
	    if (index > -1 && index < num_variables)
		measure_array_master[index]++;
         } 
      } 
   }

   /*---------------------------------------------------
    * Loop until all points are either fine or coarse.
    *---------------------------------------------------*/

   if (debug_flag == 3) wall_time = time_getWallclockSeconds();

   /* first coarsening phase */

  /*************************************************************
   *
   *   Initialize the lists
   *
   *************************************************************/

   CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables);
   
   num_left = 0;
   for (j = 0; j < num_variables; j++)
   {
     if ((S_i[j+1]-S_i[j])== 0 &&
	 (S_offd_i[j+1]-S_offd_i[j]) == 0)
     {
       CF_marker[j] = SF_PT;
       measure_array_master[j] = 0;
     }
     else
     {
       CF_marker[j] = UNDECIDED; 
       /*        num_left++; */ /* BM May 19, 2006: see below*/
     }
   } 

   if (coarsen_type==22) {
     /* BM Sep 8, 2006: allow_emptygrids only if the following holds for all points j: 
        (a) the point has no strong connections at all, OR
        (b) the point has a strong connection across a boundary */
     for (j=0;j<num_variables;j++)
       if (S_i[j+1]>S_i[j] && S_offd_i[j+1] == S_offd_i[j]) {coarsen_type=21;break;}
   }

   for (l = 1; l <= cgc_its; l++)
   {
     LoL_head = NULL;
     LoL_tail = NULL;
     num_left = 0;  /* compute num_left before each RS coarsening loop */
     memcpy (measure_array,measure_array_master,num_variables*sizeof(HYPRE_Int));
     memset (lists,0,sizeof(HYPRE_Int)*num_variables);
     memset (where,0,sizeof(HYPRE_Int)*num_variables);

     for (j = 0; j < num_variables; j++) 
     {    
       measure = measure_array[j];
       if (CF_marker[j] != SF_PT)  
       {
	 if (measure > 0)
	 {
	   enter_on_lists(&LoL_head, &LoL_tail, measure, j, lists, where);
	   num_left++; /* compute num_left before each RS coarsening loop */
	 }
	 else if (CF_marker[j] == 0) /* increase weight of strongly coupled neighbors only 
					if j is not conained in a previously constructed coarse grid.
					Reason: these neighbors should start with the same initial weight
					in each CGC iteration.                    BM Aug 30, 2006 */
					
	 {
	   if (measure < 0) hypre_printf("negative measure!\n");
/* 	   CF_marker[j] = f_pnt; */
	   for (k = S_i[j]; k < S_i[j+1]; k++)
	   {
	     nabor = S_j[k];
/*  	     if (CF_marker[nabor] != SF_PT)  */
 	     if (CF_marker[nabor] == 0)  /* BM Aug 30, 2006: don't alter weights of points 
 					    contained in other candidate coarse grids */ 
	     {
	       if (nabor < j)
	       {
		 new_meas = measure_array[nabor];
		 if (new_meas > 0)
		   remove_point(&LoL_head, &LoL_tail, new_meas, 
				nabor, lists, where);
		 else num_left++; /* BM Aug 29, 2006 */
		 
		 new_meas = ++(measure_array[nabor]);
		 enter_on_lists(&LoL_head, &LoL_tail, new_meas,
				nabor, lists, where);
	       }
	       else
	       {
		 new_meas = ++(measure_array[nabor]);
	       }
	     }
	   }
	   /* 	   --num_left; */ /* BM May 19, 2006 */
         }
       }
     }

     /* BM Aug 30, 2006: first iteration: determine maximal weight */
     if (num_left && l==1) measure_max = measure_array[LoL_head->head]; 
     /* BM Aug 30, 2006: break CGC iteration if no suitable 
	starting point is available any more */
     if (!num_left || measure_array[LoL_head->head]<measure_max) {
       while (LoL_head) {
	 hypre_LinkList list_ptr = LoL_head;
	 LoL_head = LoL_head->next_elt;
	 dispose_elt (list_ptr);
       }
       break;
     }

   /****************************************************************
    *
    *  Main loop of Ruge-Stueben first coloring pass.
    *
    *  WHILE there are still points to classify DO:
    *        1) find first point, i,  on list with max_measure
    *           make i a C-point, remove it from the lists
    *        2) For each point, j,  in S_i^T,
    *           a) Set j to be an F-point
    *           b) For each point, k, in S_j
    *                  move k to the list in LoL with measure one
    *                  greater than it occupies (creating new LoL
    *                  entry if necessary)
    *        3) For each point, j,  in S_i,
    *                  move j to the list in LoL with measure one
    *                  smaller than it occupies (creating new LoL
    *                  entry if necessary)
    *
    ****************************************************************/

     while (num_left > 0)
     {
       index = LoL_head -> head;
/*         index = LoL_head -> tail;  */

/*        CF_marker[index] = C_PT; */
       CF_marker[index] = l;  /* BM Aug 18, 2006 */
       measure = measure_array[index];
       measure_array[index] = 0;
       measure_array_master[index] = 0; /* BM May 19: for CGC */
       --num_left;
      
       remove_point(&LoL_head, &LoL_tail, measure, index, lists, where);
  
       for (j = ST_i[index]; j < ST_i[index+1]; j++)
       {
         nabor = ST_j[j];
/*          if (CF_marker[nabor] == UNDECIDED) */
	 if (measure_array[nabor]>0) /* undecided point */
	 {
	   /* 	   CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */
	   measure = measure_array[nabor];
	   measure_array[nabor]=0;

	   remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where);
	   --num_left;
	   
	   for (k = S_i[nabor]; k < S_i[nabor+1]; k++)
           {
	     nabor_two = S_j[k];
/* 	     if (CF_marker[nabor_two] == UNDECIDED) */
	     if (measure_array[nabor_two]>0) /* undecided point */
             {
	       measure = measure_array[nabor_two];
	       remove_point(&LoL_head, &LoL_tail, measure, 
			    nabor_two, lists, where);
	       
	       new_meas = ++(measure_array[nabor_two]);
	       
	       enter_on_lists(&LoL_head, &LoL_tail, new_meas,
			      nabor_two, lists, where);
	     }
	   }
         }
       }
       for (j = S_i[index]; j < S_i[index+1]; j++)
       {
         nabor = S_j[j];
/*          if (CF_marker[nabor] == UNDECIDED) */
	 if (measure_array[nabor]>0) /* undecided point */
         {
	   measure = measure_array[nabor];
	   
	   remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where);
	   
	   measure_array[nabor] = --measure;
	   
	   if (measure > 0)
	     enter_on_lists(&LoL_head, &LoL_tail, measure, nabor, 
			    lists, where);
	   else
	   {
/* 	     CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */
	     --num_left;

	     for (k = S_i[nabor]; k < S_i[nabor+1]; k++)
             {
	       nabor_two = S_j[k];
/* 	       if (CF_marker[nabor_two] == UNDECIDED) */
	       if (measure_array[nabor_two]>0)
               {
		 new_meas = measure_array[nabor_two];
		 remove_point(&LoL_head, &LoL_tail, new_meas, 
			      nabor_two, lists, where);
		 
		 new_meas = ++(measure_array[nabor_two]);
                 
		 enter_on_lists(&LoL_head, &LoL_tail, new_meas,
				nabor_two, lists, where);
	       }
	     }
	   }
         }
       }
     }
     if (LoL_head) hypre_printf ("Linked list not empty! head: %d\n",LoL_head->head);
   }
   l--; /* BM Aug 15, 2006 */

   hypre_TFree(measure_array);
   hypre_TFree(measure_array_master);
   hypre_CSRMatrixDestroy(ST);

   if (debug_flag == 3)
   {
      wall_time = time_getWallclockSeconds() - wall_time;
      hypre_printf("Proc = %d    Coarsen 1st pass = %f\n",
                     my_id, wall_time); 
   }

   hypre_TFree(lists);
   hypre_TFree(where);
   
     if (num_procs>1) {
       if (debug_flag == 3)  wall_time = time_getWallclockSeconds();
       hypre_BoomerAMGCoarsenCGC (S,l,coarsen_type,CF_marker);
       
       if (debug_flag == 3)  { 
	 wall_time = time_getWallclockSeconds() - wall_time; 
	 hypre_printf("Proc = %d    Coarsen CGC = %f\n", 
		my_id, wall_time);  
       } 
     }
     else {
       /* the first candiate coarse grid is the coarse grid */ 
       for (j=0;j<num_variables;j++) {
	 if (CF_marker[j]==1) CF_marker[j]=C_PT;
	 else CF_marker[j]=F_PT;
       }
     }

   /* BM May 19, 2006:
      Set all undecided points to be fine grid points. */
   for (j=0;j<num_variables;j++)
     if (!CF_marker[j]) CF_marker[j]=F_PT;

   /*---------------------------------------------------
    * Initialize the graph array
    *---------------------------------------------------*/

   graph_array = hypre_CTAlloc(HYPRE_Int, num_variables);

   for (i = 0; i < num_variables; i++)
   {
      graph_array[i] = -1;
   }

   if (debug_flag == 3) wall_time = time_getWallclockSeconds();

      for (i=0; i < num_variables; i++)
      {
	 if (ci_tilde_mark != i) ci_tilde = -1;
         if (CF_marker[i] == -1)
         {
   	    for (ji = S_i[i]; ji < S_i[i+1]; ji++)
   	    {
   	       j = S_j[ji];
   	       if (CF_marker[j] > 0)
   	          graph_array[j] = i;
    	    }
   	    for (ji = S_i[i]; ji < S_i[i+1]; ji++)
   	    {
   	       j = S_j[ji];
   	       if (CF_marker[j] == -1)
   	       {
   	          set_empty = 1;
   	          for (jj = S_i[j]; jj < S_i[j+1]; jj++)
   	          {
   		     index = S_j[jj];
   		     if (graph_array[index] == i)
   		     {
   		        set_empty = 0;
   		        break;
   		     }
   	          }
   	          if (set_empty)
   	          {
   		     if (C_i_nonempty)
   		     {
   		        CF_marker[i] = 1;
   		        if (ci_tilde > -1)
   		        {
   			   CF_marker[ci_tilde] = -1;
   		           ci_tilde = -1;
   		        }
   	    		C_i_nonempty = 0;
   		        break;
   		     }
   		     else
   		     {
   		        ci_tilde = j;
   		        ci_tilde_mark = i;
   		        CF_marker[j] = 1;
   		        C_i_nonempty = 1;
		        i--;
		        break;
		     }
	          }
	       }
	    }
	 }
      }

   if (debug_flag == 3 && coarsen_type != 2)
   {
      wall_time = time_getWallclockSeconds() - wall_time;
      hypre_printf("Proc = %d    Coarsen 2nd pass = %f\n",
                       my_id, wall_time); 
   }

   /* third pass, check boundary fine points for coarse neighbors */

      /*------------------------------------------------
       * Exchange boundary data for CF_marker
       *------------------------------------------------*/

      if (debug_flag == 3) wall_time = time_getWallclockSeconds();
    
      CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
      int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg,
                                                   num_sends));
    
      index = 0;
      for (i = 0; i < num_sends; i++)
      {
        start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
        for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
                int_buf_data[index++]
                 = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
      }
    
      if (num_procs > 1)
      {
      comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
        CF_marker_offd);
    
      hypre_ParCSRCommHandleDestroy(comm_handle);
      }
      AmgCGCBoundaryFix (S,CF_marker,CF_marker_offd);
      if (debug_flag == 3)
      {
         wall_time = time_getWallclockSeconds() - wall_time;
         hypre_printf("Proc = %d    CGC boundary fix = %f\n",
                       my_id, wall_time); 
      }

   /*---------------------------------------------------
    * Clean up and return
    *---------------------------------------------------*/

   /*if (coarsen_type != 1)
   { */  
     if (CF_marker_offd) hypre_TFree(CF_marker_offd);  /* BM Aug 21, 2006 */
     if (int_buf_data) hypre_TFree(int_buf_data); /* BM Aug 21, 2006 */
     /*if (ci_array) hypre_TFree(ci_array);*/ /* BM Aug 21, 2006 */
   /*} */   
   hypre_TFree(graph_array);
   if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) 
		&& num_procs > 1)
   	hypre_CSRMatrixDestroy(S_ext); 
   
   *CF_marker_ptr   = CF_marker;
   
   return (ierr);
}
Example #5
0
void hypre_ParCSRMatrixSplit(hypre_ParCSRMatrix *A,
                             HYPRE_Int nr, HYPRE_Int nc,
                             hypre_ParCSRMatrix **blocks,
                             int interleaved_rows, int interleaved_cols)
{
    HYPRE_Int i, j, k;

    MPI_Comm comm = hypre_ParCSRMatrixComm(A);

    hypre_CSRMatrix *Adiag = hypre_ParCSRMatrixDiag(A);
    hypre_CSRMatrix *Aoffd = hypre_ParCSRMatrixOffd(A);

    HYPRE_Int global_rows = hypre_ParCSRMatrixGlobalNumRows(A);
    HYPRE_Int global_cols = hypre_ParCSRMatrixGlobalNumCols(A);

    HYPRE_Int local_rows = hypre_CSRMatrixNumRows(Adiag);
    HYPRE_Int local_cols = hypre_CSRMatrixNumCols(Adiag);
    HYPRE_Int offd_cols = hypre_CSRMatrixNumCols(Aoffd);

    hypre_assert(local_rows % nr == 0 && local_cols % nc == 0);
    hypre_assert(global_rows % nr == 0 && global_cols % nc == 0);

    HYPRE_Int block_rows = local_rows / nr;
    HYPRE_Int block_cols = local_cols / nc;
    HYPRE_Int num_blocks = nr * nc;

    /* mark local rows and columns with block number */
    HYPRE_Int *row_block_num = hypre_TAlloc(HYPRE_Int, local_rows);
    HYPRE_Int *col_block_num = hypre_TAlloc(HYPRE_Int, local_cols);

    for (i = 0; i < local_rows; i++)
    {
        row_block_num[i] = interleaved_rows ? (i % nr) : (i / block_rows);
    }
    for (i = 0; i < local_cols; i++)
    {
        col_block_num[i] = interleaved_cols ? (i % nc) : (i / block_cols);
    }

    /* determine the block numbers for offd columns */
    HYPRE_Int* offd_col_block_num = hypre_TAlloc(HYPRE_Int, offd_cols);
    hypre_ParCSRCommHandle *comm_handle;
    HYPRE_Int *int_buf_data;
    {
        /* make sure A has a communication package */
        hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
        if (!comm_pkg)
        {
            hypre_MatvecCommPkgCreate(A);
            comm_pkg = hypre_ParCSRMatrixCommPkg(A);
        }

        /* calculate the final global column numbers for each block */
        HYPRE_Int *count = hypre_CTAlloc(HYPRE_Int, nc);
        HYPRE_Int *block_global_col = hypre_TAlloc(HYPRE_Int, local_cols);
        HYPRE_Int first_col = hypre_ParCSRMatrixFirstColDiag(A) / nc;
        for (i = 0; i < local_cols; i++)
        {
            block_global_col[i] = first_col + count[col_block_num[i]]++;
        }
        hypre_TFree(count);

        /* use a Matvec communication pattern to determine offd_col_block_num */
        HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
        int_buf_data = hypre_CTAlloc(HYPRE_Int,
                                     hypre_ParCSRCommPkgSendMapStart(comm_pkg,
                                             num_sends));
        HYPRE_Int start, index = 0;
        for (i = 0; i < num_sends; i++)
        {
            start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
            for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
            {
                k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j);
                int_buf_data[index++] = col_block_num[k] + nc*block_global_col[k];
            }
        }
        hypre_TFree(block_global_col);

        comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
                      offd_col_block_num);
    }

    /* create the block matrices */
    HYPRE_Int num_procs = 1;
    if (!hypre_ParCSRMatrixAssumedPartition(A))
    {
        hypre_MPI_Comm_size(comm, &num_procs);
    }

    HYPRE_Int *row_starts = hypre_TAlloc(HYPRE_Int, num_procs+1);
    HYPRE_Int *col_starts = hypre_TAlloc(HYPRE_Int, num_procs+1);
    for (i = 0; i <= num_procs; i++)
    {
        row_starts[i] = hypre_ParCSRMatrixRowStarts(A)[i] / nr;
        col_starts[i] = hypre_ParCSRMatrixColStarts(A)[i] / nc;
    }

    for (i = 0; i < num_blocks; i++)
    {
        blocks[i] = hypre_ParCSRMatrixCreate(comm,
                                             global_rows/nr, global_cols/nc,
                                             row_starts, col_starts, 0, 0, 0);
    }

    /* split diag part */
    hypre_CSRMatrix **csr_blocks = hypre_TAlloc(hypre_CSRMatrix*, nr*nc);
    hypre_CSRMatrixSplit(Adiag, nr, nc, row_block_num, col_block_num,
                         csr_blocks);

    for (i = 0; i < num_blocks; i++)
    {
        hypre_TFree(hypre_ParCSRMatrixDiag(blocks[i]));
        hypre_ParCSRMatrixDiag(blocks[i]) = csr_blocks[i];
    }

    /* finish communication, receive offd_col_block_num */
    hypre_ParCSRCommHandleDestroy(comm_handle);
    hypre_TFree(int_buf_data);

    /* decode global offd column numbers */
    HYPRE_Int* offd_global_col = hypre_TAlloc(HYPRE_Int, offd_cols);
    for (i = 0; i < offd_cols; i++)
    {
        offd_global_col[i] = offd_col_block_num[i] / nc;
        offd_col_block_num[i] %= nc;
    }

    /* split offd part */
    hypre_CSRMatrixSplit(Aoffd, nr, nc, row_block_num, offd_col_block_num,
                         csr_blocks);

    for (i = 0; i < num_blocks; i++)
    {
        hypre_TFree(hypre_ParCSRMatrixOffd(blocks[i]));
        hypre_ParCSRMatrixOffd(blocks[i]) = csr_blocks[i];
    }

    hypre_TFree(csr_blocks);
    hypre_TFree(col_block_num);
    hypre_TFree(row_block_num);

    /* update block col-maps */
    for (int bi = 0; bi < nr; bi++)
    {
        for (int bj = 0; bj < nc; bj++)
        {
            hypre_ParCSRMatrix *block = blocks[bi*nc + bj];
            hypre_CSRMatrix *block_offd = hypre_ParCSRMatrixOffd(block);
            HYPRE_Int block_offd_cols = hypre_CSRMatrixNumCols(block_offd);

            HYPRE_Int *block_col_map = hypre_TAlloc(HYPRE_Int, block_offd_cols);
            for (i = j = 0; i < offd_cols; i++)
            {
                HYPRE_Int bn = offd_col_block_num[i];
                if (bn == bj) {
                    block_col_map[j++] = offd_global_col[i];
                }
            }
            hypre_assert(j == block_offd_cols);

            hypre_ParCSRMatrixColMapOffd(block) = block_col_map;
        }
    }

    hypre_TFree(offd_global_col);
    hypre_TFree(offd_col_block_num);

    /* finish the new matrices, make them own all the stuff */
    for (i = 0; i < num_blocks; i++)
    {
        hypre_ParCSRMatrixSetNumNonzeros(blocks[i]);
        hypre_MatvecCommPkgCreate(blocks[i]);

        hypre_ParCSRMatrixOwnsData(blocks[i]) = 1;

        /* only the first block will own the row/col_starts */
        hypre_ParCSRMatrixOwnsRowStarts(blocks[i]) = !i;
        hypre_ParCSRMatrixOwnsColStarts(blocks[i]) = !i;
    }
}
Example #6
0
/******************************************************************************
 *
 * hypre_IJMatrixInsertRowPETSc
 *
 * inserts a row into an IJMatrix, 
 * if diag_i and offd_i are known, those values are inserted directly
 * into the ParCSRMatrix,
 * if they are not known, an auxiliary structure, AuxParCSRMatrix is used
 *
 *****************************************************************************/
HYPRE_Int
hypre_IJMatrixInsertRowPETSc(hypre_IJMatrix *matrix,
		              HYPRE_Int	      n,
		              HYPRE_Int	      row,
		              HYPRE_Int	     *indices,
		              double         *coeffs)
{
   HYPRE_Int ierr = 0;
   hypre_ParCSRMatrix *par_matrix;
   hypre_AuxParCSRMatrix *aux_matrix;
   HYPRE_Int *row_starts;
   HYPRE_Int *col_starts;
   MPI_Comm comm = hypre_IJMatrixContext(matrix);
   HYPRE_Int num_procs, my_id;
   HYPRE_Int row_local;
   HYPRE_Int col_0, col_n;
   HYPRE_Int i, temp;
   HYPRE_Int *indx_diag, *indx_offd;
   HYPRE_Int **aux_j;
   HYPRE_Int *local_j;
   double **aux_data;
   double *local_data;
   HYPRE_Int diag_space, offd_space;
   HYPRE_Int *row_length, *row_space;
   HYPRE_Int need_aux;
   HYPRE_Int indx_0;
   HYPRE_Int diag_indx, offd_indx;

   hypre_CSRMatrix *diag;
   HYPRE_Int *diag_i;
   HYPRE_Int *diag_j;
   double *diag_data;

   hypre_CSRMatrix *offd;
   HYPRE_Int *offd_i;
   HYPRE_Int *offd_j;
   double *offd_data;

   hypre_MPI_Comm_size(comm, &num_procs);
   hypre_MPI_Comm_rank(comm, &my_id);
   par_matrix = hypre_IJMatrixLocalStorage( matrix );
   aux_matrix = hypre_IJMatrixTranslator(matrix);
   row_space = hypre_AuxParCSRMatrixRowSpace(aux_matrix);
   row_length = hypre_AuxParCSRMatrixRowLength(aux_matrix);
   col_n = hypre_ParCSRMatrixFirstColDiag(par_matrix);
   row_starts = hypre_ParCSRMatrixRowStarts(par_matrix);
   col_starts = hypre_ParCSRMatrixColStarts(par_matrix);
   col_0 = col_starts[my_id];
   col_n = col_starts[my_id+1]-1;
   need_aux = hypre_AuxParCSRMatrixNeedAux(aux_matrix);

   if (row >= row_starts[my_id] && row < row_starts[my_id+1])
   {
      if (need_aux)
      {
         row_local = row - row_starts[my_id]; /* compute local row number */
         aux_j = hypre_AuxParCSRMatrixAuxJ(aux_matrix);
         aux_data = hypre_AuxParCSRMatrixAuxData(aux_matrix);
         local_j = aux_j[row_local];
         local_data = aux_data[row_local];
            
         row_length[row_local] = n;
         
         if ( row_space[row_local] < n)
         {
   	    hypre_TFree(local_j);
   	    hypre_TFree(local_data);
   	    local_j = hypre_CTAlloc(HYPRE_Int,n);
   	    local_data = hypre_CTAlloc(double,n);
            row_space[row_local] = n;
         }
         
         for (i=0; i < n; i++)
         {
   	    local_j[i] = indices[i];
   	    local_data[i] = coeffs[i];
         }
   
   /* make sure first element is diagonal element, if not, find it and
      exchange it with first element */
         if (local_j[0] != row_local)
         {
            for (i=1; i < n; i++)
     	    {
   	       if (local_j[i] == row_local)
   	       {
   		   local_j[i] = local_j[0];
   		   local_j[0] = row_local;
   		   temp = local_data[0];
   		   local_data[0] = local_data[i];
   		   local_data[i] = temp;
   		   break;
   	       }
     	    }
         }
      /* sort data according to column indices, except for first element */

         qsort1(local_j,local_data,1,n-1);
 
      }
      else /* insert immediately into data into ParCSRMatrix structure */
      {
Example #7
0
hypre_CSRMatrix *
hypre_ParCSRMatrixExtractAExt( hypre_ParCSRMatrix *A,
                               HYPRE_Int data,
                               HYPRE_Int ** pA_ext_row_map )
{
    /* Note that A's role as the first factor in A*A^T is used only
       through ...CommPkgT(A), which basically says which rows of A
       (columns of A^T) are needed.  In all the other places where A
       serves as an input, it is through its role as A^T, the matrix
       whose data needs to be passed between processors. */
    MPI_Comm comm = hypre_ParCSRMatrixComm(A);
    HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(A);
    HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A);
    HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);

    hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkgT(A);
    /* ... CommPkgT(A) should identify all rows of A^T needed for A*A^T (that is
     * generally a bigger set than ...CommPkg(A), the rows of B needed for A*B) */
    HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
    HYPRE_Int *recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
    HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
    HYPRE_Int *send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
    HYPRE_Int *send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);

    hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A);

    HYPRE_Int *diag_i = hypre_CSRMatrixI(diag);
    HYPRE_Int *diag_j = hypre_CSRMatrixJ(diag);
    HYPRE_Complex *diag_data = hypre_CSRMatrixData(diag);

    hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A);

    HYPRE_Int *offd_i = hypre_CSRMatrixI(offd);
    HYPRE_Int *offd_j = hypre_CSRMatrixJ(offd);
    HYPRE_Complex *offd_data = hypre_CSRMatrixData(offd);

    HYPRE_Int num_cols_A, num_nonzeros;
    HYPRE_Int num_rows_A_ext;

    hypre_CSRMatrix *A_ext;

    HYPRE_Int *A_ext_i;
    HYPRE_Int *A_ext_j;
    HYPRE_Complex *A_ext_data;

    num_cols_A = hypre_ParCSRMatrixGlobalNumCols(A);
    num_rows_A_ext = recv_vec_starts[num_recvs];

    hypre_ParCSRMatrixExtractBExt_Arrays
    ( &A_ext_i, &A_ext_j, &A_ext_data, pA_ext_row_map,
      &num_nonzeros,
      data, 1, comm, comm_pkg,
      num_cols_A, num_recvs, num_sends,
      first_col_diag, first_row_index,
      recv_vec_starts, send_map_starts, send_map_elmts,
      diag_i, diag_j, offd_i, offd_j, col_map_offd,
      diag_data, offd_data
    );

    A_ext = hypre_CSRMatrixCreate(num_rows_A_ext,num_cols_A,num_nonzeros);
    hypre_CSRMatrixI(A_ext) = A_ext_i;
    hypre_CSRMatrixJ(A_ext) = A_ext_j;
    if (data) hypre_CSRMatrixData(A_ext) = A_ext_data;

    return A_ext;
}
Example #8
0
hypre_ParCSRMatrix *hypre_ParCSRAAt( hypre_ParCSRMatrix  *A )
{
    MPI_Comm         comm = hypre_ParCSRMatrixComm(A);

    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);

    HYPRE_Complex   *A_diag_data = hypre_CSRMatrixData(A_diag);
    HYPRE_Int       *A_diag_i = hypre_CSRMatrixI(A_diag);
    HYPRE_Int       *A_diag_j = hypre_CSRMatrixJ(A_diag);

    hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);

    HYPRE_Complex   *A_offd_data = hypre_CSRMatrixData(A_offd);
    HYPRE_Int       *A_offd_i = hypre_CSRMatrixI(A_offd);
    HYPRE_Int       *A_offd_j = hypre_CSRMatrixJ(A_offd);
    HYPRE_Int       *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
    HYPRE_Int       *A_ext_row_map;

    HYPRE_Int       *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
    HYPRE_Int        num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
    HYPRE_Int        num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);

    hypre_ParCSRMatrix *C;
    HYPRE_Int          *col_map_offd_C;

    hypre_CSRMatrix *C_diag;

    HYPRE_Complex   *C_diag_data;
    HYPRE_Int       *C_diag_i;
    HYPRE_Int       *C_diag_j;

    hypre_CSRMatrix *C_offd;

    HYPRE_Complex   *C_offd_data=NULL;
    HYPRE_Int       *C_offd_i=NULL;
    HYPRE_Int       *C_offd_j=NULL;
    HYPRE_Int       *new_C_offd_j;

    HYPRE_Int        C_diag_size;
    HYPRE_Int        C_offd_size;
    HYPRE_Int        last_col_diag_C;
    HYPRE_Int        num_cols_offd_C;

    hypre_CSRMatrix *A_ext;

    HYPRE_Complex   *A_ext_data;
    HYPRE_Int       *A_ext_i;
    HYPRE_Int       *A_ext_j;
    HYPRE_Int        num_rows_A_ext=0;

    HYPRE_Int        first_row_index_A = hypre_ParCSRMatrixFirstRowIndex(A);
    HYPRE_Int        first_col_diag_A = hypre_ParCSRMatrixFirstColDiag(A);
    HYPRE_Int       *B_marker;

    HYPRE_Int        i;
    HYPRE_Int        i1, i2, i3;
    HYPRE_Int        jj2, jj3;

    HYPRE_Int        jj_count_diag, jj_count_offd;
    HYPRE_Int        jj_row_begin_diag, jj_row_begin_offd;
    HYPRE_Int        start_indexing = 0; /* start indexing for C_data at 0 */
    HYPRE_Int        count;
    HYPRE_Int        n_rows_A, n_cols_A;

    HYPRE_Complex    a_entry;
    HYPRE_Complex    a_b_product;

    HYPRE_Complex    zero = 0.0;

    n_rows_A = hypre_ParCSRMatrixGlobalNumRows(A);
    n_cols_A = hypre_ParCSRMatrixGlobalNumCols(A);

    if (n_cols_A != n_rows_A)
    {
        hypre_printf(" Error! Incompatible matrix dimensions!\n");
        return NULL;
    }
    /*-----------------------------------------------------------------------
     *  Extract A_ext, i.e. portion of A that is stored on neighbor procs
     *  and needed locally for A^T in the matrix matrix product A*A^T
     *-----------------------------------------------------------------------*/

    if (num_rows_diag_A != n_rows_A)
    {
        /*---------------------------------------------------------------------
         * If there exists no CommPkg for A, a CommPkg is generated using
         * equally load balanced partitionings
         *--------------------------------------------------------------------*/
        if (!hypre_ParCSRMatrixCommPkg(A))
        {
            hypre_MatTCommPkgCreate(A);
        }

        A_ext = hypre_ParCSRMatrixExtractAExt( A, 1, &A_ext_row_map );
        A_ext_data = hypre_CSRMatrixData(A_ext);
        A_ext_i    = hypre_CSRMatrixI(A_ext);
        A_ext_j    = hypre_CSRMatrixJ(A_ext);
        num_rows_A_ext = hypre_CSRMatrixNumRows(A_ext);
    }
    /*-----------------------------------------------------------------------
     *  Allocate marker array.
     *-----------------------------------------------------------------------*/

    B_marker = hypre_CTAlloc(HYPRE_Int, num_rows_diag_A+num_rows_A_ext );

    /*-----------------------------------------------------------------------
     *  Initialize some stuff.
     *-----------------------------------------------------------------------*/

    for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 )
    {
        B_marker[i1] = -1;
    }


    hypre_ParAat_RowSizes(
        &C_diag_i, &C_offd_i, B_marker,
        A_diag_i, A_diag_j,
        A_offd_i, A_offd_j, A_col_map_offd,
        A_ext_i, A_ext_j, A_ext_row_map,
        &C_diag_size, &C_offd_size,
        num_rows_diag_A, num_cols_offd_A,
        num_rows_A_ext,
        first_col_diag_A, first_row_index_A
    );

#if 0
    /* debugging output: */
    hypre_printf("A_ext_row_map (%i):",num_rows_A_ext);
    for ( i1=0; i1<num_rows_A_ext; ++i1 ) hypre_printf(" %i",A_ext_row_map[i1] );
    hypre_printf("\nC_diag_i (%i):",C_diag_size);
    for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_diag_i[i1] );
    hypre_printf("\nC_offd_i (%i):",C_offd_size);
    for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_offd_i[i1] );
    hypre_printf("\n");
#endif

    /*-----------------------------------------------------------------------
     *  Allocate C_diag_data and C_diag_j arrays.
     *  Allocate C_offd_data and C_offd_j arrays.
     *-----------------------------------------------------------------------*/

    last_col_diag_C = first_row_index_A + num_rows_diag_A - 1;
    C_diag_data = hypre_CTAlloc(HYPRE_Complex, C_diag_size);
    C_diag_j    = hypre_CTAlloc(HYPRE_Int, C_diag_size);
    if (C_offd_size)
    {
        C_offd_data = hypre_CTAlloc(HYPRE_Complex, C_offd_size);
        C_offd_j    = hypre_CTAlloc(HYPRE_Int, C_offd_size);
    }

    /*-----------------------------------------------------------------------
     *  Second Pass: Fill in C_diag_data and C_diag_j.
     *  Second Pass: Fill in C_offd_data and C_offd_j.
     *-----------------------------------------------------------------------*/

    /*-----------------------------------------------------------------------
     *  Initialize some stuff.
     *-----------------------------------------------------------------------*/

    jj_count_diag = start_indexing;
    jj_count_offd = start_indexing;
    for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 )
    {
        B_marker[i1] = -1;
    }

    /*-----------------------------------------------------------------------
     *  Loop over interior c-points.
     *-----------------------------------------------------------------------*/

    for (i1 = 0; i1 < num_rows_diag_A; i1++)
    {

        /*--------------------------------------------------------------------
         *  Create diagonal entry, C_{i1,i1}
         *--------------------------------------------------------------------*/

        B_marker[i1] = jj_count_diag;
        jj_row_begin_diag = jj_count_diag;
        jj_row_begin_offd = jj_count_offd;
        C_diag_data[jj_count_diag] = zero;
        C_diag_j[jj_count_diag] = i1;
        jj_count_diag++;

        /*-----------------------------------------------------------------
         *  Loop over entries in row i1 of A_offd.
         *-----------------------------------------------------------------*/

        /* There are 3 CSRMatrix or CSRBooleanMatrix objects here:
           ext*ext, ext*diag, and ext*offd belong to another processor.
           diag*offd and offd*diag don't count - never share a column by definition.
           So we have to do 4 cases:
           diag*ext, offd*ext, diag*diag, and offd*offd.
        */

        for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
        {
            i2 = A_diag_j[jj2];
            a_entry = A_diag_data[jj2];

            /* diag*ext */
            /*-----------------------------------------------------------
             *  Loop over entries (columns) i3 in row i2 of (A_ext)^T
             *  That is, rows i3 having a column i2 of A_ext.
             *  For now, for each row i3 of A_ext we crudely check _all_
             *  columns to see whether one matches i2.
             *  For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2)
             *  to C(i1,i3) .  This contributes to both the diag and offd
             *  blocks of C.
             *-----------------------------------------------------------*/

            for ( i3=0; i3<num_rows_A_ext; i3++ ) {
                for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) {
                    if ( A_ext_j[jj3]==i2+first_col_diag_A ) {
                        /* row i3, column i2 of A_ext; or,
                           row i2, column i3 of (A_ext)^T */

                        a_b_product = a_entry * A_ext_data[jj3];

                        /*--------------------------------------------------------
                         *  Check B_marker to see that C_{i1,i3} has not already
                         *  been accounted for. If it has not, create a new entry.
                         *  If it has, add new contribution.
                         *--------------------------------------------------------*/

                        if ( A_ext_row_map[i3] < first_row_index_A ||
                                A_ext_row_map[i3] > last_col_diag_C ) { /* offd */
                            if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) {
                                B_marker[i3+num_rows_diag_A] = jj_count_offd;
                                C_offd_data[jj_count_offd] = a_b_product;
                                C_offd_j[jj_count_offd] = i3;
                                jj_count_offd++;
                            }
                            else
                                C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                        }
                        else {                                              /* diag */
                            if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) {
                                B_marker[i3+num_rows_diag_A] = jj_count_diag;
                                C_diag_data[jj_count_diag] = a_b_product;
                                C_diag_j[jj_count_diag] = i3-first_col_diag_A;
                                jj_count_diag++;
                            }
                            else
                                C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                        }
                    }
                }
            }
        }

        if (num_cols_offd_A)
        {
            for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
            {
                i2 = A_offd_j[jj2];
                a_entry = A_offd_data[jj2];

                /* offd * ext */
                /*-----------------------------------------------------------
                 *  Loop over entries (columns) i3 in row i2 of (A_ext)^T
                 *  That is, rows i3 having a column i2 of A_ext.
                 *  For now, for each row i3 of A_ext we crudely check _all_
                 *  columns to see whether one matches i2.
                 *  For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2)
                 *  to C(i1,i3) .  This contributes to both the diag and offd
                 *  blocks of C.
                 *-----------------------------------------------------------*/

                for ( i3=0; i3<num_rows_A_ext; i3++ ) {
                    for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) {
                        if ( A_ext_j[jj3]==A_col_map_offd[i2] ) {
                            /* row i3, column i2 of A_ext; or,
                               row i2, column i3 of (A_ext)^T */

                            a_b_product = a_entry * A_ext_data[jj3];

                            /*--------------------------------------------------------
                             *  Check B_marker to see that C_{i1,i3} has not already
                             *  been accounted for. If it has not, create a new entry.
                             *  If it has, add new contribution.
                             *--------------------------------------------------------*/

                            if ( A_ext_row_map[i3] < first_row_index_A ||
                                    A_ext_row_map[i3] > last_col_diag_C ) { /* offd */
                                if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) {
                                    B_marker[i3+num_rows_diag_A] = jj_count_offd;
                                    C_offd_data[jj_count_offd] = a_b_product;
                                    C_offd_j[jj_count_offd] = i3;
                                    jj_count_offd++;
                                }
                                else
                                    C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                            }
                            else {                                              /* diag */
                                if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) {
                                    B_marker[i3+num_rows_diag_A] = jj_count_diag;
                                    C_diag_data[jj_count_diag] = a_b_product;
                                    C_diag_j[jj_count_diag] = i3-first_row_index_A;
                                    jj_count_diag++;
                                }
                                else
                                    C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                            }
                        }
                    }
                }
            }
        }

        /* diag * diag */
        /*-----------------------------------------------------------------
         *  Loop over entries (columns) i2 in row i1 of A_diag.
         *  For each such column we will find the contributions of the
         *  corresponding rows i2 of A^T to C=A*A^T .  Now we only look
         *  at the local part of A^T - with columns (rows of A) living
         *  on this processor.
         *-----------------------------------------------------------------*/

        for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
        {
            i2 = A_diag_j[jj2];
            a_entry = A_diag_data[jj2];

            /*-----------------------------------------------------------
             *  Loop over entries (columns) i3 in row i2 of A^T
             *  That is, rows i3 having a column i2 of A (local part).
             *  For now, for each row i3 of A we crudely check _all_
             *  columns to see whether one matches i2.
             *  This i3-loop is for the diagonal block of A.
             *  It contributes to the diagonal block of C.
             *  For each entry (i2,i3) of A^T,  add A(i1,i2)*A(i3,i2)
             *  to C(i1,i3)
             *-----------------------------------------------------------*/
            for ( i3=0; i3<num_rows_diag_A; i3++ ) {
                for ( jj3=A_diag_i[i3]; jj3<A_diag_i[i3+1]; jj3++ ) {
                    if ( A_diag_j[jj3]==i2 ) {
                        /* row i3, column i2 of A; or,
                           row i2, column i3 of A^T */
                        a_b_product = a_entry * A_diag_data[jj3];

                        /*--------------------------------------------------------
                         *  Check B_marker to see that C_{i1,i3} has not already
                         *  been accounted for. If it has not, mark it and increment
                         *  counter.
                         *--------------------------------------------------------*/
                        if (B_marker[i3] < jj_row_begin_diag)
                        {
                            B_marker[i3] = jj_count_diag;
                            C_diag_data[jj_count_diag] = a_b_product;
                            C_diag_j[jj_count_diag] = i3;
                            jj_count_diag++;
                        }
                        else
                        {
                            C_diag_data[B_marker[i3]] += a_b_product;
                        }
                    }
                }
            } /* end of i3 loop */
        } /* end of third i2 loop */

        /* offd * offd */
        /*-----------------------------------------------------------
         *  Loop over offd columns i2 of A in A*A^T.  Then
         *  loop over offd entries (columns) i3 in row i2 of A^T
         *  That is, rows i3 having a column i2 of A (local part).
         *  For now, for each row i3 of A we crudely check _all_
         *  columns to see whether one matches i2.
         *  This i3-loop is for the off-diagonal block of A.
         *  It contributes to the diag block of C.
         *  For each entry (i2,i3) of A^T, add A*A^T to C
         *-----------------------------------------------------------*/
        if (num_cols_offd_A) {

            for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
            {
                i2 = A_offd_j[jj2];
                a_entry = A_offd_data[jj2];

                for ( i3=0; i3<num_rows_diag_A; i3++ ) {
                    /* ... note that num_rows_diag_A == num_rows_offd_A */
                    for ( jj3=A_offd_i[i3]; jj3<A_offd_i[i3+1]; jj3++ ) {
                        if ( A_offd_j[jj3]==i2 ) {
                            /* row i3, column i2 of A; or,
                               row i2, column i3 of A^T */
                            a_b_product = a_entry * A_offd_data[jj3];

                            /*--------------------------------------------------------
                             *  Check B_marker to see that C_{i1,i3} has not already
                             *  been accounted for. If it has not, create a new entry.
                             *  If it has, add new contribution
                             *--------------------------------------------------------*/

                            if (B_marker[i3] < jj_row_begin_diag)
                            {
                                B_marker[i3] = jj_count_diag;
                                C_diag_data[jj_count_diag] = a_b_product;
                                C_diag_j[jj_count_diag] = i3;
                                jj_count_diag++;
                            }
                            else
                            {
                                C_diag_data[B_marker[i3]] += a_b_product;
                            }
                        }
                    }
                }  /* end of last i3 loop */
            }     /* end of if (num_cols_offd_A) */

        }        /* end of fourth and last i2 loop */
#if 0          /* debugging printout */
        hypre_printf("end of i1 loop: i1=%i jj_count_diag=%i\n", i1, jj_count_diag );
        hypre_printf("  C_diag_j=");
        for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%i ",C_diag_j[jj3]);
        hypre_printf("  C_diag_data=");
        for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%f ",C_diag_data[jj3]);
        hypre_printf("\n");
        hypre_printf("  C_offd_j=");
        for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%i ",C_offd_j[jj3]);
        hypre_printf("  C_offd_data=");
        for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%f ",C_offd_data[jj3]);
        hypre_printf("\n");
        hypre_printf( "  B_marker =" );
        for ( it=0; it<num_rows_diag_A+num_rows_A_ext; ++it )
            hypre_printf(" %i", B_marker[it] );
        hypre_printf( "\n" );
#endif
    }           /* end of i1 loop */

    /*-----------------------------------------------------------------------
     *  Delete 0-columns in C_offd, i.e. generate col_map_offd and reset
     *  C_offd_j.  Note that (with the indexing we have coming into this
     *  block) col_map_offd_C[i3]==A_ext_row_map[i3].
     *-----------------------------------------------------------------------*/

    for ( i=0; i<num_rows_diag_A+num_rows_A_ext; ++i )
        B_marker[i] = -1;
    for ( i=0; i<C_offd_size; i++ )
        B_marker[ C_offd_j[i] ] = -2;

    count = 0;
    for (i=0; i < num_rows_diag_A + num_rows_A_ext; i++) {
        if (B_marker[i] == -2) {
            B_marker[i] = count;
            count++;
        }
    }
    num_cols_offd_C = count;

    if (num_cols_offd_C) {
        col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C);
        new_C_offd_j = hypre_CTAlloc(HYPRE_Int,C_offd_size);
        /* ... a bit big, but num_cols_offd_C is too small.  It might be worth
           computing the correct size, which is sum( no. columns in row i, over all rows i )
        */

        for (i=0; i < C_offd_size; i++) {
            new_C_offd_j[i] = B_marker[C_offd_j[i]];
            col_map_offd_C[ new_C_offd_j[i] ] = A_ext_row_map[ C_offd_j[i] ];
        }

        hypre_TFree(C_offd_j);
        C_offd_j = new_C_offd_j;

    }

    /*----------------------------------------------------------------
     * Create C
     *----------------------------------------------------------------*/

    C = hypre_ParCSRMatrixCreate(comm, n_rows_A, n_rows_A, row_starts_A,
                                 row_starts_A, num_cols_offd_C,
                                 C_diag_size, C_offd_size);

    /* Note that C does not own the partitionings */
    hypre_ParCSRMatrixSetRowStartsOwner(C,0);
    hypre_ParCSRMatrixSetColStartsOwner(C,0);

    C_diag = hypre_ParCSRMatrixDiag(C);
    hypre_CSRMatrixData(C_diag) = C_diag_data;
    hypre_CSRMatrixI(C_diag) = C_diag_i;
    hypre_CSRMatrixJ(C_diag) = C_diag_j;

    if (num_cols_offd_C)
    {
        C_offd = hypre_ParCSRMatrixOffd(C);
        hypre_CSRMatrixData(C_offd) = C_offd_data;
        hypre_CSRMatrixI(C_offd) = C_offd_i;
        hypre_CSRMatrixJ(C_offd) = C_offd_j;
        hypre_ParCSRMatrixOffd(C) = C_offd;
        hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;

    }
    else
        hypre_TFree(C_offd_i);

    /*-----------------------------------------------------------------------
     *  Free B_ext and marker array.
     *-----------------------------------------------------------------------*/

    if (num_cols_offd_A)
    {
        hypre_CSRMatrixDestroy(A_ext);
        A_ext = NULL;
    }
    hypre_TFree(B_marker);
    if ( num_rows_diag_A != n_rows_A )
        hypre_TFree(A_ext_row_map);

    return C;

}
HYPRE_Int
hypre_GenerateSendMapAndCommPkg(MPI_Comm comm, HYPRE_Int num_sends, HYPRE_Int num_recvs,
				HYPRE_Int *recv_procs, HYPRE_Int *send_procs,
				HYPRE_Int *recv_vec_starts, hypre_ParCSRMatrix *A)
{
   HYPRE_Int *send_map_starts;
   HYPRE_Int *send_map_elmts;
   HYPRE_Int i, j;
   HYPRE_Int num_requests = num_sends+num_recvs;
   hypre_MPI_Request *requests;
   hypre_MPI_Status *status;
   HYPRE_Int vec_len, vec_start;
   hypre_ParCSRCommPkg *comm_pkg;
   HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
   HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(A);

/*--------------------------------------------------------------------------
 * generate send_map_starts and send_map_elmts
 *--------------------------------------------------------------------------*/
   requests = hypre_CTAlloc(hypre_MPI_Request,num_requests);
   status = hypre_CTAlloc(hypre_MPI_Status,num_requests);
   send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1);
   j = 0;
   for (i=0; i < num_sends; i++)
	hypre_MPI_Irecv(&send_map_starts[i+1],1,HYPRE_MPI_INT,send_procs[i],0,comm,
		&requests[j++]);

   for (i=0; i < num_recvs; i++)
   {
	vec_len = recv_vec_starts[i+1] - recv_vec_starts[i];
	hypre_MPI_Isend(&vec_len,1,HYPRE_MPI_INT, recv_procs[i],0,comm,&requests[j++]);
   }
   
   hypre_MPI_Waitall(j,requests,status);
 
   send_map_starts[0] = 0; 
   for (i=0; i < num_sends; i++)
	send_map_starts[i+1] += send_map_starts[i]; 

   send_map_elmts = hypre_CTAlloc(HYPRE_Int,send_map_starts[num_sends]);

   j = 0;
   for (i=0; i < num_sends; i++)
   {
	vec_start = send_map_starts[i];
	vec_len = send_map_starts[i+1]-vec_start;
	hypre_MPI_Irecv(&send_map_elmts[vec_start],vec_len,HYPRE_MPI_INT,
		send_procs[i],0,comm,&requests[j++]);
   }

   for (i=0; i < num_recvs; i++)
   {
	vec_start = recv_vec_starts[i];
	vec_len = recv_vec_starts[i+1] - vec_start;
	hypre_MPI_Isend(&col_map_offd[vec_start],vec_len,HYPRE_MPI_INT, 
		recv_procs[i],0,comm,&requests[j++]);
   }
   
   hypre_MPI_Waitall(j,requests,status);

   for (i=0; i < send_map_starts[num_sends]; i++)
	send_map_elmts[i] -= first_col_diag; 
	
   comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1);

   hypre_ParCSRCommPkgComm(comm_pkg) = comm;
   hypre_ParCSRCommPkgNumSends(comm_pkg) = num_sends;
   hypre_ParCSRCommPkgNumRecvs(comm_pkg) = num_recvs;
   hypre_ParCSRCommPkgSendProcs(comm_pkg) = send_procs;
   hypre_ParCSRCommPkgRecvProcs(comm_pkg) = recv_procs;
   hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) = recv_vec_starts;
   hypre_ParCSRCommPkgSendMapStarts(comm_pkg) = send_map_starts;
   hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = send_map_elmts;

   hypre_TFree(status);
   hypre_TFree(requests);

   hypre_ParCSRMatrixCommPkg(A) = comm_pkg;
   return 0;
}
HYPRE_Int
hypre_GetCommPkgRTFromCommPkgA( hypre_ParCSRMatrix *RT,
			       	hypre_ParCSRMatrix *A,
			       	HYPRE_Int *fine_to_coarse_offd)
{
   MPI_Comm comm = hypre_ParCSRMatrixComm(RT);
   hypre_ParCSRCommPkg *comm_pkg_A = hypre_ParCSRMatrixCommPkg(A);
   HYPRE_Int num_recvs_A = hypre_ParCSRCommPkgNumRecvs(comm_pkg_A);
   HYPRE_Int *recv_procs_A = hypre_ParCSRCommPkgRecvProcs(comm_pkg_A);
   HYPRE_Int *recv_vec_starts_A = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_A);
   HYPRE_Int num_sends_A = hypre_ParCSRCommPkgNumSends(comm_pkg_A);
   HYPRE_Int *send_procs_A = hypre_ParCSRCommPkgSendProcs(comm_pkg_A);

   hypre_ParCSRCommPkg *comm_pkg;
   HYPRE_Int num_recvs_RT;
   HYPRE_Int *recv_procs_RT;   
   HYPRE_Int *recv_vec_starts_RT;   
   HYPRE_Int num_sends_RT;
   HYPRE_Int *send_procs_RT;   
   HYPRE_Int *send_map_starts_RT;   
   HYPRE_Int *send_map_elmts_RT;   

   HYPRE_Int *col_map_offd_RT = hypre_ParCSRMatrixColMapOffd(RT);
   HYPRE_Int num_cols_offd_RT = hypre_CSRMatrixNumCols( hypre_ParCSRMatrixOffd(RT));
   HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(RT);

   HYPRE_Int i, j;
   HYPRE_Int vec_len, vec_start;
   HYPRE_Int num_procs, my_id;
   HYPRE_Int ierr = 0;
   HYPRE_Int num_requests;
   HYPRE_Int offd_col, proc_num;
 
   HYPRE_Int *proc_mark;
   HYPRE_Int *change_array;

   hypre_MPI_Request *requests;
   hypre_MPI_Status *status;

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);

/*--------------------------------------------------------------------------
 * determine num_recvs, recv_procs and recv_vec_starts for RT
 *--------------------------------------------------------------------------*/

   proc_mark = hypre_CTAlloc(HYPRE_Int, num_recvs_A);

   for (i=0; i < num_recvs_A; i++)
                proc_mark[i] = 0;

   proc_num = 0;
   num_recvs_RT = 0;
   if (num_cols_offd_RT)
   {
      for (i=0; i < num_recvs_A; i++)
      {
         for (j=recv_vec_starts_A[i]; j<recv_vec_starts_A[i+1]; j++)
         {
            offd_col = col_map_offd_RT[proc_num];
            if (offd_col == j)
            {
                proc_mark[i]++;
                proc_num++;
                if (proc_num == num_cols_offd_RT) break;
            }
         }
         if (proc_mark[i]) num_recvs_RT++;
         if (proc_num == num_cols_offd_RT) break;
      }
   }

   for (i=0; i < num_cols_offd_RT; i++)
      col_map_offd_RT[i] = fine_to_coarse_offd[col_map_offd_RT[i]];
 
   recv_procs_RT = hypre_CTAlloc(HYPRE_Int,num_recvs_RT);
   recv_vec_starts_RT = hypre_CTAlloc(HYPRE_Int, num_recvs_RT+1);
 
   j = 0;
   recv_vec_starts_RT[0] = 0;
   for (i=0; i < num_recvs_A; i++)
        if (proc_mark[i])
        {
                recv_procs_RT[j] = recv_procs_A[i];
                recv_vec_starts_RT[j+1] = recv_vec_starts_RT[j]+proc_mark[i];
                j++;
        }

/*--------------------------------------------------------------------------
 * send num_changes to recv_procs_A and receive change_array from send_procs_A
 *--------------------------------------------------------------------------*/

   num_requests = num_recvs_A+num_sends_A;
   requests = hypre_CTAlloc(hypre_MPI_Request, num_requests);
   status = hypre_CTAlloc(hypre_MPI_Status, num_requests);

   change_array = hypre_CTAlloc(HYPRE_Int, num_sends_A);

   j = 0;
   for (i=0; i < num_sends_A; i++)
	hypre_MPI_Irecv(&change_array[i],1,HYPRE_MPI_INT,send_procs_A[i],0,comm,
		&requests[j++]);

   for (i=0; i < num_recvs_A; i++)
	hypre_MPI_Isend(&proc_mark[i],1,HYPRE_MPI_INT,recv_procs_A[i],0,comm,
		&requests[j++]);
   
   hypre_MPI_Waitall(num_requests,requests,status);

   hypre_TFree(proc_mark);
   
/*--------------------------------------------------------------------------
 * if change_array[i] is 0 , omit send_procs_A[i] in send_procs_RT
 *--------------------------------------------------------------------------*/

   num_sends_RT = 0;
   for (i=0; i < num_sends_A; i++)
      if (change_array[i]) 
      {
	 num_sends_RT++;
      }

   send_procs_RT = hypre_CTAlloc(HYPRE_Int, num_sends_RT);
   send_map_starts_RT = hypre_CTAlloc(HYPRE_Int, num_sends_RT+1);

   j = 0;
   send_map_starts_RT[0] = 0;
   for (i=0; i < num_sends_A; i++)
      if (change_array[i]) 
      {
	 send_procs_RT[j] = send_procs_A[i];
	 send_map_starts_RT[j+1] = send_map_starts_RT[j]+change_array[i];
	 j++;
      }

/*--------------------------------------------------------------------------
 * generate send_map_elmts
 *--------------------------------------------------------------------------*/

   send_map_elmts_RT = hypre_CTAlloc(HYPRE_Int,send_map_starts_RT[num_sends_RT]);

   j = 0;
   for (i=0; i < num_sends_RT; i++)
   {
	vec_start = send_map_starts_RT[i];
	vec_len = send_map_starts_RT[i+1]-vec_start;
	hypre_MPI_Irecv(&send_map_elmts_RT[vec_start],vec_len,HYPRE_MPI_INT,
		send_procs_RT[i],0,comm,&requests[j++]);
   }

   for (i=0; i < num_recvs_RT; i++)
   {
	vec_start = recv_vec_starts_RT[i];
	vec_len = recv_vec_starts_RT[i+1] - vec_start;
	hypre_MPI_Isend(&col_map_offd_RT[vec_start],vec_len,HYPRE_MPI_INT, 
		recv_procs_RT[i],0,comm,&requests[j++]);
   }
   
   hypre_MPI_Waitall(j,requests,status);

   for (i=0; i < send_map_starts_RT[num_sends_RT]; i++)
	send_map_elmts_RT[i] -= first_col_diag; 
	
   comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1);

   hypre_ParCSRCommPkgComm(comm_pkg) = comm;
   hypre_ParCSRCommPkgNumSends(comm_pkg) = num_sends_RT;
   hypre_ParCSRCommPkgNumRecvs(comm_pkg) = num_recvs_RT;
   hypre_ParCSRCommPkgSendProcs(comm_pkg) = send_procs_RT;
   hypre_ParCSRCommPkgRecvProcs(comm_pkg) = recv_procs_RT;
   hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) = recv_vec_starts_RT;
   hypre_ParCSRCommPkgSendMapStarts(comm_pkg) = send_map_starts_RT;
   hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = send_map_elmts_RT;

   hypre_TFree(status);
   hypre_TFree(requests);

   hypre_ParCSRMatrixCommPkg(RT) = comm_pkg;
   hypre_TFree(change_array);

   return ierr;
}
Example #11
0
HYPRE_Int
hypre_ParCSRMatrixToParChordMatrix(
   hypre_ParCSRMatrix *Ap,
   MPI_Comm comm,
   hypre_ParChordMatrix **pAc )
{
   HYPRE_Int * row_starts = hypre_ParCSRMatrixRowStarts(Ap);
   HYPRE_Int * col_starts = hypre_ParCSRMatrixColStarts(Ap);
   hypre_CSRMatrix * diag = hypre_ParCSRMatrixDiag(Ap);
   hypre_CSRMatrix * offd = hypre_ParCSRMatrixOffd(Ap);
   HYPRE_Int * offd_j = hypre_CSRMatrixJ(offd);
   HYPRE_Int * diag_j = hypre_CSRMatrixJ(diag);
   HYPRE_Int * col_map_offd = hypre_ParCSRMatrixColMapOffd(Ap);
   HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(Ap);

   hypre_ParChordMatrix * Ac;
   hypre_NumbersNode * rdofs, * offd_cols_me;
   hypre_NumbersNode ** offd_cols;
   HYPRE_Int ** offd_col_array;
   HYPRE_Int * len_offd_col_array, * offd_col_array_me;
   HYPRE_Int len_offd_col_array_me;
   HYPRE_Int num_idofs, num_rdofs, j_local, j_global, row_global;
   HYPRE_Int i, j, jj, p, pto, q, qto, my_id, my_q, row, ireq;
   HYPRE_Int num_inprocessors, num_toprocessors, num_procs, len_num_rdofs_toprocessor;
   HYPRE_Int *inprocessor, *toprocessor, *pcr, *qcr, *num_inchords, *chord, *chordto;
   HYPRE_Int *inproc, *toproc, *num_rdofs_toprocessor;
   HYPRE_Int **inchord_idof, **inchord_rdof, **rdof_toprocessor;
   double **inchord_data;
   double data;
   HYPRE_Int *first_index_idof, *first_index_rdof;
   hypre_MPI_Request * request;
   hypre_MPI_Status * status;

   hypre_MPI_Comm_rank(comm, &my_id);
   hypre_MPI_Comm_size(comm, &num_procs);
   num_idofs = row_starts[my_id+1] - row_starts[my_id];
   num_rdofs = col_starts[my_id+1] - col_starts[my_id];

   hypre_ParChordMatrixCreate( pAc, comm, num_idofs, num_rdofs );
   Ac = *pAc;

/* The following block sets Inprocessor:
   On each proc. my_id, we find the columns in the offd and diag blocks
   (global no.s).  The columns are rdofs (contrary to what I wrote in
   ChordMatrix.txt).
   For each such col/rdof r, find the proc. p which owns row/idof r.
   We set the temporary array pcr[p]=1 for such p.
   An MPI all-to-all will exchange such arrays so my_id's array qcr has
   qcr[q]=1 iff, on proc. q, pcr[my_id]=1.  In other words, qcr[q]=1 if
   my_id owns a row/idof i which is the same as a col/rdof owned by q.
   Collect all such q's into in the array Inprocessor.
   While constructing pcr, we also construct pj such that for any index jj
   into offd_j,offd_data, pj[jj] is the processor which owns jj as a row (idof)
   (the number jj is local to this processor).
   */
   pcr = hypre_CTAlloc( HYPRE_Int, num_procs );
   qcr = hypre_CTAlloc( HYPRE_Int, num_procs );
   for ( p=0; p<num_procs; ++p ) pcr[p]=0;
   for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(offd); ++jj ) {
      j_local = offd_j[jj];
      j_global =  col_map_offd[j_local];
      for ( p=0; p<num_procs; ++p ) {
         if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) {
            pcr[p]=1;
/* not used yet...            pj[jj] = p;*/
            break;
         }
      }
   }
   /*   jjd = jj; ...not used yet */

   /* pcr[my_id] = 1; ...for square matrices (with nonzero diag block)
      this one line  would do the job of the following nested loop.
      For non-square matrices, the data distribution is too arbitrary. */
   for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(diag); ++jj ) {
      j_local = diag_j[jj];
      j_global = j_local + first_col_diag;
      for ( p=0; p<num_procs; ++p ) {
         if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) {
            pcr[p]=1;
/* not used yet...            pj[jj+jjd] = p;*/
            break;
         }
      }
   }


   /* Now pcr[p]=1 iff my_id owns a col/rdof r which proc. p owns as a row/idof */
   hypre_MPI_Alltoall( pcr, 1, HYPRE_MPI_INT, qcr, 1, HYPRE_MPI_INT, comm );
   /* Now qcr[q]==1 if my_id owns a row/idof i which is a col/rdof of proc. q
    The array of such q's is the array Inprocessor. */

   num_inprocessors = 0;
   for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) ++num_inprocessors;
   inprocessor = hypre_CTAlloc( HYPRE_Int, num_inprocessors );
   p = 0;
   for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) inprocessor[ p++ ] = q;
   num_toprocessors = 0;
   for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) ++num_toprocessors;
   toprocessor = hypre_CTAlloc( HYPRE_Int, num_toprocessors );
   p = 0;
   for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) toprocessor[ p++ ] = q;

   hypre_ParChordMatrixNumInprocessors(Ac) = num_inprocessors;
   hypre_ParChordMatrixInprocessor(Ac) = inprocessor;
   hypre_ParChordMatrixNumToprocessors(Ac) = num_toprocessors;
   hypre_ParChordMatrixToprocessor(Ac) = toprocessor;
   hypre_TFree( qcr );

   /* FirstIndexIdof[p] is the global index of proc. p's row 0 */
   /* FirstIndexRdof[p] is the global index of proc. p's col 0 */
   /* Fir FirstIndexIdof, we copy the array row_starts rather than its pointers,
      because the chord matrix will think it's free to delete FirstIndexIdof */
   /* col_starts[p] contains the global index of the first column
      in the diag block of p.  But for first_index_rdof we want the global
      index of the first column in p (whether that's in the diag or offd block).
      So it's more involved than row/idof: we also check the offd block, and
      have to do a gather to get first_index_rdof for every proc. on every proc. */
   first_index_idof = hypre_CTAlloc( HYPRE_Int, num_procs+1 );
   first_index_rdof = hypre_CTAlloc( HYPRE_Int, num_procs+1 );
   for ( p=0; p<=num_procs; ++p ) {
      first_index_idof[p] = row_starts[p];
      first_index_rdof[p] = col_starts[p];
   };
   if ( hypre_CSRMatrixNumRows(offd) > 0  && hypre_CSRMatrixNumCols(offd) > 0 )
      first_index_rdof[my_id] =
         col_starts[my_id]<col_map_offd[0] ? col_starts[my_id] : col_map_offd[0];
   hypre_MPI_Allgather( &first_index_rdof[my_id], 1, HYPRE_MPI_INT,
                  first_index_rdof, 1, HYPRE_MPI_INT, comm );

   /* Set num_inchords: num_inchords[p] is no. chords on my_id connected to p.
      Set each chord (idof,jdof,data).
      We go through each matrix element in the diag block, find what processor
      owns its column no. as a row, then update num_inchords[p], inchord_idof[p],
      inchord_rdof[p], inchord_data[p].
   */

   inchord_idof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors );
   inchord_rdof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors );
   inchord_data = hypre_CTAlloc( double*, num_inprocessors );
   num_inchords = hypre_CTAlloc( HYPRE_Int, num_inprocessors );
   chord = hypre_CTAlloc( HYPRE_Int, num_inprocessors );
   chordto = hypre_CTAlloc( HYPRE_Int, num_toprocessors );
   num_rdofs = 0;
   for ( q=0; q<num_inprocessors; ++q ) num_inchords[q] = 0;
   my_q = -1;
   for ( q=0; q<num_inprocessors; ++q ) if ( inprocessor[q]==my_id ) my_q = q;
   hypre_assert( my_q>=0 );

   /* diag block: first count chords (from my_id to my_id),
      then set them from diag block's CSR data structure */
   num_idofs = hypre_CSRMatrixNumRows(diag);
   rdofs = hypre_NumbersNewNode();
   for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) {
      for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) {
         j_local = hypre_CSRMatrixJ(diag)[i];
         hypre_NumbersEnter( rdofs, j_local );
         ++num_inchords[my_q];
      }
   };
   num_rdofs = hypre_NumbersNEntered( rdofs );
   inchord_idof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] );
   inchord_rdof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] );
   inchord_data[my_q] = hypre_CTAlloc( double, num_inchords[my_q] );
   chord[0] = 0;
   for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) {
      for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) {
         j_local = hypre_CSRMatrixJ(diag)[i];
         data = hypre_CSRMatrixData(diag)[i];
         inchord_idof[my_q][chord[0]] = row;
         /* Here We need to convert from j_local - a column local to
            the diag of this proc., to a j which is local only to this
            processor - a column (rdof) numbering scheme to be shared by the
            diag and offd blocks...  */
         j_global = j_local + hypre_ParCSRMatrixColStarts(Ap)[my_q];
         j = j_global - first_index_rdof[my_q];
         inchord_rdof[my_q][chord[0]] = j;
         inchord_data[my_q][chord[0]] = data;
         hypre_assert( chord[0] < num_inchords[my_q] );
         ++chord[0];
      }
   };
   hypre_NumbersDeleteNode(rdofs);


   /* offd block: */

   /* >>> offd_cols_me duplicates rdofs */
   offd_cols_me = hypre_NumbersNewNode();
   for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) {
      for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) {
         j_local = hypre_CSRMatrixJ(offd)[i];
         j_global =  col_map_offd[j_local];
         hypre_NumbersEnter( offd_cols_me, j_global );
      }
   }
   offd_col_array = hypre_CTAlloc( HYPRE_Int*, num_inprocessors );
   len_offd_col_array = hypre_CTAlloc( HYPRE_Int, num_inprocessors );
   offd_col_array_me = hypre_NumbersArray( offd_cols_me );
   len_offd_col_array_me = hypre_NumbersNEntered( offd_cols_me );
   request = hypre_CTAlloc(hypre_MPI_Request, 2*num_procs );
   ireq = 0;
   for ( q=0; q<num_inprocessors; ++q )
      hypre_MPI_Irecv( &len_offd_col_array[q], 1, HYPRE_MPI_INT,
                 inprocessor[q], 0, comm, &request[ireq++] );
   for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) {
      hypre_MPI_Isend( &len_offd_col_array_me, 1, HYPRE_MPI_INT, p, 0, comm, &request[ireq++] );
   }
   status = hypre_CTAlloc(hypre_MPI_Status, ireq );
   hypre_MPI_Waitall( ireq, request, status );
   hypre_TFree(status);
   ireq = 0;
   for ( q=0; q<num_inprocessors; ++q )
      offd_col_array[q] = hypre_CTAlloc( HYPRE_Int, len_offd_col_array[q] );
   for ( q=0; q<num_inprocessors; ++q )
      hypre_MPI_Irecv( offd_col_array[q], len_offd_col_array[q], HYPRE_MPI_INT,
                 inprocessor[q], 0, comm, &request[ireq++] );
   for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) {
      hypre_MPI_Isend( offd_col_array_me, len_offd_col_array_me,
                 HYPRE_MPI_INT, p, 0, comm, &request[ireq++] );
   }
   status = hypre_CTAlloc(hypre_MPI_Status, ireq );
   hypre_MPI_Waitall( ireq, request, status );
   hypre_TFree(request);
   hypre_TFree(status);
   offd_cols = hypre_CTAlloc( hypre_NumbersNode *, num_inprocessors );
   for ( q=0; q<num_inprocessors; ++q ) {
      offd_cols[q] = hypre_NumbersNewNode();
      for ( i=0; i<len_offd_col_array[q]; ++i )
         hypre_NumbersEnter( offd_cols[q], offd_col_array[q][i] );
   }

   len_num_rdofs_toprocessor = 1 + hypre_CSRMatrixI(offd)
      [hypre_CSRMatrixNumRows(offd)];
   inproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor );
   toproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor );
   num_rdofs_toprocessor = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor );
   for ( qto=0; qto<len_num_rdofs_toprocessor; ++qto ) {
      inproc[qto] = -1;
      toproc[qto] = -1;
      num_rdofs_toprocessor[qto] = 0;
   };
   rdofs = hypre_NumbersNewNode();
   for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) {
      for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) {
         j_local = hypre_CSRMatrixJ(offd)[i];
         j_global =  col_map_offd[j_local];
         hypre_NumbersEnter( rdofs, j_local );
         
         /* TO DO: find faster ways to do the two processor lookups below.*/
         /* Find a processor p (local index q) from the inprocessor list,
            which owns the column(rdof) whichis the same as this processor's
            row(idof) row. Update num_inchords for p.
            Save q as inproc[i] for quick recall later.  It represents
            an inprocessor (not unique) connected to a chord i.
         */
         inproc[i] = -1;
         for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) {
            p = inprocessor[q];
            if ( hypre_NumbersQuery( offd_cols[q],
                                     row+hypre_ParCSRMatrixFirstRowIndex(Ap) )
                 == 1 ) {
               /* row is one of the offd columns of p */
               ++num_inchords[q];
               inproc[i] = q;
               break;
            }
         }
         if ( inproc[i]<0 ) {
            /* For square matrices, we would have found the column in some
               other processor's offd.  But for non-square matrices it could
               exist only in some other processor's diag...*/
            /* Note that all data in a diag block is stored.  We don't check
               whether the value of a data entry is zero. */
            for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) {
               p = inprocessor[q];
               row_global = row+hypre_ParCSRMatrixFirstRowIndex(Ap);
               if ( row_global>=col_starts[p] &&
                    row_global< col_starts[p+1] ) {
                  /* row is one of the diag columns of p */
                  ++num_inchords[q];
                  inproc[i] = q;
                  break;
               }
            }  
         }
         hypre_assert( inproc[i]>=0 );

         /* Find the processor pto (local index qto) from the toprocessor list,
            which owns the row(idof) which is the  same as this processor's
            column(rdof) j_global. Update num_rdofs_toprocessor for pto.
            Save pto as toproc[i] for quick recall later. It represents
            the toprocessor connected to a chord i. */
         for ( qto=0; qto<num_toprocessors; ++qto ) {
            pto = toprocessor[qto];
            if ( j_global >= row_starts[pto] && j_global<row_starts[pto+1] ) {
               hypre_assert( qto < len_num_rdofs_toprocessor );
               ++num_rdofs_toprocessor[qto];
               /* ... an overestimate, as if two chords share an rdof, that
                  rdof will be counted twice in num_rdofs_toprocessor.
                  It can be fixed up later.*/
               toproc[i] = qto;
               break;
            }
         }
      }
   };
   num_rdofs += hypre_NumbersNEntered(rdofs);
   hypre_NumbersDeleteNode(rdofs);

   for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) {
      inchord_idof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] );
      inchord_rdof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] );
      inchord_data[q] = hypre_CTAlloc( double, num_inchords[q] );
      chord[q] = 0;
   };