Beispiel #1
0
HYPRE_Int
hypre_BoomerAMGCreateScalarCFS(hypre_ParCSRMatrix    *SN,
                       HYPRE_Int                   *CFN_marker,
                       HYPRE_Int                   *col_offd_SN_to_AN,
                       HYPRE_Int                    num_functions,
                       HYPRE_Int                    nodal,
                       HYPRE_Int                    data,
                       HYPRE_Int                  **dof_func_ptr,
                       HYPRE_Int                  **CF_marker_ptr,
                       HYPRE_Int                  **col_offd_S_to_A_ptr,
                       hypre_ParCSRMatrix   **S_ptr)
{
   MPI_Comm	       comm = hypre_ParCSRMatrixComm(SN);
   hypre_ParCSRMatrix *S;
   hypre_CSRMatrix    *S_diag;
   HYPRE_Int		      *S_diag_i;
   HYPRE_Int		      *S_diag_j;
   double	      *S_diag_data;
   hypre_CSRMatrix    *S_offd;
   HYPRE_Int		      *S_offd_i;
   HYPRE_Int		      *S_offd_j;
   double	      *S_offd_data;
   HYPRE_Int		      *row_starts_S;
   HYPRE_Int		      *col_starts_S;
   HYPRE_Int		      *row_starts_SN = hypre_ParCSRMatrixRowStarts(SN);
   HYPRE_Int		      *col_starts_SN = hypre_ParCSRMatrixColStarts(SN);
   hypre_CSRMatrix    *SN_diag = hypre_ParCSRMatrixDiag(SN);
   HYPRE_Int		      *SN_diag_i = hypre_CSRMatrixI(SN_diag);
   HYPRE_Int		      *SN_diag_j = hypre_CSRMatrixJ(SN_diag);
   double	      *SN_diag_data;
   hypre_CSRMatrix    *SN_offd = hypre_ParCSRMatrixOffd(SN);
   HYPRE_Int		      *SN_offd_i = hypre_CSRMatrixI(SN_offd);
   HYPRE_Int		      *SN_offd_j = hypre_CSRMatrixJ(SN_offd);
   double	      *SN_offd_data;
   HYPRE_Int		      *CF_marker;
   HYPRE_Int		      *col_map_offd_SN = hypre_ParCSRMatrixColMapOffd(SN);
   HYPRE_Int		      *col_map_offd_S;
   HYPRE_Int		      *dof_func;
   HYPRE_Int		       num_nodes = hypre_CSRMatrixNumRows(SN_diag);
   HYPRE_Int		       num_variables;
   hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(SN);
   HYPRE_Int		       num_sends;
   HYPRE_Int		       num_recvs;
   HYPRE_Int		      *send_procs;
   HYPRE_Int		      *send_map_starts;
   HYPRE_Int		      *send_map_elmts;
   HYPRE_Int		      *recv_procs;
   HYPRE_Int		      *recv_vec_starts;
   hypre_ParCSRCommPkg *comm_pkg_S;
   HYPRE_Int		      *send_procs_S;
   HYPRE_Int		      *send_map_starts_S;
   HYPRE_Int		      *send_map_elmts_S;
   HYPRE_Int		      *recv_procs_S;
   HYPRE_Int		      *recv_vec_starts_S;
   HYPRE_Int		      *col_offd_S_to_A = NULL;
   
   HYPRE_Int		       num_coarse_nodes;
   HYPRE_Int		       i,j,k,k1,jj,cnt;
   HYPRE_Int		       row, start, end;
   HYPRE_Int		       num_procs;
   HYPRE_Int		       num_cols_offd_SN = hypre_CSRMatrixNumCols(SN_offd);
   HYPRE_Int		       num_cols_offd_S;
   HYPRE_Int		       SN_num_nonzeros_diag;
   HYPRE_Int		       SN_num_nonzeros_offd;
   HYPRE_Int		       S_num_nonzeros_diag;
   HYPRE_Int		       S_num_nonzeros_offd;
   HYPRE_Int		       global_num_vars;
   HYPRE_Int		       global_num_cols;
   HYPRE_Int		       global_num_nodes;
   HYPRE_Int		       ierr = 0;
 
   hypre_MPI_Comm_size(comm, &num_procs);
 
   num_variables = num_functions*num_nodes;
   CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables);

   if (nodal < 0)
   {
      cnt = 0;
      num_coarse_nodes = 0;
      for (i=0; i < num_nodes; i++)
      {
	 if (CFN_marker[i] == 1) num_coarse_nodes++;
         for (j=0; j < num_functions; j++)
	    CF_marker[cnt++] = CFN_marker[i];
      }

      dof_func = hypre_CTAlloc(HYPRE_Int,num_coarse_nodes*num_functions);
      cnt = 0;
      for (i=0; i < num_nodes; i++)
      {
	 if (CFN_marker[i] == 1)
	 {
	    for (k=0; k < num_functions; k++)
	       dof_func[cnt++] = k;
	 }
      }
      *dof_func_ptr = dof_func;
   }
   else
   {
      cnt = 0;
      for (i=0; i < num_nodes; i++)
         for (j=0; j < num_functions; j++)
	    CF_marker[cnt++] = CFN_marker[i];
   }

   *CF_marker_ptr = CF_marker;


#ifdef HYPRE_NO_GLOBAL_PARTITION
   row_starts_S = hypre_CTAlloc(HYPRE_Int,2);
   for (i=0; i < 2; i++)
      row_starts_S[i] = num_functions*row_starts_SN[i];

   if (row_starts_SN != col_starts_SN)
   {
      col_starts_S = hypre_CTAlloc(HYPRE_Int,2);
      for (i=0; i < 2; i++)
         col_starts_S[i] = num_functions*col_starts_SN[i];
   }
   else
   {
      col_starts_S = row_starts_S;
   }
#else
   row_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1);
   for (i=0; i < num_procs+1; i++)
      row_starts_S[i] = num_functions*row_starts_SN[i];

   if (row_starts_SN != col_starts_SN)
   {
      col_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1);
      for (i=0; i < num_procs+1; i++)
         col_starts_S[i] = num_functions*col_starts_SN[i];
   }
   else
   {
      col_starts_S = row_starts_S;
   }
#endif


   SN_num_nonzeros_diag = SN_diag_i[num_nodes];
   SN_num_nonzeros_offd = SN_offd_i[num_nodes];
 
   global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(SN);
   global_num_cols = hypre_ParCSRMatrixGlobalNumCols(SN)*num_functions;
 
   global_num_vars = global_num_nodes*num_functions;
   S_num_nonzeros_diag = num_functions*SN_num_nonzeros_diag;
   S_num_nonzeros_offd = num_functions*SN_num_nonzeros_offd;
   num_cols_offd_S = num_functions*num_cols_offd_SN;
   S = hypre_ParCSRMatrixCreate(comm, global_num_vars, global_num_cols,
		row_starts_S, col_starts_S, num_cols_offd_S,
		S_num_nonzeros_diag, S_num_nonzeros_offd);

   S_diag = hypre_ParCSRMatrixDiag(S);
   S_offd = hypre_ParCSRMatrixOffd(S);
   S_diag_i = hypre_CTAlloc(HYPRE_Int, num_variables+1);
   S_offd_i = hypre_CTAlloc(HYPRE_Int, num_variables+1);
   S_diag_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_diag);
   hypre_CSRMatrixI(S_diag) = S_diag_i;
   hypre_CSRMatrixJ(S_diag) = S_diag_j;
   if (data) 
   {
      SN_diag_data = hypre_CSRMatrixData(SN_diag);
      S_diag_data = hypre_CTAlloc(double, S_num_nonzeros_diag);
      hypre_CSRMatrixData(S_diag) = S_diag_data;
      if (num_cols_offd_S)
      {
         SN_offd_data = hypre_CSRMatrixData(SN_offd);
         S_offd_data = hypre_CTAlloc(double, S_num_nonzeros_offd);
         hypre_CSRMatrixData(S_offd) = S_offd_data;
      }

   }
   hypre_CSRMatrixI(S_offd) = S_offd_i;

   if (comm_pkg)
   {
      comm_pkg_S = hypre_CTAlloc(hypre_ParCSRCommPkg,1);
      hypre_ParCSRCommPkgComm(comm_pkg_S) = comm;
      num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
      hypre_ParCSRCommPkgNumSends(comm_pkg_S) = num_sends;
      num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
      hypre_ParCSRCommPkgNumRecvs(comm_pkg_S) = num_recvs;
      send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
      send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
      send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
      recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
      recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
      send_procs_S = NULL;
      send_map_elmts_S = NULL;
      if (num_sends) 
      {
         send_procs_S = hypre_CTAlloc(HYPRE_Int,num_sends);
         send_map_elmts_S = hypre_CTAlloc(HYPRE_Int,
		num_functions*send_map_starts[num_sends]);
      }
      send_map_starts_S = hypre_CTAlloc(HYPRE_Int,num_sends+1);
      recv_vec_starts_S = hypre_CTAlloc(HYPRE_Int,num_recvs+1);
      recv_procs_S = NULL;
      if (num_recvs) recv_procs_S = hypre_CTAlloc(HYPRE_Int,num_recvs);
      send_map_starts_S[0] = 0;
      for (i=0; i < num_sends; i++)
      {
         send_procs_S[i] = send_procs[i];
         send_map_starts_S[i+1] = num_functions*send_map_starts[i+1];
      }
      recv_vec_starts_S[0] = 0;
      for (i=0; i < num_recvs; i++)
      {
         recv_procs_S[i] = recv_procs[i];
         recv_vec_starts_S[i+1] = num_functions*recv_vec_starts[i+1];
      }

      cnt = 0;
      for (i=0; i < send_map_starts[num_sends]; i++)
      {
	 k1 = num_functions*send_map_elmts[i];
         for (j=0; j < num_functions; j++)
         {
	    send_map_elmts_S[cnt++] = k1+j;
         }
      }
      hypre_ParCSRCommPkgSendProcs(comm_pkg_S) = send_procs_S;
      hypre_ParCSRCommPkgSendMapStarts(comm_pkg_S) = send_map_starts_S;
      hypre_ParCSRCommPkgSendMapElmts(comm_pkg_S) = send_map_elmts_S;
      hypre_ParCSRCommPkgRecvProcs(comm_pkg_S) = recv_procs_S;
      hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_S) = recv_vec_starts_S;
      hypre_ParCSRMatrixCommPkg(S) = comm_pkg_S;
   }

   if (num_cols_offd_S)
   {
      S_offd_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_offd);
      hypre_CSRMatrixJ(S_offd) = S_offd_j;

      col_map_offd_S = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S);

      cnt = 0;
      for (i=0; i < num_cols_offd_SN; i++)
      {
         k1 = col_map_offd_SN[i]*num_functions;
         for (j=0; j < num_functions; j++)
            col_map_offd_S[cnt++] = k1+j;
      }
      hypre_ParCSRMatrixColMapOffd(S) = col_map_offd_S;
   }
   
   if (col_offd_SN_to_AN)
   {
      col_offd_S_to_A = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S);

      cnt = 0;
      for (i=0; i < num_cols_offd_SN; i++)
      {
         k1 = col_offd_SN_to_AN[i]*num_functions;
         for (j=0; j < num_functions; j++)
            col_offd_S_to_A[cnt++] = k1+j;
      }
      *col_offd_S_to_A_ptr = col_offd_S_to_A;
   }
   


   cnt = 0;
   row = 0;
   for (i=0; i < num_nodes; i++)
   {
      row++;
      start = cnt;
      for (j=SN_diag_i[i]; j < SN_diag_i[i+1]; j++)
      {
         jj = SN_diag_j[j];
	 if (data) S_diag_data[cnt] = SN_diag_data[j];
	 S_diag_j[cnt++] = jj*num_functions;
      }
      end = cnt;
      S_diag_i[row] = cnt;
      for (k1=1; k1 < num_functions; k1++)
      {
         row++;
	 for (k=start; k < end; k++)
	 {
	    if (data) S_diag_data[cnt] = S_diag_data[k];
	    S_diag_j[cnt++] = S_diag_j[k]+k1;
	 }
         S_diag_i[row] = cnt;
      }
   } 

   cnt = 0;
   row = 0;
   for (i=0; i < num_nodes; i++)
   {
      row++;
      start = cnt;
      for (j=SN_offd_i[i]; j < SN_offd_i[i+1]; j++)
      {
         jj = SN_offd_j[j];
	 if (data) S_offd_data[cnt] = SN_offd_data[j];
	 S_offd_j[cnt++] = jj*num_functions;
      }
      end = cnt;
      S_offd_i[row] = cnt;
      for (k1=1; k1 < num_functions; k1++)
      {
         row++;
	 for (k=start; k < end; k++)
	 {
	    if (data) S_offd_data[cnt] = S_offd_data[k];
	    S_offd_j[cnt++] = S_offd_j[k]+k1;
	 }
         S_offd_i[row] = cnt;
      }
   } 

   *S_ptr = S; 

   return (ierr);
}
Beispiel #2
0
hypre_ParCSRMatrix * hypre_ParMatmul_FC(
   hypre_ParCSRMatrix * A, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker,
   HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd )
/* hypre_parMatmul_FC creates and returns the "Fine"-designated rows of the
   matrix product A*P.  A's size is (nC+nF)*(nC+nF), P's size is (nC+nF)*nC
   where nC is the number of coarse rows/columns, nF the number of fine
   rows/columns.  The size of C=A*P is (nC+nF)*nC, even though not all rows
   of C are actually computed.  If we were to construct a matrix consisting
   only of the computed rows of C, its size would be nF*nC.
   "Fine" is defined solely by the marker array, and for example could be
   a proper subset of the fine points of a multigrid hierarchy.
*/
{
   /* To compute a submatrix of C containing only the computed data, i.e.
      only "Fine" rows, we would have to do a lot of computational work,
      with a lot of communication.  The communication is because such a
      matrix would need global information that depends on which rows are
      "Fine".
   */

   MPI_Comm 	   comm = hypre_ParCSRMatrixComm(A);

   hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
   
   double          *A_diag_data = hypre_CSRMatrixData(A_diag);
   HYPRE_Int             *A_diag_i = hypre_CSRMatrixI(A_diag);
   HYPRE_Int             *A_diag_j = hypre_CSRMatrixJ(A_diag);

   hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
   
   double          *A_offd_data = hypre_CSRMatrixData(A_offd);
   HYPRE_Int             *A_offd_i = hypre_CSRMatrixI(A_offd);
   HYPRE_Int             *A_offd_j = hypre_CSRMatrixJ(A_offd);

   HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
   HYPRE_Int	num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
   HYPRE_Int	num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag);
   HYPRE_Int	num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
   
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   
   double          *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);

   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
   HYPRE_Int		   *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P);
   
   double          *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   HYPRE_Int             *P_offd_j = hypre_CSRMatrixJ(P_offd);

   HYPRE_Int	first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P);
   HYPRE_Int	last_col_diag_P;
   HYPRE_Int *col_starts_P = hypre_ParCSRMatrixColStarts(P);
   HYPRE_Int	num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
   HYPRE_Int	num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag);
   HYPRE_Int	num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd);

   hypre_ParCSRMatrix *C;
   HYPRE_Int		      *col_map_offd_C;
   HYPRE_Int		      *map_P_to_C;

   hypre_CSRMatrix *C_diag;

   double          *C_diag_data;
   HYPRE_Int             *C_diag_i;
   HYPRE_Int             *C_diag_j;

   hypre_CSRMatrix *C_offd;

   double          *C_offd_data=NULL;
   HYPRE_Int             *C_offd_i=NULL;
   HYPRE_Int             *C_offd_j=NULL;

   HYPRE_Int              C_diag_size;
   HYPRE_Int              C_offd_size;
   HYPRE_Int		    num_cols_offd_C = 0;
   
   hypre_CSRMatrix *Ps_ext;
   
   double          *Ps_ext_data;
   HYPRE_Int             *Ps_ext_i;
   HYPRE_Int             *Ps_ext_j;

   double          *P_ext_diag_data;
   HYPRE_Int             *P_ext_diag_i;
   HYPRE_Int             *P_ext_diag_j;
   HYPRE_Int              P_ext_diag_size;

   double          *P_ext_offd_data;
   HYPRE_Int             *P_ext_offd_i;
   HYPRE_Int             *P_ext_offd_j;
   HYPRE_Int              P_ext_offd_size;

   HYPRE_Int		   *P_marker;
   HYPRE_Int		   *temp;

   HYPRE_Int              i, j;
   HYPRE_Int              i1, i2, i3;
   HYPRE_Int              jj2, jj3;
   
   HYPRE_Int              jj_count_diag, jj_count_offd;
   HYPRE_Int              jj_row_begin_diag, jj_row_begin_offd;
   HYPRE_Int              start_indexing = 0; /* start indexing for C_data at 0 */
   HYPRE_Int		    n_rows_A_global, n_cols_A_global;
   HYPRE_Int		    n_rows_P_global, n_cols_P_global;
   HYPRE_Int              allsquare = 0;
   HYPRE_Int              cnt, cnt_offd, cnt_diag;
   HYPRE_Int 		    num_procs;
   HYPRE_Int 		    value;

   double           a_entry;
   double           a_b_product;
   
   n_rows_A_global = hypre_ParCSRMatrixGlobalNumRows(A);
   n_cols_A_global = hypre_ParCSRMatrixGlobalNumCols(A);
   n_rows_P_global = hypre_ParCSRMatrixGlobalNumRows(P);
   n_cols_P_global = hypre_ParCSRMatrixGlobalNumCols(P);

   if (n_cols_A_global != n_rows_P_global || num_cols_diag_A != num_rows_diag_P)
   {
	hypre_printf(" Error! Incompatible matrix dimensions!\n");
	return NULL;
   }
   /* if (num_rows_A==num_cols_P) allsquare = 1; */

   /*-----------------------------------------------------------------------
    *  Extract P_ext, i.e. portion of P that is stored on neighbor procs
    *  and needed locally for matrix matrix product 
    *-----------------------------------------------------------------------*/

   hypre_MPI_Comm_size(comm, &num_procs);

   if (num_procs > 1)
   {
       /*---------------------------------------------------------------------
    	* If there exists no CommPkg for A, a CommPkg is generated using
    	* equally load balanced partitionings within 
	* hypre_ParCSRMatrixExtractBExt
    	*--------------------------------------------------------------------*/
   	Ps_ext = hypre_ParCSRMatrixExtractBExt(P,A,1);
   	Ps_ext_data = hypre_CSRMatrixData(Ps_ext);
   	Ps_ext_i    = hypre_CSRMatrixI(Ps_ext);
   	Ps_ext_j    = hypre_CSRMatrixJ(Ps_ext);
   }
   P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1);
   P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1);
   P_ext_diag_size = 0;
   P_ext_offd_size = 0;
   last_col_diag_P = first_col_diag_P + num_cols_diag_P -1;

   for (i=0; i < num_cols_offd_A; i++)
   {
      for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
         if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
            P_ext_offd_size++;
         else
            P_ext_diag_size++;
      P_ext_diag_i[i+1] = P_ext_diag_size;
      P_ext_offd_i[i+1] = P_ext_offd_size;
   }

   if (P_ext_diag_size)
   {
      P_ext_diag_j = hypre_CTAlloc(HYPRE_Int, P_ext_diag_size);
      P_ext_diag_data = hypre_CTAlloc(double, P_ext_diag_size);
   }
   if (P_ext_offd_size)
   {
      P_ext_offd_j = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size);
      P_ext_offd_data = hypre_CTAlloc(double, P_ext_offd_size);
   }

   cnt_offd = 0;
   cnt_diag = 0;
   for (i=0; i < num_cols_offd_A; i++)
   {
      for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
         if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
         {
            P_ext_offd_j[cnt_offd] = Ps_ext_j[j];
            P_ext_offd_data[cnt_offd++] = Ps_ext_data[j];
         }
         else
         {
            P_ext_diag_j[cnt_diag] = Ps_ext_j[j] - first_col_diag_P;
            P_ext_diag_data[cnt_diag++] = Ps_ext_data[j];
         }
   }

   if (num_procs > 1)
   {
      hypre_CSRMatrixDestroy(Ps_ext);
      Ps_ext = NULL;
   }

   cnt = 0;
   if (P_ext_offd_size || num_cols_offd_P)
   {
      temp = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size+num_cols_offd_P);
      for (i=0; i < P_ext_offd_size; i++)
         temp[i] = P_ext_offd_j[i];
      cnt = P_ext_offd_size;
      for (i=0; i < num_cols_offd_P; i++)
         temp[cnt++] = col_map_offd_P[i];
   }
   if (cnt)
   {
      qsort0(temp, 0, cnt-1);

      num_cols_offd_C = 1;
      value = temp[0];
      for (i=1; i < cnt; i++)
      {
         if (temp[i] > value)
         {
            value = temp[i];
            temp[num_cols_offd_C++] = value;
         }
      }
   }

   if (num_cols_offd_C)
        col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C);

   for (i=0; i < num_cols_offd_C; i++)
      col_map_offd_C[i] = temp[i];

   if (P_ext_offd_size || num_cols_offd_P)
      hypre_TFree(temp);

   for (i=0 ; i < P_ext_offd_size; i++)
      P_ext_offd_j[i] = hypre_BinarySearch(col_map_offd_C,
                                           P_ext_offd_j[i],
                                           num_cols_offd_C);
   if (num_cols_offd_P)
   {
      map_P_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P);

      cnt = 0;
      for (i=0; i < num_cols_offd_C; i++)
         if (col_map_offd_C[i] == col_map_offd_P[cnt])
         {
            map_P_to_C[cnt++] = i;
            if (cnt == num_cols_offd_P) break;
         }
   }

   /*-----------------------------------------------------------------------
   *  Allocate marker array.
    *-----------------------------------------------------------------------*/

   P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_C);

   /*-----------------------------------------------------------------------
    *  Initialize some stuff.
    *-----------------------------------------------------------------------*/

   for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++)
   {      
      P_marker[i1] = -1;
   }


/* no changes for the marked version above this point */
   /* This function call is the first pass: */
   hypre_ParMatmul_RowSizes_Marked(
      &C_diag_i, &C_offd_i, &P_marker,
      A_diag_i, A_diag_j, A_offd_i, A_offd_j,
      P_diag_i, P_diag_j, P_offd_i, P_offd_j,
      P_ext_diag_i, P_ext_diag_j, P_ext_offd_i, P_ext_offd_j,
      map_P_to_C,
      &C_diag_size, &C_offd_size,
      num_rows_diag_A, num_cols_offd_A, allsquare,
      num_cols_diag_P, num_cols_offd_P,
      num_cols_offd_C, CF_marker, dof_func, dof_func_offd
      );

   /* The above call of hypre_ParMatmul_RowSizes_Marked computed
      two scalars: C_diag_size, C_offd_size,
      and two arrays: C_diag_i, C_offd_i
      ( P_marker is also computed, but only used internally )
   */

   /*-----------------------------------------------------------------------
    *  Allocate C_diag_data and C_diag_j arrays.
    *  Allocate C_offd_data and C_offd_j arrays.
    *-----------------------------------------------------------------------*/
 
   last_col_diag_P = first_col_diag_P + num_cols_diag_P - 1;
   C_diag_data = hypre_CTAlloc(double, C_diag_size);
   C_diag_j    = hypre_CTAlloc(HYPRE_Int, C_diag_size);
   if (C_offd_size)
   { 
   	C_offd_data = hypre_CTAlloc(double, C_offd_size);
   	C_offd_j    = hypre_CTAlloc(HYPRE_Int, C_offd_size);
   } 


   /*-----------------------------------------------------------------------
    *  Second Pass: Fill in C_diag_data and C_diag_j.
    *  Second Pass: Fill in C_offd_data and C_offd_j.
    *-----------------------------------------------------------------------*/

   /*-----------------------------------------------------------------------
    *  Initialize some stuff.
    *-----------------------------------------------------------------------*/

   jj_count_diag = start_indexing;
   jj_count_offd = start_indexing;
   for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++)
   {      
      P_marker[i1] = -1;
   }
   
   /*-----------------------------------------------------------------------
    *  Loop over interior c-points.
    *-----------------------------------------------------------------------*/
    
   for (i1 = 0; i1 < num_rows_diag_A; i1++)
   {

      if ( CF_marker[i1] < 0 )  /* i1 is a fine row */
         /* ... This and the coarse row code are the only parts between first pass
            and near the end where
            hypre_ParMatmul_FC is different from the regular hypre_ParMatmul */
      {

         /*--------------------------------------------------------------------
          *  Create diagonal entry, C_{i1,i1} 
          *--------------------------------------------------------------------*/

         jj_row_begin_diag = jj_count_diag;
         jj_row_begin_offd = jj_count_offd;

         /*-----------------------------------------------------------------
          *  Loop over entries in row i1 of A_offd.
          *-----------------------------------------------------------------*/
         
	 if (num_cols_offd_A)
	 {
            for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
            {
               i2 = A_offd_j[jj2];
               if( dof_func==NULL || dof_func[i1] == dof_func_offd[i2] )
               {  /* interpolate only like "functions" */
                  a_entry = A_offd_data[jj2];
            
                  /*-----------------------------------------------------------
                   *  Loop over entries in row i2 of P_ext.
                   *-----------------------------------------------------------*/

                  for (jj3 = P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
                  {
                     i3 = num_cols_diag_P+P_ext_offd_j[jj3];
                     a_b_product = a_entry * P_ext_offd_data[jj3];
                  
                     /*--------------------------------------------------------
                      *  Check P_marker to see that C_{i1,i3} has not already
                      *  been accounted for. If it has not, create a new entry.
                      *  If it has, add new contribution.
                      *--------------------------------------------------------*/
                     if (P_marker[i3] < jj_row_begin_offd)
                     {
                        P_marker[i3] = jj_count_offd;
                        C_offd_data[jj_count_offd] = a_b_product;
                        C_offd_j[jj_count_offd] = i3-num_cols_diag_P;
                        jj_count_offd++;
                     }
                     else
                        C_offd_data[P_marker[i3]] += a_b_product;
                  }
                  for (jj3 = P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
                  {
                     i3 = P_ext_diag_j[jj3];
                     a_b_product = a_entry * P_ext_diag_data[jj3];

                     if (P_marker[i3] < jj_row_begin_diag)
                     {
                        P_marker[i3] = jj_count_diag;
                        C_diag_data[jj_count_diag] = a_b_product;
                        C_diag_j[jj_count_diag] = i3;
                        jj_count_diag++;
                     }
                     else
                        C_diag_data[P_marker[i3]] += a_b_product;
                  }
               }
               else
               {  /* Interpolation mat should be 0 where i1 and i2 correspond to
                     different "functions".  As we haven't created an entry for
                     C(i1,i2), nothing needs to be done. */
               }

            }
         }

         /*-----------------------------------------------------------------
          *  Loop over entries in row i1 of A_diag.
          *-----------------------------------------------------------------*/

         for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
         {
            i2 = A_diag_j[jj2];
            if( dof_func==NULL || dof_func[i1] == dof_func[i2] )
            {  /* interpolate only like "functions" */
               a_entry = A_diag_data[jj2];
            
               /*-----------------------------------------------------------
                *  Loop over entries in row i2 of P_diag.
                *-----------------------------------------------------------*/

               for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
               {
                  i3 = P_diag_j[jj3];
                  a_b_product = a_entry * P_diag_data[jj3];
                  
                  /*--------------------------------------------------------
                   *  Check P_marker to see that C_{i1,i3} has not already
                   *  been accounted for. If it has not, create a new entry.
                   *  If it has, add new contribution.
                   *--------------------------------------------------------*/

                  if (P_marker[i3] < jj_row_begin_diag)
                  {
                     P_marker[i3] = jj_count_diag;
                     C_diag_data[jj_count_diag] = a_b_product;
                     C_diag_j[jj_count_diag] = i3;
                     jj_count_diag++;
                  }
                  else
                  {
                     C_diag_data[P_marker[i3]] += a_b_product;
                  }
               }
               if (num_cols_offd_P)
	       {
                  for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
                  {
                     i3 = num_cols_diag_P+map_P_to_C[P_offd_j[jj3]];
                     a_b_product = a_entry * P_offd_data[jj3];
                  
                     /*--------------------------------------------------------
                      *  Check P_marker to see that C_{i1,i3} has not already
                      *  been accounted for. If it has not, create a new entry.
                      *  If it has, add new contribution.
                      *--------------------------------------------------------*/

                     if (P_marker[i3] < jj_row_begin_offd)
                     {
                        P_marker[i3] = jj_count_offd;
                        C_offd_data[jj_count_offd] = a_b_product;
                        C_offd_j[jj_count_offd] = i3-num_cols_diag_P;
                        jj_count_offd++;
                     }
                     else
                     {
                        C_offd_data[P_marker[i3]] += a_b_product;
                     }
                  }
               }
            }
            else
            {  /* Interpolation mat should be 0 where i1 and i2 correspond to
                  different "functions".  As we haven't created an entry for
                  C(i1,i2), nothing needs to be done. */
            }
         }
      }
      else  /* i1 is a coarse row.*/
         /* Copy P coarse-row values to C.  This is useful if C is meant to
            become a replacement for P */
      {
	 if (num_cols_offd_P)
	 {
            for (jj2 = P_offd_i[i1]; jj2 < P_offd_i[i1+1]; jj2++)
            {
               C_offd_j[jj_count_offd] = P_offd_j[jj_count_offd];
               C_offd_data[jj_count_offd] = P_offd_data[jj_count_offd];
               ++jj_count_offd;
            }
         }
         for (jj2 = P_diag_i[i1]; jj2 < P_diag_i[i1+1]; jj2++)
         {
            C_diag_j[jj_count_diag] = P_diag_j[jj2];
            C_diag_data[jj_count_diag] = P_diag_data[jj2];
            ++jj_count_diag;
         }
      }
   }

   C = hypre_ParCSRMatrixCreate(
      comm, n_rows_A_global, n_cols_P_global,
      row_starts_A, col_starts_P, num_cols_offd_C, C_diag_size, C_offd_size );

   /* Note that C does not own the partitionings */
   hypre_ParCSRMatrixSetRowStartsOwner(C,0);
   hypre_ParCSRMatrixSetColStartsOwner(C,0);

   C_diag = hypre_ParCSRMatrixDiag(C);
   hypre_CSRMatrixData(C_diag) = C_diag_data; 
   hypre_CSRMatrixI(C_diag) = C_diag_i; 
   hypre_CSRMatrixJ(C_diag) = C_diag_j; 

   C_offd = hypre_ParCSRMatrixOffd(C);
   hypre_CSRMatrixI(C_offd) = C_offd_i; 
   hypre_ParCSRMatrixOffd(C) = C_offd;

   if (num_cols_offd_C)
   {
      hypre_CSRMatrixData(C_offd) = C_offd_data; 
      hypre_CSRMatrixJ(C_offd) = C_offd_j; 
      hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;

   }

   /*-----------------------------------------------------------------------
    *  Free various arrays
    *-----------------------------------------------------------------------*/

   hypre_TFree(P_marker);   
   hypre_TFree(P_ext_diag_i);
   if (P_ext_diag_size)
   {
      hypre_TFree(P_ext_diag_j);
      hypre_TFree(P_ext_diag_data);
   }
   hypre_TFree(P_ext_offd_i);
   if (P_ext_offd_size)
   {
      hypre_TFree(P_ext_offd_j);
      hypre_TFree(P_ext_offd_data);
   }
   if (num_cols_offd_P) hypre_TFree(map_P_to_C);

   return C;
   
}
Beispiel #3
0
HYPRE_Int main( HYPRE_Int   argc, char *argv[] )
{
   hypre_ParCSRMatrix      *par_matrix, *g_matrix, **submatrices;
   hypre_CSRMatrix         *A_diag, *A_offd;
   hypre_CSRBlockMatrix    *diag;
   hypre_CSRBlockMatrix    *offd;
   hypre_ParCSRBlockMatrix *par_blk_matrix, *par_blk_matrixT, *rap_matrix;
   hypre_Vector        *x_local;
   hypre_Vector        *y_local;
   hypre_ParVector     *x;
   hypre_ParVector     *y;
   HYPRE_Solver        gmres_solver, precon;
   HYPRE_Int                 *diag_i, *diag_j, *offd_i, *offd_j;
   HYPRE_Int                 *diag_i2, *diag_j2, *offd_i2, *offd_j2;
   double              *diag_d, *diag_d2, *offd_d, *offd_d2;
   HYPRE_Int		       mypid, local_size, nprocs;
   HYPRE_Int		       global_num_rows, global_num_cols, num_cols_offd;
   HYPRE_Int		       num_nonzeros_diag, num_nonzeros_offd, *colMap;
   HYPRE_Int 		       ii, jj, kk, row, col, nnz, *indices, *colMap2;
   double 	       *data, ddata, *y_data;
   HYPRE_Int 		       *row_starts, *col_starts, *rstarts, *cstarts;
   HYPRE_Int 		       *row_starts2, *col_starts2;
   HYPRE_Int                 block_size=2, bnnz=4, *index_set;
   FILE                *fp;

   /* --------------------------------------------- */
   /* Initialize MPI                                */
   /* --------------------------------------------- */

   hypre_MPI_Init(&argc, &argv);
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &mypid);
   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &nprocs);

   /* build and fetch matrix */
   MyBuildParLaplacian9pt((HYPRE_ParCSRMatrix *) &par_matrix);
   global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix);
   global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix);
   row_starts = hypre_ParCSRMatrixRowStarts(par_matrix);
   col_starts = hypre_ParCSRMatrixColStarts(par_matrix);
   A_diag = hypre_ParCSRMatrixDiag(par_matrix);
   A_offd = hypre_ParCSRMatrixOffd(par_matrix);
   num_cols_offd     = hypre_CSRMatrixNumCols(A_offd);
   num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(A_diag);
   num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(A_offd);

   /* --------------------------------------------- */
   /* build vector and apply matvec                 */
   /* --------------------------------------------- */

   x = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_cols,col_starts);
   hypre_ParVectorSetPartitioningOwner(x,0);
   hypre_ParVectorInitialize(x);
   x_local = hypre_ParVectorLocalVector(x);
   data    = hypre_VectorData(x_local);
   local_size = col_starts[mypid+1] - col_starts[mypid];
   for (ii = 0; ii < local_size; ii++) data[ii] = 1.0;
   y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_rows,row_starts);
   hypre_ParVectorSetPartitioningOwner(y,0);
   hypre_ParVectorInitialize(y);
   hypre_ParCSRMatrixMatvec (1.0, par_matrix, x, 0.0, y);
   ddata = hypre_ParVectorInnerProd(y, y);
   if (mypid == 0) hypre_printf("y inner product = %e\n", ddata);
   hypre_ParVectorDestroy(x);
   hypre_ParVectorDestroy(y);

   /* --------------------------------------------- */
   /* build block matrix                            */
   /* --------------------------------------------- */

   rstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1);
   for (ii = 0; ii <= nprocs; ii++) rstarts[ii] = row_starts[ii];
   cstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1);
   for (ii = 0; ii <= nprocs; ii++) cstarts[ii] = col_starts[ii];

   par_blk_matrix = hypre_ParCSRBlockMatrixCreate(hypre_MPI_COMM_WORLD,block_size,
                          global_num_rows, global_num_cols, rstarts,
                          cstarts, num_cols_offd, num_nonzeros_diag,
                          num_nonzeros_offd);
   colMap  = hypre_ParCSRMatrixColMapOffd(par_matrix);
   if (num_cols_offd > 0) colMap2 = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
   else                   colMap2 = NULL;
   for (ii = 0; ii < num_cols_offd; ii++) colMap2[ii] = colMap[ii];
   hypre_ParCSRBlockMatrixColMapOffd(par_blk_matrix) = colMap2;
   diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(par_matrix));
   diag_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixDiag(par_matrix));
   diag_d = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(par_matrix));
   diag = hypre_ParCSRBlockMatrixDiag(par_blk_matrix);
   diag_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1);
   diag_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_diag);
   diag_d2 = hypre_CTAlloc(double, num_nonzeros_diag*bnnz);
   for (ii = 0; ii <= local_size; ii++) diag_i2[ii] = diag_i[ii];
   for (ii = 0; ii < num_nonzeros_diag; ii++) diag_j2[ii] = diag_j[ii];
   hypre_CSRBlockMatrixI(diag) = diag_i2;
   hypre_CSRBlockMatrixJ(diag) = diag_j2;
   for (ii = 0; ii < num_nonzeros_diag; ii++)
   {
      for (jj = 0; jj < block_size; jj++)
         for (kk = 0; kk < block_size; kk++)
         {
            if (jj <= kk)
               diag_d2[ii*bnnz+jj*block_size+kk] = diag_d[ii];
            else
               diag_d2[ii*bnnz+jj*block_size+kk] = 0.0;
         }
   }
   hypre_CSRBlockMatrixData(diag) = diag_d2;

   offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(par_matrix));
   offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(par_matrix));
   offd_d = hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(par_matrix));
   offd   = hypre_ParCSRBlockMatrixOffd(par_blk_matrix);
   offd_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1);
   for (ii = 0; ii <= local_size; ii++) offd_i2[ii] = offd_i[ii];
   hypre_CSRBlockMatrixI(offd) = offd_i2;
   if (num_cols_offd)
   {
      offd_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_offd);
      for (ii = 0; ii < num_nonzeros_offd; ii++) offd_j2[ii] = offd_j[ii];
      hypre_CSRBlockMatrixJ(offd) = offd_j2;
      offd_d2 = hypre_CTAlloc(double, num_nonzeros_offd*bnnz);
      for (ii = 0; ii < num_nonzeros_offd; ii++)
      {
         for (jj = 0; jj < block_size; jj++)
            for (kk = 0; kk < block_size; kk++)
            {
               if (jj <= kk)
                  offd_d2[ii*bnnz+jj*block_size+kk] = offd_d[ii];
               else
                  offd_d2[ii*bnnz+jj*block_size+kk] = 0.0;
            }
      }
      hypre_CSRBlockMatrixData(offd) = offd_d2;
   }
   else
   {
Beispiel #4
0
HYPRE_Int
hypre_ParCSRMatrixMatvec( HYPRE_Complex       alpha,
                          hypre_ParCSRMatrix *A,
                          hypre_ParVector    *x,
                          HYPRE_Complex       beta,
                          hypre_ParVector    *y )
{
   hypre_ParCSRCommHandle **comm_handle;
   hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   hypre_CSRMatrix   *diag   = hypre_ParCSRMatrixDiag(A);
   hypre_CSRMatrix   *offd   = hypre_ParCSRMatrixOffd(A);
   hypre_Vector      *x_local  = hypre_ParVectorLocalVector(x);   
   hypre_Vector      *y_local  = hypre_ParVectorLocalVector(y);   
   HYPRE_Int          num_rows = hypre_ParCSRMatrixGlobalNumRows(A);
   HYPRE_Int          num_cols = hypre_ParCSRMatrixGlobalNumCols(A);

   hypre_Vector      *x_tmp;
   HYPRE_Int          x_size = hypre_ParVectorGlobalSize(x);
   HYPRE_Int          y_size = hypre_ParVectorGlobalSize(y);
   HYPRE_Int          num_vectors = hypre_VectorNumVectors(x_local);
   HYPRE_Int          num_cols_offd = hypre_CSRMatrixNumCols(offd);
   HYPRE_Int          ierr = 0;
   HYPRE_Int          num_sends, i, j, jv, index, start;

   HYPRE_Int          vecstride = hypre_VectorVectorStride( x_local );
   HYPRE_Int          idxstride = hypre_VectorIndexStride( x_local );

   HYPRE_Complex     *x_tmp_data, **x_buf_data;
   HYPRE_Complex     *x_local_data = hypre_VectorData(x_local);

   /*---------------------------------------------------------------------
    *  Check for size compatibility.  ParMatvec returns ierr = 11 if
    *  length of X doesn't equal the number of columns of A,
    *  ierr = 12 if the length of Y doesn't equal the number of rows
    *  of A, and ierr = 13 if both are true.
    *
    *  Because temporary vectors are often used in ParMatvec, none of 
    *  these conditions terminates processing, and the ierr flag
    *  is informational only.
    *--------------------------------------------------------------------*/
 
   hypre_assert( idxstride>0 );

   if (num_cols != x_size)
      ierr = 11;

   if (num_rows != y_size)
      ierr = 12;

   if (num_cols != x_size && num_rows != y_size)
      ierr = 13;

   hypre_assert( hypre_VectorNumVectors(y_local)==num_vectors );

   if ( num_vectors==1 )
      x_tmp = hypre_SeqVectorCreate( num_cols_offd );
   else
   {
      hypre_assert( num_vectors>1 );
      x_tmp = hypre_SeqMultiVectorCreate( num_cols_offd, num_vectors );
   }
   hypre_SeqVectorInitialize(x_tmp);
   x_tmp_data = hypre_VectorData(x_tmp);
   
   comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors);

   /*---------------------------------------------------------------------
    * If there exists no CommPkg for A, a CommPkg is generated using
    * equally load balanced partitionings
    *--------------------------------------------------------------------*/
   if (!comm_pkg)
   {
      hypre_MatvecCommPkgCreate(A);
      comm_pkg = hypre_ParCSRMatrixCommPkg(A); 
   }

   num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
   x_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors );
   for ( jv=0; jv<num_vectors; ++jv )
      x_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart
                                     (comm_pkg, num_sends));

   if ( num_vectors==1 )
   {
      index = 0;
      for (i = 0; i < num_sends; i++)
      {
         start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
         for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
            x_buf_data[0][index++] 
               = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
      }
   }
   else
      for ( jv=0; jv<num_vectors; ++jv )
      {
         index = 0;
         for (i = 0; i < num_sends; i++)
         {
            start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
            for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
               x_buf_data[jv][index++] 
                  = x_local_data[
                     jv*vecstride +
                     idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ];
         }
      }

   hypre_assert( idxstride==1 );
   /* ... The assert is because the following loop only works for 'column'
      storage of a multivector. This needs to be fixed to work more generally,
      at least for 'row' storage. This in turn, means either change CommPkg so
      num_sends is no.zones*no.vectors (not no.zones) or, less dangerously, put
      a stride in the logic of CommHandleCreate (stride either from a new arg or
      a new variable inside CommPkg).  Or put the num_vector iteration inside
      CommHandleCreate (perhaps a new multivector variant of it).
   */
   for ( jv=0; jv<num_vectors; ++jv )
   {
      comm_handle[jv] = hypre_ParCSRCommHandleCreate
         ( 1, comm_pkg, x_buf_data[jv], &(x_tmp_data[jv*num_cols_offd]) );
   }

   hypre_CSRMatrixMatvec( alpha, diag, x_local, beta, y_local);
   
   for ( jv=0; jv<num_vectors; ++jv )
   {
      hypre_ParCSRCommHandleDestroy(comm_handle[jv]);
      comm_handle[jv] = NULL;
   }
   hypre_TFree(comm_handle);

   if (num_cols_offd) hypre_CSRMatrixMatvec( alpha, offd, x_tmp, 1.0, y_local);    

   hypre_SeqVectorDestroy(x_tmp);
   x_tmp = NULL;
   for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(x_buf_data[jv]);
   hypre_TFree(x_buf_data);
  
   return ierr;
}
Beispiel #5
0
/*--------------------------------------------------------------------------
 * hypre_ParCSRMatrixMatvec_FF
 *--------------------------------------------------------------------------*/
                                                                                    HYPRE_Int
hypre_ParCSRMatrixMatvec_FF( HYPRE_Complex       alpha,
                             hypre_ParCSRMatrix *A,
                             hypre_ParVector    *x,
                             HYPRE_Complex       beta,
                             hypre_ParVector    *y,
                             HYPRE_Int          *CF_marker,
                             HYPRE_Int           fpt )
{
   MPI_Comm                comm = hypre_ParCSRMatrixComm(A);
   hypre_ParCSRCommHandle *comm_handle;
   hypre_ParCSRCommPkg    *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   hypre_CSRMatrix        *diag   = hypre_ParCSRMatrixDiag(A);
   hypre_CSRMatrix        *offd   = hypre_ParCSRMatrixOffd(A);
   hypre_Vector           *x_local  = hypre_ParVectorLocalVector(x);
   hypre_Vector           *y_local  = hypre_ParVectorLocalVector(y);
   HYPRE_Int               num_rows = hypre_ParCSRMatrixGlobalNumRows(A);
   HYPRE_Int               num_cols = hypre_ParCSRMatrixGlobalNumCols(A);

   hypre_Vector      *x_tmp;
   HYPRE_Int          x_size = hypre_ParVectorGlobalSize(x);
   HYPRE_Int          y_size = hypre_ParVectorGlobalSize(y);
   HYPRE_Int          num_cols_offd = hypre_CSRMatrixNumCols(offd);
   HYPRE_Int          ierr = 0;
   HYPRE_Int          num_sends, i, j, index, start, num_procs;
   HYPRE_Int         *int_buf_data = NULL;
   HYPRE_Int         *CF_marker_offd = NULL;

   HYPRE_Complex     *x_tmp_data = NULL;
   HYPRE_Complex     *x_buf_data = NULL;
   HYPRE_Complex     *x_local_data = hypre_VectorData(x_local);
   /*---------------------------------------------------------------------
    *  Check for size compatibility.  ParMatvec returns ierr = 11 if
    *  length of X doesn't equal the number of columns of A,
    *  ierr = 12 if the length of Y doesn't equal the number of rows
    *  of A, and ierr = 13 if both are true.
    *
    *  Because temporary vectors are often used in ParMatvec, none of
    *  these conditions terminates processing, and the ierr flag
    *  is informational only.
    *--------------------------------------------------------------------*/

   hypre_MPI_Comm_size(comm,&num_procs);

   if (num_cols != x_size)
      ierr = 11;

   if (num_rows != y_size)
      ierr = 12;

   if (num_cols != x_size && num_rows != y_size)
      ierr = 13;

   if (num_procs > 1)
   {
      if (num_cols_offd)
      {
         x_tmp = hypre_SeqVectorCreate( num_cols_offd );
         hypre_SeqVectorInitialize(x_tmp);
         x_tmp_data = hypre_VectorData(x_tmp);
      }

      /*---------------------------------------------------------------------
       * If there exists no CommPkg for A, a CommPkg is generated using
       * equally load balanced partitionings
       *--------------------------------------------------------------------*/
      if (!comm_pkg)
      {
         hypre_MatvecCommPkgCreate(A);
         comm_pkg = hypre_ParCSRMatrixCommPkg(A);
      }

      num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
      if (num_sends)
         x_buf_data = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart
                                    (comm_pkg, num_sends));

      index = 0;
      for (i = 0; i < num_sends; i++)
      {
         start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
         for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
            x_buf_data[index++]
               = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
      }
      comm_handle =
         hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data, x_tmp_data );
   }
   hypre_CSRMatrixMatvec_FF( alpha, diag, x_local, beta, y_local, CF_marker,
                             CF_marker, fpt);

   if (num_procs > 1)
   {
      hypre_ParCSRCommHandleDestroy(comm_handle);
      comm_handle = NULL;

      if (num_sends)
         int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart
                                      (comm_pkg, num_sends));
      if (num_cols_offd) CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
      index = 0;
      for (i = 0; i < num_sends; i++)
      {
         start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
         for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
            int_buf_data[index++]
               = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
      }
      comm_handle =
         hypre_ParCSRCommHandleCreate(11,comm_pkg,int_buf_data,CF_marker_offd );

      hypre_ParCSRCommHandleDestroy(comm_handle);
      comm_handle = NULL;

      if (num_cols_offd) hypre_CSRMatrixMatvec_FF( alpha, offd, x_tmp, 1.0, y_local,
                                                   CF_marker, CF_marker_offd, fpt);

      hypre_SeqVectorDestroy(x_tmp);
      x_tmp = NULL;
      hypre_TFree(x_buf_data);
      hypre_TFree(int_buf_data);
      hypre_TFree(CF_marker_offd);
   }

   return ierr;
}
Beispiel #6
0
int
hypre_BoomerAMGSetupStats( void               *amg_vdata,
                        hypre_ParCSRMatrix *A         )
{
   MPI_Comm 	      comm = hypre_ParCSRMatrixComm(A);   

   hypre_ParAMGData *amg_data = (hypre_ParAMGData*)amg_vdata;

   /*hypre_SeqAMGData *seq_data = hypre_ParAMGDataSeqData(amg_data);*/

   /* Data Structure variables */

   hypre_ParCSRMatrix **A_array;
   hypre_ParCSRMatrix **P_array;

   hypre_CSRMatrix *A_diag;
   double          *A_diag_data;
   int             *A_diag_i;

   hypre_CSRMatrix *A_offd;   
   double          *A_offd_data;
   int             *A_offd_i;

   hypre_CSRMatrix *P_diag;
   double          *P_diag_data;
   int             *P_diag_i;

   hypre_CSRMatrix *P_offd;   
   double          *P_offd_data;
   int             *P_offd_i;


   int	    numrows;

   HYPRE_BigInt	    *row_starts;

 
   int      num_levels; 
   int      coarsen_type;
   int      interp_type;
   int      measure_type;
   double   global_nonzeros;

   double  *send_buff;
   double  *gather_buff;
 
   /* Local variables */

   int       level;
   int       j;
   HYPRE_BigInt fine_size;
 
   int       min_entries;
   int       max_entries;

   int       num_procs,my_id, num_threads;


   double    min_rowsum;
   double    max_rowsum;
   double    sparse;


   int       i;
   

   HYPRE_BigInt coarse_size;
   int       entries;

   double    avg_entries;
   double    rowsum;

   double    min_weight;
   double    max_weight;

   int       global_min_e;
   int       global_max_e;
   double    global_min_rsum;
   double    global_max_rsum;
   double    global_min_wt;
   double    global_max_wt;

   double  *num_coeffs;
   double  *num_variables;
   double   total_variables; 
   double   operat_cmplxty;
   double   grid_cmplxty;

   /* amg solve params */
   int      max_iter;
   int      cycle_type;    
   int     *num_grid_sweeps;  
   int     *grid_relax_type;   
   int      relax_order;
   int    **grid_relax_points; 
   double  *relax_weight;
   double  *omega;
   double   tol;


   int one = 1;
   int minus_one = -1;
   int zero = 0;
   int smooth_type;
   int smooth_num_levels;
   int agg_num_levels;
   /*int seq_cg = 0;*/
   
   /*if (seq_data)
      seq_cg = 1;*/


   MPI_Comm_size(comm, &num_procs);   
   MPI_Comm_rank(comm,&my_id);
   num_threads = hypre_NumThreads();

   if (my_id == 0)
      printf("\nNumber of MPI processes: %d , Number of OpenMP threads: %d\n", num_procs, num_threads);
   A_array = hypre_ParAMGDataAArray(amg_data);
   P_array = hypre_ParAMGDataPArray(amg_data);
   num_levels = hypre_ParAMGDataNumLevels(amg_data);
   coarsen_type = hypre_ParAMGDataCoarsenType(amg_data);
   interp_type = hypre_ParAMGDataInterpType(amg_data);
   measure_type = hypre_ParAMGDataMeasureType(amg_data);
   smooth_type = hypre_ParAMGDataSmoothType(amg_data);
   smooth_num_levels = hypre_ParAMGDataSmoothNumLevels(amg_data);
   agg_num_levels = hypre_ParAMGDataAggNumLevels(amg_data);


   /*----------------------------------------------------------
    * Get the amg_data data
    *----------------------------------------------------------*/

   num_levels = hypre_ParAMGDataNumLevels(amg_data);
   max_iter   = hypre_ParAMGDataMaxIter(amg_data);
   cycle_type = hypre_ParAMGDataCycleType(amg_data);    
   num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data);  
   grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data);
   grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data);
   relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); 
   relax_order = hypre_ParAMGDataRelaxOrder(amg_data); 
   omega = hypre_ParAMGDataOmega(amg_data); 
   tol = hypre_ParAMGDataTol(amg_data);

   /*block_mode = hypre_ParAMGDataBlockMode(amg_data);*/

   send_buff     = hypre_CTAlloc(double, 6);
#ifdef HYPRE_NO_GLOBAL_PARTITION
   gather_buff = hypre_CTAlloc(double,6);    
#else
   gather_buff = hypre_CTAlloc(double,6*num_procs);    
#endif

   if (my_id==0)
   {
      printf("\nBoomerAMG SETUP PARAMETERS:\n\n");
      printf(" Max levels = %d\n",hypre_ParAMGDataMaxLevels(amg_data));
      printf(" Num levels = %d\n\n",num_levels);
      printf(" Strength Threshold = %f\n", 
                         hypre_ParAMGDataStrongThreshold(amg_data));
      printf(" Interpolation Truncation Factor = %f\n", 
                         hypre_ParAMGDataTruncFactor(amg_data));
      printf(" Maximum Row Sum Threshold for Dependency Weakening = %f\n\n", 
                         hypre_ParAMGDataMaxRowSum(amg_data));

      if (coarsen_type == 0)
      {
	printf(" Coarsening Type = Cleary-Luby-Jones-Plassman\n");
      }
      else if (abs(coarsen_type) == 1) 
      {
	printf(" Coarsening Type = Ruge\n");
      }
      else if (abs(coarsen_type) == 2) 
      {
	printf(" Coarsening Type = Ruge2B\n");
      }
      else if (abs(coarsen_type) == 3) 
      {
	printf(" Coarsening Type = Ruge3\n");
      }
      else if (abs(coarsen_type) == 4) 
      {
	printf(" Coarsening Type = Ruge 3c \n");
      }
      else if (abs(coarsen_type) == 5) 
      {
	printf(" Coarsening Type = Ruge relax special points \n");
      }
      else if (abs(coarsen_type) == 6) 
      {
	printf(" Coarsening Type = Falgout-CLJP \n");
      }
      else if (abs(coarsen_type) == 8) 
      {
	printf(" Coarsening Type = PMIS \n");
      }
      else if (abs(coarsen_type) == 10) 
      {
	printf(" Coarsening Type = HMIS \n");
      }
      else if (abs(coarsen_type) == 11) 
      {
	printf(" Coarsening Type = Ruge 1st pass only \n");
      }
      else if (abs(coarsen_type) == 9) 
      {
	printf(" Coarsening Type = PMIS fixed random \n");
      }
      else if (abs(coarsen_type) == 7) 
      {
	printf(" Coarsening Type = CLJP, fixed random \n");
      }
      if (coarsen_type > 0) 
      {
	printf(" Hybrid Coarsening (switch to CLJP when coarsening slows)\n");
      }
      

      if (coarsen_type)
      	printf(" measures are determined %s\n\n", 
                  (measure_type ? "globally" : "locally"));

      if (agg_num_levels)
	printf(" no. of levels of aggressive coarsening: %d\n\n", agg_num_levels);

#ifdef HYPRE_NO_GLOBAL_PARTITION
      printf( "\n No global partition option chosen.\n\n");
#endif

      if (interp_type == 0)
      {
	printf(" Interpolation = modified classical interpolation\n");
      }
      else if (interp_type == 1) 
      {
	printf(" Interpolation = LS interpolation \n");
      }
      else if (interp_type == 2) 
      {
	printf(" Interpolation = modified classical interpolation for hyperbolic PDEs\n");
      }
      else if (interp_type == 3) 
      {
	printf(" Interpolation = direct interpolation with separation of weights\n");
      }
      else if (interp_type == 4) 
      {
	printf(" Interpolation = multipass interpolation\n");
      }
      else if (interp_type == 5) 
      {
	printf(" Interpolation = multipass interpolation with separation of weights\n");
      }
      else if (interp_type == 6) 
      {
	printf(" Interpolation = extended+i interpolation\n");
      }
      else if (interp_type == 7) 
      {
	printf(" Interpolation = extended+i interpolation (only when needed)\n");
      }
      else if (interp_type == 8) 
      {
	printf(" Interpolation = standard interpolation\n");
      }
      else if (interp_type == 9) 
      {
	printf(" Interpolation = standard interpolation with separation of weights\n");
      }
      else if (interp_type == 12) 
      {
	printf(" FF interpolation \n");
      }
      else if (interp_type == 13) 
      {
	printf(" FF1 interpolation \n");
      }

      {
         printf( "\nOperator Matrix Information:\n\n");
      }
#if HYPRE_LONG_LONG
      printf("                  nonzero         entries p");
      printf("er row        row sums\n");
      printf("lev        rows   entries  sparse  min  max   ");
      printf("avg       min         max\n");
      printf("=======================================");
      printf("==================================\n");
#else      
      printf("            nonzero         entries p");
      printf("er row        row sums\n");
      printf("lev   rows  entries  sparse  min  max   ");
      printf("avg       min         max\n");
      printf("=======================================");
      printf("============================\n");
#endif
   }
  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/

   num_coeffs = hypre_CTAlloc(double,num_levels);

   num_variables = hypre_CTAlloc(double,num_levels);

   for (level = 0; level < num_levels; level++)
   { 

      {
         A_diag = hypre_ParCSRMatrixDiag(A_array[level]);
         A_diag_data = hypre_CSRMatrixData(A_diag);
         A_diag_i = hypre_CSRMatrixI(A_diag);
         
         A_offd = hypre_ParCSRMatrixOffd(A_array[level]);   
         A_offd_data = hypre_CSRMatrixData(A_offd);
         A_offd_i = hypre_CSRMatrixI(A_offd);
         
         row_starts = hypre_ParCSRMatrixRowStarts(A_array[level]);
         
         fine_size = hypre_ParCSRMatrixGlobalNumRows(A_array[level]);
         global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(A_array[level]);
         num_coeffs[level] = global_nonzeros;
         num_variables[level] = (double) fine_size;
         
         sparse = global_nonzeros /((double) fine_size * (double) fine_size);

         min_entries = 0;
         max_entries = 0;
         min_rowsum = 0.0;
         max_rowsum = 0.0;
         
         if (hypre_CSRMatrixNumRows(A_diag))
         {
            min_entries = (A_diag_i[1]-A_diag_i[0])+(A_offd_i[1]-A_offd_i[0]);
            for (j = A_diag_i[0]; j < A_diag_i[1]; j++)
               min_rowsum += A_diag_data[j];
            for (j = A_offd_i[0]; j < A_offd_i[1]; j++)
               min_rowsum += A_offd_data[j];
            
            max_rowsum = min_rowsum;
            
            for (j = 0; j < hypre_CSRMatrixNumRows(A_diag); j++)
            {
               entries = (A_diag_i[j+1]-A_diag_i[j])+(A_offd_i[j+1]-A_offd_i[j]);
               min_entries = hypre_min(entries, min_entries);
               max_entries = hypre_max(entries, max_entries);
               
               rowsum = 0.0;
               for (i = A_diag_i[j]; i < A_diag_i[j+1]; i++)
                  rowsum += A_diag_data[i];
               
               for (i = A_offd_i[j]; i < A_offd_i[j+1]; i++)
                  rowsum += A_offd_data[i];
               
               min_rowsum = hypre_min(rowsum, min_rowsum);
               max_rowsum = hypre_max(rowsum, max_rowsum);
            }
         }
         avg_entries = global_nonzeros / ((double) fine_size);
      }
      
#ifdef HYPRE_NO_GLOBAL_PARTITION       

       numrows = (int)(row_starts[1]-row_starts[0]);
       if (!numrows) /* if we don't have any rows, then don't have this count toward
                         min row sum or min num entries */
       {
          min_entries = 1000000;
          min_rowsum =  1.0e7;
       }
       
       send_buff[0] = - (double) min_entries;
       send_buff[1] = (double) max_entries;
       send_buff[2] = - min_rowsum;
       send_buff[3] = max_rowsum;

       MPI_Reduce(send_buff, gather_buff, 4, MPI_DOUBLE, MPI_MAX, 0, comm);
       
       if (my_id ==0)
       {
          global_min_e = - gather_buff[0];
          global_max_e = gather_buff[1];
          global_min_rsum = - gather_buff[2];
          global_max_rsum = gather_buff[3];
#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#else          
          printf( "%2d %7d %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#endif          
          printf("  %4.1f  %10.3e  %10.3e\n", avg_entries,
                 global_min_rsum, global_max_rsum);
       }
       
#else

       send_buff[0] = (double) min_entries;
       send_buff[1] = (double) max_entries;
       send_buff[2] = min_rowsum;
       send_buff[3] = max_rowsum;
       
       MPI_Gather(send_buff,4,MPI_DOUBLE,gather_buff,4,MPI_DOUBLE,0,comm);

       if (my_id == 0)
       {
          global_min_e = 1000000;
          global_max_e = 0;
          global_min_rsum = 1.0e7;
          global_max_rsum = 0.0;
          for (j = 0; j < num_procs; j++)
          {
             numrows = row_starts[j+1]-row_starts[j];
             if (numrows)
             {
                global_min_e = hypre_min(global_min_e, (int) gather_buff[j*4]);
                global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*4 +2]);
             }
             global_max_e = hypre_max(global_max_e, (int) gather_buff[j*4 +1]);
             global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*4 +3]);
          }

#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#else          
          printf( "%2d %7d %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#endif          
          printf("  %4.1f  %10.3e  %10.3e\n", avg_entries,
                 global_min_rsum, global_max_rsum);
       }

#endif

        
   }

       
   if (my_id == 0)
   {
      {
         printf( "\n\nInterpolation Matrix Information:\n\n");
      }
#if HYPRE_LONG_LONG
      printf("                             entries/row    min     max");
      printf("         row sums\n");
      printf("lev        rows x cols          min max  ");
      printf("   weight   weight     min       max \n");
      printf("=======================================");
      printf("======================================\n");
#else      
      printf("                 entries/row    min     max");
      printf("         row sums\n");
      printf("lev  rows cols    min max  ");
      printf("   weight   weight     min       max \n");
      printf("=======================================");
      printf("==========================\n");
#endif
   }
  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/


   for (level = 0; level < num_levels-1; level++)
   {
    
      {
         P_diag = hypre_ParCSRMatrixDiag(P_array[level]);
         P_diag_data = hypre_CSRMatrixData(P_diag);
         P_diag_i = hypre_CSRMatrixI(P_diag);
         
         P_offd = hypre_ParCSRMatrixOffd(P_array[level]);   
         P_offd_data = hypre_CSRMatrixData(P_offd);
         P_offd_i = hypre_CSRMatrixI(P_offd);
         
         row_starts = hypre_ParCSRMatrixRowStarts(P_array[level]);
         
         fine_size = hypre_ParCSRMatrixGlobalNumRows(P_array[level]);
         coarse_size = hypre_ParCSRMatrixGlobalNumCols(P_array[level]);
         global_nonzeros = hypre_ParCSRMatrixNumNonzeros(P_array[level]);
         
         min_weight = 1.0;
         max_weight = 0.0;
         max_rowsum = 0.0;
         min_rowsum = 0.0;
         min_entries = 0;
         max_entries = 0;
         
         if (hypre_CSRMatrixNumRows(P_diag))
         {
            if (hypre_CSRMatrixNumCols(P_diag)) min_weight = P_diag_data[0];
            for (j = P_diag_i[0]; j < P_diag_i[1]; j++)
            {
               min_weight = hypre_min(min_weight, P_diag_data[j]);
               if (P_diag_data[j] != 1.0)
                  max_weight = hypre_max(max_weight, P_diag_data[j]);
               min_rowsum += P_diag_data[j];
            }
            for (j = P_offd_i[0]; j < P_offd_i[1]; j++)
            {        
               min_weight = hypre_min(min_weight, P_offd_data[j]); 
               if (P_offd_data[j] != 1.0)
                  max_weight = hypre_max(max_weight, P_offd_data[j]);     
               min_rowsum += P_offd_data[j];
            }
            
            max_rowsum = min_rowsum;
            
            min_entries = (P_diag_i[1]-P_diag_i[0])+(P_offd_i[1]-P_offd_i[0]); 
            max_entries = 0;
            
            for (j = 0; j < hypre_CSRMatrixNumRows(P_diag); j++)
            {
               entries = (P_diag_i[j+1]-P_diag_i[j])+(P_offd_i[j+1]-P_offd_i[j]);
               min_entries = hypre_min(entries, min_entries);
               max_entries = hypre_max(entries, max_entries);
               
               rowsum = 0.0;
               for (i = P_diag_i[j]; i < P_diag_i[j+1]; i++)
               {
                  min_weight = hypre_min(min_weight, P_diag_data[i]);
                  if (P_diag_data[i] != 1.0)
                     max_weight = hypre_max(max_weight, P_diag_data[i]);
                  rowsum += P_diag_data[i];
               }
               
               for (i = P_offd_i[j]; i < P_offd_i[j+1]; i++)
               {
                  min_weight = hypre_min(min_weight, P_offd_data[i]);
                  if (P_offd_data[i] != 1.0) 
                     max_weight = hypre_max(max_weight, P_offd_data[i]);
                  rowsum += P_offd_data[i];
               }
               
               min_rowsum = hypre_min(rowsum, min_rowsum);
               max_rowsum = hypre_max(rowsum, max_rowsum);
            }
         
         }
         avg_entries = ((double) global_nonzeros) / ((double) fine_size);
      }

#ifdef HYPRE_NO_GLOBAL_PARTITION

      numrows = (int)(row_starts[1]-row_starts[0]);
      if (!numrows) /* if we don't have any rows, then don't have this count toward
                       min row sum or min num entries */
      {
         min_entries = 1000000;
         min_rowsum =  1.0e7;
         min_weight = 1.0e7;
       }
       
      send_buff[0] = - (double) min_entries;
      send_buff[1] = (double) max_entries;
      send_buff[2] = - min_rowsum;
      send_buff[3] = max_rowsum;
      send_buff[4] = - min_weight;
      send_buff[5] = max_weight;

      MPI_Reduce(send_buff, gather_buff, 6, MPI_DOUBLE, MPI_MAX, 0, comm);

      if (my_id == 0)
      {
         global_min_e = - gather_buff[0];
         global_max_e = gather_buff[1];
         global_min_rsum = -gather_buff[2];
         global_max_rsum = gather_buff[3];
         global_min_wt = -gather_buff[4];
         global_max_wt = gather_buff[5];

#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld x %-12lld %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#else          
          printf( "%2d %5d x %-5d %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#endif          
         printf("  %10.3e %9.3e %9.3e %9.3e\n",
                global_min_wt, global_max_wt, 
                global_min_rsum, global_max_rsum);
      }


#else
      
      send_buff[0] = (double) min_entries;
      send_buff[1] = (double) max_entries;
      send_buff[2] = min_rowsum;
      send_buff[3] = max_rowsum;
      send_buff[4] = min_weight;
      send_buff[5] = max_weight;
      
      MPI_Gather(send_buff,6,MPI_DOUBLE,gather_buff,6,MPI_DOUBLE,0,comm);
      
      if (my_id == 0)
      {
         global_min_e = 1000000;
         global_max_e = 0;
         global_min_rsum = 1.0e7;
         global_max_rsum = 0.0;
         global_min_wt = 1.0e7;
         global_max_wt = 0.0;
         
         for (j = 0; j < num_procs; j++)
         {
            numrows = row_starts[j+1] - row_starts[j];
            if (numrows)
            {
               global_min_e = hypre_min(global_min_e, (int) gather_buff[j*6]);
               global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*6+2]);
               global_min_wt = hypre_min(global_min_wt, gather_buff[j*6+4]);
            }
            global_max_e = hypre_max(global_max_e, (int) gather_buff[j*6+1]);
            global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*6+3]);
            global_max_wt = hypre_max(global_max_wt, gather_buff[j*6+5]);
         }
         
#ifdef HYPRE_LONG_LONG
         printf( "%2d %12lld x %-12lld %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#else          
         printf( "%2d %5d x %-5d %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#endif          
         printf("  %10.3e %9.3e %9.3e %9.3e\n",
                global_min_wt, global_max_wt, 
                global_min_rsum, global_max_rsum);
      }

#endif

   }


   total_variables = 0;
   operat_cmplxty = 0;
   for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++)
   {
      operat_cmplxty +=  num_coeffs[j] / num_coeffs[0];
      total_variables += num_variables[j];
   }
   if (num_variables[0] != 0)
      grid_cmplxty = total_variables / num_variables[0];
 
   if (my_id == 0 )
   {
      printf("\n\n     Complexity:    grid = %f\n",grid_cmplxty);
      printf("                operator = %f\n",operat_cmplxty);
   }

   if (my_id == 0) printf("\n\n");

   if (my_id == 0)
   { 
      printf("\n\nBoomerAMG SOLVER PARAMETERS:\n\n");
      printf( "  Maximum number of cycles:         %d \n",max_iter);
      printf( "  Stopping Tolerance:               %e \n",tol); 
      printf( "  Cycle type (1 = V, 2 = W, etc.):  %d\n\n", cycle_type);
      printf( "  Relaxation Parameters:\n");
      printf( "   Visiting Grid:                     down   up  coarse\n");
      printf( "            Number of partial sweeps: %4d   %2d  %4d \n",
              num_grid_sweeps[1],
              num_grid_sweeps[2],num_grid_sweeps[3]);
      printf( "   Type 0=Jac, 3=hGS, 6=hSGS, 9=GE:   %4d   %2d  %4d \n",
              grid_relax_type[1],
              grid_relax_type[2],grid_relax_type[3]);
#if 1 /* TO DO: may not want this to print if CG in the coarse grid */
      printf( "   Point types, partial sweeps (1=C, -1=F):\n");
      if (grid_relax_points)
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d", grid_relax_points[1][j]);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d", grid_relax_points[2][j]);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", grid_relax_points[3][j]);
         printf( "\n\n");
      }
      else if (relax_order == 1)
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d  %2d", one, minus_one);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d  %2d", minus_one, one);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", zero);
         printf( "\n\n");
      }
      else 
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d", zero);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d", zero);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", zero);
         printf( "\n\n");
      }
#endif
      if (smooth_type == 6)
         for (j=0; j < smooth_num_levels; j++)
            printf( " Schwarz Relaxation Weight %f level %d\n",
			hypre_ParAMGDataSchwarzRlxWeight(amg_data),j);
      for (j=0; j < num_levels; j++)
         if (relax_weight[j] != 1)
	       printf( " Relaxation Weight %f level %d\n",relax_weight[j],j);
      for (j=0; j < num_levels; j++)
         if (omega[j] != 1)
               printf( " Outer relaxation weight %f level %d\n",omega[j],j);
   }


   /*if (seq_cg) 
   {
      hypre_seqAMGSetupStats(amg_data,num_coeffs[0],num_variables[0], 
                             operat_cmplxty, grid_cmplxty );
   }*/
   




   hypre_TFree(num_coeffs);
   hypre_TFree(num_variables);
   hypre_TFree(send_buff);
   hypre_TFree(gather_buff);
   
   return(0);
}  
Beispiel #7
0
HYPRE_Int
hypre_ParCSRMatrixMatvecT( HYPRE_Complex       alpha,
                           hypre_ParCSRMatrix *A,
                           hypre_ParVector    *x,
                           HYPRE_Complex       beta,
                           hypre_ParVector    *y )
{
   hypre_ParCSRCommHandle **comm_handle;
   hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   hypre_CSRMatrix     *diag = hypre_ParCSRMatrixDiag(A);
   hypre_CSRMatrix     *offd = hypre_ParCSRMatrixOffd(A);
   hypre_Vector        *x_local = hypre_ParVectorLocalVector(x);
   hypre_Vector        *y_local = hypre_ParVectorLocalVector(y);
   hypre_Vector        *y_tmp;
   HYPRE_Int            vecstride = hypre_VectorVectorStride( y_local );
   HYPRE_Int            idxstride = hypre_VectorIndexStride( y_local );
   HYPRE_Complex       *y_tmp_data, **y_buf_data;
   HYPRE_Complex       *y_local_data = hypre_VectorData(y_local);

   HYPRE_Int         num_rows  = hypre_ParCSRMatrixGlobalNumRows(A);
   HYPRE_Int         num_cols  = hypre_ParCSRMatrixGlobalNumCols(A);
   HYPRE_Int         num_cols_offd = hypre_CSRMatrixNumCols(offd);
   HYPRE_Int         x_size = hypre_ParVectorGlobalSize(x);
   HYPRE_Int         y_size = hypre_ParVectorGlobalSize(y);
   HYPRE_Int         num_vectors = hypre_VectorNumVectors(y_local);

   HYPRE_Int         i, j, jv, index, start, num_sends;

   HYPRE_Int         ierr  = 0;

   /*---------------------------------------------------------------------
    *  Check for size compatibility.  MatvecT returns ierr = 1 if
    *  length of X doesn't equal the number of rows of A,
    *  ierr = 2 if the length of Y doesn't equal the number of 
    *  columns of A, and ierr = 3 if both are true.
    *
    *  Because temporary vectors are often used in MatvecT, none of 
    *  these conditions terminates processing, and the ierr flag
    *  is informational only.
    *--------------------------------------------------------------------*/
 
   if (num_rows != x_size)
      ierr = 1;

   if (num_cols != y_size)
      ierr = 2;

   if (num_rows != x_size && num_cols != y_size)
      ierr = 3;
   /*-----------------------------------------------------------------------
    *-----------------------------------------------------------------------*/

   comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors);

   if ( num_vectors==1 )
   {
      y_tmp = hypre_SeqVectorCreate(num_cols_offd);
   }
   else
   {
      y_tmp = hypre_SeqMultiVectorCreate(num_cols_offd,num_vectors);
   }
   hypre_SeqVectorInitialize(y_tmp);

   /*---------------------------------------------------------------------
    * If there exists no CommPkg for A, a CommPkg is generated using
    * equally load balanced partitionings
    *--------------------------------------------------------------------*/
   if (!comm_pkg)
   {
      hypre_MatvecCommPkgCreate(A);
      comm_pkg = hypre_ParCSRMatrixCommPkg(A); 
   }

   num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
   y_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors );
   for ( jv=0; jv<num_vectors; ++jv )
      y_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart
                                     (comm_pkg, num_sends));
   y_tmp_data = hypre_VectorData(y_tmp);
   y_local_data = hypre_VectorData(y_local);

   hypre_assert( idxstride==1 ); /* only 'column' storage of multivectors
                                  * implemented so far */

   if (num_cols_offd) hypre_CSRMatrixMatvecT(alpha, offd, x_local, 0.0, y_tmp);

   for ( jv=0; jv<num_vectors; ++jv )
   {
      /* this is where we assume multivectors are 'column' storage */
      comm_handle[jv] = hypre_ParCSRCommHandleCreate
         ( 2, comm_pkg, &(y_tmp_data[jv*num_cols_offd]), y_buf_data[jv] );
   }

   hypre_CSRMatrixMatvecT(alpha, diag, x_local, beta, y_local);

   for ( jv=0; jv<num_vectors; ++jv )
   {
      hypre_ParCSRCommHandleDestroy(comm_handle[jv]);
      comm_handle[jv] = NULL;
   }
   hypre_TFree(comm_handle);

   if ( num_vectors==1 )
   {
      index = 0;
      for (i = 0; i < num_sends; i++)
      {
         start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
         for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
            y_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]
               += y_buf_data[0][index++];
      }
   }
   else
      for ( jv=0; jv<num_vectors; ++jv )
      {
         index = 0;
         for (i = 0; i < num_sends; i++)
         {
            start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
            for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
               y_local_data[ jv*vecstride +
                             idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ]
                  += y_buf_data[jv][index++];
         }
      }
        
   hypre_SeqVectorDestroy(y_tmp);
   y_tmp = NULL;
   for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(y_buf_data[jv]);
   hypre_TFree(y_buf_data);

   return ierr;
}
Beispiel #8
0
hypre_ParCSRBlockMatrix *
hypre_ParCSRBlockMatrixConvertFromParCSRMatrix(hypre_ParCSRMatrix *matrix,
                                               HYPRE_Int matrix_C_block_size )
{
   MPI_Comm comm = hypre_ParCSRMatrixComm(matrix);
   hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(matrix);
   hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(matrix);
   HYPRE_Int global_num_rows = hypre_ParCSRMatrixGlobalNumRows(matrix);
   HYPRE_Int global_num_cols = hypre_ParCSRMatrixGlobalNumCols(matrix);
   HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(matrix);
   HYPRE_Int *col_starts = hypre_ParCSRMatrixColStarts(matrix);
   HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd);
   HYPRE_Int *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(matrix);
   HYPRE_Int *map_to_node=NULL, *counter=NULL, *col_in_j_map=NULL;
   HYPRE_Int *matrix_C_col_map_offd = NULL;
   
   HYPRE_Int matrix_C_num_cols_offd;
   HYPRE_Int matrix_C_num_nonzeros_offd;
   HYPRE_Int num_rows, num_nodes;
   
   HYPRE_Int *offd_i        = hypre_CSRMatrixI(offd);
   HYPRE_Int *offd_j        = hypre_CSRMatrixJ(offd);
   HYPRE_Complex * offd_data = hypre_CSRMatrixData(offd);

   hypre_ParCSRBlockMatrix *matrix_C;
   HYPRE_Int *matrix_C_row_starts;
   HYPRE_Int *matrix_C_col_starts;
   hypre_CSRBlockMatrix *matrix_C_diag;
   hypre_CSRBlockMatrix *matrix_C_offd;

   HYPRE_Int *matrix_C_offd_i=NULL, *matrix_C_offd_j = NULL;
   HYPRE_Complex *matrix_C_offd_data = NULL;
   
   HYPRE_Int num_procs, i, j, k, k_map, count, index, start_index, pos, row;
   
   hypre_MPI_Comm_size(comm,&num_procs);

#ifdef HYPRE_NO_GLOBAL_PARTITION
   matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, 2);
   matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, 2);
   for(i = 0; i < 2; i++)
   {
      matrix_C_row_starts[i] = row_starts[i]/matrix_C_block_size;
      matrix_C_col_starts[i] = col_starts[i]/matrix_C_block_size;
   }
#else
   matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1);
   matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1);
   for(i = 0; i < num_procs + 1; i++)
   {
      matrix_C_row_starts[i] = row_starts[i]/matrix_C_block_size;
      matrix_C_col_starts[i] = col_starts[i]/matrix_C_block_size;
   }
#endif

   /************* create the diagonal part ************/
   matrix_C_diag = hypre_CSRBlockMatrixConvertFromCSRMatrix(diag, 
                                                            matrix_C_block_size);
 
   /*******  the offd part *******************/

   /* can't use the same function for the offd part - because this isn't square
      and the offd j entries aren't global numbering (have to consider the offd
      map) - need to look at col_map_offd first */

   /* figure out the new number of offd columns (num rows is same as diag) */
   num_cols_offd = hypre_CSRMatrixNumCols(offd);
   num_rows = hypre_CSRMatrixNumRows(diag);
   num_nodes =  num_rows/matrix_C_block_size;
   
   matrix_C_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes + 1);

   matrix_C_num_cols_offd = 0;
   matrix_C_offd_i[0] = 0;
   matrix_C_num_nonzeros_offd = 0;

   if (num_cols_offd)
   {
      map_to_node = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
      matrix_C_num_cols_offd = 1;
      map_to_node[0] = col_map_offd[0]/matrix_C_block_size;
      for (i=1; i < num_cols_offd; i++)
      {
         map_to_node[i] = col_map_offd[i]/matrix_C_block_size;
         if (map_to_node[i] > map_to_node[i-1]) matrix_C_num_cols_offd++;
      }

      matrix_C_col_map_offd = hypre_CTAlloc(HYPRE_Int, matrix_C_num_cols_offd);
      col_in_j_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd);

      matrix_C_col_map_offd[0] = map_to_node[0];
      col_in_j_map[0] = 0;
      count = 1;
      j = 1;
       
      /* fill in the col_map_off_d - these are global numbers.  Then we need to
         map these to j entries (these have local numbers) */
      for (i=1; i < num_cols_offd; i++)
      {
         if (map_to_node[i] > map_to_node[i-1])
         {
            matrix_C_col_map_offd[count++] = map_to_node[i];
         }
         col_in_j_map[j++] = count - 1;
      }
      
      /* now figure the nonzeros */   
      matrix_C_num_nonzeros_offd = 0;
      counter = hypre_CTAlloc(HYPRE_Int, matrix_C_num_cols_offd);
      for (i=0; i < matrix_C_num_cols_offd; i++)
         counter[i] = -1;

      for (i=0; i < num_nodes; i++) /* for each block row */
      {
         matrix_C_offd_i[i] = matrix_C_num_nonzeros_offd;
         for (j=0; j < matrix_C_block_size; j++)
         {
            row = i*matrix_C_block_size+j;
            for (k=offd_i[row]; k < offd_i[row+1]; k++) /* go through single row */
            {
               k_map = col_in_j_map[offd_j[k]]; /*nodal col - see if this has
                                                  been in this block row (i)
                                                  already*/
               
               if (counter[k_map] < i) /* not yet counted for this nodal row */
               {
                  counter[k_map] = i;
                  matrix_C_num_nonzeros_offd++;
               }
            }
         }
      }
      /* fill in final i entry */
      matrix_C_offd_i[num_nodes] = matrix_C_num_nonzeros_offd;
   }

   /* create offd matrix */
   matrix_C_offd = hypre_CSRBlockMatrixCreate(matrix_C_block_size, num_nodes,
                                              matrix_C_num_cols_offd,   
                                              matrix_C_num_nonzeros_offd);

   /* assign i */
   hypre_CSRBlockMatrixI(matrix_C_offd) = matrix_C_offd_i;
   

   /* create (and allocate j and data) */
   if (matrix_C_num_nonzeros_offd)
   {
      matrix_C_offd_j = hypre_CTAlloc(HYPRE_Int, matrix_C_num_nonzeros_offd);   
      matrix_C_offd_data =
         hypre_CTAlloc(HYPRE_Complex,
                       matrix_C_num_nonzeros_offd*matrix_C_block_size*
                       matrix_C_block_size);  
      hypre_CSRBlockMatrixJ(matrix_C_offd) = matrix_C_offd_j;
      hypre_CSRMatrixData(matrix_C_offd) = matrix_C_offd_data;
   
      for (i=0; i < matrix_C_num_cols_offd; i++)
         counter[i] = -1;

      index = 0; /*keep track of entry in matrix_C_offd_j*/
      start_index = 0;
      for (i=0; i < num_nodes; i++) /* for each block row */
      {
         
         for (j=0; j < matrix_C_block_size; j++) /* for each row in block */
         {
            row = i*matrix_C_block_size+j;
            for (k=offd_i[row]; k < offd_i[row+1]; k++) /* go through single row's cols */
            {
               k_map = col_in_j_map[offd_j[k]]; /*nodal col  for off_d */
               if (counter[k_map] < start_index) /* not yet counted for this nodal row */
               {
                  counter[k_map] = index;
                  matrix_C_offd_j[index] = k_map;
                  /*copy the data: which position (corresponds to j array) + which row + which col */                
                  pos =  (index * matrix_C_block_size * matrix_C_block_size) + (j * matrix_C_block_size) + 
                     col_map_offd[offd_j[k]]%matrix_C_block_size;
                  matrix_C_offd_data[pos] = offd_data[k];
                  index ++;
               }
               else  /* this col has already been listed for this row */
               {

                  /*copy the data: which position (corresponds to j array) + which row + which col */                
                  pos =  (counter[k_map]* matrix_C_block_size * matrix_C_block_size) + (j * matrix_C_block_size) + 
                     col_map_offd[offd_j[k]]%matrix_C_block_size;
                  matrix_C_offd_data[pos] = offd_data[k];
               }
            }
         }
         start_index = index; /* first index for current nodal row */
      }
   }

   /* *********create the new matrix  *************/
   matrix_C = hypre_ParCSRBlockMatrixCreate(comm, matrix_C_block_size, 
                                            global_num_rows/matrix_C_block_size, 
                                            global_num_cols/matrix_C_block_size, matrix_C_row_starts, 
                                            matrix_C_col_starts, matrix_C_num_cols_offd, 
                                            hypre_CSRBlockMatrixNumNonzeros(matrix_C_diag), 
                                            matrix_C_num_nonzeros_offd);

   /* use the diag and off diag matrices we have already created */
   hypre_CSRBlockMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C));
   hypre_ParCSRBlockMatrixDiag(matrix_C) = matrix_C_diag;
   hypre_CSRBlockMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C));
   hypre_ParCSRBlockMatrixOffd(matrix_C) = matrix_C_offd;

   hypre_ParCSRMatrixColMapOffd(matrix_C) = matrix_C_col_map_offd;

   /* *********don't bother to copy the comm_pkg *************/
   
   hypre_ParCSRBlockMatrixCommPkg(matrix_C) = NULL;
 
   /* CLEAN UP !!!! */
   hypre_TFree(map_to_node);
   hypre_TFree(col_in_j_map);
   hypre_TFree(counter);

   return matrix_C;
}
Beispiel #9
0
hypre_CSRBlockMatrix * 
hypre_ParCSRBlockMatrixExtractBExt(hypre_ParCSRBlockMatrix *B, 
                                   hypre_ParCSRBlockMatrix *A, HYPRE_Int data)
{
   MPI_Comm comm = hypre_ParCSRBlockMatrixComm(B);
   HYPRE_Int first_col_diag = hypre_ParCSRBlockMatrixFirstColDiag(B);
   HYPRE_Int *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(B);
   HYPRE_Int block_size = hypre_ParCSRBlockMatrixBlockSize(B);

   hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A);
   HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
   HYPRE_Int *recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
   HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
   HYPRE_Int *send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
   HYPRE_Int *send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
 
   hypre_ParCSRCommHandle *comm_handle;
   hypre_ParCSRCommPkg *tmp_comm_pkg;

   hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(B);

   HYPRE_Int *diag_i = hypre_CSRBlockMatrixI(diag);
   HYPRE_Int *diag_j = hypre_CSRBlockMatrixJ(diag);
   HYPRE_Complex *diag_data = hypre_CSRBlockMatrixData(diag);

   hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(B);

   HYPRE_Int *offd_i = hypre_CSRBlockMatrixI(offd);
   HYPRE_Int *offd_j = hypre_CSRBlockMatrixJ(offd);
   HYPRE_Complex *offd_data = hypre_CSRBlockMatrixData(offd);

   HYPRE_Int *B_int_i;
   HYPRE_Int *B_int_j;
   HYPRE_Complex *B_int_data;

   HYPRE_Int num_cols_B, num_nonzeros;
   HYPRE_Int num_rows_B_ext;
   HYPRE_Int num_procs, my_id;

   hypre_CSRBlockMatrix *B_ext;

   HYPRE_Int *B_ext_i;
   HYPRE_Int *B_ext_j;
   HYPRE_Complex *B_ext_data;
 
   HYPRE_Int *jdata_recv_vec_starts;
   HYPRE_Int *jdata_send_map_starts;
 
   HYPRE_Int i, j, k, l, counter, bnnz;
   HYPRE_Int start_index;
   HYPRE_Int j_cnt, jrow;

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);

   bnnz = block_size * block_size;
   num_cols_B = hypre_ParCSRMatrixGlobalNumCols(B);
   num_rows_B_ext = recv_vec_starts[num_recvs];
   B_int_i = hypre_CTAlloc(HYPRE_Int, send_map_starts[num_sends]+1);
   B_ext_i = hypre_CTAlloc(HYPRE_Int, num_rows_B_ext+1);
/*--------------------------------------------------------------------------
 * generate B_int_i through adding number of row-elements of offd and diag
 * for corresponding rows. B_int_i[j+1] contains the number of elements of
 * a row j (which is determined through send_map_elmts) 
 *--------------------------------------------------------------------------*/
   B_int_i[0] = 0;
   j_cnt = 0;
   num_nonzeros = 0;
   for (i=0; i < num_sends; i++)
   {
      for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
      {
         jrow = send_map_elmts[j];
         B_int_i[++j_cnt] = offd_i[jrow+1] - offd_i[jrow]
            + diag_i[jrow+1] - diag_i[jrow];
         num_nonzeros += B_int_i[j_cnt];
      }
   }

/*--------------------------------------------------------------------------
 * initialize communication 
 *--------------------------------------------------------------------------*/
   comm_handle = hypre_ParCSRCommHandleCreate(11,comm_pkg,
                                              &B_int_i[1],&B_ext_i[1]);

   B_int_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros);
   if (data) B_int_data = hypre_CTAlloc(HYPRE_Complex, num_nonzeros*bnnz);

   jdata_send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1);
   jdata_recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1);
   start_index = B_int_i[0];
   jdata_send_map_starts[0] = start_index;
   counter = 0;
   for (i=0; i < num_sends; i++)
   {
      num_nonzeros = counter;
      for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
      {
         jrow = send_map_elmts[j];
         for (k=diag_i[jrow]; k < diag_i[jrow+1]; k++) 
         {
            B_int_j[counter] = diag_j[k]+first_col_diag;
            if (data) {
               for(l = 0; l < bnnz; l++) 
                  B_int_data[counter*bnnz+ l] = diag_data[k*bnnz+ l];
            }
            counter++;
         }
         for (k=offd_i[jrow]; k < offd_i[jrow+1]; k++) 
         {
            B_int_j[counter] = col_map_offd[offd_j[k]];
            if (data) {
               for(l = 0; l < bnnz; l++)
                  B_int_data[counter*bnnz+ l] = 
                     offd_data[k*bnnz+ l];
            }
            counter++;
         }
      }
      num_nonzeros = counter - num_nonzeros;
      start_index += num_nonzeros;
      jdata_send_map_starts[i+1] = start_index;
   }

   tmp_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1);
   hypre_ParCSRCommPkgComm(tmp_comm_pkg) = comm;
   hypre_ParCSRCommPkgNumSends(tmp_comm_pkg) = num_sends;
   hypre_ParCSRCommPkgNumRecvs(tmp_comm_pkg) = num_recvs;
   hypre_ParCSRCommPkgSendProcs(tmp_comm_pkg) = hypre_ParCSRCommPkgSendProcs(comm_pkg);
   hypre_ParCSRCommPkgRecvProcs(tmp_comm_pkg) = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
   hypre_ParCSRCommPkgSendMapStarts(tmp_comm_pkg) = jdata_send_map_starts; 

   hypre_ParCSRCommHandleDestroy(comm_handle);
   comm_handle = NULL;

/*--------------------------------------------------------------------------
 * after communication exchange B_ext_i[j+1] contains the number of elements
 * of a row j ! 
 * evaluate B_ext_i and compute num_nonzeros for B_ext 
 *--------------------------------------------------------------------------*/

   for (i=0; i < num_recvs; i++)
      for (j = recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++)
         B_ext_i[j+1] += B_ext_i[j];

   num_nonzeros = B_ext_i[num_rows_B_ext];

   B_ext = hypre_CSRBlockMatrixCreate(block_size, num_rows_B_ext, num_cols_B, 
                                      num_nonzeros);
   B_ext_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros);
   if (data) B_ext_data = hypre_CTAlloc(HYPRE_Complex, num_nonzeros*bnnz);

   for (i=0; i < num_recvs; i++)
   {
      start_index = B_ext_i[recv_vec_starts[i]];
      num_nonzeros = B_ext_i[recv_vec_starts[i+1]]-start_index;
      jdata_recv_vec_starts[i+1] = B_ext_i[recv_vec_starts[i+1]];
   }

   hypre_ParCSRCommPkgRecvVecStarts(tmp_comm_pkg) = jdata_recv_vec_starts;

   comm_handle = hypre_ParCSRCommHandleCreate(11,tmp_comm_pkg,B_int_j,B_ext_j);
   hypre_ParCSRCommHandleDestroy(comm_handle);
   comm_handle = NULL;

   if (data)
   {
      comm_handle = hypre_ParCSRBlockCommHandleCreate(1, bnnz,tmp_comm_pkg,
                                                      B_int_data, B_ext_data);
      hypre_ParCSRBlockCommHandleDestroy(comm_handle);
      comm_handle = NULL;
   }

   hypre_CSRBlockMatrixI(B_ext) = B_ext_i;
   hypre_CSRBlockMatrixJ(B_ext) = B_ext_j;
   if (data) hypre_CSRBlockMatrixData(B_ext) = B_ext_data;

   hypre_TFree(B_int_i);
   hypre_TFree(B_int_j);
   if (data) hypre_TFree(B_int_data);
   hypre_TFree(jdata_send_map_starts);
   hypre_TFree(jdata_recv_vec_starts);
   hypre_TFree(tmp_comm_pkg);

   return B_ext;
}
Beispiel #10
0
void hypre_ParCSRMatrixSplit(hypre_ParCSRMatrix *A,
                             HYPRE_Int nr, HYPRE_Int nc,
                             hypre_ParCSRMatrix **blocks,
                             int interleaved_rows, int interleaved_cols)
{
    HYPRE_Int i, j, k;

    MPI_Comm comm = hypre_ParCSRMatrixComm(A);

    hypre_CSRMatrix *Adiag = hypre_ParCSRMatrixDiag(A);
    hypre_CSRMatrix *Aoffd = hypre_ParCSRMatrixOffd(A);

    HYPRE_Int global_rows = hypre_ParCSRMatrixGlobalNumRows(A);
    HYPRE_Int global_cols = hypre_ParCSRMatrixGlobalNumCols(A);

    HYPRE_Int local_rows = hypre_CSRMatrixNumRows(Adiag);
    HYPRE_Int local_cols = hypre_CSRMatrixNumCols(Adiag);
    HYPRE_Int offd_cols = hypre_CSRMatrixNumCols(Aoffd);

    hypre_assert(local_rows % nr == 0 && local_cols % nc == 0);
    hypre_assert(global_rows % nr == 0 && global_cols % nc == 0);

    HYPRE_Int block_rows = local_rows / nr;
    HYPRE_Int block_cols = local_cols / nc;
    HYPRE_Int num_blocks = nr * nc;

    /* mark local rows and columns with block number */
    HYPRE_Int *row_block_num = hypre_TAlloc(HYPRE_Int, local_rows);
    HYPRE_Int *col_block_num = hypre_TAlloc(HYPRE_Int, local_cols);

    for (i = 0; i < local_rows; i++)
    {
        row_block_num[i] = interleaved_rows ? (i % nr) : (i / block_rows);
    }
    for (i = 0; i < local_cols; i++)
    {
        col_block_num[i] = interleaved_cols ? (i % nc) : (i / block_cols);
    }

    /* determine the block numbers for offd columns */
    HYPRE_Int* offd_col_block_num = hypre_TAlloc(HYPRE_Int, offd_cols);
    hypre_ParCSRCommHandle *comm_handle;
    HYPRE_Int *int_buf_data;
    {
        /* make sure A has a communication package */
        hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
        if (!comm_pkg)
        {
            hypre_MatvecCommPkgCreate(A);
            comm_pkg = hypre_ParCSRMatrixCommPkg(A);
        }

        /* calculate the final global column numbers for each block */
        HYPRE_Int *count = hypre_CTAlloc(HYPRE_Int, nc);
        HYPRE_Int *block_global_col = hypre_TAlloc(HYPRE_Int, local_cols);
        HYPRE_Int first_col = hypre_ParCSRMatrixFirstColDiag(A) / nc;
        for (i = 0; i < local_cols; i++)
        {
            block_global_col[i] = first_col + count[col_block_num[i]]++;
        }
        hypre_TFree(count);

        /* use a Matvec communication pattern to determine offd_col_block_num */
        HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
        int_buf_data = hypre_CTAlloc(HYPRE_Int,
                                     hypre_ParCSRCommPkgSendMapStart(comm_pkg,
                                             num_sends));
        HYPRE_Int start, index = 0;
        for (i = 0; i < num_sends; i++)
        {
            start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
            for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
            {
                k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j);
                int_buf_data[index++] = col_block_num[k] + nc*block_global_col[k];
            }
        }
        hypre_TFree(block_global_col);

        comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
                      offd_col_block_num);
    }

    /* create the block matrices */
    HYPRE_Int num_procs = 1;
    if (!hypre_ParCSRMatrixAssumedPartition(A))
    {
        hypre_MPI_Comm_size(comm, &num_procs);
    }

    HYPRE_Int *row_starts = hypre_TAlloc(HYPRE_Int, num_procs+1);
    HYPRE_Int *col_starts = hypre_TAlloc(HYPRE_Int, num_procs+1);
    for (i = 0; i <= num_procs; i++)
    {
        row_starts[i] = hypre_ParCSRMatrixRowStarts(A)[i] / nr;
        col_starts[i] = hypre_ParCSRMatrixColStarts(A)[i] / nc;
    }

    for (i = 0; i < num_blocks; i++)
    {
        blocks[i] = hypre_ParCSRMatrixCreate(comm,
                                             global_rows/nr, global_cols/nc,
                                             row_starts, col_starts, 0, 0, 0);
    }

    /* split diag part */
    hypre_CSRMatrix **csr_blocks = hypre_TAlloc(hypre_CSRMatrix*, nr*nc);
    hypre_CSRMatrixSplit(Adiag, nr, nc, row_block_num, col_block_num,
                         csr_blocks);

    for (i = 0; i < num_blocks; i++)
    {
        hypre_TFree(hypre_ParCSRMatrixDiag(blocks[i]));
        hypre_ParCSRMatrixDiag(blocks[i]) = csr_blocks[i];
    }

    /* finish communication, receive offd_col_block_num */
    hypre_ParCSRCommHandleDestroy(comm_handle);
    hypre_TFree(int_buf_data);

    /* decode global offd column numbers */
    HYPRE_Int* offd_global_col = hypre_TAlloc(HYPRE_Int, offd_cols);
    for (i = 0; i < offd_cols; i++)
    {
        offd_global_col[i] = offd_col_block_num[i] / nc;
        offd_col_block_num[i] %= nc;
    }

    /* split offd part */
    hypre_CSRMatrixSplit(Aoffd, nr, nc, row_block_num, offd_col_block_num,
                         csr_blocks);

    for (i = 0; i < num_blocks; i++)
    {
        hypre_TFree(hypre_ParCSRMatrixOffd(blocks[i]));
        hypre_ParCSRMatrixOffd(blocks[i]) = csr_blocks[i];
    }

    hypre_TFree(csr_blocks);
    hypre_TFree(col_block_num);
    hypre_TFree(row_block_num);

    /* update block col-maps */
    for (int bi = 0; bi < nr; bi++)
    {
        for (int bj = 0; bj < nc; bj++)
        {
            hypre_ParCSRMatrix *block = blocks[bi*nc + bj];
            hypre_CSRMatrix *block_offd = hypre_ParCSRMatrixOffd(block);
            HYPRE_Int block_offd_cols = hypre_CSRMatrixNumCols(block_offd);

            HYPRE_Int *block_col_map = hypre_TAlloc(HYPRE_Int, block_offd_cols);
            for (i = j = 0; i < offd_cols; i++)
            {
                HYPRE_Int bn = offd_col_block_num[i];
                if (bn == bj) {
                    block_col_map[j++] = offd_global_col[i];
                }
            }
            hypre_assert(j == block_offd_cols);

            hypre_ParCSRMatrixColMapOffd(block) = block_col_map;
        }
    }

    hypre_TFree(offd_global_col);
    hypre_TFree(offd_col_block_num);

    /* finish the new matrices, make them own all the stuff */
    for (i = 0; i < num_blocks; i++)
    {
        hypre_ParCSRMatrixSetNumNonzeros(blocks[i]);
        hypre_MatvecCommPkgCreate(blocks[i]);

        hypre_ParCSRMatrixOwnsData(blocks[i]) = 1;

        /* only the first block will own the row/col_starts */
        hypre_ParCSRMatrixOwnsRowStarts(blocks[i]) = !i;
        hypre_ParCSRMatrixOwnsColStarts(blocks[i]) = !i;
    }
}
Beispiel #11
0
/*
  Function:  hypre_ParCSRMatrixEliminateAAe

                    (input)                  (output)

                / A_ii | A_ib \          / A_ii |  0   \
            A = | -----+----- |   --->   | -----+----- |
                \ A_bi | A_bb /          \   0  |  I   /


                                         /   0  |   A_ib   \
                                    Ae = | -----+--------- |
                                         \ A_bi | A_bb - I /

*/
void hypre_ParCSRMatrixEliminateAAe(hypre_ParCSRMatrix *A,
                                    hypre_ParCSRMatrix **Ae,
                                    HYPRE_Int num_rowscols_to_elim,
                                    HYPRE_Int *rowscols_to_elim)
{
    HYPRE_Int i, j, k;

    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
    hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
    HYPRE_Int A_diag_nrows  = hypre_CSRMatrixNumRows(A_diag);
    HYPRE_Int A_offd_ncols  = hypre_CSRMatrixNumCols(A_offd);

    *Ae = hypre_ParCSRMatrixCreate(hypre_ParCSRMatrixComm(A),
                                   hypre_ParCSRMatrixGlobalNumRows(A),
                                   hypre_ParCSRMatrixGlobalNumCols(A),
                                   hypre_ParCSRMatrixRowStarts(A),
                                   hypre_ParCSRMatrixColStarts(A),
                                   0, 0, 0);

    hypre_ParCSRMatrixSetRowStartsOwner(*Ae, 0);
    hypre_ParCSRMatrixSetColStartsOwner(*Ae, 0);

    hypre_CSRMatrix *Ae_diag = hypre_ParCSRMatrixDiag(*Ae);
    hypre_CSRMatrix *Ae_offd = hypre_ParCSRMatrixOffd(*Ae);
    HYPRE_Int Ae_offd_ncols;

    HYPRE_Int  num_offd_cols_to_elim;
    HYPRE_Int  *offd_cols_to_elim;

    HYPRE_Int  *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
    HYPRE_Int  *Ae_col_map_offd;

    HYPRE_Int  *col_mark;
    HYPRE_Int  *col_remap;

    /* figure out which offd cols should be eliminated */
    {
        hypre_ParCSRCommHandle *comm_handle;
        hypre_ParCSRCommPkg *comm_pkg;
        HYPRE_Int num_sends, *int_buf_data;
        HYPRE_Int index, start;

        HYPRE_Int *eliminate_row = hypre_CTAlloc(HYPRE_Int, A_diag_nrows);
        HYPRE_Int *eliminate_col = hypre_CTAlloc(HYPRE_Int, A_offd_ncols);

        /* make sure A has a communication package */
        comm_pkg = hypre_ParCSRMatrixCommPkg(A);
        if (!comm_pkg)
        {
            hypre_MatvecCommPkgCreate(A);
            comm_pkg = hypre_ParCSRMatrixCommPkg(A);
        }

        /* which of the local rows are to be eliminated */
        for (i = 0; i < A_diag_nrows; i++)
        {
            eliminate_row[i] = 0;
        }
        for (i = 0; i < num_rowscols_to_elim; i++)
        {
            eliminate_row[rowscols_to_elim[i]] = 1;
        }

        /* use a Matvec communication pattern to find (in eliminate_col)
           which of the local offd columns are to be eliminated */
        num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
        int_buf_data = hypre_CTAlloc(HYPRE_Int,
                                     hypre_ParCSRCommPkgSendMapStart(comm_pkg,
                                             num_sends));
        index = 0;
        for (i = 0; i < num_sends; i++)
        {
            start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
            for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
            {
                k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j);
                int_buf_data[index++] = eliminate_row[k];
            }
        }
        comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg,
                      int_buf_data, eliminate_col);

        /* eliminate diagonal part, overlapping it with communication */
        hypre_CSRMatrixElimCreate(A_diag, Ae_diag,
                                  num_rowscols_to_elim, rowscols_to_elim,
                                  num_rowscols_to_elim, rowscols_to_elim,
                                  NULL);

        hypre_CSRMatrixEliminateRowsCols(A_diag, Ae_diag,
                                         num_rowscols_to_elim, rowscols_to_elim,
                                         num_rowscols_to_elim, rowscols_to_elim,
                                         1, NULL);
        hypre_CSRMatrixReorder(Ae_diag);

        /* finish the communication */
        hypre_ParCSRCommHandleDestroy(comm_handle);

        /* received eliminate_col[], count offd columns to eliminate */
        num_offd_cols_to_elim = 0;
        for (i = 0; i < A_offd_ncols; i++)
        {
            if (eliminate_col[i]) {
                num_offd_cols_to_elim++;
            }
        }

        offd_cols_to_elim = hypre_CTAlloc(HYPRE_Int, num_offd_cols_to_elim);

        /* get a list of offd column indices and coefs */
        num_offd_cols_to_elim = 0;
        for (i = 0; i < A_offd_ncols; i++)
        {
            if (eliminate_col[i])
            {
                offd_cols_to_elim[num_offd_cols_to_elim++] = i;
            }
        }

        hypre_TFree(int_buf_data);
        hypre_TFree(eliminate_row);
        hypre_TFree(eliminate_col);
    }

    /* eliminate the off-diagonal part */
    col_mark = hypre_CTAlloc(HYPRE_Int, A_offd_ncols);
    col_remap = hypre_CTAlloc(HYPRE_Int, A_offd_ncols);

    hypre_CSRMatrixElimCreate(A_offd, Ae_offd,
                              num_rowscols_to_elim, rowscols_to_elim,
                              num_offd_cols_to_elim, offd_cols_to_elim,
                              col_mark);

    for (i = k = 0; i < A_offd_ncols; i++)
    {
        if (col_mark[i]) {
            col_remap[i] = k++;
        }
    }

    hypre_CSRMatrixEliminateRowsCols(A_offd, Ae_offd,
                                     num_rowscols_to_elim, rowscols_to_elim,
                                     num_offd_cols_to_elim, offd_cols_to_elim,
                                     0, col_remap);

    /* create col_map_offd for Ae */
    Ae_offd_ncols = 0;
    for (i = 0; i < A_offd_ncols; i++)
    {
        if (col_mark[i]) {
            Ae_offd_ncols++;
        }
    }

    Ae_col_map_offd  = hypre_CTAlloc(HYPRE_Int, Ae_offd_ncols);

    Ae_offd_ncols = 0;
    for (i = 0; i < A_offd_ncols; i++)
    {
        if (col_mark[i])
        {
            Ae_col_map_offd[Ae_offd_ncols++] = A_col_map_offd[i];
        }
    }

    hypre_ParCSRMatrixColMapOffd(*Ae) = Ae_col_map_offd;
    hypre_CSRMatrixNumCols(Ae_offd) = Ae_offd_ncols;

    hypre_TFree(col_remap);
    hypre_TFree(col_mark);
    hypre_TFree(offd_cols_to_elim);

    hypre_ParCSRMatrixSetNumNonzeros(*Ae);
    hypre_MatvecCommPkgCreate(*Ae);
}
Beispiel #12
0
hypre_CSRMatrix *
hypre_ParCSRMatrixExtractAExt( hypre_ParCSRMatrix *A,
                               HYPRE_Int data,
                               HYPRE_Int ** pA_ext_row_map )
{
    /* Note that A's role as the first factor in A*A^T is used only
       through ...CommPkgT(A), which basically says which rows of A
       (columns of A^T) are needed.  In all the other places where A
       serves as an input, it is through its role as A^T, the matrix
       whose data needs to be passed between processors. */
    MPI_Comm comm = hypre_ParCSRMatrixComm(A);
    HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(A);
    HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A);
    HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);

    hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkgT(A);
    /* ... CommPkgT(A) should identify all rows of A^T needed for A*A^T (that is
     * generally a bigger set than ...CommPkg(A), the rows of B needed for A*B) */
    HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
    HYPRE_Int *recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
    HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
    HYPRE_Int *send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
    HYPRE_Int *send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);

    hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A);

    HYPRE_Int *diag_i = hypre_CSRMatrixI(diag);
    HYPRE_Int *diag_j = hypre_CSRMatrixJ(diag);
    HYPRE_Complex *diag_data = hypre_CSRMatrixData(diag);

    hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A);

    HYPRE_Int *offd_i = hypre_CSRMatrixI(offd);
    HYPRE_Int *offd_j = hypre_CSRMatrixJ(offd);
    HYPRE_Complex *offd_data = hypre_CSRMatrixData(offd);

    HYPRE_Int num_cols_A, num_nonzeros;
    HYPRE_Int num_rows_A_ext;

    hypre_CSRMatrix *A_ext;

    HYPRE_Int *A_ext_i;
    HYPRE_Int *A_ext_j;
    HYPRE_Complex *A_ext_data;

    num_cols_A = hypre_ParCSRMatrixGlobalNumCols(A);
    num_rows_A_ext = recv_vec_starts[num_recvs];

    hypre_ParCSRMatrixExtractBExt_Arrays
    ( &A_ext_i, &A_ext_j, &A_ext_data, pA_ext_row_map,
      &num_nonzeros,
      data, 1, comm, comm_pkg,
      num_cols_A, num_recvs, num_sends,
      first_col_diag, first_row_index,
      recv_vec_starts, send_map_starts, send_map_elmts,
      diag_i, diag_j, offd_i, offd_j, col_map_offd,
      diag_data, offd_data
    );

    A_ext = hypre_CSRMatrixCreate(num_rows_A_ext,num_cols_A,num_nonzeros);
    hypre_CSRMatrixI(A_ext) = A_ext_i;
    hypre_CSRMatrixJ(A_ext) = A_ext_j;
    if (data) hypre_CSRMatrixData(A_ext) = A_ext_data;

    return A_ext;
}
Beispiel #13
0
hypre_ParCSRMatrix *hypre_ParCSRAAt( hypre_ParCSRMatrix  *A )
{
    MPI_Comm         comm = hypre_ParCSRMatrixComm(A);

    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);

    HYPRE_Complex   *A_diag_data = hypre_CSRMatrixData(A_diag);
    HYPRE_Int       *A_diag_i = hypre_CSRMatrixI(A_diag);
    HYPRE_Int       *A_diag_j = hypre_CSRMatrixJ(A_diag);

    hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);

    HYPRE_Complex   *A_offd_data = hypre_CSRMatrixData(A_offd);
    HYPRE_Int       *A_offd_i = hypre_CSRMatrixI(A_offd);
    HYPRE_Int       *A_offd_j = hypre_CSRMatrixJ(A_offd);
    HYPRE_Int       *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
    HYPRE_Int       *A_ext_row_map;

    HYPRE_Int       *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
    HYPRE_Int        num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
    HYPRE_Int        num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);

    hypre_ParCSRMatrix *C;
    HYPRE_Int          *col_map_offd_C;

    hypre_CSRMatrix *C_diag;

    HYPRE_Complex   *C_diag_data;
    HYPRE_Int       *C_diag_i;
    HYPRE_Int       *C_diag_j;

    hypre_CSRMatrix *C_offd;

    HYPRE_Complex   *C_offd_data=NULL;
    HYPRE_Int       *C_offd_i=NULL;
    HYPRE_Int       *C_offd_j=NULL;
    HYPRE_Int       *new_C_offd_j;

    HYPRE_Int        C_diag_size;
    HYPRE_Int        C_offd_size;
    HYPRE_Int        last_col_diag_C;
    HYPRE_Int        num_cols_offd_C;

    hypre_CSRMatrix *A_ext;

    HYPRE_Complex   *A_ext_data;
    HYPRE_Int       *A_ext_i;
    HYPRE_Int       *A_ext_j;
    HYPRE_Int        num_rows_A_ext=0;

    HYPRE_Int        first_row_index_A = hypre_ParCSRMatrixFirstRowIndex(A);
    HYPRE_Int        first_col_diag_A = hypre_ParCSRMatrixFirstColDiag(A);
    HYPRE_Int       *B_marker;

    HYPRE_Int        i;
    HYPRE_Int        i1, i2, i3;
    HYPRE_Int        jj2, jj3;

    HYPRE_Int        jj_count_diag, jj_count_offd;
    HYPRE_Int        jj_row_begin_diag, jj_row_begin_offd;
    HYPRE_Int        start_indexing = 0; /* start indexing for C_data at 0 */
    HYPRE_Int        count;
    HYPRE_Int        n_rows_A, n_cols_A;

    HYPRE_Complex    a_entry;
    HYPRE_Complex    a_b_product;

    HYPRE_Complex    zero = 0.0;

    n_rows_A = hypre_ParCSRMatrixGlobalNumRows(A);
    n_cols_A = hypre_ParCSRMatrixGlobalNumCols(A);

    if (n_cols_A != n_rows_A)
    {
        hypre_printf(" Error! Incompatible matrix dimensions!\n");
        return NULL;
    }
    /*-----------------------------------------------------------------------
     *  Extract A_ext, i.e. portion of A that is stored on neighbor procs
     *  and needed locally for A^T in the matrix matrix product A*A^T
     *-----------------------------------------------------------------------*/

    if (num_rows_diag_A != n_rows_A)
    {
        /*---------------------------------------------------------------------
         * If there exists no CommPkg for A, a CommPkg is generated using
         * equally load balanced partitionings
         *--------------------------------------------------------------------*/
        if (!hypre_ParCSRMatrixCommPkg(A))
        {
            hypre_MatTCommPkgCreate(A);
        }

        A_ext = hypre_ParCSRMatrixExtractAExt( A, 1, &A_ext_row_map );
        A_ext_data = hypre_CSRMatrixData(A_ext);
        A_ext_i    = hypre_CSRMatrixI(A_ext);
        A_ext_j    = hypre_CSRMatrixJ(A_ext);
        num_rows_A_ext = hypre_CSRMatrixNumRows(A_ext);
    }
    /*-----------------------------------------------------------------------
     *  Allocate marker array.
     *-----------------------------------------------------------------------*/

    B_marker = hypre_CTAlloc(HYPRE_Int, num_rows_diag_A+num_rows_A_ext );

    /*-----------------------------------------------------------------------
     *  Initialize some stuff.
     *-----------------------------------------------------------------------*/

    for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 )
    {
        B_marker[i1] = -1;
    }


    hypre_ParAat_RowSizes(
        &C_diag_i, &C_offd_i, B_marker,
        A_diag_i, A_diag_j,
        A_offd_i, A_offd_j, A_col_map_offd,
        A_ext_i, A_ext_j, A_ext_row_map,
        &C_diag_size, &C_offd_size,
        num_rows_diag_A, num_cols_offd_A,
        num_rows_A_ext,
        first_col_diag_A, first_row_index_A
    );

#if 0
    /* debugging output: */
    hypre_printf("A_ext_row_map (%i):",num_rows_A_ext);
    for ( i1=0; i1<num_rows_A_ext; ++i1 ) hypre_printf(" %i",A_ext_row_map[i1] );
    hypre_printf("\nC_diag_i (%i):",C_diag_size);
    for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_diag_i[i1] );
    hypre_printf("\nC_offd_i (%i):",C_offd_size);
    for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_offd_i[i1] );
    hypre_printf("\n");
#endif

    /*-----------------------------------------------------------------------
     *  Allocate C_diag_data and C_diag_j arrays.
     *  Allocate C_offd_data and C_offd_j arrays.
     *-----------------------------------------------------------------------*/

    last_col_diag_C = first_row_index_A + num_rows_diag_A - 1;
    C_diag_data = hypre_CTAlloc(HYPRE_Complex, C_diag_size);
    C_diag_j    = hypre_CTAlloc(HYPRE_Int, C_diag_size);
    if (C_offd_size)
    {
        C_offd_data = hypre_CTAlloc(HYPRE_Complex, C_offd_size);
        C_offd_j    = hypre_CTAlloc(HYPRE_Int, C_offd_size);
    }

    /*-----------------------------------------------------------------------
     *  Second Pass: Fill in C_diag_data and C_diag_j.
     *  Second Pass: Fill in C_offd_data and C_offd_j.
     *-----------------------------------------------------------------------*/

    /*-----------------------------------------------------------------------
     *  Initialize some stuff.
     *-----------------------------------------------------------------------*/

    jj_count_diag = start_indexing;
    jj_count_offd = start_indexing;
    for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 )
    {
        B_marker[i1] = -1;
    }

    /*-----------------------------------------------------------------------
     *  Loop over interior c-points.
     *-----------------------------------------------------------------------*/

    for (i1 = 0; i1 < num_rows_diag_A; i1++)
    {

        /*--------------------------------------------------------------------
         *  Create diagonal entry, C_{i1,i1}
         *--------------------------------------------------------------------*/

        B_marker[i1] = jj_count_diag;
        jj_row_begin_diag = jj_count_diag;
        jj_row_begin_offd = jj_count_offd;
        C_diag_data[jj_count_diag] = zero;
        C_diag_j[jj_count_diag] = i1;
        jj_count_diag++;

        /*-----------------------------------------------------------------
         *  Loop over entries in row i1 of A_offd.
         *-----------------------------------------------------------------*/

        /* There are 3 CSRMatrix or CSRBooleanMatrix objects here:
           ext*ext, ext*diag, and ext*offd belong to another processor.
           diag*offd and offd*diag don't count - never share a column by definition.
           So we have to do 4 cases:
           diag*ext, offd*ext, diag*diag, and offd*offd.
        */

        for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
        {
            i2 = A_diag_j[jj2];
            a_entry = A_diag_data[jj2];

            /* diag*ext */
            /*-----------------------------------------------------------
             *  Loop over entries (columns) i3 in row i2 of (A_ext)^T
             *  That is, rows i3 having a column i2 of A_ext.
             *  For now, for each row i3 of A_ext we crudely check _all_
             *  columns to see whether one matches i2.
             *  For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2)
             *  to C(i1,i3) .  This contributes to both the diag and offd
             *  blocks of C.
             *-----------------------------------------------------------*/

            for ( i3=0; i3<num_rows_A_ext; i3++ ) {
                for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) {
                    if ( A_ext_j[jj3]==i2+first_col_diag_A ) {
                        /* row i3, column i2 of A_ext; or,
                           row i2, column i3 of (A_ext)^T */

                        a_b_product = a_entry * A_ext_data[jj3];

                        /*--------------------------------------------------------
                         *  Check B_marker to see that C_{i1,i3} has not already
                         *  been accounted for. If it has not, create a new entry.
                         *  If it has, add new contribution.
                         *--------------------------------------------------------*/

                        if ( A_ext_row_map[i3] < first_row_index_A ||
                                A_ext_row_map[i3] > last_col_diag_C ) { /* offd */
                            if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) {
                                B_marker[i3+num_rows_diag_A] = jj_count_offd;
                                C_offd_data[jj_count_offd] = a_b_product;
                                C_offd_j[jj_count_offd] = i3;
                                jj_count_offd++;
                            }
                            else
                                C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                        }
                        else {                                              /* diag */
                            if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) {
                                B_marker[i3+num_rows_diag_A] = jj_count_diag;
                                C_diag_data[jj_count_diag] = a_b_product;
                                C_diag_j[jj_count_diag] = i3-first_col_diag_A;
                                jj_count_diag++;
                            }
                            else
                                C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                        }
                    }
                }
            }
        }

        if (num_cols_offd_A)
        {
            for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
            {
                i2 = A_offd_j[jj2];
                a_entry = A_offd_data[jj2];

                /* offd * ext */
                /*-----------------------------------------------------------
                 *  Loop over entries (columns) i3 in row i2 of (A_ext)^T
                 *  That is, rows i3 having a column i2 of A_ext.
                 *  For now, for each row i3 of A_ext we crudely check _all_
                 *  columns to see whether one matches i2.
                 *  For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2)
                 *  to C(i1,i3) .  This contributes to both the diag and offd
                 *  blocks of C.
                 *-----------------------------------------------------------*/

                for ( i3=0; i3<num_rows_A_ext; i3++ ) {
                    for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) {
                        if ( A_ext_j[jj3]==A_col_map_offd[i2] ) {
                            /* row i3, column i2 of A_ext; or,
                               row i2, column i3 of (A_ext)^T */

                            a_b_product = a_entry * A_ext_data[jj3];

                            /*--------------------------------------------------------
                             *  Check B_marker to see that C_{i1,i3} has not already
                             *  been accounted for. If it has not, create a new entry.
                             *  If it has, add new contribution.
                             *--------------------------------------------------------*/

                            if ( A_ext_row_map[i3] < first_row_index_A ||
                                    A_ext_row_map[i3] > last_col_diag_C ) { /* offd */
                                if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) {
                                    B_marker[i3+num_rows_diag_A] = jj_count_offd;
                                    C_offd_data[jj_count_offd] = a_b_product;
                                    C_offd_j[jj_count_offd] = i3;
                                    jj_count_offd++;
                                }
                                else
                                    C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                            }
                            else {                                              /* diag */
                                if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) {
                                    B_marker[i3+num_rows_diag_A] = jj_count_diag;
                                    C_diag_data[jj_count_diag] = a_b_product;
                                    C_diag_j[jj_count_diag] = i3-first_row_index_A;
                                    jj_count_diag++;
                                }
                                else
                                    C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                            }
                        }
                    }
                }
            }
        }

        /* diag * diag */
        /*-----------------------------------------------------------------
         *  Loop over entries (columns) i2 in row i1 of A_diag.
         *  For each such column we will find the contributions of the
         *  corresponding rows i2 of A^T to C=A*A^T .  Now we only look
         *  at the local part of A^T - with columns (rows of A) living
         *  on this processor.
         *-----------------------------------------------------------------*/

        for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
        {
            i2 = A_diag_j[jj2];
            a_entry = A_diag_data[jj2];

            /*-----------------------------------------------------------
             *  Loop over entries (columns) i3 in row i2 of A^T
             *  That is, rows i3 having a column i2 of A (local part).
             *  For now, for each row i3 of A we crudely check _all_
             *  columns to see whether one matches i2.
             *  This i3-loop is for the diagonal block of A.
             *  It contributes to the diagonal block of C.
             *  For each entry (i2,i3) of A^T,  add A(i1,i2)*A(i3,i2)
             *  to C(i1,i3)
             *-----------------------------------------------------------*/
            for ( i3=0; i3<num_rows_diag_A; i3++ ) {
                for ( jj3=A_diag_i[i3]; jj3<A_diag_i[i3+1]; jj3++ ) {
                    if ( A_diag_j[jj3]==i2 ) {
                        /* row i3, column i2 of A; or,
                           row i2, column i3 of A^T */
                        a_b_product = a_entry * A_diag_data[jj3];

                        /*--------------------------------------------------------
                         *  Check B_marker to see that C_{i1,i3} has not already
                         *  been accounted for. If it has not, mark it and increment
                         *  counter.
                         *--------------------------------------------------------*/
                        if (B_marker[i3] < jj_row_begin_diag)
                        {
                            B_marker[i3] = jj_count_diag;
                            C_diag_data[jj_count_diag] = a_b_product;
                            C_diag_j[jj_count_diag] = i3;
                            jj_count_diag++;
                        }
                        else
                        {
                            C_diag_data[B_marker[i3]] += a_b_product;
                        }
                    }
                }
            } /* end of i3 loop */
        } /* end of third i2 loop */

        /* offd * offd */
        /*-----------------------------------------------------------
         *  Loop over offd columns i2 of A in A*A^T.  Then
         *  loop over offd entries (columns) i3 in row i2 of A^T
         *  That is, rows i3 having a column i2 of A (local part).
         *  For now, for each row i3 of A we crudely check _all_
         *  columns to see whether one matches i2.
         *  This i3-loop is for the off-diagonal block of A.
         *  It contributes to the diag block of C.
         *  For each entry (i2,i3) of A^T, add A*A^T to C
         *-----------------------------------------------------------*/
        if (num_cols_offd_A) {

            for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
            {
                i2 = A_offd_j[jj2];
                a_entry = A_offd_data[jj2];

                for ( i3=0; i3<num_rows_diag_A; i3++ ) {
                    /* ... note that num_rows_diag_A == num_rows_offd_A */
                    for ( jj3=A_offd_i[i3]; jj3<A_offd_i[i3+1]; jj3++ ) {
                        if ( A_offd_j[jj3]==i2 ) {
                            /* row i3, column i2 of A; or,
                               row i2, column i3 of A^T */
                            a_b_product = a_entry * A_offd_data[jj3];

                            /*--------------------------------------------------------
                             *  Check B_marker to see that C_{i1,i3} has not already
                             *  been accounted for. If it has not, create a new entry.
                             *  If it has, add new contribution
                             *--------------------------------------------------------*/

                            if (B_marker[i3] < jj_row_begin_diag)
                            {
                                B_marker[i3] = jj_count_diag;
                                C_diag_data[jj_count_diag] = a_b_product;
                                C_diag_j[jj_count_diag] = i3;
                                jj_count_diag++;
                            }
                            else
                            {
                                C_diag_data[B_marker[i3]] += a_b_product;
                            }
                        }
                    }
                }  /* end of last i3 loop */
            }     /* end of if (num_cols_offd_A) */

        }        /* end of fourth and last i2 loop */
#if 0          /* debugging printout */
        hypre_printf("end of i1 loop: i1=%i jj_count_diag=%i\n", i1, jj_count_diag );
        hypre_printf("  C_diag_j=");
        for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%i ",C_diag_j[jj3]);
        hypre_printf("  C_diag_data=");
        for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%f ",C_diag_data[jj3]);
        hypre_printf("\n");
        hypre_printf("  C_offd_j=");
        for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%i ",C_offd_j[jj3]);
        hypre_printf("  C_offd_data=");
        for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%f ",C_offd_data[jj3]);
        hypre_printf("\n");
        hypre_printf( "  B_marker =" );
        for ( it=0; it<num_rows_diag_A+num_rows_A_ext; ++it )
            hypre_printf(" %i", B_marker[it] );
        hypre_printf( "\n" );
#endif
    }           /* end of i1 loop */

    /*-----------------------------------------------------------------------
     *  Delete 0-columns in C_offd, i.e. generate col_map_offd and reset
     *  C_offd_j.  Note that (with the indexing we have coming into this
     *  block) col_map_offd_C[i3]==A_ext_row_map[i3].
     *-----------------------------------------------------------------------*/

    for ( i=0; i<num_rows_diag_A+num_rows_A_ext; ++i )
        B_marker[i] = -1;
    for ( i=0; i<C_offd_size; i++ )
        B_marker[ C_offd_j[i] ] = -2;

    count = 0;
    for (i=0; i < num_rows_diag_A + num_rows_A_ext; i++) {
        if (B_marker[i] == -2) {
            B_marker[i] = count;
            count++;
        }
    }
    num_cols_offd_C = count;

    if (num_cols_offd_C) {
        col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C);
        new_C_offd_j = hypre_CTAlloc(HYPRE_Int,C_offd_size);
        /* ... a bit big, but num_cols_offd_C is too small.  It might be worth
           computing the correct size, which is sum( no. columns in row i, over all rows i )
        */

        for (i=0; i < C_offd_size; i++) {
            new_C_offd_j[i] = B_marker[C_offd_j[i]];
            col_map_offd_C[ new_C_offd_j[i] ] = A_ext_row_map[ C_offd_j[i] ];
        }

        hypre_TFree(C_offd_j);
        C_offd_j = new_C_offd_j;

    }

    /*----------------------------------------------------------------
     * Create C
     *----------------------------------------------------------------*/

    C = hypre_ParCSRMatrixCreate(comm, n_rows_A, n_rows_A, row_starts_A,
                                 row_starts_A, num_cols_offd_C,
                                 C_diag_size, C_offd_size);

    /* Note that C does not own the partitionings */
    hypre_ParCSRMatrixSetRowStartsOwner(C,0);
    hypre_ParCSRMatrixSetColStartsOwner(C,0);

    C_diag = hypre_ParCSRMatrixDiag(C);
    hypre_CSRMatrixData(C_diag) = C_diag_data;
    hypre_CSRMatrixI(C_diag) = C_diag_i;
    hypre_CSRMatrixJ(C_diag) = C_diag_j;

    if (num_cols_offd_C)
    {
        C_offd = hypre_ParCSRMatrixOffd(C);
        hypre_CSRMatrixData(C_offd) = C_offd_data;
        hypre_CSRMatrixI(C_offd) = C_offd_i;
        hypre_CSRMatrixJ(C_offd) = C_offd_j;
        hypre_ParCSRMatrixOffd(C) = C_offd;
        hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;

    }
    else
        hypre_TFree(C_offd_i);

    /*-----------------------------------------------------------------------
     *  Free B_ext and marker array.
     *-----------------------------------------------------------------------*/

    if (num_cols_offd_A)
    {
        hypre_CSRMatrixDestroy(A_ext);
        A_ext = NULL;
    }
    hypre_TFree(B_marker);
    if ( num_rows_diag_A != n_rows_A )
        hypre_TFree(A_ext_row_map);

    return C;

}
HYPRE_Int
main( HYPRE_Int   argc,
      char *argv[] )
{
   hypre_CSRMatrix     *matrix;
   hypre_CSRMatrix     *matrix1;
   hypre_ParCSRMatrix  *par_matrix;
   hypre_Vector        *x_local;
   hypre_Vector        *y_local;
   hypre_Vector        *y2_local;
   hypre_ParVector     *x;
   hypre_ParVector     *x2;
   hypre_ParVector     *y;
   hypre_ParVector     *y2;

   HYPRE_Int          vecstride_x, idxstride_x, vecstride_y, idxstride_y;
   HYPRE_Int          num_procs, my_id;
   HYPRE_Int		local_size;
   HYPRE_Int          num_vectors;
   HYPRE_Int		global_num_rows, global_num_cols;
   HYPRE_Int		first_index;
   HYPRE_Int 		i, j, ierr=0;
   double 	*data, *data2;
   HYPRE_Int 		*row_starts, *col_starts;
   char		file_name[80];
   /* Initialize MPI */
   hypre_MPI_Init(&argc, &argv);

   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs);
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &my_id);

   hypre_printf(" my_id: %d num_procs: %d\n", my_id, num_procs);
 
   if (my_id == 0) 
   {
	matrix = hypre_CSRMatrixRead("input");
   	hypre_printf(" read input\n");
   }
   row_starts = NULL;
   col_starts = NULL; 
   par_matrix = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, matrix, 
		row_starts, col_starts);
   hypre_printf(" converted\n");

   matrix1 = hypre_ParCSRMatrixToCSRMatrixAll(par_matrix);

   hypre_sprintf(file_name,"matrix1.%d",my_id);

   if (matrix1) hypre_CSRMatrixPrint(matrix1, file_name);

   hypre_ParCSRMatrixPrint(par_matrix,"matrix");
   hypre_ParCSRMatrixPrintIJ(par_matrix,0,0,"matrixIJ");

   par_matrix = hypre_ParCSRMatrixRead(hypre_MPI_COMM_WORLD,"matrix");

   global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix);
   hypre_printf(" global_num_cols %d\n", global_num_cols);
   global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix);
 
   col_starts = hypre_ParCSRMatrixColStarts(par_matrix);
   first_index = col_starts[my_id];
   local_size = col_starts[my_id+1] - first_index;

   num_vectors = 3;

   x = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_cols,
                                         col_starts, num_vectors );
   hypre_ParVectorSetPartitioningOwner(x,0);
   hypre_ParVectorInitialize(x);
   x_local = hypre_ParVectorLocalVector(x);
   data = hypre_VectorData(x_local);
   vecstride_x = hypre_VectorVectorStride(x_local);
   idxstride_x = hypre_VectorIndexStride(x_local);
   for ( j=0; j<num_vectors; ++j )
      for (i=0; i < local_size; i++)
         data[i*idxstride_x + j*vecstride_x] = first_index+i+1 + 100*j;

   x2 = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_cols,
                                    col_starts, num_vectors );
   hypre_ParVectorSetPartitioningOwner(x2,0);
   hypre_ParVectorInitialize(x2);
   hypre_ParVectorSetConstantValues(x2,2.0);

   row_starts = hypre_ParCSRMatrixRowStarts(par_matrix);
   first_index = row_starts[my_id];
   local_size = row_starts[my_id+1] - first_index;
   y = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_rows,
                                   row_starts, num_vectors );
   hypre_ParVectorSetPartitioningOwner(y,0);
   hypre_ParVectorInitialize(y);
   y_local = hypre_ParVectorLocalVector(y);

   y2 = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_rows,
                                    row_starts, num_vectors );
   hypre_ParVectorSetPartitioningOwner(y2,0);
   hypre_ParVectorInitialize(y2);
   y2_local = hypre_ParVectorLocalVector(y2);
   data2 = hypre_VectorData(y2_local);
   vecstride_y = hypre_VectorVectorStride(y2_local);
   idxstride_y = hypre_VectorIndexStride(y2_local);
 
   for ( j=0; j<num_vectors; ++j )
      for (i=0; i < local_size; i++)
         data2[i*idxstride_y+j*vecstride_y] = first_index+i+1 + 100*j;

   hypre_ParVectorSetConstantValues(y,1.0);
   hypre_printf(" initialized vectors, first_index=%i\n", first_index);

   hypre_ParVectorPrint(x, "vectorx");
   hypre_ParVectorPrint(y, "vectory");

   hypre_MatvecCommPkgCreate(par_matrix);

   hypre_ParCSRMatrixMatvec ( 1.0, par_matrix, x, 1.0, y);
   hypre_printf(" did matvec\n");

   hypre_ParVectorPrint(y, "result");

   ierr = hypre_ParCSRMatrixMatvecT ( 1.0, par_matrix, y2, 1.0, x2);
   hypre_printf(" did matvecT %d\n", ierr);

   hypre_ParVectorPrint(x2, "transp"); 

   hypre_ParCSRMatrixDestroy(par_matrix);
   hypre_ParVectorDestroy(x);
   hypre_ParVectorDestroy(x2);
   hypre_ParVectorDestroy(y);
   hypre_ParVectorDestroy(y2);
   if (my_id == 0) hypre_CSRMatrixDestroy(matrix);
   if (matrix1) hypre_CSRMatrixDestroy(matrix1);

   /* Finalize MPI */
   hypre_MPI_Finalize();

   return 0;
}