Example #1
0
HYPRE_Int
hypre_IJMatrixInitializePETSc(hypre_IJMatrix *matrix)
{
   HYPRE_Int ierr = 0;
   hypre_ParCSRMatrix *par_matrix = hypre_IJMatrixLocalStorage(matrix);
   hypre_AuxParCSRMatrix *aux_matrix = hypre_IJMatrixTranslator(matrix);
   HYPRE_Int local_num_rows = hypre_AuxParCSRMatrixLocalNumRows(aux_matrix);
   HYPRE_Int local_num_cols = hypre_AuxParCSRMatrixLocalNumCols(aux_matrix);
   HYPRE_Int *row_space = hypre_AuxParCSRMatrixRowSpace(aux_matrix);
   HYPRE_Int num_nonzeros = hypre_ParCSRMatrixNumNonzeros(par_matrix);
   HYPRE_Int local_nnz;
   HYPRE_Int num_procs, my_id;
   MPI_Comm  comm = hypre_IJMatrixContext(matrix);
   HYPRE_Int global_num_rows = hypre_IJMatrixM(matrix);

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);
   
   local_nnz = (num_nonzeros/global_num_rows+1)*local_num_rows;
   if (local_num_rows < 0)
      hypre_AuxParCSRMatrixLocalNumRows(aux_matrix) = 
		hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(par_matrix));
   if (local_num_cols < 0)
      hypre_AuxParCSRMatrixLocalNumCols(aux_matrix) = 
		hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(par_matrix));
   ierr = hypre_AuxParCSRMatrixInitialize(aux_matrix);
   ierr += hypre_ParCSRMatrixInitialize(par_matrix);
   return ierr;
}
Example #2
0
hypre_ParMultiVector  *
hypre_ParMultiVectorCreate(MPI_Comm comm, HYPRE_Int global_size, HYPRE_Int *partitioning,
                           HYPRE_Int num_vectors)
{
    hypre_ParMultiVector *vector;
    HYPRE_Int num_procs, my_id;

    vector = hypre_CTAlloc(hypre_ParMultiVector, 1);

    hypre_MPI_Comm_rank(comm, &my_id);

    if (! partitioning)
    {
        hypre_MPI_Comm_size(comm, &num_procs);
        hypre_GeneratePartitioning(global_size, num_procs, &partitioning);
    }

    hypre_ParMultiVectorComm(vector) = comm;
    hypre_ParMultiVectorGlobalSize(vector) = global_size;
    hypre_ParMultiVectorPartitioning(vector) = partitioning;
    hypre_ParMultiVectorNumVectors(vector) = num_vectors;

    hypre_ParMultiVectorLocalVector(vector) =
        hypre_SeqMultivectorCreate((partitioning[my_id+1]-partitioning[my_id]), num_vectors);

    hypre_ParMultiVectorFirstIndex(vector) = partitioning[my_id];

    /* we set these 2 defaults exactly as in par_vector.c, although it's questionable */
    hypre_ParMultiVectorOwnsData(vector) = 1;
    hypre_ParMultiVectorOwnsPartitioning(vector) = 1;

    return vector;
}
Example #3
0
HYPRE_Int
hypre_ParKrylovCommInfo( void   *A, HYPRE_Int *my_id, HYPRE_Int *num_procs)
{
   MPI_Comm comm = hypre_ParCSRMatrixComm ( (hypre_ParCSRMatrix *) A);
   hypre_MPI_Comm_size(comm,num_procs);
   hypre_MPI_Comm_rank(comm,my_id);
   return 0;
}
Example #4
0
HYPRE_Int
hypre_StructKrylovCommInfo( void  *A,
                            HYPRE_Int   *my_id,
                            HYPRE_Int   *num_procs )
{
   MPI_Comm comm = hypre_StructMatrixComm((hypre_StructMatrix *) A);
   hypre_MPI_Comm_size(comm,num_procs);
   hypre_MPI_Comm_rank(comm,my_id);
   return hypre_error_flag;
}
Example #5
0
hypre_ParVector
*hypre_ParVectorRead( MPI_Comm    comm,
                      const char *file_name )
{
   char 	new_file_name[80];
   hypre_ParVector *par_vector;
   HYPRE_Int  	my_id, num_procs;
   HYPRE_Int		*partitioning;
   HYPRE_Int		global_size, i;
   FILE		*fp;

   hypre_MPI_Comm_rank(comm,&my_id); 
   hypre_MPI_Comm_size(comm,&num_procs); 

   partitioning = hypre_CTAlloc(HYPRE_Int,num_procs+1);

   hypre_sprintf(new_file_name,"%s.INFO.%d",file_name,my_id); 
   fp = fopen(new_file_name, "r");
   hypre_fscanf(fp, "%d\n", &global_size);
#ifdef HYPRE_NO_GLOBAL_PARTITION
   for (i=0; i < 2; i++)
	hypre_fscanf(fp, "%d\n", &partitioning[i]);
   fclose (fp);
#else
   for (i=0; i < num_procs; i++)
	hypre_fscanf(fp, "%d\n", &partitioning[i]);
   fclose (fp);
   partitioning[num_procs] = global_size; 
#endif
   par_vector = hypre_CTAlloc(hypre_ParVector, 1);
	
   hypre_ParVectorComm(par_vector) = comm;
   hypre_ParVectorGlobalSize(par_vector) = global_size;

#ifdef HYPRE_NO_GLOBAL_PARTITION
   hypre_ParVectorFirstIndex(par_vector) = partitioning[0];
   hypre_ParVectorLastIndex(par_vector) = partitioning[1]-1;
#else
   hypre_ParVectorFirstIndex(par_vector) = partitioning[my_id];
   hypre_ParVectorLastIndex(par_vector) = partitioning[my_id+1]-1;
#endif

   hypre_ParVectorPartitioning(par_vector) = partitioning;

   hypre_ParVectorOwnsData(par_vector) = 1;
   hypre_ParVectorOwnsPartitioning(par_vector) = 1;

   hypre_sprintf(new_file_name,"%s.%d",file_name,my_id); 
   hypre_ParVectorLocalVector(par_vector) = hypre_SeqVectorRead(new_file_name);

   /* multivector code not written yet >>> */
   hypre_assert( hypre_ParVectorNumVectors(par_vector) == 1 );

   return par_vector;
}
Example #6
0
hypre_ParVector *
hypre_ParVectorCreate(  MPI_Comm comm,
			HYPRE_Int global_size, 
			HYPRE_Int *partitioning)
{
   hypre_ParVector  *vector;
   HYPRE_Int num_procs, my_id;

   if (global_size < 0)
   {
      hypre_error_in_arg(2);
      return NULL;
   }
   vector = hypre_CTAlloc(hypre_ParVector, 1);
   hypre_MPI_Comm_rank(comm,&my_id);

   if (!partitioning)
   {
     hypre_MPI_Comm_size(comm,&num_procs);
#ifdef HYPRE_NO_GLOBAL_PARTITION
     hypre_GenerateLocalPartitioning(global_size, num_procs, my_id, &partitioning);
#else
     hypre_GeneratePartitioning(global_size, num_procs, &partitioning);
#endif
   }


   hypre_ParVectorAssumedPartition(vector) = NULL;
   

   hypre_ParVectorComm(vector) = comm;
   hypre_ParVectorGlobalSize(vector) = global_size;
#ifdef HYPRE_NO_GLOBAL_PARTITION
   hypre_ParVectorFirstIndex(vector) = partitioning[0];
   hypre_ParVectorLastIndex(vector) = partitioning[1]-1;
   hypre_ParVectorPartitioning(vector) = partitioning;
   hypre_ParVectorLocalVector(vector) = 
		hypre_SeqVectorCreate(partitioning[1]-partitioning[0]);
#else
   hypre_ParVectorFirstIndex(vector) = partitioning[my_id];
   hypre_ParVectorLastIndex(vector) = partitioning[my_id+1] -1;
   hypre_ParVectorPartitioning(vector) = partitioning;
   hypre_ParVectorLocalVector(vector) = 
		hypre_SeqVectorCreate(partitioning[my_id+1]-partitioning[my_id]);
#endif

   /* set defaults */
   hypre_ParVectorOwnsData(vector) = 1;
   hypre_ParVectorOwnsPartitioning(vector) = 1;

   return vector;
}
Example #7
0
HYPRE_Int
main( HYPRE_Int   argc,
      char *argv[] )
{
   hypre_ParCSRBooleanMatrix     *A;
   hypre_ParCSRBooleanMatrix     *C;
   hypre_CSRBooleanMatrix *As;
   HYPRE_Int *row_starts, *col_starts;
   HYPRE_Int num_procs, my_id;

   /* Initialize MPI */
   hypre_MPI_Init(&argc, &argv);

   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD,&num_procs);
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD,&my_id);
   row_starts = NULL;
   col_starts = NULL;

   if (my_id == 0)
   {
   	As = hypre_CSRBooleanMatrixRead("inpr");
   	hypre_printf(" read input A\n");
   }
   A = hypre_CSRBooleanMatrixToParCSRBooleanMatrix(hypre_MPI_COMM_WORLD, As, row_starts,
                                                 col_starts);
   row_starts = hypre_ParCSRBooleanMatrix_Get_RowStarts(A);
   col_starts = hypre_ParCSRBooleanMatrix_Get_ColStarts(A);

   hypre_ParCSRBooleanMatrixPrint(A, "echo_A" );
   hypre_ParCSRBooleanMatrixPrintIJ(A, "echo_AIJ" );
   C = hypre_ParBooleanAAt( A );
   hypre_ParCSRBooleanMatrixPrint(C, "result");
   hypre_ParCSRBooleanMatrixPrintIJ(C, "resultIJ");

   if (my_id == 0)
   {
	hypre_CSRBooleanMatrixDestroy(As);
   }
   hypre_ParCSRBooleanMatrixDestroy(A);
   hypre_ParCSRBooleanMatrixDestroy(C);

   hypre_MPI_Finalize();

   return 0;
}
HYPRE_Int hypre_ParCSRBooleanMatrixPrint( hypre_ParCSRBooleanMatrix *matrix, 
                                    const char                *file_name )
{
   MPI_Comm comm = hypre_ParCSRBooleanMatrix_Get_Comm(matrix);
   HYPRE_Int global_num_rows = hypre_ParCSRBooleanMatrix_Get_GlobalNRows(matrix);
   HYPRE_Int global_num_cols = hypre_ParCSRBooleanMatrix_Get_GlobalNCols(matrix);
   HYPRE_Int *col_map_offd = hypre_ParCSRBooleanMatrix_Get_ColMapOffd(matrix);
   HYPRE_Int *row_starts = hypre_ParCSRBooleanMatrix_Get_RowStarts(matrix);
   HYPRE_Int *col_starts = hypre_ParCSRBooleanMatrix_Get_ColStarts(matrix);
   HYPRE_Int  my_id, i, num_procs;
   char new_file_d[80], new_file_o[80], new_file_info[80];
   HYPRE_Int  ierr = 0;
   FILE *fp;
   HYPRE_Int  num_cols_offd = 0;

   if (hypre_ParCSRBooleanMatrix_Get_Offd(matrix)) num_cols_offd = 
      hypre_CSRBooleanMatrix_Get_NCols(hypre_ParCSRBooleanMatrix_Get_Offd(matrix));

   hypre_MPI_Comm_rank(comm, &my_id);
   hypre_MPI_Comm_size(comm, &num_procs);
   
   hypre_sprintf(new_file_d,"%s.D.%d",file_name,my_id);
   hypre_sprintf(new_file_o,"%s.O.%d",file_name,my_id);
   hypre_sprintf(new_file_info,"%s.INFO.%d",file_name,my_id);
   hypre_CSRBooleanMatrixPrint(hypre_ParCSRBooleanMatrix_Get_Diag(matrix),new_file_d);
   if (num_cols_offd != 0)
      hypre_CSRBooleanMatrixPrint(hypre_ParCSRBooleanMatrix_Get_Offd(matrix),
                                new_file_o);
  
   fp = fopen(new_file_info, "w");
   hypre_fprintf(fp, "%d\n", global_num_rows);
   hypre_fprintf(fp, "%d\n", global_num_cols);
   hypre_fprintf(fp, "%d\n", num_cols_offd);
   for (i=0; i < num_procs; i++)
      hypre_fprintf(fp, "%d %d\n", row_starts[i], col_starts[i]);
   for (i=0; i < num_cols_offd; i++)
      hypre_fprintf(fp, "%d\n", col_map_offd[i]);
   fclose(fp);

   return ierr;
}
Example #9
0
HYPRE_Int
hypre_ParVectorPrint( hypre_ParVector  *vector, 
                      const char       *file_name )
{
   char 	new_file_name[80];
   hypre_Vector *local_vector;
   MPI_Comm 	comm;
   HYPRE_Int  	my_id, num_procs, i;
   HYPRE_Int		*partitioning;
   HYPRE_Int		global_size;
   FILE		*fp;
   if (!vector)
   {
      hypre_error_in_arg(1);
      return hypre_error_flag;
   }
   local_vector = hypre_ParVectorLocalVector(vector); 
   comm = hypre_ParVectorComm(vector);
   partitioning = hypre_ParVectorPartitioning(vector); 
   global_size = hypre_ParVectorGlobalSize(vector); 

   hypre_MPI_Comm_rank(comm,&my_id); 
   hypre_MPI_Comm_size(comm,&num_procs); 
   hypre_sprintf(new_file_name,"%s.%d",file_name,my_id); 
   hypre_SeqVectorPrint(local_vector,new_file_name);
   hypre_sprintf(new_file_name,"%s.INFO.%d",file_name,my_id); 
   fp = fopen(new_file_name, "w");
   hypre_fprintf(fp, "%d\n", global_size);
#ifdef HYPRE_NO_GLOBAL_PARTITION
   for (i=0; i < 2; i++)
	hypre_fprintf(fp, "%d\n", partitioning[i]);
#else
  for (i=0; i < num_procs; i++)
	hypre_fprintf(fp, "%d\n", partitioning[i]);
#endif

   fclose (fp);
   return hypre_error_flag;
}
Example #10
0
/******************************************************************************
 *
 * hypre_IJVectorCreatePar
 *
 * creates ParVector if necessary, and leaves a pointer to it as the
 * hypre_IJVector object
 *
 *****************************************************************************/
HYPRE_Int
hypre_IJVectorCreatePar(hypre_IJVector *vector, HYPRE_Int *IJpartitioning)
{
   MPI_Comm comm = hypre_IJVectorComm(vector);


   HYPRE_Int num_procs, jmin, global_n, *partitioning, j;
   hypre_MPI_Comm_size(comm, &num_procs);

#ifdef HYPRE_NO_GLOBAL_PARTITION
   jmin = hypre_IJVectorGlobalFirstRow(vector);
   global_n = hypre_IJVectorGlobalNumRows(vector);

   partitioning = hypre_CTAlloc(HYPRE_Int, 2); 

/* Shift to zero-based partitioning for ParVector object */
   for (j = 0; j < 2; j++) 
      partitioning[j] = IJpartitioning[j] - jmin;

#else
   jmin = IJpartitioning[0];
   global_n = IJpartitioning[num_procs] - jmin;

   partitioning = hypre_CTAlloc(HYPRE_Int, num_procs+1); 

/* Shift to zero-based partitioning for ParVector object */
   for (j = 0; j < num_procs+1; j++) 
      partitioning[j] = IJpartitioning[j] - jmin;

#endif
  

   hypre_IJVectorObject(vector) = hypre_ParVectorCreate(comm,
            global_n, (HYPRE_Int *) partitioning); 

   return hypre_error_flag;
}
Example #11
0
HYPRE_Int
hypre_BoomerAMGCreateNodalA(hypre_ParCSRMatrix    *A,
                            HYPRE_Int                    num_functions,
                            HYPRE_Int                   *dof_func,
                            HYPRE_Int                    option,
                            HYPRE_Int                    diag_option,     
                            hypre_ParCSRMatrix   **AN_ptr)
{
   MPI_Comm 	       comm            = hypre_ParCSRMatrixComm(A);
   hypre_CSRMatrix    *A_diag          = hypre_ParCSRMatrixDiag(A);
   HYPRE_Int                *A_diag_i        = hypre_CSRMatrixI(A_diag);
   double             *A_diag_data     = hypre_CSRMatrixData(A_diag);


   hypre_CSRMatrix    *A_offd          = hypre_ParCSRMatrixOffd(A);
   HYPRE_Int                *A_offd_i        = hypre_CSRMatrixI(A_offd);
   double             *A_offd_data     = hypre_CSRMatrixData(A_offd);
   HYPRE_Int                *A_diag_j        = hypre_CSRMatrixJ(A_diag);
   HYPRE_Int                *A_offd_j        = hypre_CSRMatrixJ(A_offd);

   HYPRE_Int 		      *row_starts      = hypre_ParCSRMatrixRowStarts(A);
   HYPRE_Int 		      *col_map_offd    = hypre_ParCSRMatrixColMapOffd(A);
   HYPRE_Int                 num_variables   = hypre_CSRMatrixNumRows(A_diag);
   HYPRE_Int 		       num_nonzeros_offd = 0;
   HYPRE_Int 		       num_cols_offd = 0;
                  
   hypre_ParCSRMatrix *AN;
   hypre_CSRMatrix    *AN_diag;
   HYPRE_Int                *AN_diag_i;
   HYPRE_Int                *AN_diag_j;
   double             *AN_diag_data; 
   hypre_CSRMatrix    *AN_offd;
   HYPRE_Int                *AN_offd_i;
   HYPRE_Int                *AN_offd_j;
   double             *AN_offd_data; 
   HYPRE_Int		      *col_map_offd_AN;
   HYPRE_Int		      *new_col_map_offd;
   HYPRE_Int		      *row_starts_AN;
   HYPRE_Int		       AN_num_nonzeros_diag = 0;
   HYPRE_Int		       AN_num_nonzeros_offd = 0;
   HYPRE_Int		       num_cols_offd_AN;
   HYPRE_Int		       new_num_cols_offd;
                 
   hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   HYPRE_Int		       num_sends;
   HYPRE_Int		       num_recvs;
   HYPRE_Int		      *send_procs;
   HYPRE_Int		      *send_map_starts;
   HYPRE_Int		      *send_map_elmts;
   HYPRE_Int		      *new_send_map_elmts;
   HYPRE_Int		      *recv_procs;
   HYPRE_Int		      *recv_vec_starts;

   hypre_ParCSRCommPkg *comm_pkg_AN;
   HYPRE_Int		      *send_procs_AN;
   HYPRE_Int		      *send_map_starts_AN;
   HYPRE_Int		      *send_map_elmts_AN;
   HYPRE_Int		      *recv_procs_AN;
   HYPRE_Int		      *recv_vec_starts_AN;

   HYPRE_Int                 i, j, k, k_map;
                      
   HYPRE_Int                 ierr = 0;

   HYPRE_Int		       index, row;
   HYPRE_Int		       start_index;
   HYPRE_Int		       num_procs;
   HYPRE_Int		       node, cnt;
   HYPRE_Int		       mode;
   HYPRE_Int		       new_send_elmts_size;

   HYPRE_Int		       global_num_nodes;
   HYPRE_Int		       num_nodes;
   HYPRE_Int		       num_fun2;
   HYPRE_Int		      *map_to_node;
   HYPRE_Int		      *map_to_map;
   HYPRE_Int		      *counter;

   double sum;
   double *data;
   

   hypre_MPI_Comm_size(comm,&num_procs);

   if (!comm_pkg)
   {
      hypre_MatvecCommPkgCreate(A);
      comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   }

   mode = fabs(option);

   comm_pkg_AN = NULL;
   col_map_offd_AN = NULL;

#ifdef HYPRE_NO_GLOBAL_PARTITION
   row_starts_AN = hypre_CTAlloc(HYPRE_Int, 2);

   for (i=0; i < 2; i++)
   {
      row_starts_AN[i] = row_starts[i]/num_functions;
      if (row_starts_AN[i]*num_functions < row_starts[i])
      {
	  hypre_printf("nodes not properly aligned or incomplete info!\n");
	  return (87);
      }
   }
   
   global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(A)/num_functions;


#else
   row_starts_AN = hypre_CTAlloc(HYPRE_Int, num_procs+1);

  for (i=0; i < num_procs+1; i++)
   {
      row_starts_AN[i] = row_starts[i]/num_functions;
      if (row_starts_AN[i]*num_functions < row_starts[i])
      {
	  hypre_printf("nodes not properly aligned or incomplete info!\n");
	  return (87);
      }
   }
   
   global_num_nodes = row_starts_AN[num_procs];

#endif

 
   num_nodes =  num_variables/num_functions;
   num_fun2 = num_functions*num_functions;

   map_to_node = hypre_CTAlloc(HYPRE_Int, num_variables);
   AN_diag_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1);
   counter = hypre_CTAlloc(HYPRE_Int, num_nodes);
   for (i=0; i < num_variables; i++)
      map_to_node[i] = i/num_functions;
   for (i=0; i < num_nodes; i++)
      counter[i] = -1;

   AN_num_nonzeros_diag = 0;
   row = 0;
   for (i=0; i < num_nodes; i++)
   {
      AN_diag_i[i] = AN_num_nonzeros_diag;
      for (j=0; j < num_functions; j++)
      {
	 for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	 {
	    k_map = map_to_node[A_diag_j[k]];
	    if (counter[k_map] < i)
	    {
	       counter[k_map] = i;
	       AN_num_nonzeros_diag++;
	    }
	 }
	 row++;
      }
   }
   AN_diag_i[num_nodes] = AN_num_nonzeros_diag;

   AN_diag_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_diag);	
   AN_diag_data = hypre_CTAlloc(double, AN_num_nonzeros_diag);	

   AN_diag = hypre_CSRMatrixCreate(num_nodes,num_nodes,AN_num_nonzeros_diag);
   hypre_CSRMatrixI(AN_diag) = AN_diag_i;
   hypre_CSRMatrixJ(AN_diag) = AN_diag_j;
   hypre_CSRMatrixData(AN_diag) = AN_diag_data;
       
   for (i=0; i < num_nodes; i++)
      counter[i] = -1;
   index = 0;
   start_index = 0;
   row = 0;

   switch (mode)
   {
      case 1:  /* frobenius norm */
      {
         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             AN_diag_data[index] = A_diag_data[k]*A_diag_data[k];
	             index++;
	          }
	          else
	          {
	             AN_diag_data[counter[k_map]] += 
				A_diag_data[k]*A_diag_data[k];
	          }
	       }
	       row++;
            }
            start_index = index;
         }
         for (i=0; i < AN_num_nonzeros_diag; i++)
            AN_diag_data[i] = sqrt(AN_diag_data[i]);

      }
      break;
      
      case 2:  /* sum of abs. value of all elements in each block */
      {
         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             AN_diag_data[index] = fabs(A_diag_data[k]);
	             index++;
	          }
	          else
	          {
	             AN_diag_data[counter[k_map]] += fabs(A_diag_data[k]);
	          }
	       }
	       row++;
            }
            start_index = index;
         }
         for (i=0; i < AN_num_nonzeros_diag; i++)
            AN_diag_data[i] /= num_fun2;
      }
      break;

      case 3:  /* largest element of each block (sets true value - not abs. value) */
      {

         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
      	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
      	       {
      	          k_map = map_to_node[A_diag_j[k]];
      	          if (counter[k_map] < start_index)
      	          {
      	             counter[k_map] = index;
      	             AN_diag_j[index] = k_map;
      	             AN_diag_data[index] = A_diag_data[k];
      	             index++;
      	          }
      	          else
      	          {
      	             if (fabs(A_diag_data[k]) > 
				fabs(AN_diag_data[counter[k_map]]))
      	                AN_diag_data[counter[k_map]] = A_diag_data[k];
      	          }
      	       }
      	       row++;
            }
            start_index = index;
         }
      }
      break;

      case 4:  /* inf. norm (row-sum)  */
      {

         data = hypre_CTAlloc(double, AN_num_nonzeros_diag*num_functions);

         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             data[index*num_functions + j] = fabs(A_diag_data[k]);
	             index++;
	          }
	          else
	          {
	             data[(counter[k_map])*num_functions + j] += fabs(A_diag_data[k]);
	          }
	       }
	       row++;
            }
            start_index = index;
         }
         for (i=0; i < AN_num_nonzeros_diag; i++)
         {
            AN_diag_data[i]  = data[i*num_functions];
            
            for (j=1; j< num_functions; j++)
            {
               AN_diag_data[i]  = hypre_max( AN_diag_data[i],data[i*num_functions+j]);
            }
         }
         hypre_TFree(data);
      
      }
      break;

      case 6:  /* sum of all elements in each block */
      {
         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             AN_diag_data[index] = (A_diag_data[k]);
	             index++;
	          }
	          else
	          {
	             AN_diag_data[counter[k_map]] += (A_diag_data[k]);
	          }
	       }
	       row++;
            }
            start_index = index;
         }
      }
      break;

   }

   if (diag_option ==1 )
   {
      /* make the diag entry the negative of the sum of off-diag entries (DO MORE BELOW) */
      for (i=0; i < num_nodes; i++)
      {
         index = AN_diag_i[i]; 
         sum = 0.0;
         for (k = AN_diag_i[i]+1; k < AN_diag_i[i+1]; k++)
         {
            sum += AN_diag_data[k];
            
         }
         AN_diag_data[index] = -sum;
      }
      
   }
   else if (diag_option == 2)
   {
      
      /*  make all diagonal entries negative */
      /* the diagonal is the first element listed in each row - */
      
      for (i=0; i < num_nodes; i++)
      {
         index = AN_diag_i[i];
         AN_diag_data[index] = - AN_diag_data[index];
      }
   }






   num_nonzeros_offd = A_offd_i[num_variables];
   AN_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1);

   num_cols_offd_AN = 0;

   if (comm_pkg)
   {
      comm_pkg_AN = hypre_CTAlloc(hypre_ParCSRCommPkg,1);
      hypre_ParCSRCommPkgComm(comm_pkg_AN) = comm;
      num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
      hypre_ParCSRCommPkgNumSends(comm_pkg_AN) = num_sends;
      num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
      hypre_ParCSRCommPkgNumRecvs(comm_pkg_AN) = num_recvs;
      send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
      send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
      send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
      recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
      recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
      send_procs_AN = NULL;
      send_map_elmts_AN = NULL;
      if (num_sends) 
      {
         send_procs_AN = hypre_CTAlloc(HYPRE_Int,num_sends);
         send_map_elmts_AN = hypre_CTAlloc(HYPRE_Int,send_map_starts[num_sends]);
      }
      send_map_starts_AN = hypre_CTAlloc(HYPRE_Int,num_sends+1);
      recv_vec_starts_AN = hypre_CTAlloc(HYPRE_Int,num_recvs+1);
      recv_procs_AN = NULL;
      if (num_recvs) recv_procs_AN = hypre_CTAlloc(HYPRE_Int,num_recvs);
      for (i=0; i < num_sends; i++)
         send_procs_AN[i] = send_procs[i];
      for (i=0; i < num_recvs; i++)
         recv_procs_AN[i] = recv_procs[i];

      send_map_starts_AN[0] = 0;
      cnt = 0;
      for (i=0; i < num_sends; i++)
      {
	 k_map = send_map_starts[i];
	 if (send_map_starts[i+1]-k_map)
            send_map_elmts_AN[cnt++] = send_map_elmts[k_map]/num_functions;
         for (j=send_map_starts[i]+1; j < send_map_starts[i+1]; j++)
         {
            node = send_map_elmts[j]/num_functions;
            if (node > send_map_elmts_AN[cnt-1])
	       send_map_elmts_AN[cnt++] = node; 
         }
         send_map_starts_AN[i+1] = cnt;
      }
      hypre_ParCSRCommPkgSendProcs(comm_pkg_AN) = send_procs_AN;
      hypre_ParCSRCommPkgSendMapStarts(comm_pkg_AN) = send_map_starts_AN;
      hypre_ParCSRCommPkgSendMapElmts(comm_pkg_AN) = send_map_elmts_AN;
      hypre_ParCSRCommPkgRecvProcs(comm_pkg_AN) = recv_procs_AN;
      hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_AN) = recv_vec_starts_AN;
   }

   num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
   if (num_cols_offd)
   {
      if (num_cols_offd > num_variables)
      {
         hypre_TFree(map_to_node);
         map_to_node = hypre_CTAlloc(HYPRE_Int,num_cols_offd);
      }

      num_cols_offd_AN = 1;
      map_to_node[0] = col_map_offd[0]/num_functions;
      for (i=1; i < num_cols_offd; i++)
      {
         map_to_node[i] = col_map_offd[i]/num_functions;
         if (map_to_node[i] > map_to_node[i-1]) num_cols_offd_AN++;
      }
      
      if (num_cols_offd_AN > num_nodes)
      {
         hypre_TFree(counter);
         counter = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN);
      }

      map_to_map = NULL;
      col_map_offd_AN = NULL;
      map_to_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
      col_map_offd_AN = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN);
      col_map_offd_AN[0] = map_to_node[0];
      recv_vec_starts_AN[0] = 0;
      cnt = 1;
      for (i=0; i < num_recvs; i++)
      {
         for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++)
         {
            node = map_to_node[j];
	    if (node > col_map_offd_AN[cnt-1])
	    {
	       col_map_offd_AN[cnt++] = node; 
	    }
	    map_to_map[j] = cnt-1;
         }
         recv_vec_starts_AN[i+1] = cnt;
      }

      for (i=0; i < num_cols_offd_AN; i++)
         counter[i] = -1;

      AN_num_nonzeros_offd = 0;
      row = 0;
      for (i=0; i < num_nodes; i++)
      {
         AN_offd_i[i] = AN_num_nonzeros_offd;
         for (j=0; j < num_functions; j++)
         {
	    for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
	    {
	       k_map = map_to_map[A_offd_j[k]];
	       if (counter[k_map] < i)
	       {
	          counter[k_map] = i;
	          AN_num_nonzeros_offd++;
	       }
	    }
	    row++;
         }
      }
      AN_offd_i[num_nodes] = AN_num_nonzeros_offd;
   }

       
   AN_offd = hypre_CSRMatrixCreate(num_nodes,num_cols_offd_AN,	
		AN_num_nonzeros_offd);
   hypre_CSRMatrixI(AN_offd) = AN_offd_i;
   if (AN_num_nonzeros_offd)
   {
      AN_offd_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_offd);	
      AN_offd_data = hypre_CTAlloc(double, AN_num_nonzeros_offd);	
      hypre_CSRMatrixJ(AN_offd) = AN_offd_j;
      hypre_CSRMatrixData(AN_offd) = AN_offd_data;
   
      for (i=0; i < num_cols_offd_AN; i++)
         counter[i] = -1;
      index = 0;
      row = 0;
      AN_offd_i[0] = 0;
      start_index = 0;
      switch (mode)
      {
         case 1: /* frobenius norm */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
	          for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
	          {
	             k_map = map_to_map[A_offd_j[k]];
	             if (counter[k_map] < start_index)
	             {
	                counter[k_map] = index;
	                AN_offd_j[index] = k_map;
	                AN_offd_data[index] = A_offd_data[k]*A_offd_data[k];
	                index++;
	             }
	             else
	             {
	                AN_offd_data[counter[k_map]] += 
				A_offd_data[k]*A_offd_data[k];
	             }
	          }
	          row++;
               }
               start_index = index;
            }
            for (i=0; i < AN_num_nonzeros_offd; i++)
	       AN_offd_data[i] = sqrt(AN_offd_data[i]);
         }
         break;
      
         case 2:  /* sum of abs. value of all elements in block */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
	          for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
	          {
	             k_map = map_to_map[A_offd_j[k]];
	             if (counter[k_map] < start_index)
	             {
	                counter[k_map] = index;
	                AN_offd_j[index] = k_map;
	                AN_offd_data[index] = fabs(A_offd_data[k]);
	                index++;
	             }
	             else
	             {
	                AN_offd_data[counter[k_map]] += fabs(A_offd_data[k]);
	             }
	          }
	          row++;
               }
               start_index = index;
            }
            for (i=0; i < AN_num_nonzeros_offd; i++)
               AN_offd_data[i] /= num_fun2;
         }
         break;

         case 3: /* largest element in each block (not abs. value ) */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
      	          for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
      	          {
      	             k_map = map_to_map[A_offd_j[k]];
      	             if (counter[k_map] < start_index)
      	             {
      	                counter[k_map] = index;
      	                AN_offd_j[index] = k_map;
      	                AN_offd_data[index] = A_offd_data[k];
      	                index++;
      	             }
      	             else
      	             {
      	                if (fabs(A_offd_data[k]) > 
				fabs(AN_offd_data[counter[k_map]]))
      	                   AN_offd_data[counter[k_map]] = A_offd_data[k];
      	             }
      	          }
      	          row++;
               }
               start_index = index;
            }
         }
         break;
         
         case 4:  /* inf. norm (row-sum)  */
         {
            
            data = hypre_CTAlloc(double, AN_num_nonzeros_offd*num_functions);
            
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
                  for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
                  {
                     k_map = map_to_map[A_offd_j[k]];
                     if (counter[k_map] < start_index)
                     {
                        counter[k_map] = index;
                        AN_offd_j[index] = k_map;
                        data[index*num_functions + j] = fabs(A_offd_data[k]);
                        index++;
                     }
                     else
                     {
                        data[(counter[k_map])*num_functions + j] += fabs(A_offd_data[k]);
                     }
                  }
                  row++;
               }
               start_index = index;
            }
            for (i=0; i < AN_num_nonzeros_offd; i++)
            {
               AN_offd_data[i]  = data[i*num_functions];
               
               for (j=1; j< num_functions; j++)
               {
                  AN_offd_data[i]  = hypre_max( AN_offd_data[i],data[i*num_functions+j]);
               }
            }
            hypre_TFree(data);
            
         }
         break;
         
         case 6:  /* sum of value of all elements in block */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
                  for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
                  {
                     k_map = map_to_map[A_offd_j[k]];
                     if (counter[k_map] < start_index)
                     {
                        counter[k_map] = index;
                        AN_offd_j[index] = k_map;
                        AN_offd_data[index] = (A_offd_data[k]);
                        index++;
                     }
                     else
                     {
                        AN_offd_data[counter[k_map]] += (A_offd_data[k]);
                     }
                  }
                  row++;
               }
               start_index = index;
            }
            
         }
         break;
      }
   
      hypre_TFree(map_to_map);
   }

   if (diag_option ==1 )
   {
      /* make the diag entry the negative of the sum of off-diag entries (here we are adding the 
         off_diag contribution)*/
      /* the diagonal is the first element listed in each row of AN_diag_data - */
      for (i=0; i < num_nodes; i++)
      {
         sum = 0.0;
         for (k = AN_offd_i[i]; k < AN_offd_i[i+1]; k++)
         {
            sum += AN_offd_data[k];
            
         }
         index = AN_diag_i[i];/* location of diag entry in data */ 
         AN_diag_data[index] -= sum; /* subtract from current value */
      }
      
   }

    
   AN = hypre_ParCSRMatrixCreate(comm, global_num_nodes, global_num_nodes,
		row_starts_AN, row_starts_AN, num_cols_offd_AN,
		AN_num_nonzeros_diag, AN_num_nonzeros_offd);

   /* we already created the diag and offd matrices - so we don't need the ones
      created above */
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(AN));
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(AN));
   hypre_ParCSRMatrixDiag(AN) = AN_diag;
   hypre_ParCSRMatrixOffd(AN) = AN_offd;


   hypre_ParCSRMatrixColMapOffd(AN) = col_map_offd_AN;
   hypre_ParCSRMatrixCommPkg(AN) = comm_pkg_AN;

   new_num_cols_offd = num_functions*num_cols_offd_AN;

   if (new_num_cols_offd > num_cols_offd)
   {
      new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols_offd);
      cnt = 0;
      for (i=0; i < num_cols_offd_AN; i++)
      {
	 for (j=0; j < num_functions; j++)
         {
 	    new_col_map_offd[cnt++] = num_functions*col_map_offd_AN[i]+j;
         }
      }
      cnt = 0;
      for (i=0; i < num_cols_offd; i++)
      {
         while (col_map_offd[i] >  new_col_map_offd[cnt])
            cnt++;
         col_map_offd[i] = cnt++;
      }
      for (i=0; i < num_recvs+1; i++)
      {
         recv_vec_starts[i] = num_functions*recv_vec_starts_AN[i];
      }

      for (i=0; i < num_nonzeros_offd; i++)
      {
         j = A_offd_j[i];
	 A_offd_j[i] = col_map_offd[j];
      }
      hypre_ParCSRMatrixColMapOffd(A) = new_col_map_offd;
      hypre_CSRMatrixNumCols(A_offd) = new_num_cols_offd;
      hypre_TFree(col_map_offd);
   }
 
   hypre_TFree(map_to_node);
   new_send_elmts_size = send_map_starts_AN[num_sends]*num_functions;

   if (new_send_elmts_size > send_map_starts[num_sends])
   {
      new_send_map_elmts = hypre_CTAlloc(HYPRE_Int,new_send_elmts_size);
      cnt = 0;
      send_map_starts[0] = 0;
      for (i=0; i < num_sends; i++)
      {
         send_map_starts[i+1] = send_map_starts_AN[i+1]*num_functions;
         for (j=send_map_starts_AN[i]; j < send_map_starts_AN[i+1]; j++)
	 {
            for (k=0; k < num_functions; k++)
	       new_send_map_elmts[cnt++] = send_map_elmts_AN[j]*num_functions+k;
	 }
      }
      hypre_TFree(send_map_elmts);
      hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = new_send_map_elmts;
   }
 
   *AN_ptr        = AN;

   hypre_TFree(counter);

   return (ierr);
}
Example #12
0
HYPRE_ParCSRMatrix
GenerateRotate7pt( MPI_Comm comm,
                   HYPRE_Int      nx,
                   HYPRE_Int      ny,
                   HYPRE_Int      P,
                   HYPRE_Int      Q,
                   HYPRE_Int      p,
                   HYPRE_Int      q,
                   HYPRE_Real   alpha,
                   HYPRE_Real   eps )
{
    hypre_ParCSRMatrix *A;
    hypre_CSRMatrix *diag;
    hypre_CSRMatrix *offd;

    HYPRE_Int    *diag_i;
    HYPRE_Int    *diag_j;
    HYPRE_Real *diag_data;

    HYPRE_Int    *offd_i;
    HYPRE_Int    *offd_j;
    HYPRE_Real *offd_data;

    HYPRE_Real *value;
    HYPRE_Real ac, bc, cc, s, c, pi, x;
    HYPRE_Int *global_part;
    HYPRE_Int ix, iy;
    HYPRE_Int cnt, o_cnt;
    HYPRE_Int local_num_rows;
    HYPRE_Int *col_map_offd;
    HYPRE_Int *work;
    HYPRE_Int row_index;
    HYPRE_Int i,j;

    HYPRE_Int nx_local, ny_local;
    HYPRE_Int nx_size, ny_size;
    HYPRE_Int num_cols_offd;
    HYPRE_Int grid_size;

    HYPRE_Int *nx_part;
    HYPRE_Int *ny_part;

    HYPRE_Int num_procs, my_id;
    HYPRE_Int P_busy, Q_busy;

    hypre_MPI_Comm_size(comm,&num_procs);
    hypre_MPI_Comm_rank(comm,&my_id);

    grid_size = nx*ny;

    value = hypre_CTAlloc(HYPRE_Real,4);
    pi = 4.0*atan(1.0);
    x = pi*alpha/180.0;
    s = sin(x);
    c = cos(x);
    ac = -(c*c + eps*s*s);
    bc = 2.0*(1.0 - eps)*s*c;
    cc = -(s*s + eps*c*c);
    value[0] = -2*(2*ac+bc+2*cc);
    value[1] = 2*ac+bc;
    value[2] = bc+2*cc;
    value[3] = -bc;

    hypre_GeneratePartitioning(nx,P,&nx_part);
    hypre_GeneratePartitioning(ny,Q,&ny_part);

    global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1);

    global_part[0] = 0;
    cnt = 1;
    for (iy = 0; iy < Q; iy++)
    {
        ny_size = ny_part[iy+1]-ny_part[iy];
        for (ix = 0; ix < P; ix++)
        {
            nx_size = nx_part[ix+1] - nx_part[ix];
            global_part[cnt] = global_part[cnt-1];
            global_part[cnt++] += nx_size*ny_size;
        }
    }

    nx_local = nx_part[p+1] - nx_part[p];
    ny_local = ny_part[q+1] - ny_part[q];

    my_id = q*P + p;
    num_procs = P*Q;

    local_num_rows = nx_local*ny_local;
    diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);
    offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);

    P_busy = hypre_min(nx,P);
    Q_busy = hypre_min(ny,Q);

    num_cols_offd = 0;
    if (p) num_cols_offd += ny_local;
    if (p < P_busy-1) num_cols_offd += ny_local;
    if (q) num_cols_offd += nx_local;
    if (q < Q_busy-1) num_cols_offd += nx_local;
    if (p && q) num_cols_offd++;
    if (p && q < Q_busy-1 ) num_cols_offd++;
    if (p < P_busy-1 && q ) num_cols_offd++;
    if (p < P_busy-1 && q < Q_busy-1 ) num_cols_offd++;

    if (!local_num_rows) num_cols_offd = 0;

    col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);

    cnt = 0;
    o_cnt = 0;
    diag_i[0] = 0;
    offd_i[0] = 0;
    for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
    {
        for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
        {
            cnt++;
            o_cnt++;
            diag_i[cnt] = diag_i[cnt-1];
            offd_i[o_cnt] = offd_i[o_cnt-1];
            diag_i[cnt]++;
            if (iy > ny_part[q])
            {
                diag_i[cnt]++;
                if (ix > nx_part[p])
                {
                    diag_i[cnt]++;
                }
                else
                {
                    if (ix)
                        offd_i[o_cnt]++;
                }
            }
            else
            {
                if (iy)
                {
                    offd_i[o_cnt]++;
                    if (ix > nx_part[p])
                    {
                        offd_i[o_cnt]++;
                    }
                    else if (ix)
                    {
                        offd_i[o_cnt]++;
                    }
                }
            }
            if (ix > nx_part[p])
                diag_i[cnt]++;
            else
            {
                if (ix)
                {
                    offd_i[o_cnt]++;
                }
            }
            if (ix+1 < nx_part[p+1])
                diag_i[cnt]++;
            else
            {
                if (ix+1 < nx)
                {
                    offd_i[o_cnt]++;
                }
            }
            if (iy+1 < ny_part[q+1])
            {
                diag_i[cnt]++;
                if (ix < nx_part[p+1]-1)
                {
                    diag_i[cnt]++;
                }
                else
                {
                    if (ix+1 < nx)
                        offd_i[o_cnt]++;
                }
            }
            else
            {
                if (iy+1 < ny)
                {
                    offd_i[o_cnt]++;
                    if (ix < nx_part[p+1]-1)
                    {
                        offd_i[o_cnt]++;
                    }
                    else if (ix < nx-1)
                    {
                        offd_i[o_cnt]++;
                    }
                }
            }
        }
    }

    diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]);
    diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]);

    if (num_procs > 1)
    {
        offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]);
        offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]);
    }

    row_index = 0;
    cnt = 0;
    o_cnt = 0;
    for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
    {
        for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
        {
            diag_j[cnt] = row_index;
            diag_data[cnt++] = value[0];
            if (iy > ny_part[q])
            {
                if (ix > nx_part[p])
                {
                    diag_j[cnt] = row_index-nx_local-1 ;
                    diag_data[cnt++] = value[3];
                }
                else
                {
                    if (ix)
                    {
                        offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                }
                diag_j[cnt] = row_index-nx_local;
                diag_data[cnt++] = value[2];
            }
            else
            {
                if (iy)
                {
                    if (ix > nx_part[p])
                    {
                        offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p,q-1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                    else if (ix)
                    {
                        offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q-1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                    offd_j[o_cnt] = hypre_map2(ix,iy-1,p,q-1,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[2];
                }
            }
            if (ix > nx_part[p])
            {
                diag_j[cnt] = row_index-1;
                diag_data[cnt++] = value[1];
            }
            else
            {
                if (ix)
                {
                    offd_j[o_cnt] = hypre_map2(ix-1,iy,p-1,q,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[1];
                }
            }
            if (ix+1 < nx_part[p+1])
            {
                diag_j[cnt] = row_index+1;
                diag_data[cnt++] = value[1];
            }
            else
            {
                if (ix+1 < nx)
                {
                    offd_j[o_cnt] = hypre_map2(ix+1,iy,p+1,q,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[1];
                }
            }
            if (iy+1 < ny_part[q+1])
            {
                diag_j[cnt] = row_index+nx_local;
                diag_data[cnt++] = value[2];
                if (ix < nx_part[p+1]-1)
                {
                    diag_j[cnt] = row_index+nx_local+1 ;
                    diag_data[cnt++] = value[3];
                }
                else
                {
                    if (ix+1 < nx)
                    {
                        offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                }
            }
            else
            {
                if (iy+1 < ny)
                {
                    offd_j[o_cnt] = hypre_map2(ix,iy+1,p,q+1,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[2];
                    if (ix < nx_part[p+1]-1)
                    {
                        offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p,q+1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                    else if (ix < nx-1)
                    {
                        offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q+1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                }
            }
            row_index++;
        }
    }

    if (num_procs > 1)
    {
        work = hypre_CTAlloc(HYPRE_Int,o_cnt);

        for (i=0; i < o_cnt; i++)
            work[i] = offd_j[i];

        qsort0(work, 0, o_cnt-1);

        col_map_offd[0] = work[0];
        cnt = 0;
        for (i=0; i < o_cnt; i++)
        {
            if (work[i] > col_map_offd[cnt])
            {
                cnt++;
                col_map_offd[cnt] = work[i];
            }
        }

        for (i=0; i < o_cnt; i++)
        {
            for (j=0; j < num_cols_offd; j++)
            {
                if (offd_j[i] == col_map_offd[j])
                {
                    offd_j[i] = j;
                    break;
                }
            }
        }

        hypre_TFree(work);
    }

    A = hypre_ParCSRMatrixCreate(comm, grid_size, grid_size,
                                 global_part, global_part, num_cols_offd,
                                 diag_i[local_num_rows],
                                 offd_i[local_num_rows]);

    hypre_ParCSRMatrixColMapOffd(A) = col_map_offd;

    diag = hypre_ParCSRMatrixDiag(A);
    hypre_CSRMatrixI(diag) = diag_i;
    hypre_CSRMatrixJ(diag) = diag_j;
    hypre_CSRMatrixData(diag) = diag_data;

    offd = hypre_ParCSRMatrixOffd(A);
    hypre_CSRMatrixI(offd) = offd_i;
    if (num_cols_offd)
    {
        hypre_CSRMatrixJ(offd) = offd_j;
        hypre_CSRMatrixData(offd) = offd_data;
    }

    hypre_TFree(nx_part);
    hypre_TFree(ny_part);
    hypre_TFree(value);

    return (HYPRE_ParCSRMatrix) A;
}
Example #13
0
HYPRE_Int main( HYPRE_Int   argc, char *argv[] )
{
   hypre_ParCSRMatrix      *par_matrix, *g_matrix, **submatrices;
   hypre_CSRMatrix         *A_diag, *A_offd;
   hypre_CSRBlockMatrix    *diag;
   hypre_CSRBlockMatrix    *offd;
   hypre_ParCSRBlockMatrix *par_blk_matrix, *par_blk_matrixT, *rap_matrix;
   hypre_Vector        *x_local;
   hypre_Vector        *y_local;
   hypre_ParVector     *x;
   hypre_ParVector     *y;
   HYPRE_Solver        gmres_solver, precon;
   HYPRE_Int                 *diag_i, *diag_j, *offd_i, *offd_j;
   HYPRE_Int                 *diag_i2, *diag_j2, *offd_i2, *offd_j2;
   double              *diag_d, *diag_d2, *offd_d, *offd_d2;
   HYPRE_Int		       mypid, local_size, nprocs;
   HYPRE_Int		       global_num_rows, global_num_cols, num_cols_offd;
   HYPRE_Int		       num_nonzeros_diag, num_nonzeros_offd, *colMap;
   HYPRE_Int 		       ii, jj, kk, row, col, nnz, *indices, *colMap2;
   double 	       *data, ddata, *y_data;
   HYPRE_Int 		       *row_starts, *col_starts, *rstarts, *cstarts;
   HYPRE_Int 		       *row_starts2, *col_starts2;
   HYPRE_Int                 block_size=2, bnnz=4, *index_set;
   FILE                *fp;

   /* --------------------------------------------- */
   /* Initialize MPI                                */
   /* --------------------------------------------- */

   hypre_MPI_Init(&argc, &argv);
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &mypid);
   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &nprocs);

   /* build and fetch matrix */
   MyBuildParLaplacian9pt((HYPRE_ParCSRMatrix *) &par_matrix);
   global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix);
   global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix);
   row_starts = hypre_ParCSRMatrixRowStarts(par_matrix);
   col_starts = hypre_ParCSRMatrixColStarts(par_matrix);
   A_diag = hypre_ParCSRMatrixDiag(par_matrix);
   A_offd = hypre_ParCSRMatrixOffd(par_matrix);
   num_cols_offd     = hypre_CSRMatrixNumCols(A_offd);
   num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(A_diag);
   num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(A_offd);

   /* --------------------------------------------- */
   /* build vector and apply matvec                 */
   /* --------------------------------------------- */

   x = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_cols,col_starts);
   hypre_ParVectorSetPartitioningOwner(x,0);
   hypre_ParVectorInitialize(x);
   x_local = hypre_ParVectorLocalVector(x);
   data    = hypre_VectorData(x_local);
   local_size = col_starts[mypid+1] - col_starts[mypid];
   for (ii = 0; ii < local_size; ii++) data[ii] = 1.0;
   y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_rows,row_starts);
   hypre_ParVectorSetPartitioningOwner(y,0);
   hypre_ParVectorInitialize(y);
   hypre_ParCSRMatrixMatvec (1.0, par_matrix, x, 0.0, y);
   ddata = hypre_ParVectorInnerProd(y, y);
   if (mypid == 0) hypre_printf("y inner product = %e\n", ddata);
   hypre_ParVectorDestroy(x);
   hypre_ParVectorDestroy(y);

   /* --------------------------------------------- */
   /* build block matrix                            */
   /* --------------------------------------------- */

   rstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1);
   for (ii = 0; ii <= nprocs; ii++) rstarts[ii] = row_starts[ii];
   cstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1);
   for (ii = 0; ii <= nprocs; ii++) cstarts[ii] = col_starts[ii];

   par_blk_matrix = hypre_ParCSRBlockMatrixCreate(hypre_MPI_COMM_WORLD,block_size,
                          global_num_rows, global_num_cols, rstarts,
                          cstarts, num_cols_offd, num_nonzeros_diag,
                          num_nonzeros_offd);
   colMap  = hypre_ParCSRMatrixColMapOffd(par_matrix);
   if (num_cols_offd > 0) colMap2 = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
   else                   colMap2 = NULL;
   for (ii = 0; ii < num_cols_offd; ii++) colMap2[ii] = colMap[ii];
   hypre_ParCSRBlockMatrixColMapOffd(par_blk_matrix) = colMap2;
   diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(par_matrix));
   diag_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixDiag(par_matrix));
   diag_d = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(par_matrix));
   diag = hypre_ParCSRBlockMatrixDiag(par_blk_matrix);
   diag_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1);
   diag_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_diag);
   diag_d2 = hypre_CTAlloc(double, num_nonzeros_diag*bnnz);
   for (ii = 0; ii <= local_size; ii++) diag_i2[ii] = diag_i[ii];
   for (ii = 0; ii < num_nonzeros_diag; ii++) diag_j2[ii] = diag_j[ii];
   hypre_CSRBlockMatrixI(diag) = diag_i2;
   hypre_CSRBlockMatrixJ(diag) = diag_j2;
   for (ii = 0; ii < num_nonzeros_diag; ii++)
   {
      for (jj = 0; jj < block_size; jj++)
         for (kk = 0; kk < block_size; kk++)
         {
            if (jj <= kk)
               diag_d2[ii*bnnz+jj*block_size+kk] = diag_d[ii];
            else
               diag_d2[ii*bnnz+jj*block_size+kk] = 0.0;
         }
   }
   hypre_CSRBlockMatrixData(diag) = diag_d2;

   offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(par_matrix));
   offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(par_matrix));
   offd_d = hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(par_matrix));
   offd   = hypre_ParCSRBlockMatrixOffd(par_blk_matrix);
   offd_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1);
   for (ii = 0; ii <= local_size; ii++) offd_i2[ii] = offd_i[ii];
   hypre_CSRBlockMatrixI(offd) = offd_i2;
   if (num_cols_offd)
   {
      offd_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_offd);
      for (ii = 0; ii < num_nonzeros_offd; ii++) offd_j2[ii] = offd_j[ii];
      hypre_CSRBlockMatrixJ(offd) = offd_j2;
      offd_d2 = hypre_CTAlloc(double, num_nonzeros_offd*bnnz);
      for (ii = 0; ii < num_nonzeros_offd; ii++)
      {
         for (jj = 0; jj < block_size; jj++)
            for (kk = 0; kk < block_size; kk++)
            {
               if (jj <= kk)
                  offd_d2[ii*bnnz+jj*block_size+kk] = offd_d[ii];
               else
                  offd_d2[ii*bnnz+jj*block_size+kk] = 0.0;
            }
      }
      hypre_CSRBlockMatrixData(offd) = offd_d2;
   }
   else
   {
Example #14
0
HYPRE_Int hypre_seqAMGSetup( hypre_ParAMGData *amg_data,
                      HYPRE_Int p_level,
                      HYPRE_Int coarse_threshold)


{

   /* Par Data Structure variables */
   hypre_ParCSRMatrix **Par_A_array = hypre_ParAMGDataAArray(amg_data);

   MPI_Comm 	      comm = hypre_ParCSRMatrixComm(Par_A_array[0]); 
   MPI_Comm 	      new_comm, seq_comm;

   hypre_ParCSRMatrix   *A_seq = NULL;
   hypre_CSRMatrix  *A_seq_diag;
   hypre_CSRMatrix  *A_seq_offd;
   hypre_ParVector   *F_seq = NULL;
   hypre_ParVector   *U_seq = NULL;

   hypre_ParCSRMatrix *A;

   HYPRE_Int               **dof_func_array;   
   HYPRE_Int                num_procs, my_id;

   HYPRE_Int                not_finished_coarsening;
   HYPRE_Int                level;

   HYPRE_Solver  coarse_solver;

   /* misc */
   dof_func_array = hypre_ParAMGDataDofFuncArray(amg_data);

   /*MPI Stuff */
   hypre_MPI_Comm_size(comm, &num_procs);   
   hypre_MPI_Comm_rank(comm,&my_id);
  
   /*initial */
   level = p_level;
   
   not_finished_coarsening = 1;
  
   /* convert A at this level to sequential */
   A = Par_A_array[level];

   {
      double *A_seq_data = NULL;
      HYPRE_Int *A_seq_i = NULL;
      HYPRE_Int *A_seq_offd_i = NULL;
      HYPRE_Int *A_seq_j = NULL;

      double *A_tmp_data = NULL;
      HYPRE_Int *A_tmp_i = NULL;
      HYPRE_Int *A_tmp_j = NULL;

      HYPRE_Int *info, *displs, *displs2;
      HYPRE_Int i, j, size, num_nonzeros, total_nnz, cnt;
  
      hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
      hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
      HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
      HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
      HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
      HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
      HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
      double *A_diag_data = hypre_CSRMatrixData(A_diag);
      double *A_offd_data = hypre_CSRMatrixData(A_offd);
      HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
      HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A);

      hypre_MPI_Group orig_group, new_group; 
      HYPRE_Int *ranks, new_num_procs, *row_starts;

      info = hypre_CTAlloc(HYPRE_Int, num_procs);

      hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm);

      ranks = hypre_CTAlloc(HYPRE_Int, num_procs);

      new_num_procs = 0;
      for (i=0; i < num_procs; i++)
         if (info[i]) 
         {
            ranks[new_num_procs] = i;
            info[new_num_procs++] = info[i];
         }

      MPI_Comm_group(comm, &orig_group);
      hypre_MPI_Group_incl(orig_group, new_num_procs, ranks, &new_group);
      MPI_Comm_create(comm, new_group, &new_comm);
      hypre_MPI_Group_free(&new_group);
      hypre_MPI_Group_free(&orig_group);

      if (num_rows)
      {
         /* alloc space in seq data structure only for participating procs*/
         HYPRE_BoomerAMGCreate(&coarse_solver);
         HYPRE_BoomerAMGSetMaxRowSum(coarse_solver,
		hypre_ParAMGDataMaxRowSum(amg_data)); 
         HYPRE_BoomerAMGSetStrongThreshold(coarse_solver,
		hypre_ParAMGDataStrongThreshold(amg_data)); 
         HYPRE_BoomerAMGSetCoarsenType(coarse_solver,
		hypre_ParAMGDataCoarsenType(amg_data)); 
         HYPRE_BoomerAMGSetInterpType(coarse_solver,
		hypre_ParAMGDataInterpType(amg_data)); 
         HYPRE_BoomerAMGSetTruncFactor(coarse_solver, 
		hypre_ParAMGDataTruncFactor(amg_data)); 
         HYPRE_BoomerAMGSetPMaxElmts(coarse_solver, 
		hypre_ParAMGDataPMaxElmts(amg_data)); 
	 if (hypre_ParAMGDataUserRelaxType(amg_data) > -1) 
            HYPRE_BoomerAMGSetRelaxType(coarse_solver, 
		hypre_ParAMGDataUserRelaxType(amg_data)); 
         HYPRE_BoomerAMGSetRelaxOrder(coarse_solver, 
		hypre_ParAMGDataRelaxOrder(amg_data)); 
         HYPRE_BoomerAMGSetRelaxWt(coarse_solver, 
		hypre_ParAMGDataUserRelaxWeight(amg_data)); 
	 if (hypre_ParAMGDataUserNumSweeps(amg_data) > -1) 
            HYPRE_BoomerAMGSetNumSweeps(coarse_solver, 
		hypre_ParAMGDataUserNumSweeps(amg_data)); 
         HYPRE_BoomerAMGSetNumFunctions(coarse_solver, 
		hypre_ParAMGDataNumFunctions(amg_data)); 
         HYPRE_BoomerAMGSetMaxIter(coarse_solver, 1); 
         HYPRE_BoomerAMGSetTol(coarse_solver, 0); 

         /* Create CSR Matrix, will be Diag part of new matrix */
         A_tmp_i = hypre_CTAlloc(HYPRE_Int, num_rows+1);

         A_tmp_i[0] = 0;
         for (i=1; i < num_rows+1; i++)
            A_tmp_i[i] = A_diag_i[i]-A_diag_i[i-1]+A_offd_i[i]-A_offd_i[i-1];

         num_nonzeros = A_offd_i[num_rows]+A_diag_i[num_rows];

         A_tmp_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros);
         A_tmp_data = hypre_CTAlloc(double, num_nonzeros);

         cnt = 0;
         for (i=0; i < num_rows; i++)
         {
            for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++)
	    {
	       A_tmp_j[cnt] = A_diag_j[j]+first_row_index;
	       A_tmp_data[cnt++] = A_diag_data[j];
	    }
            for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++)
	    {
	       A_tmp_j[cnt] = col_map_offd[A_offd_j[j]];
	       A_tmp_data[cnt++] = A_offd_data[j];
	    }
         }

         displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);
         displs[0] = 0;
         for (i=1; i < new_num_procs+1; i++)
            displs[i] = displs[i-1]+info[i-1];
         size = displs[new_num_procs];
  
         A_seq_i = hypre_CTAlloc(HYPRE_Int, size+1);
         A_seq_offd_i = hypre_CTAlloc(HYPRE_Int, size+1);

         hypre_MPI_Allgatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, 
			displs, HYPRE_MPI_INT, new_comm );

         displs2 = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);

         A_seq_i[0] = 0;
         displs2[0] = 0;
         for (j=1; j < displs[1]; j++)
            A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1];
         for (i=1; i < new_num_procs; i++)
         {
            for (j=displs[i]; j < displs[i+1]; j++)
            {
               A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1];
            }
         }
         A_seq_i[size] = A_seq_i[size]+A_seq_i[size-1];
         displs2[new_num_procs] = A_seq_i[size];
         for (i=1; i < new_num_procs+1; i++)
         {
            displs2[i] = A_seq_i[displs[i]];
            info[i-1] = displs2[i] - displs2[i-1];
         }

         total_nnz = displs2[new_num_procs];
         A_seq_j = hypre_CTAlloc(HYPRE_Int, total_nnz);
         A_seq_data = hypre_CTAlloc(double, total_nnz);

         hypre_MPI_Allgatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT,
                       A_seq_j, info, displs2,
                       HYPRE_MPI_INT, new_comm );

         hypre_MPI_Allgatherv ( A_tmp_data, num_nonzeros, hypre_MPI_DOUBLE,
                       A_seq_data, info, displs2,
                       hypre_MPI_DOUBLE, new_comm );

         hypre_TFree(displs);
         hypre_TFree(displs2);
         hypre_TFree(A_tmp_i);
         hypre_TFree(A_tmp_j);
         hypre_TFree(A_tmp_data);
   
         row_starts = hypre_CTAlloc(HYPRE_Int,2);
         row_starts[0] = 0; 
         row_starts[1] = size;
 
         /* Create 1 proc communicator */
         seq_comm = hypre_MPI_COMM_SELF;

         A_seq = hypre_ParCSRMatrixCreate(seq_comm,size,size,
					  row_starts, row_starts,
						0,total_nnz,0); 

         A_seq_diag = hypre_ParCSRMatrixDiag(A_seq);
         A_seq_offd = hypre_ParCSRMatrixOffd(A_seq);

         hypre_CSRMatrixData(A_seq_diag) = A_seq_data;
         hypre_CSRMatrixI(A_seq_diag) = A_seq_i;
         hypre_CSRMatrixJ(A_seq_diag) = A_seq_j;
         hypre_CSRMatrixI(A_seq_offd) = A_seq_offd_i;

         F_seq = hypre_ParVectorCreate(seq_comm, size, row_starts);
         U_seq = hypre_ParVectorCreate(seq_comm, size, row_starts);
         hypre_ParVectorOwnsPartitioning(F_seq) = 0;
         hypre_ParVectorOwnsPartitioning(U_seq) = 0;
         hypre_ParVectorInitialize(F_seq);
         hypre_ParVectorInitialize(U_seq);

         hypre_BoomerAMGSetup(coarse_solver,A_seq,F_seq,U_seq);

         hypre_ParAMGDataCoarseSolver(amg_data) = coarse_solver;
         hypre_ParAMGDataACoarse(amg_data) = A_seq;
         hypre_ParAMGDataFCoarse(amg_data) = F_seq;
         hypre_ParAMGDataUCoarse(amg_data) = U_seq;
         hypre_ParAMGDataNewComm(amg_data) = new_comm;
      }
      hypre_TFree(info);
      hypre_TFree(ranks);
   }
 
   return 0;
   
   
}
hypre_ParCSRBooleanMatrix *
hypre_CSRBooleanMatrixToParCSRBooleanMatrix
( MPI_Comm comm, hypre_CSRBooleanMatrix *A,
  HYPRE_Int *row_starts, HYPRE_Int *col_starts )
{
   HYPRE_Int          global_data[2];
   HYPRE_Int          global_num_rows;
   HYPRE_Int          global_num_cols;
   HYPRE_Int          *local_num_rows;

   HYPRE_Int          num_procs, my_id;
   HYPRE_Int          *local_num_nonzeros;
   HYPRE_Int          num_nonzeros;
  
   HYPRE_Int          *a_i;
   HYPRE_Int          *a_j;
  
   hypre_CSRBooleanMatrix *local_A;

   hypre_MPI_Request  *requests;
   hypre_MPI_Status   *status, status0;
   hypre_MPI_Datatype *csr_matrix_datatypes;

   hypre_ParCSRBooleanMatrix *par_matrix;

   HYPRE_Int          first_col_diag;
   HYPRE_Int          last_col_diag;
 
   HYPRE_Int i, j, ind;

   hypre_MPI_Comm_rank(comm, &my_id);
   hypre_MPI_Comm_size(comm, &num_procs);

   if (my_id == 0) 
   {
        global_data[0] = hypre_CSRBooleanMatrix_Get_NRows(A);
        global_data[1] = hypre_CSRBooleanMatrix_Get_NCols(A);
        a_i = hypre_CSRBooleanMatrix_Get_I(A);
        a_j = hypre_CSRBooleanMatrix_Get_J(A);
   }
   hypre_MPI_Bcast(global_data,2,HYPRE_MPI_INT,0,comm);
   global_num_rows = global_data[0];
   global_num_cols = global_data[1];

   local_num_rows = hypre_CTAlloc(HYPRE_Int, num_procs);
   csr_matrix_datatypes = hypre_CTAlloc(hypre_MPI_Datatype, num_procs);

   par_matrix = hypre_ParCSRBooleanMatrixCreate (comm, global_num_rows,
        global_num_cols,row_starts,col_starts,0,0,0);

   row_starts = hypre_ParCSRBooleanMatrix_Get_RowStarts(par_matrix);
   col_starts = hypre_ParCSRBooleanMatrix_Get_ColStarts(par_matrix);

   for (i=0; i < num_procs; i++)
         local_num_rows[i] = row_starts[i+1] - row_starts[i];

   if (my_id == 0)
   {
        local_num_nonzeros = hypre_CTAlloc(HYPRE_Int, num_procs);
        for (i=0; i < num_procs-1; i++)
                local_num_nonzeros[i] = a_i[row_starts[i+1]] 
                                - a_i[row_starts[i]];
        local_num_nonzeros[num_procs-1] = a_i[global_num_rows] 
                                - a_i[row_starts[num_procs-1]];
   }
   hypre_MPI_Scatter(local_num_nonzeros,1,HYPRE_MPI_INT,&num_nonzeros,1,HYPRE_MPI_INT,0,comm);

   if (my_id == 0) num_nonzeros = local_num_nonzeros[0];

   local_A = hypre_CSRBooleanMatrixCreate(local_num_rows[my_id], global_num_cols,
                num_nonzeros);
   if (my_id == 0)
   {
        requests = hypre_CTAlloc (hypre_MPI_Request, num_procs-1);
        status = hypre_CTAlloc(hypre_MPI_Status, num_procs-1);
        j=0;
        for (i=1; i < num_procs; i++)
        {
                ind = a_i[row_starts[i]];
                hypre_BuildCSRBooleanMatrixMPIDataType(local_num_nonzeros[i], 
                        local_num_rows[i],
                        &a_i[row_starts[i]],
                        &a_j[ind],
                        &csr_matrix_datatypes[i]);
                hypre_MPI_Isend(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[i], i, 0, comm,
                        &requests[j++]);
                hypre_MPI_Type_free(&csr_matrix_datatypes[i]);
        }
        hypre_CSRBooleanMatrix_Get_I(local_A) = a_i;
        hypre_CSRBooleanMatrix_Get_J(local_A) = a_j;
        hypre_MPI_Waitall(num_procs-1,requests,status);
        hypre_TFree(requests);
        hypre_TFree(status);
        hypre_TFree(local_num_nonzeros);
    }
   else
   {
        hypre_CSRBooleanMatrixInitialize(local_A);
        hypre_BuildCSRBooleanMatrixMPIDataType(num_nonzeros, 
                        local_num_rows[my_id],
                        hypre_CSRBooleanMatrix_Get_I(local_A),
                        hypre_CSRBooleanMatrix_Get_J(local_A),
                        csr_matrix_datatypes);
        hypre_MPI_Recv(hypre_MPI_BOTTOM,1,csr_matrix_datatypes[0],0,0,comm,&status0);
        hypre_MPI_Type_free(csr_matrix_datatypes);
   }

   first_col_diag = col_starts[my_id];
   last_col_diag = col_starts[my_id+1]-1;

   BooleanGenerateDiagAndOffd(local_A, par_matrix, first_col_diag, last_col_diag);

   /* set pointers back to NULL before destroying */
   if (my_id == 0)
   {      
      hypre_CSRBooleanMatrix_Get_I(local_A) = NULL;
      hypre_CSRBooleanMatrix_Get_J(local_A) = NULL; 
   }      
   hypre_CSRBooleanMatrixDestroy(local_A);
   hypre_TFree(local_num_rows);
   hypre_TFree(csr_matrix_datatypes);

   return par_matrix;
}
hypre_ParCSRBooleanMatrix *hypre_ParCSRBooleanMatrixCreate( MPI_Comm comm,
                               HYPRE_Int global_num_rows, HYPRE_Int global_num_cols,
                               HYPRE_Int *row_starts, HYPRE_Int *col_starts,
                               HYPRE_Int num_cols_offd, HYPRE_Int num_nonzeros_diag,
                               HYPRE_Int num_nonzeros_offd)
{
   hypre_ParCSRBooleanMatrix *matrix;
   HYPRE_Int                     num_procs, my_id;
   HYPRE_Int                     local_num_rows, local_num_cols;
   HYPRE_Int                     first_row_index, first_col_diag;
   
   matrix = hypre_CTAlloc(hypre_ParCSRBooleanMatrix, 1);

   hypre_MPI_Comm_rank(comm,&my_id);
   hypre_MPI_Comm_size(comm,&num_procs);

   if (!row_starts)
   {
      hypre_GeneratePartitioning(global_num_rows,num_procs,&row_starts);
   }

   if (!col_starts)
   {
      if (global_num_rows == global_num_cols)
      {
        col_starts = row_starts;
      }
      else
      {
        hypre_GeneratePartitioning(global_num_cols,num_procs,&col_starts);
      }
   }

   first_row_index = row_starts[my_id];
   local_num_rows = row_starts[my_id+1]-first_row_index;
   first_col_diag = col_starts[my_id];
   local_num_cols = col_starts[my_id+1]-first_col_diag;
   hypre_ParCSRBooleanMatrix_Get_Comm(matrix) = comm;
   hypre_ParCSRBooleanMatrix_Get_Diag(matrix) = 
          hypre_CSRBooleanMatrixCreate(local_num_rows, local_num_cols,
                                     num_nonzeros_diag);
   hypre_ParCSRBooleanMatrix_Get_Offd(matrix) = 
          hypre_CSRBooleanMatrixCreate(local_num_rows, num_cols_offd,
                                     num_nonzeros_offd);
   hypre_ParCSRBooleanMatrix_Get_GlobalNRows(matrix) = global_num_rows;
   hypre_ParCSRBooleanMatrix_Get_GlobalNCols(matrix) = global_num_cols;
   hypre_ParCSRBooleanMatrix_Get_StartRow(matrix) = first_row_index;
   hypre_ParCSRBooleanMatrix_Get_FirstColDiag(matrix) = first_col_diag;
   hypre_ParCSRBooleanMatrix_Get_ColMapOffd(matrix) = NULL;
   hypre_ParCSRBooleanMatrix_Get_RowStarts(matrix) = row_starts;
   hypre_ParCSRBooleanMatrix_Get_ColStarts(matrix) = col_starts;
   hypre_ParCSRBooleanMatrix_Get_CommPkg(matrix) = NULL;

   hypre_ParCSRBooleanMatrix_Get_OwnsData(matrix)      = 1;
   hypre_ParCSRBooleanMatrix_Get_OwnsRowStarts(matrix) = 1;
   hypre_ParCSRBooleanMatrix_Get_OwnsColStarts(matrix) = 1;
   if (row_starts == col_starts)
      hypre_ParCSRBooleanMatrix_Get_OwnsColStarts(matrix) = 0;

   hypre_ParCSRBooleanMatrix_Get_Rowindices(matrix)   = NULL;
   hypre_ParCSRBooleanMatrix_Get_Getrowactive(matrix) = 0;

   return matrix;
}
hypre_ParCSRBooleanMatrix *
hypre_ParCSRBooleanMatrixRead( MPI_Comm comm, const char *file_name )
{
   hypre_ParCSRBooleanMatrix  *matrix;
   hypre_CSRBooleanMatrix  *diag;
   hypre_CSRBooleanMatrix  *offd;
   HYPRE_Int  my_id, i, num_procs;
   char new_file_d[80], new_file_o[80], new_file_info[80];
   HYPRE_Int  global_num_rows, global_num_cols, num_cols_offd;
   HYPRE_Int  local_num_rows;
   HYPRE_Int  *row_starts;
   HYPRE_Int  *col_starts;
   HYPRE_Int  *col_map_offd;
   FILE *fp;
   HYPRE_Int equal = 1;

   hypre_MPI_Comm_rank(comm,&my_id);
   hypre_MPI_Comm_size(comm,&num_procs);
   row_starts = hypre_CTAlloc(HYPRE_Int, num_procs+1);
   col_starts = hypre_CTAlloc(HYPRE_Int, num_procs+1);
   hypre_sprintf(new_file_d,"%s.D.%d",file_name,my_id);
   hypre_sprintf(new_file_o,"%s.O.%d",file_name,my_id);
   hypre_sprintf(new_file_info,"%s.INFO.%d",file_name,my_id);
   fp = fopen(new_file_info, "r");
   hypre_fscanf(fp, "%d", &global_num_rows);
   hypre_fscanf(fp, "%d", &global_num_cols);
   hypre_fscanf(fp, "%d", &num_cols_offd);
   for (i=0; i < num_procs; i++)
           hypre_fscanf(fp, "%d %d", &row_starts[i], &col_starts[i]);
   row_starts[num_procs] = global_num_rows;
   col_starts[num_procs] = global_num_cols;
   col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
   for (i=0; i < num_cols_offd; i++)
        hypre_fscanf(fp, "%d", &col_map_offd[i]);
        
   fclose(fp);

   for (i=num_procs; i >= 0; i--)
        if (row_starts[i] != col_starts[i])
        {
                equal = 0;
                break;
        }

   if (equal)
   {
        hypre_TFree(col_starts);
        col_starts = row_starts;
   }
   
   diag = hypre_CSRBooleanMatrixRead(new_file_d);
   local_num_rows = hypre_CSRBooleanMatrix_Get_NRows(diag);

   if (num_cols_offd)
   {
        offd = hypre_CSRBooleanMatrixRead(new_file_o);
   }
   else
        offd = hypre_CSRBooleanMatrixCreate(local_num_rows,0,0);

        
   matrix = hypre_CTAlloc(hypre_ParCSRBooleanMatrix, 1);
   
   hypre_ParCSRBooleanMatrix_Get_Comm(matrix) = comm;
   hypre_ParCSRBooleanMatrix_Get_GlobalNRows(matrix) = global_num_rows;
   hypre_ParCSRBooleanMatrix_Get_GlobalNCols(matrix) = global_num_cols;
   hypre_ParCSRBooleanMatrix_Get_StartRow(matrix) = row_starts[my_id];
   hypre_ParCSRBooleanMatrix_Get_FirstColDiag(matrix) = col_starts[my_id];
   hypre_ParCSRBooleanMatrix_Get_RowStarts(matrix) = row_starts;
   hypre_ParCSRBooleanMatrix_Get_ColStarts(matrix) = col_starts;
   hypre_ParCSRBooleanMatrix_Get_CommPkg(matrix) = NULL;

   /* set defaults */
   hypre_ParCSRBooleanMatrix_Get_OwnsData(matrix) = 1;
   hypre_ParCSRBooleanMatrix_Get_OwnsRowStarts(matrix) = 1;
   hypre_ParCSRBooleanMatrix_Get_OwnsColStarts(matrix) = 1;
   if (row_starts == col_starts)
        hypre_ParCSRBooleanMatrix_Get_OwnsColStarts(matrix) = 0;

   hypre_ParCSRBooleanMatrix_Get_Diag(matrix) = diag;
   hypre_ParCSRBooleanMatrix_Get_Offd(matrix) = offd;
   if (num_cols_offd)
        hypre_ParCSRBooleanMatrix_Get_ColMapOffd(matrix) = col_map_offd;
   else
        hypre_ParCSRBooleanMatrix_Get_ColMapOffd(matrix) = NULL;

   return matrix;
}
Example #18
0
hypre_ParCSRMatrix * hypre_ParMatmul_FC(
   hypre_ParCSRMatrix * A, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker,
   HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd )
/* hypre_parMatmul_FC creates and returns the "Fine"-designated rows of the
   matrix product A*P.  A's size is (nC+nF)*(nC+nF), P's size is (nC+nF)*nC
   where nC is the number of coarse rows/columns, nF the number of fine
   rows/columns.  The size of C=A*P is (nC+nF)*nC, even though not all rows
   of C are actually computed.  If we were to construct a matrix consisting
   only of the computed rows of C, its size would be nF*nC.
   "Fine" is defined solely by the marker array, and for example could be
   a proper subset of the fine points of a multigrid hierarchy.
*/
{
   /* To compute a submatrix of C containing only the computed data, i.e.
      only "Fine" rows, we would have to do a lot of computational work,
      with a lot of communication.  The communication is because such a
      matrix would need global information that depends on which rows are
      "Fine".
   */

   MPI_Comm 	   comm = hypre_ParCSRMatrixComm(A);

   hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
   
   double          *A_diag_data = hypre_CSRMatrixData(A_diag);
   HYPRE_Int             *A_diag_i = hypre_CSRMatrixI(A_diag);
   HYPRE_Int             *A_diag_j = hypre_CSRMatrixJ(A_diag);

   hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
   
   double          *A_offd_data = hypre_CSRMatrixData(A_offd);
   HYPRE_Int             *A_offd_i = hypre_CSRMatrixI(A_offd);
   HYPRE_Int             *A_offd_j = hypre_CSRMatrixJ(A_offd);

   HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
   HYPRE_Int	num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
   HYPRE_Int	num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag);
   HYPRE_Int	num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
   
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   
   double          *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);

   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
   HYPRE_Int		   *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P);
   
   double          *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   HYPRE_Int             *P_offd_j = hypre_CSRMatrixJ(P_offd);

   HYPRE_Int	first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P);
   HYPRE_Int	last_col_diag_P;
   HYPRE_Int *col_starts_P = hypre_ParCSRMatrixColStarts(P);
   HYPRE_Int	num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
   HYPRE_Int	num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag);
   HYPRE_Int	num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd);

   hypre_ParCSRMatrix *C;
   HYPRE_Int		      *col_map_offd_C;
   HYPRE_Int		      *map_P_to_C;

   hypre_CSRMatrix *C_diag;

   double          *C_diag_data;
   HYPRE_Int             *C_diag_i;
   HYPRE_Int             *C_diag_j;

   hypre_CSRMatrix *C_offd;

   double          *C_offd_data=NULL;
   HYPRE_Int             *C_offd_i=NULL;
   HYPRE_Int             *C_offd_j=NULL;

   HYPRE_Int              C_diag_size;
   HYPRE_Int              C_offd_size;
   HYPRE_Int		    num_cols_offd_C = 0;
   
   hypre_CSRMatrix *Ps_ext;
   
   double          *Ps_ext_data;
   HYPRE_Int             *Ps_ext_i;
   HYPRE_Int             *Ps_ext_j;

   double          *P_ext_diag_data;
   HYPRE_Int             *P_ext_diag_i;
   HYPRE_Int             *P_ext_diag_j;
   HYPRE_Int              P_ext_diag_size;

   double          *P_ext_offd_data;
   HYPRE_Int             *P_ext_offd_i;
   HYPRE_Int             *P_ext_offd_j;
   HYPRE_Int              P_ext_offd_size;

   HYPRE_Int		   *P_marker;
   HYPRE_Int		   *temp;

   HYPRE_Int              i, j;
   HYPRE_Int              i1, i2, i3;
   HYPRE_Int              jj2, jj3;
   
   HYPRE_Int              jj_count_diag, jj_count_offd;
   HYPRE_Int              jj_row_begin_diag, jj_row_begin_offd;
   HYPRE_Int              start_indexing = 0; /* start indexing for C_data at 0 */
   HYPRE_Int		    n_rows_A_global, n_cols_A_global;
   HYPRE_Int		    n_rows_P_global, n_cols_P_global;
   HYPRE_Int              allsquare = 0;
   HYPRE_Int              cnt, cnt_offd, cnt_diag;
   HYPRE_Int 		    num_procs;
   HYPRE_Int 		    value;

   double           a_entry;
   double           a_b_product;
   
   n_rows_A_global = hypre_ParCSRMatrixGlobalNumRows(A);
   n_cols_A_global = hypre_ParCSRMatrixGlobalNumCols(A);
   n_rows_P_global = hypre_ParCSRMatrixGlobalNumRows(P);
   n_cols_P_global = hypre_ParCSRMatrixGlobalNumCols(P);

   if (n_cols_A_global != n_rows_P_global || num_cols_diag_A != num_rows_diag_P)
   {
	hypre_printf(" Error! Incompatible matrix dimensions!\n");
	return NULL;
   }
   /* if (num_rows_A==num_cols_P) allsquare = 1; */

   /*-----------------------------------------------------------------------
    *  Extract P_ext, i.e. portion of P that is stored on neighbor procs
    *  and needed locally for matrix matrix product 
    *-----------------------------------------------------------------------*/

   hypre_MPI_Comm_size(comm, &num_procs);

   if (num_procs > 1)
   {
       /*---------------------------------------------------------------------
    	* If there exists no CommPkg for A, a CommPkg is generated using
    	* equally load balanced partitionings within 
	* hypre_ParCSRMatrixExtractBExt
    	*--------------------------------------------------------------------*/
   	Ps_ext = hypre_ParCSRMatrixExtractBExt(P,A,1);
   	Ps_ext_data = hypre_CSRMatrixData(Ps_ext);
   	Ps_ext_i    = hypre_CSRMatrixI(Ps_ext);
   	Ps_ext_j    = hypre_CSRMatrixJ(Ps_ext);
   }
   P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1);
   P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1);
   P_ext_diag_size = 0;
   P_ext_offd_size = 0;
   last_col_diag_P = first_col_diag_P + num_cols_diag_P -1;

   for (i=0; i < num_cols_offd_A; i++)
   {
      for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
         if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
            P_ext_offd_size++;
         else
            P_ext_diag_size++;
      P_ext_diag_i[i+1] = P_ext_diag_size;
      P_ext_offd_i[i+1] = P_ext_offd_size;
   }

   if (P_ext_diag_size)
   {
      P_ext_diag_j = hypre_CTAlloc(HYPRE_Int, P_ext_diag_size);
      P_ext_diag_data = hypre_CTAlloc(double, P_ext_diag_size);
   }
   if (P_ext_offd_size)
   {
      P_ext_offd_j = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size);
      P_ext_offd_data = hypre_CTAlloc(double, P_ext_offd_size);
   }

   cnt_offd = 0;
   cnt_diag = 0;
   for (i=0; i < num_cols_offd_A; i++)
   {
      for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
         if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
         {
            P_ext_offd_j[cnt_offd] = Ps_ext_j[j];
            P_ext_offd_data[cnt_offd++] = Ps_ext_data[j];
         }
         else
         {
            P_ext_diag_j[cnt_diag] = Ps_ext_j[j] - first_col_diag_P;
            P_ext_diag_data[cnt_diag++] = Ps_ext_data[j];
         }
   }

   if (num_procs > 1)
   {
      hypre_CSRMatrixDestroy(Ps_ext);
      Ps_ext = NULL;
   }

   cnt = 0;
   if (P_ext_offd_size || num_cols_offd_P)
   {
      temp = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size+num_cols_offd_P);
      for (i=0; i < P_ext_offd_size; i++)
         temp[i] = P_ext_offd_j[i];
      cnt = P_ext_offd_size;
      for (i=0; i < num_cols_offd_P; i++)
         temp[cnt++] = col_map_offd_P[i];
   }
   if (cnt)
   {
      qsort0(temp, 0, cnt-1);

      num_cols_offd_C = 1;
      value = temp[0];
      for (i=1; i < cnt; i++)
      {
         if (temp[i] > value)
         {
            value = temp[i];
            temp[num_cols_offd_C++] = value;
         }
      }
   }

   if (num_cols_offd_C)
        col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C);

   for (i=0; i < num_cols_offd_C; i++)
      col_map_offd_C[i] = temp[i];

   if (P_ext_offd_size || num_cols_offd_P)
      hypre_TFree(temp);

   for (i=0 ; i < P_ext_offd_size; i++)
      P_ext_offd_j[i] = hypre_BinarySearch(col_map_offd_C,
                                           P_ext_offd_j[i],
                                           num_cols_offd_C);
   if (num_cols_offd_P)
   {
      map_P_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P);

      cnt = 0;
      for (i=0; i < num_cols_offd_C; i++)
         if (col_map_offd_C[i] == col_map_offd_P[cnt])
         {
            map_P_to_C[cnt++] = i;
            if (cnt == num_cols_offd_P) break;
         }
   }

   /*-----------------------------------------------------------------------
   *  Allocate marker array.
    *-----------------------------------------------------------------------*/

   P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_C);

   /*-----------------------------------------------------------------------
    *  Initialize some stuff.
    *-----------------------------------------------------------------------*/

   for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++)
   {      
      P_marker[i1] = -1;
   }


/* no changes for the marked version above this point */
   /* This function call is the first pass: */
   hypre_ParMatmul_RowSizes_Marked(
      &C_diag_i, &C_offd_i, &P_marker,
      A_diag_i, A_diag_j, A_offd_i, A_offd_j,
      P_diag_i, P_diag_j, P_offd_i, P_offd_j,
      P_ext_diag_i, P_ext_diag_j, P_ext_offd_i, P_ext_offd_j,
      map_P_to_C,
      &C_diag_size, &C_offd_size,
      num_rows_diag_A, num_cols_offd_A, allsquare,
      num_cols_diag_P, num_cols_offd_P,
      num_cols_offd_C, CF_marker, dof_func, dof_func_offd
      );

   /* The above call of hypre_ParMatmul_RowSizes_Marked computed
      two scalars: C_diag_size, C_offd_size,
      and two arrays: C_diag_i, C_offd_i
      ( P_marker is also computed, but only used internally )
   */

   /*-----------------------------------------------------------------------
    *  Allocate C_diag_data and C_diag_j arrays.
    *  Allocate C_offd_data and C_offd_j arrays.
    *-----------------------------------------------------------------------*/
 
   last_col_diag_P = first_col_diag_P + num_cols_diag_P - 1;
   C_diag_data = hypre_CTAlloc(double, C_diag_size);
   C_diag_j    = hypre_CTAlloc(HYPRE_Int, C_diag_size);
   if (C_offd_size)
   { 
   	C_offd_data = hypre_CTAlloc(double, C_offd_size);
   	C_offd_j    = hypre_CTAlloc(HYPRE_Int, C_offd_size);
   } 


   /*-----------------------------------------------------------------------
    *  Second Pass: Fill in C_diag_data and C_diag_j.
    *  Second Pass: Fill in C_offd_data and C_offd_j.
    *-----------------------------------------------------------------------*/

   /*-----------------------------------------------------------------------
    *  Initialize some stuff.
    *-----------------------------------------------------------------------*/

   jj_count_diag = start_indexing;
   jj_count_offd = start_indexing;
   for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++)
   {      
      P_marker[i1] = -1;
   }
   
   /*-----------------------------------------------------------------------
    *  Loop over interior c-points.
    *-----------------------------------------------------------------------*/
    
   for (i1 = 0; i1 < num_rows_diag_A; i1++)
   {

      if ( CF_marker[i1] < 0 )  /* i1 is a fine row */
         /* ... This and the coarse row code are the only parts between first pass
            and near the end where
            hypre_ParMatmul_FC is different from the regular hypre_ParMatmul */
      {

         /*--------------------------------------------------------------------
          *  Create diagonal entry, C_{i1,i1} 
          *--------------------------------------------------------------------*/

         jj_row_begin_diag = jj_count_diag;
         jj_row_begin_offd = jj_count_offd;

         /*-----------------------------------------------------------------
          *  Loop over entries in row i1 of A_offd.
          *-----------------------------------------------------------------*/
         
	 if (num_cols_offd_A)
	 {
            for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
            {
               i2 = A_offd_j[jj2];
               if( dof_func==NULL || dof_func[i1] == dof_func_offd[i2] )
               {  /* interpolate only like "functions" */
                  a_entry = A_offd_data[jj2];
            
                  /*-----------------------------------------------------------
                   *  Loop over entries in row i2 of P_ext.
                   *-----------------------------------------------------------*/

                  for (jj3 = P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
                  {
                     i3 = num_cols_diag_P+P_ext_offd_j[jj3];
                     a_b_product = a_entry * P_ext_offd_data[jj3];
                  
                     /*--------------------------------------------------------
                      *  Check P_marker to see that C_{i1,i3} has not already
                      *  been accounted for. If it has not, create a new entry.
                      *  If it has, add new contribution.
                      *--------------------------------------------------------*/
                     if (P_marker[i3] < jj_row_begin_offd)
                     {
                        P_marker[i3] = jj_count_offd;
                        C_offd_data[jj_count_offd] = a_b_product;
                        C_offd_j[jj_count_offd] = i3-num_cols_diag_P;
                        jj_count_offd++;
                     }
                     else
                        C_offd_data[P_marker[i3]] += a_b_product;
                  }
                  for (jj3 = P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
                  {
                     i3 = P_ext_diag_j[jj3];
                     a_b_product = a_entry * P_ext_diag_data[jj3];

                     if (P_marker[i3] < jj_row_begin_diag)
                     {
                        P_marker[i3] = jj_count_diag;
                        C_diag_data[jj_count_diag] = a_b_product;
                        C_diag_j[jj_count_diag] = i3;
                        jj_count_diag++;
                     }
                     else
                        C_diag_data[P_marker[i3]] += a_b_product;
                  }
               }
               else
               {  /* Interpolation mat should be 0 where i1 and i2 correspond to
                     different "functions".  As we haven't created an entry for
                     C(i1,i2), nothing needs to be done. */
               }

            }
         }

         /*-----------------------------------------------------------------
          *  Loop over entries in row i1 of A_diag.
          *-----------------------------------------------------------------*/

         for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
         {
            i2 = A_diag_j[jj2];
            if( dof_func==NULL || dof_func[i1] == dof_func[i2] )
            {  /* interpolate only like "functions" */
               a_entry = A_diag_data[jj2];
            
               /*-----------------------------------------------------------
                *  Loop over entries in row i2 of P_diag.
                *-----------------------------------------------------------*/

               for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
               {
                  i3 = P_diag_j[jj3];
                  a_b_product = a_entry * P_diag_data[jj3];
                  
                  /*--------------------------------------------------------
                   *  Check P_marker to see that C_{i1,i3} has not already
                   *  been accounted for. If it has not, create a new entry.
                   *  If it has, add new contribution.
                   *--------------------------------------------------------*/

                  if (P_marker[i3] < jj_row_begin_diag)
                  {
                     P_marker[i3] = jj_count_diag;
                     C_diag_data[jj_count_diag] = a_b_product;
                     C_diag_j[jj_count_diag] = i3;
                     jj_count_diag++;
                  }
                  else
                  {
                     C_diag_data[P_marker[i3]] += a_b_product;
                  }
               }
               if (num_cols_offd_P)
	       {
                  for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
                  {
                     i3 = num_cols_diag_P+map_P_to_C[P_offd_j[jj3]];
                     a_b_product = a_entry * P_offd_data[jj3];
                  
                     /*--------------------------------------------------------
                      *  Check P_marker to see that C_{i1,i3} has not already
                      *  been accounted for. If it has not, create a new entry.
                      *  If it has, add new contribution.
                      *--------------------------------------------------------*/

                     if (P_marker[i3] < jj_row_begin_offd)
                     {
                        P_marker[i3] = jj_count_offd;
                        C_offd_data[jj_count_offd] = a_b_product;
                        C_offd_j[jj_count_offd] = i3-num_cols_diag_P;
                        jj_count_offd++;
                     }
                     else
                     {
                        C_offd_data[P_marker[i3]] += a_b_product;
                     }
                  }
               }
            }
            else
            {  /* Interpolation mat should be 0 where i1 and i2 correspond to
                  different "functions".  As we haven't created an entry for
                  C(i1,i2), nothing needs to be done. */
            }
         }
      }
      else  /* i1 is a coarse row.*/
         /* Copy P coarse-row values to C.  This is useful if C is meant to
            become a replacement for P */
      {
	 if (num_cols_offd_P)
	 {
            for (jj2 = P_offd_i[i1]; jj2 < P_offd_i[i1+1]; jj2++)
            {
               C_offd_j[jj_count_offd] = P_offd_j[jj_count_offd];
               C_offd_data[jj_count_offd] = P_offd_data[jj_count_offd];
               ++jj_count_offd;
            }
         }
         for (jj2 = P_diag_i[i1]; jj2 < P_diag_i[i1+1]; jj2++)
         {
            C_diag_j[jj_count_diag] = P_diag_j[jj2];
            C_diag_data[jj_count_diag] = P_diag_data[jj2];
            ++jj_count_diag;
         }
      }
   }

   C = hypre_ParCSRMatrixCreate(
      comm, n_rows_A_global, n_cols_P_global,
      row_starts_A, col_starts_P, num_cols_offd_C, C_diag_size, C_offd_size );

   /* Note that C does not own the partitionings */
   hypre_ParCSRMatrixSetRowStartsOwner(C,0);
   hypre_ParCSRMatrixSetColStartsOwner(C,0);

   C_diag = hypre_ParCSRMatrixDiag(C);
   hypre_CSRMatrixData(C_diag) = C_diag_data; 
   hypre_CSRMatrixI(C_diag) = C_diag_i; 
   hypre_CSRMatrixJ(C_diag) = C_diag_j; 

   C_offd = hypre_ParCSRMatrixOffd(C);
   hypre_CSRMatrixI(C_offd) = C_offd_i; 
   hypre_ParCSRMatrixOffd(C) = C_offd;

   if (num_cols_offd_C)
   {
      hypre_CSRMatrixData(C_offd) = C_offd_data; 
      hypre_CSRMatrixJ(C_offd) = C_offd_j; 
      hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;

   }

   /*-----------------------------------------------------------------------
    *  Free various arrays
    *-----------------------------------------------------------------------*/

   hypre_TFree(P_marker);   
   hypre_TFree(P_ext_diag_i);
   if (P_ext_diag_size)
   {
      hypre_TFree(P_ext_diag_j);
      hypre_TFree(P_ext_diag_data);
   }
   hypre_TFree(P_ext_offd_i);
   if (P_ext_offd_size)
   {
      hypre_TFree(P_ext_offd_j);
      hypre_TFree(P_ext_offd_data);
   }
   if (num_cols_offd_P) hypre_TFree(map_P_to_C);

   return C;
   
}
Example #19
0
void
hypre_MatTCommPkgCreate_core (

/* input args: */
   MPI_Comm comm, HYPRE_Int * col_map_offd, HYPRE_Int first_col_diag, HYPRE_Int * col_starts,
   HYPRE_Int num_rows_diag, HYPRE_Int num_cols_diag, HYPRE_Int num_cols_offd, HYPRE_Int * row_starts,
   HYPRE_Int firstColDiag, HYPRE_Int * colMapOffd,
   HYPRE_Int * mat_i_diag, HYPRE_Int * mat_j_diag, HYPRE_Int * mat_i_offd, HYPRE_Int * mat_j_offd,

   HYPRE_Int data,  /* = 1 for a matrix with floating-point data, =0 for Boolean matrix */

/* pointers to output args: */
   HYPRE_Int * p_num_recvs, HYPRE_Int ** p_recv_procs, HYPRE_Int ** p_recv_vec_starts,
   HYPRE_Int * p_num_sends, HYPRE_Int ** p_send_procs, HYPRE_Int ** p_send_map_starts,
   HYPRE_Int ** p_send_map_elmts

   )
{
   HYPRE_Int			num_sends;
   HYPRE_Int			*send_procs;
   HYPRE_Int			*send_map_starts;
   HYPRE_Int			*send_map_elmts;
   HYPRE_Int			num_recvs;
   HYPRE_Int			*recv_procs;
   HYPRE_Int			*recv_vec_starts;
   HYPRE_Int	i, j, j2, k, ir, rowmin, rowmax;
   HYPRE_Int	*tmp, *recv_buf, *displs, *info, *send_buf, *all_num_sends3;
   HYPRE_Int	num_procs, my_id, num_elmts;
   HYPRE_Int	local_info, index, index2;
   HYPRE_Int pmatch, col, kc, p;
   HYPRE_Int * recv_sz_buf;
   HYPRE_Int * row_marker;
   
   hypre_MPI_Comm_size(comm, &num_procs);  
   hypre_MPI_Comm_rank(comm, &my_id);

   info = hypre_CTAlloc(HYPRE_Int, num_procs);

/* ----------------------------------------------------------------------
 * determine which processors to receive from (set proc_mark) and num_recvs,
 * at the end of the loop proc_mark[i] contains the number of elements to be
 * received from Proc. i
 *
 * - For A*b or A*B: for each off-diagonal column i of A, you want to identify
 * the processor which has the corresponding element i of b (row i of B)
 * (columns in the local diagonal block, just multiply local rows of B).
 * You do it by finding the processor which has that column of A in its
 * _diagonal_ block - assuming b or B is distributed the same, which I believe
 * is evenly among processors, by row.  There is a unique solution because
 * the diag/offd blocking is defined by which processor owns which rows of A.
 *
 * - For A*A^T: A^T is not distributed by rows as B or any 'normal' matrix is.
 * For each off-diagonal row,column k,i element of A, you want to identify
 * the processors which have the corresponding row,column i,j elements of A^T
 * i.e., row,column j,i elements of A (all i,j,k for which these entries are
 * nonzero, row k of A lives on this processor, and row j of A lives on
 * a different processor).  So, given a column i in the local A-offd or A-diag,
 * we want to find all the processors which have column i, in diag or offd
 * blocks.  Unlike the A*B case, I don't think you can eliminate looking at
 * any class of blocks.
 * ---------------------------------------------------------------------*/

/* The algorithm for A*B was:
   For each of my columns i (in offd block), use known information on data
   distribution of columns in _diagonal_ blocks to find the processor p which
   owns row i. (Note that for i in diag block, I own the row, nothing to do.)
   Count up such i's for each processor in proc_mark.  Construct a data
   structure, recv_buf, made by appending a structure tmp from each processor.
   The data structure tmp looks like (p, no. of i's, i1, i2,...) (p=0,...) .
   There are two communication steps: gather size information (local_info) from
   all processors (into info), then gather the data (tmp) from all processors
   (into recv_buf).  Then you go through recv_buf.  For each (sink) processor p
   you search for for the appearance of my (source) processor number
   (most of recv_buf pertains to other processors and is ignored).
   When you find the appropriate section, pull out the i's, count them and
   save them, in send_map_elmts, and save p in send_procs and index information
   in send_map_starts.
*/
/* The algorithm for A*A^T:
   [ Originally I had planned to figure out approximately which processors
   had the information (for A*B it could be done exactly) to save on
   communication.  But even for A*B where the data owner is known, all data is
   sent to all processors, so that's not worth worrying about on the first cut.
   One consequence is that proc_mark is not needed.]
   Construct a data structure, recv_buf, made by appending a structure tmp for
   each processor.  It simply consists of (no. of i's, i1, i2,...) where i is
   the global number of a column in the offd block.  There are still two
   communication steps: gather size information (local_info) from all processors
   (into info), then gather the data (tmp) from all processors (into recv_buf).
   Then you go through recv_buf.  For each (sink) processor p you go through
   all its column numbers in recv_buf.  Check each one for whether you have
   data in that column.  If so, put in in send_map_elmts, p in send_procs,
   and update the index information in send_map_starts.  Note that these
   arrays don't mean quite the same thing as for A*B.
*/

   num_recvs=num_procs-1;
   local_info = num_procs + num_cols_offd + num_cols_diag;
			
   hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); 

/* ----------------------------------------------------------------------
 * generate information to be send: tmp contains for each recv_proc:
 * {deleted: id of recv_procs}, number of elements to be received for this processor,
 * indices of elements (in this order)
 * ---------------------------------------------------------------------*/

   displs = hypre_CTAlloc(HYPRE_Int, num_procs+1);
   displs[0] = 0;
   for (i=1; i < num_procs+1; i++)
	displs[i] = displs[i-1]+info[i-1]; 
   recv_buf = hypre_CTAlloc(HYPRE_Int, displs[num_procs]); 
   tmp = hypre_CTAlloc(HYPRE_Int, local_info);

   j = 0;
   for (i=0; i < num_procs; i++) {
      j2 = j++;
      tmp[j2] = 0;
      for (k=0; k < num_cols_offd; k++)
         if (col_map_offd[k] >= col_starts[i] && 
             col_map_offd[k] < col_starts[i+1]) {
            tmp[j++] = col_map_offd[k];
            ++(tmp[j2]);
         };
      for (k=0; k < num_cols_diag; k++)
         if ( k+first_col_diag >= col_starts[i] && 
              k+first_col_diag < col_starts[i+1] ) {
            tmp[j++] = k + first_col_diag;
            ++(tmp[j2]);
         }
   }

   hypre_MPI_Allgatherv(tmp,local_info,HYPRE_MPI_INT,recv_buf,info,displs,HYPRE_MPI_INT,comm);
	

/* ----------------------------------------------------------------------
 * determine send_procs and actual elements to be send (in send_map_elmts)
 * and send_map_starts whose i-th entry points to the beginning of the 
 * elements to be send to proc. i
 * ---------------------------------------------------------------------*/
/* Meanings of arrays being set here, more verbosely stated:
   send_procs: processors p to send to
   send_map_starts: for each p, gives range of indices in send_map_elmts; 
   send_map_elmts:  Each element is a send_map_elmts[i], with i in a range given
     by send_map_starts[p..p+1], for some p. This element is is the global
     column number for a column in the offd block of p which is to be multiplied
     by data from this processor.
     For A*B, send_map_elmts[i] is therefore a row of B belonging to this
     processor, to be sent to p.  For A*A^T, send_map_elmts[i] is a row of A
     belonging to this processor, to be sent to p; this row was selected
     because it has a nonzero on a _column_ needed by p.
*/
   num_sends = num_procs;   /* may turn out to be less, but we can't know yet */
   num_elmts = (num_procs-1)*num_rows_diag;
   /* ... a crude upper bound; should try to do better even if more comm required */
   send_procs = hypre_CTAlloc(HYPRE_Int, num_sends);
   send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1);
   send_map_elmts = hypre_CTAlloc(HYPRE_Int, num_elmts);
   row_marker = hypre_CTAlloc(HYPRE_Int,num_rows_diag);
 
   index = 0;
   index2 = 0; 
   send_map_starts[0] = 0;
   for (i=0; i < num_procs; i++) {
      send_map_starts[index+1] = send_map_starts[index];
      j = displs[i];
      pmatch = 0;
      for ( ir=0; ir<num_rows_diag; ++ir ) row_marker[ir] = 0;
      while ( j < displs[i+1])
      {
         num_elmts = recv_buf[j++];  /* no. of columns proc. i wants */
         for ( k=0; k<num_elmts; k++ ) {
            col = recv_buf[j++]; /* a global column no. at proc. i */
            for ( kc=0; kc<num_cols_offd; kc++ ) {
               if ( col_map_offd[kc]==col && i!=my_id ) {
                  /* this processor has the same column as proc. i (but is different) */
                  pmatch = 1;
                  send_procs[index] = i;
                  /* this would be right if we could send columns, but we can't ...
                     offset = first_col_diag;
                     ++send_map_starts[index+1];
                     send_map_elmts[index2++] = col - offset; */
                  /* Plan to send all of my rows which use this column... */
                  RowsWithColumn( &rowmin, &rowmax, col,
                                  num_rows_diag, 
                                  firstColDiag, colMapOffd,
                                  mat_i_diag, mat_j_diag, mat_i_offd, mat_j_offd
                     );
                  for ( ir=rowmin; ir<=rowmax; ++ir ) {
                     if ( row_marker[ir]==0 ) {
                        row_marker[ir] = 1;
                        ++send_map_starts[index+1];
                        send_map_elmts[index2++] = ir;
                     }
                  }
               }
            }
/* alternative way of doing the following for-loop:
            for ( kc=0; kc<num_cols_diag; kc++ ) {
               if ( kc+first_col_diag==col && i!=my_id ) {
               / * this processor has the same column as proc. i (but is different) * /
                  pmatch = 1;
/ * this would be right if we could send columns, but we can't ... >>> * /
                  send_procs[index] = i;
                  ++send_map_starts[index+1];
                  send_map_elmts[index2++] = col - offset;
                  / * Plan to send all of my rows which use this column... * /
                  / * NOT DONE * /
               }
            }
*/
            for ( kc=row_starts[my_id]; kc<row_starts[my_id+1]; kc++ ) {
               if ( kc==col && i!=my_id ) {
                  /* this processor has the same column as proc. i (but is different) */
                  pmatch = 1;
                  send_procs[index] = i;
/* this would be right if we could send columns, but we can't ... >>> 
                  ++send_map_starts[index+1];
                  send_map_elmts[index2++] = col - offset;*/
                  /* Plan to send all of my rows which use this column... */
                  RowsWithColumn( &rowmin, &rowmax, col,
                                  num_rows_diag, 
                                  firstColDiag, colMapOffd,
                                  mat_i_diag, mat_j_diag, mat_i_offd, mat_j_offd
                     );
                  for ( ir=rowmin; ir<=rowmax; ++ir ) {
                     if ( row_marker[ir]==0 ) {
                        row_marker[ir] = 1;
                        ++send_map_starts[index+1];
                        send_map_elmts[index2++] = ir;
                     }
                  }
               }
            }
         }
      }
      if ( pmatch ) index++;
   }
   num_sends = index;  /* no. of proc. rows will be sent to */

/* Compute receive arrays recv_procs, recv_vec_starts ... */
   recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs);
   recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1);
   j2 = 0;
   for (i=0; i < num_procs; i++) {
      if ( i!=my_id ) { recv_procs[j2] = i; j2++; };
   };

/* Compute recv_vec_starts.
   The real job is, for each processor p, to figure out how many rows
   p will send to me (me=this processor).  I now know how many (and which)
   rows I will send to each p.  Indeed, if send_procs[index]=p, then the
   number is send_map_starts[index+1]-send_map_starts[index].
   More communication is needed.
   options:
   hypre_MPI_Allgather of communication sizes. <--- my choice, for now
     good: simple   bad: send num_procs*num_sends data, only need num_procs
                    but: not that much data compared to previous communication
   hypre_MPI_Allgatherv of communication sizes, only for pairs of procs. that communicate
     good: less data than above   bad: need extra commun. step to get recvcounts
   hypre_MPI_ISend,hypre_MPI_IRecv of each size, separately between each pair of procs.
     good: no excess data sent   bad: lots of little messages
                                 but: Allgather might be done the same under the hood
     may be much slower than Allgather or may be a bit faster depending on
     implementations
*/
   send_buf = hypre_CTAlloc( HYPRE_Int, 3*num_sends );
   all_num_sends3 = hypre_CTAlloc( HYPRE_Int, num_procs );

   /* scatter-gather num_sends, to set up the size for the main comm. step */
   i = 3*num_sends;
   hypre_MPI_Allgather( &i, 1, HYPRE_MPI_INT, all_num_sends3, 1, HYPRE_MPI_INT, comm );
   displs[0] = 0;
   for ( p=0; p<num_procs; ++p ) {
      displs[p+1] = displs[p] + all_num_sends3[p];
   };
   recv_sz_buf = hypre_CTAlloc( HYPRE_Int, displs[num_procs] );
   
   /* scatter-gather size of row info to send, and proc. to send to */
   index = 0;
   for ( i=0; i<num_sends; ++i ) {
      send_buf[index++] = send_procs[i];   /* processor to send to */
      send_buf[index++] = my_id;
      send_buf[index++] = send_map_starts[i+1] - send_map_starts[i];
      /* ... sizes of info to send */
   };

   hypre_MPI_Allgatherv( send_buf, 3*num_sends, HYPRE_MPI_INT,
                   recv_sz_buf, all_num_sends3, displs, HYPRE_MPI_INT, comm);

   recv_vec_starts[0] = 0;
   j2 = 0;  j = 0;
   for ( i=0; i<displs[num_procs]; i=i+3 ) {
      j = i;
      if ( recv_sz_buf[j++]==my_id ) {
         recv_procs[j2] = recv_sz_buf[j++];
         recv_vec_starts[j2+1] = recv_vec_starts[j2] + recv_sz_buf[j++];
         j2++;
      }
   }
   num_recvs = j2;

#if 0
   hypre_printf("num_procs=%i send_map_starts (%i):",num_procs,num_sends+1);
   for( i=0; i<=num_sends; ++i ) hypre_printf(" %i", send_map_starts[i] );
   hypre_printf("  send_procs (%i):",num_sends);
   for( i=0; i<num_sends; ++i ) hypre_printf(" %i", send_procs[i] );
   hypre_printf("\n");
   hypre_printf("my_id=%i num_sends=%i send_buf[0,1,2]=%i %i %i",
          my_id, num_sends, send_buf[0], send_buf[1], send_buf[2] );
   hypre_printf(" all_num_sends3[0,1]=%i %i\n", all_num_sends3[0], all_num_sends3[1] );
   hypre_printf("my_id=%i rcv_sz_buf (%i):", my_id, displs[num_procs] );
   for( i=0; i<displs[num_procs]; ++i ) hypre_printf(" %i", recv_sz_buf[i] );
   hypre_printf("\n");
   hypre_printf("my_id=%i recv_vec_starts (%i):",my_id,num_recvs+1);
   for( i=0; i<=num_recvs; ++i ) hypre_printf(" %i", recv_vec_starts[i] );
   hypre_printf("  recv_procs (%i):",num_recvs);
   for( i=0; i<num_recvs; ++i ) hypre_printf(" %i", recv_procs[i] );
   hypre_printf("\n");
   hypre_printf("my_id=%i num_recvs=%i recv_sz_buf[0,1,2]=%i %i %i\n",
          my_id, num_recvs, recv_sz_buf[0], recv_sz_buf[1], recv_sz_buf[2] );
#endif

   hypre_TFree(send_buf);
   hypre_TFree(all_num_sends3);
   hypre_TFree(tmp);
   hypre_TFree(recv_buf);
   hypre_TFree(displs);
   hypre_TFree(info);
   hypre_TFree(recv_sz_buf);
   hypre_TFree(row_marker);


   /* finish up with the hand-coded call-by-reference... */
   *p_num_recvs = num_recvs;
   *p_recv_procs = recv_procs;
   *p_recv_vec_starts = recv_vec_starts;
   *p_num_sends = num_sends;
   *p_send_procs = send_procs;
   *p_send_map_starts = send_map_starts;
   *p_send_map_elmts = send_map_elmts;

}
Example #20
0
HYPRE_ParCSRMatrix 
GenerateLaplacian9pt( MPI_Comm comm,
                      HYPRE_Int      nx,
                      HYPRE_Int      ny,
                      HYPRE_Int      P,
                      HYPRE_Int      Q,
                      HYPRE_Int      p,
                      HYPRE_Int      q,
                      double  *value )
{
   hypre_ParCSRMatrix *A;
   hypre_CSRMatrix *diag;
   hypre_CSRMatrix *offd;

   HYPRE_Int    *diag_i;
   HYPRE_Int    *diag_j;
   double *diag_data;

   HYPRE_Int    *offd_i;
   HYPRE_Int    *offd_j;
   double *offd_data;

   HYPRE_Int *global_part;
   HYPRE_Int ix, iy;
   HYPRE_Int cnt, o_cnt;
   HYPRE_Int local_num_rows; 
   HYPRE_Int *col_map_offd;
   HYPRE_Int *work;
   HYPRE_Int row_index;
   HYPRE_Int i,j;

   HYPRE_Int nx_local, ny_local;
   HYPRE_Int nx_size, ny_size;
   HYPRE_Int num_cols_offd;
   HYPRE_Int grid_size;

   HYPRE_Int *nx_part;
   HYPRE_Int *ny_part;

   HYPRE_Int num_procs, my_id;
   HYPRE_Int P_busy, Q_busy;

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);

   grid_size = nx*ny;

   hypre_GeneratePartitioning(nx,P,&nx_part);
   hypre_GeneratePartitioning(ny,Q,&ny_part);

   global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1);

   global_part[0] = 0;
   cnt = 1;
   for (iy = 0; iy < Q; iy++)
   {
      ny_size = ny_part[iy+1]-ny_part[iy];
      for (ix = 0; ix < P; ix++)
      {
         nx_size = nx_part[ix+1] - nx_part[ix];
         global_part[cnt] = global_part[cnt-1];
         global_part[cnt++] += nx_size*ny_size;
      }
   }

   nx_local = nx_part[p+1] - nx_part[p];
   ny_local = ny_part[q+1] - ny_part[q];

   my_id = q*P + p;
   num_procs = P*Q;

   local_num_rows = nx_local*ny_local;
   diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);
   offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);

   P_busy = hypre_min(nx,P);
   Q_busy = hypre_min(ny,Q);

   num_cols_offd = 0;
   if (p) num_cols_offd += ny_local;
   if (p < P_busy-1) num_cols_offd += ny_local;
   if (q) num_cols_offd += nx_local;
   if (q < Q_busy-1) num_cols_offd += nx_local;
   if (p && q) num_cols_offd++;
   if (p && q < Q_busy-1 ) num_cols_offd++;
   if (p < P_busy-1 && q ) num_cols_offd++;
   if (p < P_busy-1 && q < Q_busy-1 ) num_cols_offd++;

   if (!local_num_rows) num_cols_offd = 0;

   col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);

   cnt = 0;
   o_cnt = 0;
   diag_i[0] = 0;
   offd_i[0] = 0;
   for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
   {
      for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
      {
         cnt++;
         o_cnt++;
         diag_i[cnt] = diag_i[cnt-1];
         offd_i[o_cnt] = offd_i[o_cnt-1];
         diag_i[cnt]++;
         if (iy > ny_part[q]) 
         {
            diag_i[cnt]++;
	    if (ix > nx_part[p])
	    {
	       diag_i[cnt]++;
	    }
	    else
	    {
	       if (ix) 
		  offd_i[o_cnt]++;
	    }
	    if (ix < nx_part[p+1]-1)
	    {
	       diag_i[cnt]++;
	    }
	    else
	    {
	       if (ix+1 < nx) 
		  offd_i[o_cnt]++;
	    }
         }
         else
         {
            if (iy) 
            {
               offd_i[o_cnt]++;
	       if (ix > nx_part[p])
	       {
	          offd_i[o_cnt]++;
	       }
	       else if (ix)
	       {
	          offd_i[o_cnt]++;
	       }
	       if (ix < nx_part[p+1]-1)
	       {
	          offd_i[o_cnt]++;
	       }
	       else if (ix < nx-1)
	       {
	          offd_i[o_cnt]++;
	       }
            }
         }
         if (ix > nx_part[p]) 
            diag_i[cnt]++;
         else
         {
            if (ix) 
            {
               offd_i[o_cnt]++; 
            }
         }
         if (ix+1 < nx_part[p+1]) 
            diag_i[cnt]++;
         else
         {
            if (ix+1 < nx) 
            {
               offd_i[o_cnt]++; 
            }
         }
         if (iy+1 < ny_part[q+1]) 
         {
            diag_i[cnt]++;
	    if (ix > nx_part[p])
	    {
	       diag_i[cnt]++;
	    }
	    else
	    {
	       if (ix) 
		  offd_i[o_cnt]++;
	    }
	    if (ix < nx_part[p+1]-1)
	    {
	       diag_i[cnt]++;
	    }
	    else
	    {
	       if (ix+1 < nx) 
		  offd_i[o_cnt]++;
	    }
         }
         else
         {
            if (iy+1 < ny) 
            {
               offd_i[o_cnt]++;
	       if (ix > nx_part[p])
	       {
	          offd_i[o_cnt]++;
	       }
	       else if (ix)
	       {
	          offd_i[o_cnt]++;
	       }
	       if (ix < nx_part[p+1]-1)
	       {
	          offd_i[o_cnt]++;
	       }
	       else if (ix < nx-1)
	       {
	          offd_i[o_cnt]++;
	       }
            }
         }
      }
   }

   diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]);
   diag_data = hypre_CTAlloc(double, diag_i[local_num_rows]);

   if (num_procs > 1)
   {
      offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]);
      offd_data = hypre_CTAlloc(double, offd_i[local_num_rows]);
   }
Example #21
0
/*--------------------------------------------------------------------------
 * hypre_ParCSRMatrixMatvec_FF
 *--------------------------------------------------------------------------*/
                                                                                    HYPRE_Int
hypre_ParCSRMatrixMatvec_FF( HYPRE_Complex       alpha,
                             hypre_ParCSRMatrix *A,
                             hypre_ParVector    *x,
                             HYPRE_Complex       beta,
                             hypre_ParVector    *y,
                             HYPRE_Int          *CF_marker,
                             HYPRE_Int           fpt )
{
   MPI_Comm                comm = hypre_ParCSRMatrixComm(A);
   hypre_ParCSRCommHandle *comm_handle;
   hypre_ParCSRCommPkg    *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   hypre_CSRMatrix        *diag   = hypre_ParCSRMatrixDiag(A);
   hypre_CSRMatrix        *offd   = hypre_ParCSRMatrixOffd(A);
   hypre_Vector           *x_local  = hypre_ParVectorLocalVector(x);
   hypre_Vector           *y_local  = hypre_ParVectorLocalVector(y);
   HYPRE_Int               num_rows = hypre_ParCSRMatrixGlobalNumRows(A);
   HYPRE_Int               num_cols = hypre_ParCSRMatrixGlobalNumCols(A);

   hypre_Vector      *x_tmp;
   HYPRE_Int          x_size = hypre_ParVectorGlobalSize(x);
   HYPRE_Int          y_size = hypre_ParVectorGlobalSize(y);
   HYPRE_Int          num_cols_offd = hypre_CSRMatrixNumCols(offd);
   HYPRE_Int          ierr = 0;
   HYPRE_Int          num_sends, i, j, index, start, num_procs;
   HYPRE_Int         *int_buf_data = NULL;
   HYPRE_Int         *CF_marker_offd = NULL;

   HYPRE_Complex     *x_tmp_data = NULL;
   HYPRE_Complex     *x_buf_data = NULL;
   HYPRE_Complex     *x_local_data = hypre_VectorData(x_local);
   /*---------------------------------------------------------------------
    *  Check for size compatibility.  ParMatvec returns ierr = 11 if
    *  length of X doesn't equal the number of columns of A,
    *  ierr = 12 if the length of Y doesn't equal the number of rows
    *  of A, and ierr = 13 if both are true.
    *
    *  Because temporary vectors are often used in ParMatvec, none of
    *  these conditions terminates processing, and the ierr flag
    *  is informational only.
    *--------------------------------------------------------------------*/

   hypre_MPI_Comm_size(comm,&num_procs);

   if (num_cols != x_size)
      ierr = 11;

   if (num_rows != y_size)
      ierr = 12;

   if (num_cols != x_size && num_rows != y_size)
      ierr = 13;

   if (num_procs > 1)
   {
      if (num_cols_offd)
      {
         x_tmp = hypre_SeqVectorCreate( num_cols_offd );
         hypre_SeqVectorInitialize(x_tmp);
         x_tmp_data = hypre_VectorData(x_tmp);
      }

      /*---------------------------------------------------------------------
       * If there exists no CommPkg for A, a CommPkg is generated using
       * equally load balanced partitionings
       *--------------------------------------------------------------------*/
      if (!comm_pkg)
      {
         hypre_MatvecCommPkgCreate(A);
         comm_pkg = hypre_ParCSRMatrixCommPkg(A);
      }

      num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
      if (num_sends)
         x_buf_data = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart
                                    (comm_pkg, num_sends));

      index = 0;
      for (i = 0; i < num_sends; i++)
      {
         start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
         for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
            x_buf_data[index++]
               = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
      }
      comm_handle =
         hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data, x_tmp_data );
   }
   hypre_CSRMatrixMatvec_FF( alpha, diag, x_local, beta, y_local, CF_marker,
                             CF_marker, fpt);

   if (num_procs > 1)
   {
      hypre_ParCSRCommHandleDestroy(comm_handle);
      comm_handle = NULL;

      if (num_sends)
         int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart
                                      (comm_pkg, num_sends));
      if (num_cols_offd) CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
      index = 0;
      for (i = 0; i < num_sends; i++)
      {
         start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
         for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
            int_buf_data[index++]
               = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
      }
      comm_handle =
         hypre_ParCSRCommHandleCreate(11,comm_pkg,int_buf_data,CF_marker_offd );

      hypre_ParCSRCommHandleDestroy(comm_handle);
      comm_handle = NULL;

      if (num_cols_offd) hypre_CSRMatrixMatvec_FF( alpha, offd, x_tmp, 1.0, y_local,
                                                   CF_marker, CF_marker_offd, fpt);

      hypre_SeqVectorDestroy(x_tmp);
      x_tmp = NULL;
      hypre_TFree(x_buf_data);
      hypre_TFree(int_buf_data);
      hypre_TFree(CF_marker_offd);
   }

   return ierr;
}
Example #22
0
HYPRE_Int
myBuildParLaplacian( HYPRE_Int                  argc,
                   char                *argv[],
                   HYPRE_Int                  arg_index,
                     HYPRE_ParCSRMatrix  *A_ptr , HYPRE_Int parmprint    )
{
   HYPRE_Int                 nx, ny, nz;
   HYPRE_Int                 P, Q, R;
   HYPRE_Real          cx, cy, cz;

   HYPRE_ParCSRMatrix  A;

   HYPRE_Int                 num_procs, myid;
   HYPRE_Int                 p, q, r;
   HYPRE_Real         *values;

   /*-----------------------------------------------------------
    * Initialize some stuff
    *-----------------------------------------------------------*/

   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );

   /*-----------------------------------------------------------
    * Set defaults
    *-----------------------------------------------------------*/
 
   nx = 10;
   ny = 10;
   nz = 10;

   P  = 1;
   Q  = num_procs;
   R  = 1;

   cx = 1.;
   cy = 1.;
   cz = 1.;

   /*-----------------------------------------------------------
    * Parse command line
    *-----------------------------------------------------------*/
   arg_index = 0;
   while (arg_index < argc)
   {
      if ( strcmp(argv[arg_index], "-n") == 0 )
      {
         arg_index++;
         nx = atoi(argv[arg_index++]);
         ny = atoi(argv[arg_index++]);
         nz = atoi(argv[arg_index++]);
      }
      else if ( strcmp(argv[arg_index], "-P") == 0 )
      {
         arg_index++;
         P  = atoi(argv[arg_index++]);
         Q  = atoi(argv[arg_index++]);
         R  = atoi(argv[arg_index++]);
      }
      else if ( strcmp(argv[arg_index], "-c") == 0 )
      {
         arg_index++;
         cx = atof(argv[arg_index++]);
         cy = atof(argv[arg_index++]);
         cz = atof(argv[arg_index++]);
      }
      else
      {
         arg_index++;
      }
   }

   /*-----------------------------------------------------------
    * Check a few things
    *-----------------------------------------------------------*/

   if ((P*Q*R) != num_procs)
   {
      hypre_printf("Error: Invalid number of processors or processor topology \n");
      exit(1);
   }

   /*-----------------------------------------------------------
    * Print driver parameters
    *-----------------------------------------------------------*/
 
   if (myid == 0 && parmprint)
   {
      hypre_printf("  Laplacian:\n");
      hypre_printf("    (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz);
      hypre_printf("    (Px, Py, Pz) = (%d, %d, %d)\n", P,  Q,  R);
      hypre_printf("    (cx, cy, cz) = (%f, %f, %f)\n\n", cx, cy, cz);
   }

   /*-----------------------------------------------------------
    * Set up the grid structure
    *-----------------------------------------------------------*/

   /* compute p,q,r from P,Q,R and myid */
   p = myid % P;
   q = (( myid - p)/P) % Q;
   r = ( myid - p - P*q)/( P*Q );

   /*-----------------------------------------------------------
    * Generate the matrix 
    *-----------------------------------------------------------*/
 
   values = hypre_CTAlloc(HYPRE_Real, 4);

   values[1] = -cx;
   values[2] = -cy;
   values[3] = -cz;

   values[0] = 0.;
   if (nx > 1)
   {
      values[0] += 2.0*cx;
   }
   if (ny > 1)
   {
      values[0] += 2.0*cy;
   }
   if (nz > 1)
   {
      values[0] += 2.0*cz;
   }

   A = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD, nx, ny, nz, 
					      P, Q, R, p, q, r, values);

   hypre_TFree(values);


   *A_ptr = A;

   return (0);
}
Example #23
0
HYPRE_Int
hypre_BoomerAMGCreateScalarCFS(hypre_ParCSRMatrix    *SN,
                       HYPRE_Int                   *CFN_marker,
                       HYPRE_Int                   *col_offd_SN_to_AN,
                       HYPRE_Int                    num_functions,
                       HYPRE_Int                    nodal,
                       HYPRE_Int                    data,
                       HYPRE_Int                  **dof_func_ptr,
                       HYPRE_Int                  **CF_marker_ptr,
                       HYPRE_Int                  **col_offd_S_to_A_ptr,
                       hypre_ParCSRMatrix   **S_ptr)
{
   MPI_Comm	       comm = hypre_ParCSRMatrixComm(SN);
   hypre_ParCSRMatrix *S;
   hypre_CSRMatrix    *S_diag;
   HYPRE_Int		      *S_diag_i;
   HYPRE_Int		      *S_diag_j;
   double	      *S_diag_data;
   hypre_CSRMatrix    *S_offd;
   HYPRE_Int		      *S_offd_i;
   HYPRE_Int		      *S_offd_j;
   double	      *S_offd_data;
   HYPRE_Int		      *row_starts_S;
   HYPRE_Int		      *col_starts_S;
   HYPRE_Int		      *row_starts_SN = hypre_ParCSRMatrixRowStarts(SN);
   HYPRE_Int		      *col_starts_SN = hypre_ParCSRMatrixColStarts(SN);
   hypre_CSRMatrix    *SN_diag = hypre_ParCSRMatrixDiag(SN);
   HYPRE_Int		      *SN_diag_i = hypre_CSRMatrixI(SN_diag);
   HYPRE_Int		      *SN_diag_j = hypre_CSRMatrixJ(SN_diag);
   double	      *SN_diag_data;
   hypre_CSRMatrix    *SN_offd = hypre_ParCSRMatrixOffd(SN);
   HYPRE_Int		      *SN_offd_i = hypre_CSRMatrixI(SN_offd);
   HYPRE_Int		      *SN_offd_j = hypre_CSRMatrixJ(SN_offd);
   double	      *SN_offd_data;
   HYPRE_Int		      *CF_marker;
   HYPRE_Int		      *col_map_offd_SN = hypre_ParCSRMatrixColMapOffd(SN);
   HYPRE_Int		      *col_map_offd_S;
   HYPRE_Int		      *dof_func;
   HYPRE_Int		       num_nodes = hypre_CSRMatrixNumRows(SN_diag);
   HYPRE_Int		       num_variables;
   hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(SN);
   HYPRE_Int		       num_sends;
   HYPRE_Int		       num_recvs;
   HYPRE_Int		      *send_procs;
   HYPRE_Int		      *send_map_starts;
   HYPRE_Int		      *send_map_elmts;
   HYPRE_Int		      *recv_procs;
   HYPRE_Int		      *recv_vec_starts;
   hypre_ParCSRCommPkg *comm_pkg_S;
   HYPRE_Int		      *send_procs_S;
   HYPRE_Int		      *send_map_starts_S;
   HYPRE_Int		      *send_map_elmts_S;
   HYPRE_Int		      *recv_procs_S;
   HYPRE_Int		      *recv_vec_starts_S;
   HYPRE_Int		      *col_offd_S_to_A = NULL;
   
   HYPRE_Int		       num_coarse_nodes;
   HYPRE_Int		       i,j,k,k1,jj,cnt;
   HYPRE_Int		       row, start, end;
   HYPRE_Int		       num_procs;
   HYPRE_Int		       num_cols_offd_SN = hypre_CSRMatrixNumCols(SN_offd);
   HYPRE_Int		       num_cols_offd_S;
   HYPRE_Int		       SN_num_nonzeros_diag;
   HYPRE_Int		       SN_num_nonzeros_offd;
   HYPRE_Int		       S_num_nonzeros_diag;
   HYPRE_Int		       S_num_nonzeros_offd;
   HYPRE_Int		       global_num_vars;
   HYPRE_Int		       global_num_cols;
   HYPRE_Int		       global_num_nodes;
   HYPRE_Int		       ierr = 0;
 
   hypre_MPI_Comm_size(comm, &num_procs);
 
   num_variables = num_functions*num_nodes;
   CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables);

   if (nodal < 0)
   {
      cnt = 0;
      num_coarse_nodes = 0;
      for (i=0; i < num_nodes; i++)
      {
	 if (CFN_marker[i] == 1) num_coarse_nodes++;
         for (j=0; j < num_functions; j++)
	    CF_marker[cnt++] = CFN_marker[i];
      }

      dof_func = hypre_CTAlloc(HYPRE_Int,num_coarse_nodes*num_functions);
      cnt = 0;
      for (i=0; i < num_nodes; i++)
      {
	 if (CFN_marker[i] == 1)
	 {
	    for (k=0; k < num_functions; k++)
	       dof_func[cnt++] = k;
	 }
      }
      *dof_func_ptr = dof_func;
   }
   else
   {
      cnt = 0;
      for (i=0; i < num_nodes; i++)
         for (j=0; j < num_functions; j++)
	    CF_marker[cnt++] = CFN_marker[i];
   }

   *CF_marker_ptr = CF_marker;


#ifdef HYPRE_NO_GLOBAL_PARTITION
   row_starts_S = hypre_CTAlloc(HYPRE_Int,2);
   for (i=0; i < 2; i++)
      row_starts_S[i] = num_functions*row_starts_SN[i];

   if (row_starts_SN != col_starts_SN)
   {
      col_starts_S = hypre_CTAlloc(HYPRE_Int,2);
      for (i=0; i < 2; i++)
         col_starts_S[i] = num_functions*col_starts_SN[i];
   }
   else
   {
      col_starts_S = row_starts_S;
   }
#else
   row_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1);
   for (i=0; i < num_procs+1; i++)
      row_starts_S[i] = num_functions*row_starts_SN[i];

   if (row_starts_SN != col_starts_SN)
   {
      col_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1);
      for (i=0; i < num_procs+1; i++)
         col_starts_S[i] = num_functions*col_starts_SN[i];
   }
   else
   {
      col_starts_S = row_starts_S;
   }
#endif


   SN_num_nonzeros_diag = SN_diag_i[num_nodes];
   SN_num_nonzeros_offd = SN_offd_i[num_nodes];
 
   global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(SN);
   global_num_cols = hypre_ParCSRMatrixGlobalNumCols(SN)*num_functions;
 
   global_num_vars = global_num_nodes*num_functions;
   S_num_nonzeros_diag = num_functions*SN_num_nonzeros_diag;
   S_num_nonzeros_offd = num_functions*SN_num_nonzeros_offd;
   num_cols_offd_S = num_functions*num_cols_offd_SN;
   S = hypre_ParCSRMatrixCreate(comm, global_num_vars, global_num_cols,
		row_starts_S, col_starts_S, num_cols_offd_S,
		S_num_nonzeros_diag, S_num_nonzeros_offd);

   S_diag = hypre_ParCSRMatrixDiag(S);
   S_offd = hypre_ParCSRMatrixOffd(S);
   S_diag_i = hypre_CTAlloc(HYPRE_Int, num_variables+1);
   S_offd_i = hypre_CTAlloc(HYPRE_Int, num_variables+1);
   S_diag_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_diag);
   hypre_CSRMatrixI(S_diag) = S_diag_i;
   hypre_CSRMatrixJ(S_diag) = S_diag_j;
   if (data) 
   {
      SN_diag_data = hypre_CSRMatrixData(SN_diag);
      S_diag_data = hypre_CTAlloc(double, S_num_nonzeros_diag);
      hypre_CSRMatrixData(S_diag) = S_diag_data;
      if (num_cols_offd_S)
      {
         SN_offd_data = hypre_CSRMatrixData(SN_offd);
         S_offd_data = hypre_CTAlloc(double, S_num_nonzeros_offd);
         hypre_CSRMatrixData(S_offd) = S_offd_data;
      }

   }
   hypre_CSRMatrixI(S_offd) = S_offd_i;

   if (comm_pkg)
   {
      comm_pkg_S = hypre_CTAlloc(hypre_ParCSRCommPkg,1);
      hypre_ParCSRCommPkgComm(comm_pkg_S) = comm;
      num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
      hypre_ParCSRCommPkgNumSends(comm_pkg_S) = num_sends;
      num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
      hypre_ParCSRCommPkgNumRecvs(comm_pkg_S) = num_recvs;
      send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
      send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
      send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
      recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
      recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
      send_procs_S = NULL;
      send_map_elmts_S = NULL;
      if (num_sends) 
      {
         send_procs_S = hypre_CTAlloc(HYPRE_Int,num_sends);
         send_map_elmts_S = hypre_CTAlloc(HYPRE_Int,
		num_functions*send_map_starts[num_sends]);
      }
      send_map_starts_S = hypre_CTAlloc(HYPRE_Int,num_sends+1);
      recv_vec_starts_S = hypre_CTAlloc(HYPRE_Int,num_recvs+1);
      recv_procs_S = NULL;
      if (num_recvs) recv_procs_S = hypre_CTAlloc(HYPRE_Int,num_recvs);
      send_map_starts_S[0] = 0;
      for (i=0; i < num_sends; i++)
      {
         send_procs_S[i] = send_procs[i];
         send_map_starts_S[i+1] = num_functions*send_map_starts[i+1];
      }
      recv_vec_starts_S[0] = 0;
      for (i=0; i < num_recvs; i++)
      {
         recv_procs_S[i] = recv_procs[i];
         recv_vec_starts_S[i+1] = num_functions*recv_vec_starts[i+1];
      }

      cnt = 0;
      for (i=0; i < send_map_starts[num_sends]; i++)
      {
	 k1 = num_functions*send_map_elmts[i];
         for (j=0; j < num_functions; j++)
         {
	    send_map_elmts_S[cnt++] = k1+j;
         }
      }
      hypre_ParCSRCommPkgSendProcs(comm_pkg_S) = send_procs_S;
      hypre_ParCSRCommPkgSendMapStarts(comm_pkg_S) = send_map_starts_S;
      hypre_ParCSRCommPkgSendMapElmts(comm_pkg_S) = send_map_elmts_S;
      hypre_ParCSRCommPkgRecvProcs(comm_pkg_S) = recv_procs_S;
      hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_S) = recv_vec_starts_S;
      hypre_ParCSRMatrixCommPkg(S) = comm_pkg_S;
   }

   if (num_cols_offd_S)
   {
      S_offd_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_offd);
      hypre_CSRMatrixJ(S_offd) = S_offd_j;

      col_map_offd_S = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S);

      cnt = 0;
      for (i=0; i < num_cols_offd_SN; i++)
      {
         k1 = col_map_offd_SN[i]*num_functions;
         for (j=0; j < num_functions; j++)
            col_map_offd_S[cnt++] = k1+j;
      }
      hypre_ParCSRMatrixColMapOffd(S) = col_map_offd_S;
   }
   
   if (col_offd_SN_to_AN)
   {
      col_offd_S_to_A = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S);

      cnt = 0;
      for (i=0; i < num_cols_offd_SN; i++)
      {
         k1 = col_offd_SN_to_AN[i]*num_functions;
         for (j=0; j < num_functions; j++)
            col_offd_S_to_A[cnt++] = k1+j;
      }
      *col_offd_S_to_A_ptr = col_offd_S_to_A;
   }
   


   cnt = 0;
   row = 0;
   for (i=0; i < num_nodes; i++)
   {
      row++;
      start = cnt;
      for (j=SN_diag_i[i]; j < SN_diag_i[i+1]; j++)
      {
         jj = SN_diag_j[j];
	 if (data) S_diag_data[cnt] = SN_diag_data[j];
	 S_diag_j[cnt++] = jj*num_functions;
      }
      end = cnt;
      S_diag_i[row] = cnt;
      for (k1=1; k1 < num_functions; k1++)
      {
         row++;
	 for (k=start; k < end; k++)
	 {
	    if (data) S_diag_data[cnt] = S_diag_data[k];
	    S_diag_j[cnt++] = S_diag_j[k]+k1;
	 }
         S_diag_i[row] = cnt;
      }
   } 

   cnt = 0;
   row = 0;
   for (i=0; i < num_nodes; i++)
   {
      row++;
      start = cnt;
      for (j=SN_offd_i[i]; j < SN_offd_i[i+1]; j++)
      {
         jj = SN_offd_j[j];
	 if (data) S_offd_data[cnt] = SN_offd_data[j];
	 S_offd_j[cnt++] = jj*num_functions;
      }
      end = cnt;
      S_offd_i[row] = cnt;
      for (k1=1; k1 < num_functions; k1++)
      {
         row++;
	 for (k=start; k < end; k++)
	 {
	    if (data) S_offd_data[cnt] = S_offd_data[k];
	    S_offd_j[cnt++] = S_offd_j[k]+k1;
	 }
         S_offd_i[row] = cnt;
      }
   } 

   *S_ptr = S; 

   return (ierr);
}
Example #24
0
HYPRE_Int hypre_DataExchangeList(HYPRE_Int num_contacts, 
                                 HYPRE_Int *contact_proc_list,
                                 void *contact_send_buf, 
                                 HYPRE_Int *contact_send_buf_starts,
                                 HYPRE_Int contact_obj_size, 
                                 HYPRE_Int response_obj_size,
                                 hypre_DataExchangeResponse *response_obj,
                                 HYPRE_Int max_response_size, 
                                 HYPRE_Int rnum, MPI_Comm comm,
                                 void **p_response_recv_buf, 
                                 HYPRE_Int **p_response_recv_buf_starts)
{
   /*-------------------------------------------
    *  parameters: 
    *   
    *    num_contacts              = how many procs to contact
    *    contact_proc_list         = list of processors to contact 
    *    contact_send_buf          = array of data to send
    *    contact_send_buf_starts   = index for contact_send_buf corresponding to 
    *                                contact_proc_list
    *    contact_obj_size          = sizeof() one obj in contact list  
   
    *    response_obj_size          = sizeof() one obj in response_recv_buf 
    *    response_obj              = this will give us the function we need to 
    *                                fill the reponse as well as 
    *                                any data we might need to accomplish that 
    *    max_response_size         = max size of a single response expected (do NOT
    *                                need to be an absolute upper bound)  
    *    rnum                      = two consequentive exchanges should have different 
    *                                rnums. Alternate rnum = 1 
    *                                and rnum=2  - these flags will be even (so odd
    *                                numbered tags could be used in calling code) 
    *    p_response_recv_buf       = where to receive the reponses - will be allocated 
    *                                in this function  
    *    p_response_recv_buf_starts  = index of p_response_buf corresponding to 
    *                                contact_buf_list - will be allocated here  

    *-------------------------------------------*/

   HYPRE_Int  num_procs, myid;
   HYPRE_Int  i;
   HYPRE_Int  terminate, responses_complete;
   HYPRE_Int  children_complete;
   HYPRE_Int  contact_flag;
   HYPRE_Int  proc;
   HYPRE_Int  contact_size;

   HYPRE_Int  size, post_size, copy_size;
   HYPRE_Int  total_size, count;      

   void *start_ptr = NULL, *index_ptr=NULL;
   HYPRE_Int  *int_ptr=NULL;  

   void *response_recv_buf = NULL;
   void *send_response_buf = NULL;

   HYPRE_Int  *response_recv_buf_starts = NULL;
   void *initial_recv_buf = NULL;  

   void *recv_contact_buf = NULL;
   HYPRE_Int  recv_contact_buf_size = 0;
   
   HYPRE_Int  response_message_size = 0;

   HYPRE_Int  overhead;
 
   HYPRE_Int  max_response_size_bytes;
   
   HYPRE_Int  max_response_total_bytes;
   
   void **post_array = NULL;  /*this must be set to null or realloc will crash */
   HYPRE_Int  post_array_storage = 0;
   HYPRE_Int  post_array_size = 0;
   HYPRE_Int   num_post_recvs =0;
   
   void **contact_ptrs = NULL, **response_ptrs=NULL, **post_ptrs=NULL;
   
   hypre_BinaryTree tree;

   hypre_MPI_Request *response_requests, *contact_requests;
   hypre_MPI_Status  *response_statuses, *contact_statuses;
   
   hypre_MPI_Request  *post_send_requests = NULL, *post_recv_requests = NULL;
   hypre_MPI_Status   *post_send_statuses = NULL, *post_recv_statuses = NULL;

   hypre_MPI_Request *term_requests, term_request1, request_parent;
   hypre_MPI_Status  *term_statuses, term_status1, status_parent;
   hypre_MPI_Status  status, fill_status;

   const HYPRE_Int contact_tag = 1000*rnum;
   const HYPRE_Int response_tag = 1002*rnum;
   const HYPRE_Int term_tag =  1004*rnum;  
   const HYPRE_Int post_tag = 1006*rnum;
   
   hypre_MPI_Comm_size(comm, &num_procs );
   hypre_MPI_Comm_rank(comm, &myid );

   /* ---------initializations ----------------*/ 

   /* if the response_obj_size or contact_obj_size is 0, set to sizeof(HYPRE_Int) */
   if (!response_obj_size) response_obj_size = sizeof(HYPRE_Int);
   if (!contact_obj_size) contact_obj_size = sizeof(HYPRE_Int);

   max_response_size_bytes = max_response_size*response_obj_size;
 
                   
   /* pre-allocate the max space for responding to contacts */
   overhead = ceil((HYPRE_Real) sizeof(HYPRE_Int)/response_obj_size); /*for appending an integer*/
   
   max_response_total_bytes = (max_response_size+overhead)*response_obj_size;

   response_obj->send_response_overhead = overhead;
   response_obj->send_response_storage = max_response_size;

   /*send_response_buf = hypre_MAlloc(max_response_total_bytes);*/
   send_response_buf = hypre_CAlloc(max_response_size+overhead, response_obj_size);
      
   /*allocate space for inital recv array for the responses - give each processor
     size max_response_size */

   initial_recv_buf = hypre_MAlloc(max_response_total_bytes*num_contacts);
   response_recv_buf_starts =   hypre_CTAlloc(HYPRE_Int, num_contacts+1);  

   contact_ptrs = hypre_TAlloc( void *, num_contacts);
   response_ptrs = hypre_TAlloc(void *, num_contacts);
   
   /*-------------SEND CONTACTS AND POST RECVS FOR RESPONSES---*/

   for (i=0; i<= num_contacts; i++)
   {
      response_recv_buf_starts[i] = i*(max_response_size+overhead);
   }
   
   /* Send "contact" messages to the list of processors and 
      pre-post receives to wait for their response*/

   responses_complete = 1;
   if (num_contacts > 0 )
   {
      responses_complete = 0;
      response_requests = hypre_CTAlloc(hypre_MPI_Request, num_contacts);
      response_statuses = hypre_CTAlloc(hypre_MPI_Status, num_contacts);
      contact_requests = hypre_CTAlloc(hypre_MPI_Request, num_contacts);
      contact_statuses = hypre_CTAlloc(hypre_MPI_Status, num_contacts);

      /* post receives - could be confirmation or data*/
      /* the size to post is max_response_total_bytes*/     
      
      for (i=0; i< num_contacts; i++) 
      {
         /* response_ptrs[i] =  initial_recv_buf + i*max_response_total_bytes ; */
         response_ptrs[i] = (void *)((char *) initial_recv_buf +
                                     i*max_response_total_bytes) ; 

         hypre_MPI_Irecv(response_ptrs[i], max_response_total_bytes,
                         hypre_MPI_BYTE, contact_proc_list[i], 
                         response_tag, comm, &response_requests[i]);
      }

      /* send out contact messages */
      start_ptr = contact_send_buf;
      for (i=0; i< num_contacts; i++) 
      {
         contact_ptrs[i] = start_ptr; 
         size =  contact_send_buf_starts[i+1] - contact_send_buf_starts[i]  ; 
         hypre_MPI_Isend(contact_ptrs[i], size*contact_obj_size,
                         hypre_MPI_BYTE, contact_proc_list[i], 
                         contact_tag, comm, &contact_requests[i]); 
         /*  start_ptr += (size*contact_obj_size); */        
         start_ptr = (void *) ((char *) start_ptr  + (size*contact_obj_size)); 
      }
   }

   /*------------BINARY TREE-----------------------*/

   /*Now let's find out our binary tree information and
     initialize for the termination check sweep */
   terminate = 1; /*indicates whether we can stop probing for contact */
   children_complete = 1;/*indicates whether we have recv. term messages 
                           from our children*/
 
   if (num_procs > 1)
   { 
      hypre_CreateBinaryTree(myid, num_procs, &tree);

      /* we will get a message from all of our children when they 
         have received responses for all of their contacts.  
         So post receives now */
   
      term_requests = hypre_CTAlloc(hypre_MPI_Request, tree.num_child); 
      term_statuses = hypre_CTAlloc(hypre_MPI_Status, tree.num_child);

      for (i=0; i< tree.num_child; i++) 
      {
	 hypre_MPI_Irecv(NULL, 0, HYPRE_MPI_INT, tree.child_id[i], term_tag, comm, 
                         &term_requests[i]);
      }

      terminate = 0;
 
      children_complete = 0;
   }
   else if (num_procs ==1 && num_contacts > 0 ) /* added 11/08 */
   {
      terminate = 0;
   }

   /*---------PROBE LOOP-----------------------------------------*/

   /*Look for incoming contact messages - don't know how many I will get!*/
  
   while (!terminate)
   {
      /* did I receive any contact messages? */
      hypre_MPI_Iprobe(hypre_MPI_ANY_SOURCE, contact_tag, comm,
                       &contact_flag, &status);

      while (contact_flag)
      {
         /* received contacts - from who and what do we do ?*/
         proc = status.hypre_MPI_SOURCE;
         hypre_MPI_Get_count(&status, hypre_MPI_BYTE, &contact_size);

         contact_size = contact_size/contact_obj_size;
         
         /*---------------FILL RESPONSE ------------------------*/

         /*first receive the contact buffer - then call a function
           to determine how to populate the send buffer for the reponse*/

         /* do we have enough space to recv it? */
         if(contact_size > recv_contact_buf_size) 
         {
            recv_contact_buf = hypre_ReAlloc(recv_contact_buf, 
                                             contact_obj_size*contact_size);
            recv_contact_buf_size = contact_size;
         } 

         /* this must be blocking - can't fill recv without the buffer*/         
         hypre_MPI_Recv(recv_contact_buf, contact_size*contact_obj_size,
                        hypre_MPI_BYTE, proc, contact_tag, comm, &fill_status);

         response_obj->fill_response(recv_contact_buf, contact_size, proc,
                                     response_obj, comm, &send_response_buf,
                                     &response_message_size );

         /* we need to append the size of the send obj */
         /* first we copy out any part that may be needed to send later so we don't overwrite */
         post_size = response_message_size - max_response_size; 
         if (post_size > 0) /*we will need to send the extra information later */   
         {
            /*hypre_printf("myid = %d, post_size = %d\n", myid, post_size);*/

            if (post_array_size == post_array_storage)
             
            {
               /* allocate room for more posts  - add 20*/
               post_array_storage += 20;       
               post_array = hypre_TReAlloc(post_array, void *, post_array_storage); 
               post_send_requests =
                  hypre_TReAlloc(post_send_requests, hypre_MPI_Request,
                                 post_array_storage);
            }
            /* allocate space for the data this post only*/
            /* this should not happen often (unless a poor max_size has been chosen)
               - so we will allocate space for the data as needed */
            size = post_size*response_obj_size;
            post_array[post_array_size] =  hypre_MAlloc(size); 
            /* index_ptr =  send_response_buf + max_response_size_bytes */;
            index_ptr = (void *) ((char *) send_response_buf +
                                  max_response_size_bytes);
               
            memcpy(post_array[post_array_size], index_ptr, size);
    
            /*now post any part of the message that is too long with a non-blocking
              send and a different tag */

            hypre_MPI_Isend(post_array[post_array_size], size,
                            hypre_MPI_BYTE, proc, post_tag, 
                            /*hypre_MPI_COMM_WORLD, */
                            comm,
                            &post_send_requests[post_array_size]); 

            post_array_size++;
         }

         /*now append the size information into the overhead storage */
         /* index_ptr =  send_response_buf + max_response_size_bytes; */
         index_ptr = (void *) ((char *) send_response_buf +
                               max_response_size_bytes);

         memcpy(index_ptr, &response_message_size, sizeof(HYPRE_Int));
         
         /*send the block of data that includes the overhead */        
         /* this is a blocking send - the recv has already been posted */
         hypre_MPI_Send(send_response_buf, max_response_total_bytes, 
                        hypre_MPI_BYTE, proc, response_tag, comm); 

         /*--------------------------------------------------------------*/
      
         /* look for any more contact messages*/
         hypre_MPI_Iprobe(hypre_MPI_ANY_SOURCE, contact_tag, comm,
                          &contact_flag, &status);
      }
    
      /* no more contact messages waiting - either 
         (1) check to see if we have received all of our response messages
         (2) participate in termination (check for messages from children)
         (3) participate in termination sweep (check for message from parent) */
    
      if (!responses_complete) 
      {
         hypre_MPI_Testall(num_contacts, response_requests, &responses_complete, 
                           response_statuses);
         if (responses_complete && num_procs == 1) terminate = 1; /*added 11/08 */

      }
      else if(!children_complete) /* have all of our children received all of their 
                                     response messages?*/
      {
         hypre_MPI_Testall(tree.num_child, term_requests, &children_complete, 
                           term_statuses);

         /* if we have gotten term messages from all of our children, send a term 
            message to our parent.  Then post a receive to hear back from parent */ 
	 if (children_complete & (myid > 0)) /*root does not have a parent*/ 
         {
            hypre_MPI_Isend(NULL, 0, HYPRE_MPI_INT, tree.parent_id, term_tag,
                            comm, &request_parent);
            
            hypre_MPI_Irecv(NULL, 0, HYPRE_MPI_INT, tree.parent_id, term_tag,
                            comm, &term_request1);
         }
      }
      else /*have we gotten a term message from our parent? */
      {
         if (myid == 0) /* root doesn't have a parent */
         {
            terminate = 1;
         } 
         else  
         {
            hypre_MPI_Test(&term_request1, &terminate, &term_status1);
         }  
         if (terminate) /*tell children to terminate */
	 {
            if (myid > 0 ) hypre_MPI_Wait(&request_parent, &status_parent);
            
	    for (i=0; i< tree.num_child; i++)
	    {  /*a blocking send  - recv has been posted already*/
	       hypre_MPI_Send(NULL, 0, HYPRE_MPI_INT, tree.child_id[i],
                              term_tag, comm);
	    }
	 }
      } 
   }
Example #25
0
HYPRE_Int
BuildParLaplacian27pt( HYPRE_Int                  argc,
                       char                *argv[],
                       HYPRE_Int                  arg_index,
                       HYPRE_ParCSRMatrix  *A_ptr     )
{
   HYPRE_Int                 nx, ny, nz;
   HYPRE_Int                 P, Q, R;

   HYPRE_ParCSRMatrix  A;

   HYPRE_Int                 num_procs, myid;
   HYPRE_Int                 p, q, r;
   HYPRE_Real         *values;

   /*-----------------------------------------------------------
    * Initialize some stuff
    *-----------------------------------------------------------*/

   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );

   /*-----------------------------------------------------------
    * Set defaults
    *-----------------------------------------------------------*/
 
   nx = 20;
   ny = 20;
   nz = 20;

   P  = 1;
   Q  = num_procs;
   R  = 1;

   /*-----------------------------------------------------------
    * Check a few things
    *-----------------------------------------------------------*/

   if ((P*Q*R) != num_procs)
   {
      hypre_printf("Error: Invalid number of processors or processor topology \n");
      exit(1);
   }

   /*-----------------------------------------------------------
    * Print driver parameters
    *-----------------------------------------------------------*/
 
   if (myid == 0)
   {
      hypre_printf("  Laplacian_27pt:\n");
      hypre_printf("    (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz);
      hypre_printf("    (Px, Py, Pz) = (%d, %d, %d)\n", P,  Q,  R);
   }

   /*-----------------------------------------------------------
    * Set up the grid structure
    *-----------------------------------------------------------*/

   /* compute p,q,r from P,Q,R and myid */
   p = myid % P;
   q = (( myid - p)/P) % Q;
   r = ( myid - p - P*q)/( P*Q );

   /*-----------------------------------------------------------
    * Generate the matrix 
    *-----------------------------------------------------------*/
 
   values = hypre_CTAlloc(HYPRE_Real, 2);

   values[0] = 26.0;
   if (nx == 1 || ny == 1 || nz == 1)
	values[0] = 8.0;
   if (nx*ny == 1 || nx*nz == 1 || ny*nz == 1)
	values[0] = 2.0;
   values[1] = -1.0;

   A = (HYPRE_ParCSRMatrix) GenerateLaplacian27pt(hypre_MPI_COMM_WORLD,
                               nx, ny, nz, P, Q, R, p, q, r, values);

   hypre_TFree(values);

   *A_ptr = A;

   return (0);
}
Example #26
0
HYPRE_Int main( HYPRE_Int   argc, char *argv[] )
{
   HYPRE_Int                 arg_index;
   HYPRE_Int                 print_usage;
   HYPRE_Int                 build_matrix_arg_index;
   HYPRE_Int                 solver_id;
   HYPRE_Int                 ierr,i,j; 
   HYPRE_Int                 num_iterations; 

   HYPRE_ParCSRMatrix  parcsr_A;
   HYPRE_Int                 num_procs, myid;
   HYPRE_Int                 local_row;
   HYPRE_Int		       time_index;
   MPI_Comm            comm;
   HYPRE_Int                 M, N;
   HYPRE_Int                 first_local_row, last_local_row;
   HYPRE_Int                 first_local_col, last_local_col;
   HYPRE_Int                 size, *col_ind;
   HYPRE_Real          *values;

   /* parameters for BoomerAMG */
   HYPRE_Real          strong_threshold;
   HYPRE_Int                 num_grid_sweeps;  
   HYPRE_Real          relax_weight; 

   /* parameters for GMRES */
   HYPRE_Int	               k_dim;

   char *paramString = new char[100];

   /*-----------------------------------------------------------
    * Initialize some stuff
    *-----------------------------------------------------------*/

   hypre_MPI_Init(&argc, &argv);
   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );

   /*-----------------------------------------------------------
    * Set defaults
    *-----------------------------------------------------------*/
 
   build_matrix_arg_index = argc;
   solver_id              = 0;
   strong_threshold       = 0.25;
   num_grid_sweeps        = 2;
   relax_weight           = 0.5;
   k_dim                  = 20;

   /*-----------------------------------------------------------
    * Parse command line
    *-----------------------------------------------------------*/
 
   print_usage = 0;
   arg_index = 1;

   while ( (arg_index < argc) && (!print_usage) )
   {
      if ( strcmp(argv[arg_index], "-solver") == 0 )
      {
         arg_index++;
         solver_id = atoi(argv[arg_index++]);
      }
      else if ( strcmp(argv[arg_index], "-dbg") == 0 )
      {
         arg_index++;
         atoi(argv[arg_index++]);
      }
      else if ( strcmp(argv[arg_index], "-help") == 0 )
      {
         print_usage = 1;
      }
      else
      {
         arg_index++;
      }
   }

   /*-----------------------------------------------------------
    * Print usage info
    *-----------------------------------------------------------*/
 
   if ( (print_usage) && (myid == 0) )
   {
      hypre_printf("\n");
      hypre_printf("Usage: %s [<options>]\n", argv[0]);
      hypre_printf("\n");
      hypre_printf("  -solver <ID>           : solver ID\n");
      hypre_printf("       0=DS-PCG      1=ParaSails-PCG \n");
      hypre_printf("       2=AMG-PCG     3=DS-GMRES     \n");
      hypre_printf("       4=PILUT-GMRES 5=AMG-GMRES    \n");     
      hypre_printf("\n");
      hypre_printf("  -rlx <val>             : relaxation type\n");
      hypre_printf("       0=Weighted Jacobi  \n");
      hypre_printf("       1=Gauss-Seidel (very slow!)  \n");
      hypre_printf("       3=Hybrid Jacobi/Gauss-Seidel  \n");
      hypre_printf("\n");  
      exit(1);
   }

   /*-----------------------------------------------------------
    * Print driver parameters
    *-----------------------------------------------------------*/
 
   if (myid == 0)
   {
      hypre_printf("Running with these driver parameters:\n");
      hypre_printf("  solver ID    = %d\n", solver_id);
   }

   /*-----------------------------------------------------------
    * Set up matrix
    *-----------------------------------------------------------*/

   strcpy(paramString, "LS Interface");
   time_index = hypre_InitializeTiming(paramString);
   hypre_BeginTiming(time_index);

   BuildParLaplacian27pt(argc, argv, build_matrix_arg_index, &parcsr_A);
    
   /*-----------------------------------------------------------
    * Copy the parcsr matrix into the LSI through interface calls
    *-----------------------------------------------------------*/

   ierr = HYPRE_ParCSRMatrixGetComm( parcsr_A, &comm );
   ierr += HYPRE_ParCSRMatrixGetDims( parcsr_A, &M, &N );
   ierr = HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
             &first_local_row, &last_local_row ,
             &first_local_col, &last_local_col );

   HYPRE_LinSysCore H(hypre_MPI_COMM_WORLD);
   HYPRE_Int numLocalEqns = last_local_row - first_local_row + 1;
   H.createMatricesAndVectors(M,first_local_row+1,numLocalEqns);

   HYPRE_Int index;
   HYPRE_Int *rowLengths = new HYPRE_Int[numLocalEqns];
   HYPRE_Int **colIndices = new HYPRE_Int*[numLocalEqns];

   local_row = 0;
   for (i=first_local_row; i<= last_local_row; i++)
   {
      ierr += HYPRE_ParCSRMatrixGetRow(parcsr_A,i,&size,&col_ind,&values );
      rowLengths[local_row] = size;
      colIndices[local_row] = new HYPRE_Int[size];
      for (j=0; j<size; j++) colIndices[local_row][j] = col_ind[j] + 1;
      local_row++;
      HYPRE_ParCSRMatrixRestoreRow(parcsr_A,i,&size,&col_ind,&values);
   }
   H.allocateMatrix(colIndices, rowLengths);
   delete [] rowLengths;
   for (i=0; i< numLocalEqns; i++) delete [] colIndices[i];
   delete [] colIndices;

   HYPRE_Int *newColInd;

   for (i=first_local_row; i<= last_local_row; i++)
   {
      ierr += HYPRE_ParCSRMatrixGetRow(parcsr_A,i,&size,&col_ind,&values );
      newColInd = new HYPRE_Int[size];
      for (j=0; j<size; j++) newColInd[j] = col_ind[j] + 1;
      H.sumIntoSystemMatrix(i+1,size,(const HYPRE_Real*)values,
                                     (const HYPRE_Int*)newColInd);
      delete [] newColInd;
      ierr += HYPRE_ParCSRMatrixRestoreRow(parcsr_A,i,&size,&col_ind,&values);
   }
   H.matrixLoadComplete();
   HYPRE_ParCSRMatrixDestroy(parcsr_A);

   /*-----------------------------------------------------------
    * Set up the RHS and initial guess
    *-----------------------------------------------------------*/

   HYPRE_Real ddata=1.0;
   HYPRE_Int  status;

   for (i=first_local_row; i<= last_local_row; i++)
   {
      index = i + 1;
      H.sumIntoRHSVector(1,(const HYPRE_Real*) &ddata, (const HYPRE_Int*) &index);
   }

   hypre_EndTiming(time_index);
   strcpy(paramString, "LS Interface");
   hypre_PrintTiming(paramString, hypre_MPI_COMM_WORLD);
   hypre_FinalizeTiming(time_index);
   hypre_ClearTiming();
 
   /*-----------------------------------------------------------
    * Solve the system using PCG 
    *-----------------------------------------------------------*/

   if ( solver_id == 0 ) 
   {
      strcpy(paramString, "solver cg");
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: DS-PCG\n");

      strcpy(paramString, "preconditioner diagonal");
      H.parameters(1, &paramString);
   } 
   else if ( solver_id == 1 )
   {
      strcpy(paramString, "solver cg");
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: ParaSails-PCG\n");

      strcpy(paramString, "preconditioner parasails");
      H.parameters(1, &paramString);
      strcpy(paramString, "parasailsNlevels 1");
      H.parameters(1, &paramString);
      strcpy(paramString, "parasailsThreshold 0.1");
      H.parameters(1, &paramString);
   }
   else if ( solver_id == 2 )
   {
      strcpy(paramString, "solver cg");
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: AMG-PCG\n");

      strcpy(paramString, "preconditioner boomeramg");
      H.parameters(1, &paramString);
      strcpy(paramString, "amgCoarsenType falgout");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "amgStrongThreshold %e", strong_threshold);
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "amgNumSweeps %d", num_grid_sweeps);
      H.parameters(1, &paramString);
      strcpy(paramString, "amgRelaxType jacobi");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "amgRelaxWeight %e", relax_weight);
      H.parameters(1, &paramString);
   }
   else if ( solver_id == 3 )
   {
      strcpy(paramString, "solver cg");
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: Poly-PCG\n");

      strcpy(paramString, "preconditioner poly");
      H.parameters(1, &paramString);
      strcpy(paramString, "polyOrder 9");
      H.parameters(1, &paramString);
   }
   else if ( solver_id == 4 )
   {
      strcpy(paramString, "solver gmres");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "gmresDim %d", k_dim);
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: DS-GMRES\n");

      strcpy(paramString, "preconditioner diagonal");
      H.parameters(1, &paramString);
   }
   else if ( solver_id == 5 ) 
   {
      strcpy(paramString, "solver gmres");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "gmresDim %d", k_dim);
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: PILUT-GMRES\n");

      strcpy(paramString, "preconditioner pilut");
      H.parameters(1, &paramString);
      strcpy(paramString, "pilutRowSize 0");
      H.parameters(1, &paramString);
      strcpy(paramString, "pilutDropTol 0.0");
      H.parameters(1, &paramString);
   }
   else if ( solver_id == 6 )
   {
      strcpy(paramString, "solver gmres");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "gmresDim %d", k_dim);
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: AMG-GMRES\n");

      strcpy(paramString, "preconditioner boomeramg");
      H.parameters(1, &paramString);
      strcpy(paramString, "amgCoarsenType falgout");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "amgStrongThreshold %e", strong_threshold);
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "amgNumSweeps %d", num_grid_sweeps);
      H.parameters(1, &paramString);
      strcpy(paramString, "amgRelaxType jacobi");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "amgRelaxWeight %e", relax_weight);
      H.parameters(1, &paramString);
   }
   else if ( solver_id == 7 )
   {
      strcpy(paramString, "solver gmres");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "gmresDim %d", k_dim);
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: DDILUT-GMRES\n");

      strcpy(paramString, "preconditioner ddilut");
      H.parameters(1, &paramString);
      strcpy(paramString, "ddilutFillin 5.0");
      H.parameters(1, &paramString);
      strcpy(paramString, "ddilutDropTol 0.0");
      H.parameters(1, &paramString);
   }
   else if ( solver_id == 8 )
   {
      strcpy(paramString, "solver gmres");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "gmresDim %d", k_dim);
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: POLY-GMRES\n");

      strcpy(paramString, "preconditioner poly");
      H.parameters(1, &paramString);
      strcpy(paramString, "polyOrder 5");
      H.parameters(1, &paramString);
   }
 
   strcpy(paramString, "Krylov Solve");
   time_index = hypre_InitializeTiming(paramString);
   hypre_BeginTiming(time_index);
 
   H.launchSolver(status, num_iterations);
 
   hypre_EndTiming(time_index);
   strcpy(paramString, "Solve phase times");
   hypre_PrintTiming(paramString, hypre_MPI_COMM_WORLD);
   hypre_FinalizeTiming(time_index);
   hypre_ClearTiming();
 
   if (myid == 0)
   {
      hypre_printf("\n Iterations = %d\n", num_iterations);
      hypre_printf("\n");
   }
 
   /*-----------------------------------------------------------
    * Finalize things
    *-----------------------------------------------------------*/

   delete [] paramString;
   hypre_MPI_Finalize();

   return (0);
}
Example #27
0
HYPRE_Int
main( HYPRE_Int   argc,
      char *argv[] )
{


   HYPRE_Int        num_procs, myid;
   HYPRE_Int        verbose = 0, build_matrix_type = 1;
   HYPRE_Int        index, matrix_arg_index, commpkg_flag=3;
   HYPRE_Int        i, k, ierr=0;
   HYPRE_Int        row_start, row_end; 
   HYPRE_Int        col_start, col_end, global_num_rows;
   HYPRE_Int       *row_part, *col_part; 
   char      *csrfilename;
   HYPRE_Int        preload = 0, loop = 0, loop2 = LOOP2;   
   HYPRE_Int        bcast_rows[2], *info;
   


   hypre_ParCSRMatrix    *parcsr_A, *small_A;
   HYPRE_ParCSRMatrix    A_temp, A_temp_small; 
   hypre_CSRMatrix       *A_CSR;
   hypre_ParCSRCommPkg	 *comm_pkg;   

  
   HYPRE_Int                 nx, ny, nz;
   HYPRE_Int                 P, Q, R;
   HYPRE_Int                 p, q, r;
   HYPRE_Real          values[4];

   hypre_ParVector     *x_new;
   hypre_ParVector     *y_new, *y;
   HYPRE_Int                 *row_starts;
   HYPRE_Real          ans;
   HYPRE_Real          start_time, end_time, total_time, *loop_times;
   HYPRE_Real          T_avg, T_std;
   
   HYPRE_Int                   noparmprint = 0;
 
#if mydebug   
   HYPRE_Int  j, tmp_int;
#endif

   /*-----------------------------------------------------------
    * Initialize MPI
    *-----------------------------------------------------------*/


   hypre_MPI_Init(&argc, &argv);

   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );



   /*-----------------------------------------------------------
    * default - is 27pt laplace
    *-----------------------------------------------------------*/

    
   build_matrix_type = 2;
   matrix_arg_index = argc;

   /*-----------------------------------------------------------
    * Parse command line
    *-----------------------------------------------------------*/
 
   index = 1;
   while ( index < argc) 
   {
      if  ( strcmp(argv[index], "-verbose") == 0 )
      {
         index++;  
         verbose = 1;
      }
      else if ( strcmp(argv[index], "-fromonecsrfile") == 0 )
      {
         index++;
         build_matrix_type      = 1;      
         matrix_arg_index = index; /*this tells where the name is*/
      }
      else if  ( strcmp(argv[index], "-commpkg") == 0 )
      {
         index++;  
         commpkg_flag = atoi(argv[index++]);
      }
      else if ( strcmp(argv[index], "-laplacian") == 0 )
      {
         index++;
         build_matrix_type      = 2;
         matrix_arg_index = index;
      }
      else if ( strcmp(argv[index], "-27pt") == 0 )
      {
         index++;
         build_matrix_type      = 4;
         matrix_arg_index = index;
      }
/*
      else if  ( strcmp(argv[index], "-nopreload") == 0 )
      {
         index++;  
         preload = 0;
      }
*/
      else if  ( strcmp(argv[index], "-loop") == 0 )
      {
         index++;  
         loop = atoi(argv[index++]);
      }
      else if  ( strcmp(argv[index], "-noparmprint") == 0 )
      {
         index++;  
         noparmprint = 1;
         
      }
      else  
      {
	 index++;
         /*hypre_printf("Warning: Unrecogized option '%s'\n",argv[index++] );*/
      }
   }
   
   
  
   /*-----------------------------------------------------------
    * Setup the Matrix problem   
    *-----------------------------------------------------------*/

  /*-----------------------------------------------------------
    *  Get actual partitioning- 
    *  read in an actual csr matrix.
    *-----------------------------------------------------------*/


   if (build_matrix_type ==1) /*read in a csr matrix from one file */
   {
      if (matrix_arg_index < argc)
      {
	 csrfilename = argv[matrix_arg_index];
      }
      else
      {
         hypre_printf("Error: No filename specified \n");
         exit(1);
      }
      if (myid == 0)
      {
	/*hypre_printf("  FromFile: %s\n", csrfilename);*/
         A_CSR = hypre_CSRMatrixRead(csrfilename);
      }
      row_part = NULL;
      col_part = NULL;

      parcsr_A = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, A_CSR, 
					       row_part, col_part);

      if (myid == 0) hypre_CSRMatrixDestroy(A_CSR);
   }
   else if (build_matrix_type ==2)
   {
      
      myBuildParLaplacian(argc, argv, matrix_arg_index,  &A_temp, !noparmprint);
     parcsr_A = (hypre_ParCSRMatrix *) A_temp;      
 
   }
   else if (build_matrix_type ==4)
   {
      myBuildParLaplacian27pt(argc, argv, matrix_arg_index, &A_temp, !noparmprint);
     parcsr_A = (hypre_ParCSRMatrix *) A_temp;
   }

 
  /*-----------------------------------------------------------
   * create a small problem so that timings are more accurate - 
   * code gets run twice (small laplace)
   *-----------------------------------------------------------*/

   /*this is no longer being used - preload = 0 is set at the beginning */

   if (preload == 1) 
   {
 
      /*hypre_printf("preload!\n");*/
      
        
       values[1] = -1;
       values[2] = -1;
       values[3] = -1;
       values[0] = - 6.0    ;

       nx = 2;
       ny = num_procs;
       nz = 2;

       P  = 1;
       Q  = num_procs;
       R  = 1;

       p = myid % P;
       q = (( myid - p)/P) % Q;
       r = ( myid - p - P*q)/( P*Q );
       
      A_temp_small = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD, nx, ny, nz, 
				      P, Q, R, p, q, r, values);
      small_A = (hypre_ParCSRMatrix *) A_temp_small;     

      /*do comm packages*/
      hypre_NewCommPkgCreate(small_A);
      hypre_NewCommPkgDestroy(small_A); 

      hypre_MatvecCommPkgCreate(small_A);
      hypre_ParCSRMatrixDestroy(small_A); 
  
   }





   /*-----------------------------------------------------------
    *  Prepare for timing
    *-----------------------------------------------------------*/

   /* instead of preloading, let's not time the first one if more than one*/

    
   if (!loop)
   {
      loop = 1;
      /* and don't do any timings */
      
   }
   else
   {
      
      loop +=1;
      if (loop < 2) loop = 2;
   }
      
   
   loop_times = hypre_CTAlloc(HYPRE_Real, loop);
   


/******************************************************************************************/   

   hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);

   if (commpkg_flag == 1 || commpkg_flag ==3 )
   {
  
      /*-----------------------------------------------------------
       *  Create new comm package
       *-----------------------------------------------------------*/


    
      if (!myid) hypre_printf("********************************************************\n" );  
 
      /*do loop times*/
      for (i=0; i< loop; i++) 
      {
         loop_times[i] = 0.0;
         for (k=0; k< loop2; k++) 
         {
         
            hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);
            
            start_time = hypre_MPI_Wtime();

#if mpip_on
            if (i==(loop-1)) hypre_MPI_Pcontrol(1); 
#endif
     
            hypre_NewCommPkgCreate(parcsr_A);

#if mpip_on
            if (i==(loop-1)) hypre_MPI_Pcontrol(0); 
#endif  
  
            end_time = hypre_MPI_Wtime();
            
            end_time = end_time - start_time;
        
            hypre_MPI_Allreduce(&end_time, &total_time, 1,
                       HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD);
         
            loop_times[i] += total_time;

            if (  !((i+1)== loop  &&  (k+1) == loop2)) hypre_NewCommPkgDestroy(parcsr_A); 
            
         }/*end of loop2 */
      
        
      } /*end of loop*/
      


      /* calculate the avg and std. */
      if (loop > 1)
      {
         
         /* calculate the avg and std. */
         stats_mo(loop_times, loop, &T_avg, &T_std);
      
         if (!myid) hypre_printf(" NewCommPkgCreate:  AVG. wall clock time =  %f seconds\n", T_avg);  
         if (!myid) hypre_printf("                    STD. for %d  runs     =  %f\n", loop-1, T_std);  
         if (!myid) hypre_printf("                    (Note: avg./std. timings exclude run 0.)\n");
         if (!myid) hypre_printf("********************************************************\n" );  
         for (i=0; i< loop; i++) 
         {
            if (!myid) hypre_printf("      run %d  =  %f sec.\n", i, loop_times[i]);  
         }
         if (!myid) hypre_printf("********************************************************\n" );  
   
       }
       else 
       {
         if (!myid) hypre_printf("********************************************************\n" );  
         if (!myid) hypre_printf(" NewCommPkgCreate:\n");  
         if (!myid) hypre_printf("      run time =  %f sec.\n", loop_times[0]);  
         if (!myid) hypre_printf("********************************************************\n" );  
       }


     /*-----------------------------------------------------------
       *  Verbose printing
       *-----------------------------------------------------------*/

      /*some verification*/

       global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); 

       if (verbose) 
       {

	  ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A,
                                      &row_start, &row_end ,
                                       &col_start, &col_end );


	  comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A);
     
          hypre_printf("myid = %i, my ACTUAL local range: [%i, %i]\n", myid, 
		 row_start, row_end);
	  
	
	  ierr = hypre_GetAssumedPartitionRowRange( myid, global_num_rows, &row_start, 
					      &row_end);


	  hypre_printf("myid = %i, my assumed local range: [%i, %i]\n", myid, 
		 row_start, row_end);

          hypre_printf("myid = %d, num_recvs = %d\n", myid, 
		 hypre_ParCSRCommPkgNumRecvs(comm_pkg)  );  

#if mydebug   
	  for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) 
	  {
              hypre_printf("myid = %d, recv proc = %d, vec_starts = [%d : %d]\n", 
		     myid,  hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], 
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i],
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1);
	   }
#endif 
	  hypre_printf("myid = %d, num_sends = %d\n", myid, 
		 hypre_ParCSRCommPkgNumSends(comm_pkg)  );  

#if mydebug
	  for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) 
          {
	    tmp_int =  hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] -  
                     hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	    index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	    for (j=0; j< tmp_int; j++) 
	    {
	       hypre_printf("myid = %d, send proc = %d, send element = %d\n",myid,  
		      hypre_ParCSRCommPkgSendProcs(comm_pkg)[i],
		      hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); 
	     }   
	  }
#endif
       }
       /*-----------------------------------------------------------
        *  To verify correctness (if commpkg_flag = 3)
        *-----------------------------------------------------------*/

       if (commpkg_flag == 3 ) 
       {
          /*do a matvec - we are assuming a square matrix */
          row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A);
   
          x_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts);
          hypre_ParVectorSetPartitioningOwner(x_new, 0);
          hypre_ParVectorInitialize(x_new);
          hypre_ParVectorSetRandomValues(x_new, 1);    
          
          y_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts);
          hypre_ParVectorSetPartitioningOwner(y_new, 0);
          hypre_ParVectorInitialize(y_new);
          hypre_ParVectorSetConstantValues(y_new, 0.0);
          
          /*y = 1.0*A*x+1.0*y */
          hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y_new);
       }
   
   /*-----------------------------------------------------------
    *  Clean up after MyComm
    *-----------------------------------------------------------*/


       hypre_NewCommPkgDestroy(parcsr_A); 

   }

  




/******************************************************************************************/
/******************************************************************************************/

   hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);


   if (commpkg_flag > 1 )
   {

      /*-----------------------------------------------------------
       *  Set up standard comm package
       *-----------------------------------------------------------*/

      bcast_rows[0] = 23;
      bcast_rows[1] = 1789;
      
      if (!myid) hypre_printf("********************************************************\n" );  
      /*do loop times*/
      for (i=0; i< loop; i++) 
      {

         loop_times[i] = 0.0;
         for (k=0; k< loop2; k++) 
         {
            

            hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);

         
            start_time = hypre_MPI_Wtime();

#if time_gather
                  
            info = hypre_CTAlloc(HYPRE_Int, num_procs);
            
            hypre_MPI_Allgather(bcast_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hypre_MPI_COMM_WORLD); 

#endif

            hypre_MatvecCommPkgCreate(parcsr_A);

            end_time = hypre_MPI_Wtime();


            end_time = end_time - start_time;
        
            hypre_MPI_Allreduce(&end_time, &total_time, 1,
                          HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD);

            loop_times[i] += total_time;
         
       
         if (  !((i+1)== loop  &&  (k+1) == loop2))   hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkg(parcsr_A));
               
         }/* end of loop 2*/
         
        
      } /*end of loop*/
      
      /* calculate the avg and std. */
      if (loop > 1)
      {
         
         stats_mo(loop_times, loop, &T_avg, &T_std);      
         if (!myid) hypre_printf("Current CommPkgCreate:  AVG. wall clock time =  %f seconds\n", T_avg);  
         if (!myid) hypre_printf("                        STD. for %d  runs     =  %f\n", loop-1, T_std);  
         if (!myid) hypre_printf("                        (Note: avg./std. timings exclude run 0.)\n");
         if (!myid) hypre_printf("********************************************************\n" );  
         for (i=0; i< loop; i++) 
         {
            if (!myid) hypre_printf("      run %d  =  %f sec.\n", i, loop_times[i]);  
         }
         if (!myid) hypre_printf("********************************************************\n" );  
         
      }
      else 
      {
         if (!myid) hypre_printf("********************************************************\n" );  
         if (!myid) hypre_printf(" Current CommPkgCreate:\n");  
         if (!myid) hypre_printf("      run time =  %f sec.\n", loop_times[0]);  
         if (!myid) hypre_printf("********************************************************\n" );  
      }





      /*-----------------------------------------------------------
       * Verbose printing
       *-----------------------------------------------------------*/

      /*some verification*/

    
       if (verbose) 
       {

          ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A,
						  &row_start, &row_end ,
						  &col_start, &col_end );


          comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A);
     
          hypre_printf("myid = %i, std - my local range: [%i, %i]\n", myid, 
		 row_start, row_end);

          ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A,
						  &row_start, &row_end ,
						  &col_start, &col_end );

          hypre_printf("myid = %d, std - num_recvs = %d\n", myid, 
		 hypre_ParCSRCommPkgNumRecvs(comm_pkg)  );  

#if mydebug   
	  for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) 
          {
              hypre_printf("myid = %d, std - recv proc = %d, vec_starts = [%d : %d]\n", 
		     myid,  hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], 
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i],
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1);
	  }
#endif
          hypre_printf("myid = %d, std - num_sends = %d\n", myid, 
		 hypre_ParCSRCommPkgNumSends(comm_pkg));  


#if mydebug
          for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) 
          {
	     tmp_int =  hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] -  
	                hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	     index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	     for (j=0; j< tmp_int; j++) 
	     {
	        hypre_printf("myid = %d, std - send proc = %d, send element = %d\n",myid,  
		       hypre_ParCSRCommPkgSendProcs(comm_pkg)[i],
		       hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); 
	     }   
	  } 
#endif
       }

       /*-----------------------------------------------------------
        * Verify correctness
        *-----------------------------------------------------------*/

 

       if (commpkg_flag == 3 ) 
       { 
          global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); 
          row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A);
 
       
          y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows,row_starts);
          hypre_ParVectorSetPartitioningOwner(y, 0);
          hypre_ParVectorInitialize(y);
          hypre_ParVectorSetConstantValues(y, 0.0);

          hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y);
      
       }

   }






   /*-----------------------------------------------------------
    *  Compare matvecs for both comm packages (3)
    *-----------------------------------------------------------*/

   if (commpkg_flag == 3 ) 
   { 
     /*make sure that y and y_new are the same  - now y_new should=0*/   
     hypre_ParVectorAxpy( -1.0, y, y_new );


     hypre_ParVectorSetRandomValues(y, 1);

     ans = hypre_ParVectorInnerProd( y, y_new );
     if (!myid)
     {
        
        if ( fabs(ans) > 1e-8 ) 
        {  
           hypre_printf("!!!!! WARNING !!!!! should be zero if correct = %6.10f\n", 
                  ans); 
        } 
        else
        {
           hypre_printf("Matvecs match ( should be zero = %6.10f )\n", 
                  ans); 
        }
     }
     

   }
 

   /*-----------------------------------------------------------
    *  Clean up
    *-----------------------------------------------------------*/

    
   hypre_ParCSRMatrixDestroy(parcsr_A); /*this calls the standard comm 
                                          package destroy - but we'll destroy 
                                          ours separately until it is
                                          incorporated */

  if (commpkg_flag == 3 ) 
  { 

      hypre_ParVectorDestroy(x_new);
      hypre_ParVectorDestroy(y);
      hypre_ParVectorDestroy(y_new);
  }




   hypre_MPI_Finalize();

   return(ierr);


}
Example #28
0
hypre_ParCSRCommMultiHandle *
hypre_ParCSRCommMultiHandleCreate (HYPRE_Int 	               job,
			           hypre_ParCSRCommPkg *comm_pkg,
                                   void                *send_data, 
                                   void                *recv_data, 
				   HYPRE_Int                 num_vecs )
{
   HYPRE_Int                  num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
   HYPRE_Int                  num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
   MPI_Comm             comm      = hypre_ParCSRCommPkgComm(comm_pkg);

   hypre_ParCSRCommMultiHandle *comm_handle;
   HYPRE_Int                         num_requests;
   hypre_MPI_Request                 *requests;

   HYPRE_Int                  i, j;
   HYPRE_Int			my_id, num_procs;
   HYPRE_Int			ip, vec_start, vec_len;
                  
   /*--------------------------------------------------------------------
    * hypre_Initialize sets up a communication handle,
    * posts receives and initiates sends. It always requires num_sends, 
    * num_recvs, recv_procs and send_procs to be set in comm_pkg.
    * There are different options for job:
    * job = 1 : is used to initialize communication exchange for the parts
    *		of vector needed to perform a Matvec,  it requires send_data 
    *		and recv_data to be doubles, recv_vec_starts and 
    *		send_map_starts need to be set in comm_pkg.
    * job = 2 : is used to initialize communication exchange for the parts
    *		of vector needed to perform a MatvecT,  it requires send_data 
    *		and recv_data to be doubles, recv_vec_starts and 
    *		send_map_starts need to be set in comm_pkg.
    *--------------------------------------------------------------------*/

   num_requests = num_sends + num_recvs;
   requests = hypre_CTAlloc(hypre_MPI_Request, num_requests);
 
   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);

   j = 0;
   switch (job)
   {
      case  1:
      {
	 double *d_send_data = (double *) send_data;
	 double *d_recv_data = (double *) recv_data;
   	 for (i = 0; i < num_recvs; i++)
   	 {
      	    ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); 
      	    vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
      	    vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
      	    hypre_MPI_Irecv(&d_recv_data[vec_start*num_vecs], vec_len*num_vecs, 
                      hypre_MPI_DOUBLE, ip, 0, comm, &requests[j++]);
   	 }
   	 for (i = 0; i < num_sends; i++)
   	 {
	    vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
	    vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1)-vec_start;
      	    ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); 
   	    hypre_MPI_Isend(&d_send_data[vec_start*num_vecs], vec_len*num_vecs,
                      hypre_MPI_DOUBLE, ip, 0, comm, &requests[j++]);
   	 }
	 break;
      }
      case  2:
      {
 	 double *d_send_data = (double *) send_data;
	 double *d_recv_data = (double *) recv_data;
   	 for (i = 0; i < num_sends; i++)
   	 {
	    vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
	    vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1) - vec_start;
      	    ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); 
   	    hypre_MPI_Irecv(&d_recv_data[vec_start*num_vecs], vec_len*num_vecs,
                      hypre_MPI_DOUBLE, ip, 0, comm, &requests[j++]);
   	 }
   	 for (i = 0; i < num_recvs; i++)
   	 {
      	    ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); 
      	    vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
      	    vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
      	    hypre_MPI_Isend(&d_send_data[vec_start*num_vecs], vec_len*num_vecs,
                      hypre_MPI_DOUBLE, ip, 0, comm, &requests[j++]);
   	 }
	 break;
      }
   }

   /*--------------------------------------------------------------------
    * set up comm_handle and return
    *--------------------------------------------------------------------*/

   comm_handle = hypre_CTAlloc(hypre_ParCSRCommMultiHandle, 1);

   hypre_ParCSRCommMultiHandleCommPkg(comm_handle)     = comm_pkg;
   hypre_ParCSRCommMultiHandleSendData(comm_handle)    = send_data;
   hypre_ParCSRCommMultiHandleRecvData(comm_handle)    = recv_data;
   hypre_ParCSRCommMultiHandleNumRequests(comm_handle) = num_requests;
   hypre_ParCSRCommMultiHandleRequests(comm_handle)    = requests;

   return (comm_handle);
}
Example #29
0
HYPRE_Int
main( HYPRE_Int   argc,
      char *argv[] )
{
   hypre_ParVector   *vector1;
   hypre_ParVector   *vector2;
   hypre_ParVector   *tmp_vector;

   HYPRE_Int          num_procs, my_id;
   HYPRE_Int	 	global_size = 20;
   HYPRE_Int		local_size;
   HYPRE_Int		first_index;
   HYPRE_Int          num_vectors, vecstride, idxstride;
   HYPRE_Int 		i, j;
   HYPRE_Int 		*partitioning;
   double	prod;
   double 	*data, *data2;
   hypre_Vector *vector; 
   hypre_Vector *local_vector; 
   hypre_Vector *local_vector2;
 
   /* Initialize MPI */
   hypre_MPI_Init(&argc, &argv);

   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &my_id );

   hypre_printf(" my_id: %d num_procs: %d\n", my_id, num_procs);
 
   partitioning = NULL;
   num_vectors = 3;
   vector1 = hypre_ParMultiVectorCreate
      ( hypre_MPI_COMM_WORLD, global_size, partitioning, num_vectors );
   partitioning = hypre_ParVectorPartitioning(vector1);

   hypre_ParVectorInitialize(vector1);
   local_vector = hypre_ParVectorLocalVector(vector1);
   data = hypre_VectorData(local_vector);
   local_size = hypre_VectorSize(local_vector);
   vecstride = hypre_VectorVectorStride(local_vector);
   idxstride = hypre_VectorIndexStride(local_vector);
   first_index = partitioning[my_id];

   hypre_printf("vecstride=%i idxstride=%i local_size=%i num_vectors=%i",
          vecstride, idxstride, local_size, num_vectors );
   for (j=0; j<num_vectors; ++j )
      for (i=0; i < local_size; i++)
         data[ j*vecstride + i*idxstride ] = first_index+i + 100*j;

   hypre_ParVectorPrint(vector1, "Vector");

   local_vector2 = hypre_SeqMultiVectorCreate( global_size, num_vectors );
   hypre_SeqVectorInitialize(local_vector2);
   data2 = hypre_VectorData(local_vector2);
   vecstride = hypre_VectorVectorStride(local_vector2);
   idxstride = hypre_VectorIndexStride(local_vector2);
   for (j=0; j<num_vectors; ++j )
      for (i=0; i < global_size; i++)
         data2[ j*vecstride + i*idxstride ] = i + 100*j;

/*   partitioning = hypre_CTAlloc(HYPRE_Int,4);
   partitioning[0] = 0;
   partitioning[1] = 10;
   partitioning[2] = 10;
   partitioning[3] = 20;
*/
   partitioning = hypre_CTAlloc(HYPRE_Int,1+num_procs);
   hypre_GeneratePartitioning( global_size, num_procs, &partitioning );

   vector2 = hypre_VectorToParVector(hypre_MPI_COMM_WORLD,local_vector2,partitioning);
   hypre_ParVectorSetPartitioningOwner(vector2,0);

   hypre_ParVectorPrint(vector2, "Convert");

   vector = hypre_ParVectorToVectorAll(vector2);

   /*-----------------------------------------------------------
    * Copy the vector into tmp_vector
    *-----------------------------------------------------------*/

/* Read doesn't work for multivectors yet...
   tmp_vector = hypre_ParVectorRead(hypre_MPI_COMM_WORLD, "Convert");*/
   tmp_vector = hypre_ParMultiVectorCreate
      ( hypre_MPI_COMM_WORLD, global_size, partitioning, num_vectors );
   hypre_ParVectorInitialize( tmp_vector );
   hypre_ParVectorCopy( vector2, tmp_vector );
/*
   tmp_vector = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_size,partitioning);
   hypre_ParVectorSetPartitioningOwner(tmp_vector,0);
   hypre_ParVectorInitialize(tmp_vector);
   hypre_ParVectorCopy(vector1, tmp_vector);

   hypre_ParVectorPrint(tmp_vector,"Copy");
*/
   /*-----------------------------------------------------------
    * Scale tmp_vector
    *-----------------------------------------------------------*/

   hypre_ParVectorScale(2.0, tmp_vector);
   hypre_ParVectorPrint(tmp_vector,"Scale");

   /*-----------------------------------------------------------
    * Do an Axpy (2*vector - vector) = vector
    *-----------------------------------------------------------*/

   hypre_ParVectorAxpy(-1.0, vector1, tmp_vector);
   hypre_ParVectorPrint(tmp_vector,"Axpy");

   /*-----------------------------------------------------------
    * Do an inner product vector* tmp_vector
    *-----------------------------------------------------------*/

   prod = hypre_ParVectorInnerProd(vector1, tmp_vector);

   hypre_printf (" prod: %8.2f \n", prod);

   /*-----------------------------------------------------------
    * Finalize things
    *-----------------------------------------------------------*/

   hypre_ParVectorDestroy(vector1);
   hypre_ParVectorDestroy(vector2); 
   hypre_ParVectorDestroy(tmp_vector);
   hypre_SeqVectorDestroy(local_vector2); 
   if (vector) hypre_SeqVectorDestroy(vector); 

   /* Finalize MPI */
   hypre_MPI_Finalize();

   return 0;
}
Example #30
0
HYPRE_Int
hypre_BoomerAMGSolveT( void               *amg_vdata,
                   hypre_ParCSRMatrix *A,
                   hypre_ParVector    *f,
                   hypre_ParVector    *u         )
{

   MPI_Comm 	      comm = hypre_ParCSRMatrixComm(A);   

   hypre_ParAMGData   *amg_data = amg_vdata;

   /* Data Structure variables */

   HYPRE_Int      amg_print_level;
   HYPRE_Int      amg_logging;
   HYPRE_Real  *num_coeffs;
   HYPRE_Int     *num_variables;
   HYPRE_Real   cycle_op_count;
   HYPRE_Int      num_levels;
   /* HYPRE_Int      num_unknowns; */
   HYPRE_Real   tol;
   char    *file_name;
   hypre_ParCSRMatrix **A_array;
   hypre_ParVector    **F_array;
   hypre_ParVector    **U_array;

   /*  Local variables  */

   /*FILE    *fp;*/

   HYPRE_Int      j;
   HYPRE_Int      Solve_err_flag;
   HYPRE_Int      min_iter;
   HYPRE_Int      max_iter;
   HYPRE_Int      cycle_count;
   HYPRE_Real   total_coeffs;
   HYPRE_Int      total_variables;
   HYPRE_Int      num_procs, my_id;

   HYPRE_Real   alpha = 1.0;
   HYPRE_Real   beta = -1.0;
   HYPRE_Real   cycle_cmplxty = 0.0;
   HYPRE_Real   operat_cmplxty;
   HYPRE_Real   grid_cmplxty;
   HYPRE_Real   conv_factor;
   HYPRE_Real   resid_nrm;
   HYPRE_Real   resid_nrm_init;
   HYPRE_Real   relative_resid;
   HYPRE_Real   rhs_norm;
   HYPRE_Real   old_resid;

   hypre_ParVector  *Vtemp;
   hypre_ParVector  *Residual;

   hypre_MPI_Comm_size(comm, &num_procs);   
   hypre_MPI_Comm_rank(comm,&my_id);

   amg_print_level = hypre_ParAMGDataPrintLevel(amg_data);
   amg_logging   = hypre_ParAMGDataLogging(amg_data);
   if ( amg_logging>1 )
      Residual = hypre_ParAMGDataResidual(amg_data);
   file_name     = hypre_ParAMGDataLogFileName(amg_data);
   /* num_unknowns  = hypre_ParAMGDataNumUnknowns(amg_data); */
   num_levels    = hypre_ParAMGDataNumLevels(amg_data);
   A_array       = hypre_ParAMGDataAArray(amg_data);
   F_array       = hypre_ParAMGDataFArray(amg_data);
   U_array       = hypre_ParAMGDataUArray(amg_data);

   tol           = hypre_ParAMGDataTol(amg_data);
   min_iter      = hypre_ParAMGDataMinIter(amg_data);
   max_iter      = hypre_ParAMGDataMaxIter(amg_data);

   num_coeffs = hypre_CTAlloc(HYPRE_Real, num_levels);
   num_variables = hypre_CTAlloc(HYPRE_Int, num_levels);
   num_coeffs[0]    = hypre_ParCSRMatrixDNumNonzeros(A_array[0]);
   num_variables[0] = hypre_ParCSRMatrixGlobalNumRows(A_array[0]);
 
   A_array[0] = A;
   F_array[0] = f;
   U_array[0] = u;

/*   Vtemp = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A_array[0]),
                                 hypre_ParCSRMatrixGlobalNumRows(A_array[0]),
                                 hypre_ParCSRMatrixRowStarts(A_array[0]));
   hypre_ParVectorInitialize(Vtemp);
   hypre_ParVectorSetPartitioningOwner(Vtemp,0);
   hypre_ParAMGDataVtemp(amg_data) = Vtemp;
*/
   Vtemp = hypre_ParAMGDataVtemp(amg_data);
   for (j = 1; j < num_levels; j++)
   {
      num_coeffs[j]    = hypre_ParCSRMatrixDNumNonzeros(A_array[j]);
      num_variables[j] = hypre_ParCSRMatrixGlobalNumRows(A_array[j]);
   }

   /*-----------------------------------------------------------------------
    *    Write the solver parameters
    *-----------------------------------------------------------------------*/


   if (my_id == 0 && amg_print_level > 1)
      hypre_BoomerAMGWriteSolverParams(amg_data); 



   /*-----------------------------------------------------------------------
    *    Initialize the solver error flag and assorted bookkeeping variables
    *-----------------------------------------------------------------------*/

   Solve_err_flag = 0;

   total_coeffs = 0;
   total_variables = 0;
   cycle_count = 0;
   operat_cmplxty = 0;
   grid_cmplxty = 0;

   /*-----------------------------------------------------------------------
    *     open the log file and write some initial info
    *-----------------------------------------------------------------------*/

   if (my_id == 0 && amg_print_level > 1)
   { 
      /*fp = fopen(file_name, "a");*/

      hypre_printf("\n\nAMG SOLUTION INFO:\n");

   }

   /*-----------------------------------------------------------------------
    *    Compute initial fine-grid residual and print to logfile
    *-----------------------------------------------------------------------*/

   if ( amg_logging > 1 ) {
      hypre_ParVectorCopy(F_array[0], Residual );
      hypre_ParCSRMatrixMatvecT(alpha, A_array[0], U_array[0], beta, Residual );
      resid_nrm = sqrt(hypre_ParVectorInnerProd( Residual, Residual ));
   }
   else {
      hypre_ParVectorCopy(F_array[0], Vtemp);
      hypre_ParCSRMatrixMatvecT(alpha, A_array[0], U_array[0], beta, Vtemp);
      resid_nrm = sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp));
   }


   resid_nrm_init = resid_nrm;
   rhs_norm = sqrt(hypre_ParVectorInnerProd(f, f));
   relative_resid = 9999;
   if (rhs_norm)
   {
      relative_resid = resid_nrm_init / rhs_norm;
   }

   if (my_id ==0 && (amg_print_level > 1))
   {     
      hypre_printf("                                            relative\n");
      hypre_printf("               residual        factor       residual\n");
      hypre_printf("               --------        ------       --------\n");
      hypre_printf("    Initial    %e                 %e\n",resid_nrm_init,
              relative_resid);
   }

   /*-----------------------------------------------------------------------
    *    Main V-cycle loop
    *-----------------------------------------------------------------------*/
   
   while ((relative_resid >= tol || cycle_count < min_iter)
          && cycle_count < max_iter 
          && Solve_err_flag == 0)
   {
      hypre_ParAMGDataCycleOpCount(amg_data) = 0;   
      /* Op count only needed for one cycle */

      Solve_err_flag = hypre_BoomerAMGCycleT(amg_data, F_array, U_array); 

      old_resid = resid_nrm;

      /*---------------------------------------------------------------
       *    Compute  fine-grid residual and residual norm
       *----------------------------------------------------------------*/

      if ( amg_logging > 1 ) {
         hypre_ParVectorCopy(F_array[0], Residual );
         hypre_ParCSRMatrixMatvecT(alpha, A_array[0], U_array[0], beta, Residual );
         resid_nrm = sqrt(hypre_ParVectorInnerProd( Residual, Residual ));
      }
      else {
         hypre_ParVectorCopy(F_array[0], Vtemp);
         hypre_ParCSRMatrixMatvecT(alpha, A_array[0], U_array[0], beta, Vtemp);
         resid_nrm = sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp));
      }

      conv_factor = resid_nrm / old_resid;
      relative_resid = 9999;
      if (rhs_norm)
      {
         relative_resid = resid_nrm / rhs_norm;
      }

      ++cycle_count;



      hypre_ParAMGDataRelativeResidualNorm(amg_data) = relative_resid;
      hypre_ParAMGDataNumIterations(amg_data) = cycle_count;

      if (my_id == 0 && (amg_print_level > 1))
      { 
         hypre_printf("    Cycle %2d   %e    %f     %e \n", cycle_count,
                 resid_nrm, conv_factor, relative_resid);
      }
   }

   if (cycle_count == max_iter) Solve_err_flag = 1;

   /*-----------------------------------------------------------------------
    *    Compute closing statistics
    *-----------------------------------------------------------------------*/

   conv_factor = pow((resid_nrm/resid_nrm_init),(1.0/((HYPRE_Real) cycle_count)));


   for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++)
   {
      total_coeffs += num_coeffs[j];
      total_variables += num_variables[j];
   }

   cycle_op_count = hypre_ParAMGDataCycleOpCount(amg_data);

   if (num_variables[0])
      grid_cmplxty = ((HYPRE_Real) total_variables) / ((HYPRE_Real) num_variables[0]);
   if (num_coeffs[0])
   {
      operat_cmplxty = total_coeffs / num_coeffs[0];
      cycle_cmplxty = cycle_op_count / num_coeffs[0];
   }

   if (my_id == 0 && amg_print_level > 1)
   {
      if (Solve_err_flag == 1)
      {
         hypre_printf("\n\n==============================================");
         hypre_printf("\n NOTE: Convergence tolerance was not achieved\n");
         hypre_printf("      within the allowed %d V-cycles\n",max_iter);
         hypre_printf("==============================================");
      }
      hypre_printf("\n\n Average Convergence Factor = %f",conv_factor);
      hypre_printf("\n\n     Complexity:    grid = %f\n",grid_cmplxty);
      hypre_printf("                operator = %f\n",operat_cmplxty);
      hypre_printf("                   cycle = %f\n\n",cycle_cmplxty);
   }

   /*----------------------------------------------------------
    * Close the output file (if open)
    *----------------------------------------------------------*/

   /*if (my_id == 0 && amg_print_level >= 1)
   { 
      fclose(fp); 
   }*/

   hypre_TFree(num_coeffs);
   hypre_TFree(num_variables);

   return(Solve_err_flag);
}