Ejemplo n.º 1
0
void
hypre_MatTCommPkgCreate_core (

/* input args: */
   MPI_Comm comm, HYPRE_Int * col_map_offd, HYPRE_Int first_col_diag, HYPRE_Int * col_starts,
   HYPRE_Int num_rows_diag, HYPRE_Int num_cols_diag, HYPRE_Int num_cols_offd, HYPRE_Int * row_starts,
   HYPRE_Int firstColDiag, HYPRE_Int * colMapOffd,
   HYPRE_Int * mat_i_diag, HYPRE_Int * mat_j_diag, HYPRE_Int * mat_i_offd, HYPRE_Int * mat_j_offd,

   HYPRE_Int data,  /* = 1 for a matrix with floating-point data, =0 for Boolean matrix */

/* pointers to output args: */
   HYPRE_Int * p_num_recvs, HYPRE_Int ** p_recv_procs, HYPRE_Int ** p_recv_vec_starts,
   HYPRE_Int * p_num_sends, HYPRE_Int ** p_send_procs, HYPRE_Int ** p_send_map_starts,
   HYPRE_Int ** p_send_map_elmts

   )
{
   HYPRE_Int			num_sends;
   HYPRE_Int			*send_procs;
   HYPRE_Int			*send_map_starts;
   HYPRE_Int			*send_map_elmts;
   HYPRE_Int			num_recvs;
   HYPRE_Int			*recv_procs;
   HYPRE_Int			*recv_vec_starts;
   HYPRE_Int	i, j, j2, k, ir, rowmin, rowmax;
   HYPRE_Int	*tmp, *recv_buf, *displs, *info, *send_buf, *all_num_sends3;
   HYPRE_Int	num_procs, my_id, num_elmts;
   HYPRE_Int	local_info, index, index2;
   HYPRE_Int pmatch, col, kc, p;
   HYPRE_Int * recv_sz_buf;
   HYPRE_Int * row_marker;
   
   hypre_MPI_Comm_size(comm, &num_procs);  
   hypre_MPI_Comm_rank(comm, &my_id);

   info = hypre_CTAlloc(HYPRE_Int, num_procs);

/* ----------------------------------------------------------------------
 * determine which processors to receive from (set proc_mark) and num_recvs,
 * at the end of the loop proc_mark[i] contains the number of elements to be
 * received from Proc. i
 *
 * - For A*b or A*B: for each off-diagonal column i of A, you want to identify
 * the processor which has the corresponding element i of b (row i of B)
 * (columns in the local diagonal block, just multiply local rows of B).
 * You do it by finding the processor which has that column of A in its
 * _diagonal_ block - assuming b or B is distributed the same, which I believe
 * is evenly among processors, by row.  There is a unique solution because
 * the diag/offd blocking is defined by which processor owns which rows of A.
 *
 * - For A*A^T: A^T is not distributed by rows as B or any 'normal' matrix is.
 * For each off-diagonal row,column k,i element of A, you want to identify
 * the processors which have the corresponding row,column i,j elements of A^T
 * i.e., row,column j,i elements of A (all i,j,k for which these entries are
 * nonzero, row k of A lives on this processor, and row j of A lives on
 * a different processor).  So, given a column i in the local A-offd or A-diag,
 * we want to find all the processors which have column i, in diag or offd
 * blocks.  Unlike the A*B case, I don't think you can eliminate looking at
 * any class of blocks.
 * ---------------------------------------------------------------------*/

/* The algorithm for A*B was:
   For each of my columns i (in offd block), use known information on data
   distribution of columns in _diagonal_ blocks to find the processor p which
   owns row i. (Note that for i in diag block, I own the row, nothing to do.)
   Count up such i's for each processor in proc_mark.  Construct a data
   structure, recv_buf, made by appending a structure tmp from each processor.
   The data structure tmp looks like (p, no. of i's, i1, i2,...) (p=0,...) .
   There are two communication steps: gather size information (local_info) from
   all processors (into info), then gather the data (tmp) from all processors
   (into recv_buf).  Then you go through recv_buf.  For each (sink) processor p
   you search for for the appearance of my (source) processor number
   (most of recv_buf pertains to other processors and is ignored).
   When you find the appropriate section, pull out the i's, count them and
   save them, in send_map_elmts, and save p in send_procs and index information
   in send_map_starts.
*/
/* The algorithm for A*A^T:
   [ Originally I had planned to figure out approximately which processors
   had the information (for A*B it could be done exactly) to save on
   communication.  But even for A*B where the data owner is known, all data is
   sent to all processors, so that's not worth worrying about on the first cut.
   One consequence is that proc_mark is not needed.]
   Construct a data structure, recv_buf, made by appending a structure tmp for
   each processor.  It simply consists of (no. of i's, i1, i2,...) where i is
   the global number of a column in the offd block.  There are still two
   communication steps: gather size information (local_info) from all processors
   (into info), then gather the data (tmp) from all processors (into recv_buf).
   Then you go through recv_buf.  For each (sink) processor p you go through
   all its column numbers in recv_buf.  Check each one for whether you have
   data in that column.  If so, put in in send_map_elmts, p in send_procs,
   and update the index information in send_map_starts.  Note that these
   arrays don't mean quite the same thing as for A*B.
*/

   num_recvs=num_procs-1;
   local_info = num_procs + num_cols_offd + num_cols_diag;
			
   hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); 

/* ----------------------------------------------------------------------
 * generate information to be send: tmp contains for each recv_proc:
 * {deleted: id of recv_procs}, number of elements to be received for this processor,
 * indices of elements (in this order)
 * ---------------------------------------------------------------------*/

   displs = hypre_CTAlloc(HYPRE_Int, num_procs+1);
   displs[0] = 0;
   for (i=1; i < num_procs+1; i++)
	displs[i] = displs[i-1]+info[i-1]; 
   recv_buf = hypre_CTAlloc(HYPRE_Int, displs[num_procs]); 
   tmp = hypre_CTAlloc(HYPRE_Int, local_info);

   j = 0;
   for (i=0; i < num_procs; i++) {
      j2 = j++;
      tmp[j2] = 0;
      for (k=0; k < num_cols_offd; k++)
         if (col_map_offd[k] >= col_starts[i] && 
             col_map_offd[k] < col_starts[i+1]) {
            tmp[j++] = col_map_offd[k];
            ++(tmp[j2]);
         };
      for (k=0; k < num_cols_diag; k++)
         if ( k+first_col_diag >= col_starts[i] && 
              k+first_col_diag < col_starts[i+1] ) {
            tmp[j++] = k + first_col_diag;
            ++(tmp[j2]);
         }
   }

   hypre_MPI_Allgatherv(tmp,local_info,HYPRE_MPI_INT,recv_buf,info,displs,HYPRE_MPI_INT,comm);
	

/* ----------------------------------------------------------------------
 * determine send_procs and actual elements to be send (in send_map_elmts)
 * and send_map_starts whose i-th entry points to the beginning of the 
 * elements to be send to proc. i
 * ---------------------------------------------------------------------*/
/* Meanings of arrays being set here, more verbosely stated:
   send_procs: processors p to send to
   send_map_starts: for each p, gives range of indices in send_map_elmts; 
   send_map_elmts:  Each element is a send_map_elmts[i], with i in a range given
     by send_map_starts[p..p+1], for some p. This element is is the global
     column number for a column in the offd block of p which is to be multiplied
     by data from this processor.
     For A*B, send_map_elmts[i] is therefore a row of B belonging to this
     processor, to be sent to p.  For A*A^T, send_map_elmts[i] is a row of A
     belonging to this processor, to be sent to p; this row was selected
     because it has a nonzero on a _column_ needed by p.
*/
   num_sends = num_procs;   /* may turn out to be less, but we can't know yet */
   num_elmts = (num_procs-1)*num_rows_diag;
   /* ... a crude upper bound; should try to do better even if more comm required */
   send_procs = hypre_CTAlloc(HYPRE_Int, num_sends);
   send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1);
   send_map_elmts = hypre_CTAlloc(HYPRE_Int, num_elmts);
   row_marker = hypre_CTAlloc(HYPRE_Int,num_rows_diag);
 
   index = 0;
   index2 = 0; 
   send_map_starts[0] = 0;
   for (i=0; i < num_procs; i++) {
      send_map_starts[index+1] = send_map_starts[index];
      j = displs[i];
      pmatch = 0;
      for ( ir=0; ir<num_rows_diag; ++ir ) row_marker[ir] = 0;
      while ( j < displs[i+1])
      {
         num_elmts = recv_buf[j++];  /* no. of columns proc. i wants */
         for ( k=0; k<num_elmts; k++ ) {
            col = recv_buf[j++]; /* a global column no. at proc. i */
            for ( kc=0; kc<num_cols_offd; kc++ ) {
               if ( col_map_offd[kc]==col && i!=my_id ) {
                  /* this processor has the same column as proc. i (but is different) */
                  pmatch = 1;
                  send_procs[index] = i;
                  /* this would be right if we could send columns, but we can't ...
                     offset = first_col_diag;
                     ++send_map_starts[index+1];
                     send_map_elmts[index2++] = col - offset; */
                  /* Plan to send all of my rows which use this column... */
                  RowsWithColumn( &rowmin, &rowmax, col,
                                  num_rows_diag, 
                                  firstColDiag, colMapOffd,
                                  mat_i_diag, mat_j_diag, mat_i_offd, mat_j_offd
                     );
                  for ( ir=rowmin; ir<=rowmax; ++ir ) {
                     if ( row_marker[ir]==0 ) {
                        row_marker[ir] = 1;
                        ++send_map_starts[index+1];
                        send_map_elmts[index2++] = ir;
                     }
                  }
               }
            }
/* alternative way of doing the following for-loop:
            for ( kc=0; kc<num_cols_diag; kc++ ) {
               if ( kc+first_col_diag==col && i!=my_id ) {
               / * this processor has the same column as proc. i (but is different) * /
                  pmatch = 1;
/ * this would be right if we could send columns, but we can't ... >>> * /
                  send_procs[index] = i;
                  ++send_map_starts[index+1];
                  send_map_elmts[index2++] = col - offset;
                  / * Plan to send all of my rows which use this column... * /
                  / * NOT DONE * /
               }
            }
*/
            for ( kc=row_starts[my_id]; kc<row_starts[my_id+1]; kc++ ) {
               if ( kc==col && i!=my_id ) {
                  /* this processor has the same column as proc. i (but is different) */
                  pmatch = 1;
                  send_procs[index] = i;
/* this would be right if we could send columns, but we can't ... >>> 
                  ++send_map_starts[index+1];
                  send_map_elmts[index2++] = col - offset;*/
                  /* Plan to send all of my rows which use this column... */
                  RowsWithColumn( &rowmin, &rowmax, col,
                                  num_rows_diag, 
                                  firstColDiag, colMapOffd,
                                  mat_i_diag, mat_j_diag, mat_i_offd, mat_j_offd
                     );
                  for ( ir=rowmin; ir<=rowmax; ++ir ) {
                     if ( row_marker[ir]==0 ) {
                        row_marker[ir] = 1;
                        ++send_map_starts[index+1];
                        send_map_elmts[index2++] = ir;
                     }
                  }
               }
            }
         }
      }
      if ( pmatch ) index++;
   }
   num_sends = index;  /* no. of proc. rows will be sent to */

/* Compute receive arrays recv_procs, recv_vec_starts ... */
   recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs);
   recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1);
   j2 = 0;
   for (i=0; i < num_procs; i++) {
      if ( i!=my_id ) { recv_procs[j2] = i; j2++; };
   };

/* Compute recv_vec_starts.
   The real job is, for each processor p, to figure out how many rows
   p will send to me (me=this processor).  I now know how many (and which)
   rows I will send to each p.  Indeed, if send_procs[index]=p, then the
   number is send_map_starts[index+1]-send_map_starts[index].
   More communication is needed.
   options:
   hypre_MPI_Allgather of communication sizes. <--- my choice, for now
     good: simple   bad: send num_procs*num_sends data, only need num_procs
                    but: not that much data compared to previous communication
   hypre_MPI_Allgatherv of communication sizes, only for pairs of procs. that communicate
     good: less data than above   bad: need extra commun. step to get recvcounts
   hypre_MPI_ISend,hypre_MPI_IRecv of each size, separately between each pair of procs.
     good: no excess data sent   bad: lots of little messages
                                 but: Allgather might be done the same under the hood
     may be much slower than Allgather or may be a bit faster depending on
     implementations
*/
   send_buf = hypre_CTAlloc( HYPRE_Int, 3*num_sends );
   all_num_sends3 = hypre_CTAlloc( HYPRE_Int, num_procs );

   /* scatter-gather num_sends, to set up the size for the main comm. step */
   i = 3*num_sends;
   hypre_MPI_Allgather( &i, 1, HYPRE_MPI_INT, all_num_sends3, 1, HYPRE_MPI_INT, comm );
   displs[0] = 0;
   for ( p=0; p<num_procs; ++p ) {
      displs[p+1] = displs[p] + all_num_sends3[p];
   };
   recv_sz_buf = hypre_CTAlloc( HYPRE_Int, displs[num_procs] );
   
   /* scatter-gather size of row info to send, and proc. to send to */
   index = 0;
   for ( i=0; i<num_sends; ++i ) {
      send_buf[index++] = send_procs[i];   /* processor to send to */
      send_buf[index++] = my_id;
      send_buf[index++] = send_map_starts[i+1] - send_map_starts[i];
      /* ... sizes of info to send */
   };

   hypre_MPI_Allgatherv( send_buf, 3*num_sends, HYPRE_MPI_INT,
                   recv_sz_buf, all_num_sends3, displs, HYPRE_MPI_INT, comm);

   recv_vec_starts[0] = 0;
   j2 = 0;  j = 0;
   for ( i=0; i<displs[num_procs]; i=i+3 ) {
      j = i;
      if ( recv_sz_buf[j++]==my_id ) {
         recv_procs[j2] = recv_sz_buf[j++];
         recv_vec_starts[j2+1] = recv_vec_starts[j2] + recv_sz_buf[j++];
         j2++;
      }
   }
   num_recvs = j2;

#if 0
   hypre_printf("num_procs=%i send_map_starts (%i):",num_procs,num_sends+1);
   for( i=0; i<=num_sends; ++i ) hypre_printf(" %i", send_map_starts[i] );
   hypre_printf("  send_procs (%i):",num_sends);
   for( i=0; i<num_sends; ++i ) hypre_printf(" %i", send_procs[i] );
   hypre_printf("\n");
   hypre_printf("my_id=%i num_sends=%i send_buf[0,1,2]=%i %i %i",
          my_id, num_sends, send_buf[0], send_buf[1], send_buf[2] );
   hypre_printf(" all_num_sends3[0,1]=%i %i\n", all_num_sends3[0], all_num_sends3[1] );
   hypre_printf("my_id=%i rcv_sz_buf (%i):", my_id, displs[num_procs] );
   for( i=0; i<displs[num_procs]; ++i ) hypre_printf(" %i", recv_sz_buf[i] );
   hypre_printf("\n");
   hypre_printf("my_id=%i recv_vec_starts (%i):",my_id,num_recvs+1);
   for( i=0; i<=num_recvs; ++i ) hypre_printf(" %i", recv_vec_starts[i] );
   hypre_printf("  recv_procs (%i):",num_recvs);
   for( i=0; i<num_recvs; ++i ) hypre_printf(" %i", recv_procs[i] );
   hypre_printf("\n");
   hypre_printf("my_id=%i num_recvs=%i recv_sz_buf[0,1,2]=%i %i %i\n",
          my_id, num_recvs, recv_sz_buf[0], recv_sz_buf[1], recv_sz_buf[2] );
#endif

   hypre_TFree(send_buf);
   hypre_TFree(all_num_sends3);
   hypre_TFree(tmp);
   hypre_TFree(recv_buf);
   hypre_TFree(displs);
   hypre_TFree(info);
   hypre_TFree(recv_sz_buf);
   hypre_TFree(row_marker);


   /* finish up with the hand-coded call-by-reference... */
   *p_num_recvs = num_recvs;
   *p_recv_procs = recv_procs;
   *p_recv_vec_starts = recv_vec_starts;
   *p_num_sends = num_sends;
   *p_send_procs = send_procs;
   *p_send_map_starts = send_map_starts;
   *p_send_map_elmts = send_map_elmts;

}
Ejemplo n.º 2
0
HYPRE_Int
hypre_seqAMGCycle( hypre_ParAMGData *amg_data,
                   HYPRE_Int p_level,
                   hypre_ParVector  **Par_F_array,
                   hypre_ParVector  **Par_U_array   )
{
   
   hypre_ParVector    *Aux_U;
   hypre_ParVector    *Aux_F;

   /* Local variables  */

   HYPRE_Int       Solve_err_flag = 0;

   HYPRE_Int n;
   HYPRE_Int i;
   
   hypre_Vector   *u_local;
   double         *u_data;
   
   HYPRE_Int	   first_index;
   
   /* Acquire seq data */
   MPI_Comm new_comm = hypre_ParAMGDataNewComm(amg_data);
   HYPRE_Solver coarse_solver = hypre_ParAMGDataCoarseSolver(amg_data);
   hypre_ParCSRMatrix *A_coarse = hypre_ParAMGDataACoarse(amg_data);
   hypre_ParVector *F_coarse = hypre_ParAMGDataFCoarse(amg_data);
   hypre_ParVector *U_coarse = hypre_ParAMGDataUCoarse(amg_data);

   Aux_U = Par_U_array[p_level];
   Aux_F = Par_F_array[p_level];

   first_index = hypre_ParVectorFirstIndex(Aux_U);
   u_local = hypre_ParVectorLocalVector(Aux_U);
   u_data  = hypre_VectorData(u_local);
   n =  hypre_VectorSize(u_local);


   if (A_coarse)
   {
      double         *f_data;
      hypre_Vector   *f_local;
      hypre_Vector   *tmp_vec;
      
      HYPRE_Int nf;
      HYPRE_Int local_info;
      double *recv_buf;
      HYPRE_Int *displs, *info;
      HYPRE_Int size;
      HYPRE_Int new_num_procs;
      
      hypre_MPI_Comm_size(new_comm, &new_num_procs);

      f_local = hypre_ParVectorLocalVector(Aux_F);
      f_data = hypre_VectorData(f_local);
      nf =  hypre_VectorSize(f_local);

      /* first f */
      info = hypre_CTAlloc(HYPRE_Int, new_num_procs);
      local_info = nf;
      hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm);

      displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);
      displs[0] = 0;
      for (i=1; i < new_num_procs+1; i++)
         displs[i] = displs[i-1]+info[i-1]; 
      size = displs[new_num_procs];
      
      tmp_vec =  hypre_ParVectorLocalVector(F_coarse);
      recv_buf = hypre_VectorData(tmp_vec);

      hypre_MPI_Allgatherv ( f_data, nf, hypre_MPI_DOUBLE, 
                          recv_buf, info, displs, 
                          hypre_MPI_DOUBLE, new_comm );

      tmp_vec =  hypre_ParVectorLocalVector(U_coarse);
      recv_buf = hypre_VectorData(tmp_vec);
      
      /*then u */
      hypre_MPI_Allgatherv ( u_data, n, hypre_MPI_DOUBLE, 
                       recv_buf, info, displs, 
                       hypre_MPI_DOUBLE, new_comm );
         
      /* clean up */
      hypre_TFree(displs);
      hypre_TFree(info);

      hypre_BoomerAMGSolve(coarse_solver, A_coarse, F_coarse, U_coarse);      

      /*copy my part of U to parallel vector */
      {
         double *local_data;

         local_data =  hypre_VectorData(hypre_ParVectorLocalVector(U_coarse));

         for (i = 0; i < n; i++)
         {
            u_data[i] = local_data[first_index+i];
         }
      }
   }

   return(Solve_err_flag);
}
Ejemplo n.º 3
0
HYPRE_Int hypre_seqAMGSetup( hypre_ParAMGData *amg_data,
                      HYPRE_Int p_level,
                      HYPRE_Int coarse_threshold)


{

   /* Par Data Structure variables */
   hypre_ParCSRMatrix **Par_A_array = hypre_ParAMGDataAArray(amg_data);

   MPI_Comm 	      comm = hypre_ParCSRMatrixComm(Par_A_array[0]); 
   MPI_Comm 	      new_comm, seq_comm;

   hypre_ParCSRMatrix   *A_seq = NULL;
   hypre_CSRMatrix  *A_seq_diag;
   hypre_CSRMatrix  *A_seq_offd;
   hypre_ParVector   *F_seq = NULL;
   hypre_ParVector   *U_seq = NULL;

   hypre_ParCSRMatrix *A;

   HYPRE_Int               **dof_func_array;   
   HYPRE_Int                num_procs, my_id;

   HYPRE_Int                not_finished_coarsening;
   HYPRE_Int                level;

   HYPRE_Solver  coarse_solver;

   /* misc */
   dof_func_array = hypre_ParAMGDataDofFuncArray(amg_data);

   /*MPI Stuff */
   hypre_MPI_Comm_size(comm, &num_procs);   
   hypre_MPI_Comm_rank(comm,&my_id);
  
   /*initial */
   level = p_level;
   
   not_finished_coarsening = 1;
  
   /* convert A at this level to sequential */
   A = Par_A_array[level];

   {
      double *A_seq_data = NULL;
      HYPRE_Int *A_seq_i = NULL;
      HYPRE_Int *A_seq_offd_i = NULL;
      HYPRE_Int *A_seq_j = NULL;

      double *A_tmp_data = NULL;
      HYPRE_Int *A_tmp_i = NULL;
      HYPRE_Int *A_tmp_j = NULL;

      HYPRE_Int *info, *displs, *displs2;
      HYPRE_Int i, j, size, num_nonzeros, total_nnz, cnt;
  
      hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
      hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
      HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
      HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
      HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
      HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
      HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
      double *A_diag_data = hypre_CSRMatrixData(A_diag);
      double *A_offd_data = hypre_CSRMatrixData(A_offd);
      HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
      HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A);

      hypre_MPI_Group orig_group, new_group; 
      HYPRE_Int *ranks, new_num_procs, *row_starts;

      info = hypre_CTAlloc(HYPRE_Int, num_procs);

      hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm);

      ranks = hypre_CTAlloc(HYPRE_Int, num_procs);

      new_num_procs = 0;
      for (i=0; i < num_procs; i++)
         if (info[i]) 
         {
            ranks[new_num_procs] = i;
            info[new_num_procs++] = info[i];
         }

      MPI_Comm_group(comm, &orig_group);
      hypre_MPI_Group_incl(orig_group, new_num_procs, ranks, &new_group);
      MPI_Comm_create(comm, new_group, &new_comm);
      hypre_MPI_Group_free(&new_group);
      hypre_MPI_Group_free(&orig_group);

      if (num_rows)
      {
         /* alloc space in seq data structure only for participating procs*/
         HYPRE_BoomerAMGCreate(&coarse_solver);
         HYPRE_BoomerAMGSetMaxRowSum(coarse_solver,
		hypre_ParAMGDataMaxRowSum(amg_data)); 
         HYPRE_BoomerAMGSetStrongThreshold(coarse_solver,
		hypre_ParAMGDataStrongThreshold(amg_data)); 
         HYPRE_BoomerAMGSetCoarsenType(coarse_solver,
		hypre_ParAMGDataCoarsenType(amg_data)); 
         HYPRE_BoomerAMGSetInterpType(coarse_solver,
		hypre_ParAMGDataInterpType(amg_data)); 
         HYPRE_BoomerAMGSetTruncFactor(coarse_solver, 
		hypre_ParAMGDataTruncFactor(amg_data)); 
         HYPRE_BoomerAMGSetPMaxElmts(coarse_solver, 
		hypre_ParAMGDataPMaxElmts(amg_data)); 
	 if (hypre_ParAMGDataUserRelaxType(amg_data) > -1) 
            HYPRE_BoomerAMGSetRelaxType(coarse_solver, 
		hypre_ParAMGDataUserRelaxType(amg_data)); 
         HYPRE_BoomerAMGSetRelaxOrder(coarse_solver, 
		hypre_ParAMGDataRelaxOrder(amg_data)); 
         HYPRE_BoomerAMGSetRelaxWt(coarse_solver, 
		hypre_ParAMGDataUserRelaxWeight(amg_data)); 
	 if (hypre_ParAMGDataUserNumSweeps(amg_data) > -1) 
            HYPRE_BoomerAMGSetNumSweeps(coarse_solver, 
		hypre_ParAMGDataUserNumSweeps(amg_data)); 
         HYPRE_BoomerAMGSetNumFunctions(coarse_solver, 
		hypre_ParAMGDataNumFunctions(amg_data)); 
         HYPRE_BoomerAMGSetMaxIter(coarse_solver, 1); 
         HYPRE_BoomerAMGSetTol(coarse_solver, 0); 

         /* Create CSR Matrix, will be Diag part of new matrix */
         A_tmp_i = hypre_CTAlloc(HYPRE_Int, num_rows+1);

         A_tmp_i[0] = 0;
         for (i=1; i < num_rows+1; i++)
            A_tmp_i[i] = A_diag_i[i]-A_diag_i[i-1]+A_offd_i[i]-A_offd_i[i-1];

         num_nonzeros = A_offd_i[num_rows]+A_diag_i[num_rows];

         A_tmp_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros);
         A_tmp_data = hypre_CTAlloc(double, num_nonzeros);

         cnt = 0;
         for (i=0; i < num_rows; i++)
         {
            for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++)
	    {
	       A_tmp_j[cnt] = A_diag_j[j]+first_row_index;
	       A_tmp_data[cnt++] = A_diag_data[j];
	    }
            for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++)
	    {
	       A_tmp_j[cnt] = col_map_offd[A_offd_j[j]];
	       A_tmp_data[cnt++] = A_offd_data[j];
	    }
         }

         displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);
         displs[0] = 0;
         for (i=1; i < new_num_procs+1; i++)
            displs[i] = displs[i-1]+info[i-1];
         size = displs[new_num_procs];
  
         A_seq_i = hypre_CTAlloc(HYPRE_Int, size+1);
         A_seq_offd_i = hypre_CTAlloc(HYPRE_Int, size+1);

         hypre_MPI_Allgatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, 
			displs, HYPRE_MPI_INT, new_comm );

         displs2 = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);

         A_seq_i[0] = 0;
         displs2[0] = 0;
         for (j=1; j < displs[1]; j++)
            A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1];
         for (i=1; i < new_num_procs; i++)
         {
            for (j=displs[i]; j < displs[i+1]; j++)
            {
               A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1];
            }
         }
         A_seq_i[size] = A_seq_i[size]+A_seq_i[size-1];
         displs2[new_num_procs] = A_seq_i[size];
         for (i=1; i < new_num_procs+1; i++)
         {
            displs2[i] = A_seq_i[displs[i]];
            info[i-1] = displs2[i] - displs2[i-1];
         }

         total_nnz = displs2[new_num_procs];
         A_seq_j = hypre_CTAlloc(HYPRE_Int, total_nnz);
         A_seq_data = hypre_CTAlloc(double, total_nnz);

         hypre_MPI_Allgatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT,
                       A_seq_j, info, displs2,
                       HYPRE_MPI_INT, new_comm );

         hypre_MPI_Allgatherv ( A_tmp_data, num_nonzeros, hypre_MPI_DOUBLE,
                       A_seq_data, info, displs2,
                       hypre_MPI_DOUBLE, new_comm );

         hypre_TFree(displs);
         hypre_TFree(displs2);
         hypre_TFree(A_tmp_i);
         hypre_TFree(A_tmp_j);
         hypre_TFree(A_tmp_data);
   
         row_starts = hypre_CTAlloc(HYPRE_Int,2);
         row_starts[0] = 0; 
         row_starts[1] = size;
 
         /* Create 1 proc communicator */
         seq_comm = hypre_MPI_COMM_SELF;

         A_seq = hypre_ParCSRMatrixCreate(seq_comm,size,size,
					  row_starts, row_starts,
						0,total_nnz,0); 

         A_seq_diag = hypre_ParCSRMatrixDiag(A_seq);
         A_seq_offd = hypre_ParCSRMatrixOffd(A_seq);

         hypre_CSRMatrixData(A_seq_diag) = A_seq_data;
         hypre_CSRMatrixI(A_seq_diag) = A_seq_i;
         hypre_CSRMatrixJ(A_seq_diag) = A_seq_j;
         hypre_CSRMatrixI(A_seq_offd) = A_seq_offd_i;

         F_seq = hypre_ParVectorCreate(seq_comm, size, row_starts);
         U_seq = hypre_ParVectorCreate(seq_comm, size, row_starts);
         hypre_ParVectorOwnsPartitioning(F_seq) = 0;
         hypre_ParVectorOwnsPartitioning(U_seq) = 0;
         hypre_ParVectorInitialize(F_seq);
         hypre_ParVectorInitialize(U_seq);

         hypre_BoomerAMGSetup(coarse_solver,A_seq,F_seq,U_seq);

         hypre_ParAMGDataCoarseSolver(amg_data) = coarse_solver;
         hypre_ParAMGDataACoarse(amg_data) = A_seq;
         hypre_ParAMGDataFCoarse(amg_data) = F_seq;
         hypre_ParAMGDataUCoarse(amg_data) = U_seq;
         hypre_ParAMGDataNewComm(amg_data) = new_comm;
      }
      hypre_TFree(info);
      hypre_TFree(ranks);
   }
 
   return 0;
   
   
}
Ejemplo n.º 4
0
HYPRE_Int
hypre_seqAMGCycle( hypre_ParAMGData *amg_data,
                   HYPRE_Int p_level,
                   hypre_ParVector  **Par_F_array,
                   hypre_ParVector  **Par_U_array   )
{
   
   hypre_ParVector    *Aux_U;
   hypre_ParVector    *Aux_F;

   /* Local variables  */

   HYPRE_Int       Solve_err_flag = 0;

   HYPRE_Int n;
   HYPRE_Int i;
   
   hypre_Vector   *u_local;
   HYPRE_Real     *u_data;
   
   HYPRE_Int	   first_index;
   
   /* Acquire seq data */
   MPI_Comm new_comm = hypre_ParAMGDataNewComm(amg_data);
   HYPRE_Solver coarse_solver = hypre_ParAMGDataCoarseSolver(amg_data);
   hypre_ParCSRMatrix *A_coarse = hypre_ParAMGDataACoarse(amg_data);
   hypre_ParVector *F_coarse = hypre_ParAMGDataFCoarse(amg_data);
   hypre_ParVector *U_coarse = hypre_ParAMGDataUCoarse(amg_data);
   HYPRE_Int redundant = hypre_ParAMGDataRedundant(amg_data);

   Aux_U = Par_U_array[p_level];
   Aux_F = Par_F_array[p_level];

   first_index = hypre_ParVectorFirstIndex(Aux_U);
   u_local = hypre_ParVectorLocalVector(Aux_U);
   u_data  = hypre_VectorData(u_local);
   n =  hypre_VectorSize(u_local);


   /*if (A_coarse)*/
   if (hypre_ParAMGDataParticipate(amg_data))
   {
      HYPRE_Real     *f_data;
      hypre_Vector   *f_local;
      hypre_Vector   *tmp_vec;
      
      HYPRE_Int nf;
      HYPRE_Int local_info;
      HYPRE_Real *recv_buf = NULL;
      HYPRE_Int *displs = NULL;
      HYPRE_Int *info = NULL;
      HYPRE_Int new_num_procs, my_id;
      
      hypre_MPI_Comm_size(new_comm, &new_num_procs);
      hypre_MPI_Comm_rank(new_comm, &my_id);

      f_local = hypre_ParVectorLocalVector(Aux_F);
      f_data = hypre_VectorData(f_local);
      nf =  hypre_VectorSize(f_local);

      /* first f */
      info = hypre_CTAlloc(HYPRE_Int, new_num_procs);
      local_info = nf;
      if (redundant)
         hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm);
      else
         hypre_MPI_Gather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, 0, new_comm);

      if (redundant || my_id ==0)
      {
         displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);
         displs[0] = 0;
         for (i=1; i < new_num_procs+1; i++)
            displs[i] = displs[i-1]+info[i-1]; 
      
         if (F_coarse) 
         {
            tmp_vec =  hypre_ParVectorLocalVector(F_coarse);
            recv_buf = hypre_VectorData(tmp_vec);
         }
      }

      if (redundant)
         hypre_MPI_Allgatherv ( f_data, nf, HYPRE_MPI_REAL,
                          recv_buf, info, displs,
                          HYPRE_MPI_REAL, new_comm );
      else
         hypre_MPI_Gatherv ( f_data, nf, HYPRE_MPI_REAL,
                          recv_buf, info, displs,
                          HYPRE_MPI_REAL, 0, new_comm );

      if (redundant || my_id ==0)
      {
         tmp_vec =  hypre_ParVectorLocalVector(U_coarse);
         recv_buf = hypre_VectorData(tmp_vec);
      }
      
      /*then u */
      if (redundant)
      {
         hypre_MPI_Allgatherv ( u_data, n, HYPRE_MPI_REAL,
                       recv_buf, info, displs,
                       HYPRE_MPI_REAL, new_comm );
         hypre_TFree(displs);
         hypre_TFree(info);
      }
      else
         hypre_MPI_Gatherv ( u_data, n, HYPRE_MPI_REAL,
                       recv_buf, info, displs,
                       HYPRE_MPI_REAL, 0, new_comm );
         
      /* clean up */
      if (redundant || my_id ==0)
      {
         hypre_BoomerAMGSolve(coarse_solver, A_coarse, F_coarse, U_coarse);
      }

      /*copy my part of U to parallel vector */
      if (redundant)
      {
         HYPRE_Real *local_data;

         local_data =  hypre_VectorData(hypre_ParVectorLocalVector(U_coarse));

         for (i = 0; i < n; i++)
         {
            u_data[i] = local_data[first_index+i];
         }
      }
      else
      {
         HYPRE_Real *local_data=NULL;

         if (my_id == 0)
            local_data =  hypre_VectorData(hypre_ParVectorLocalVector(U_coarse));

         hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL,
                       u_data, n, HYPRE_MPI_REAL, 0, new_comm );
         /*if (my_id == 0)
            local_data =  hypre_VectorData(hypre_ParVectorLocalVector(F_coarse));
            hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL,
                       f_data, n, HYPRE_MPI_REAL, 0, new_comm );*/
         if (my_id == 0) hypre_TFree(displs);
         hypre_TFree(info);
      }
   }

   return(Solve_err_flag);
}