예제 #1
0
HYPRE_Int
hypre_MPI_Allgatherv( void               *sendbuf,
                      HYPRE_Int           sendcount,
                      hypre_MPI_Datatype  sendtype,
                      void               *recvbuf,
                      HYPRE_Int          *recvcounts,
                      HYPRE_Int          *displs, 
                      hypre_MPI_Datatype  recvtype,
                      hypre_MPI_Comm      comm ) 
{ 
   return ( hypre_MPI_Allgather(sendbuf, sendcount, sendtype,
                                recvbuf, *recvcounts, recvtype, comm) );
}
예제 #2
0
HYPRE_Int
hypre_MPI_Scatter( void               *sendbuf,
                   HYPRE_Int           sendcount,
                   hypre_MPI_Datatype  sendtype,
                   void               *recvbuf,
                   HYPRE_Int           recvcount,
                   hypre_MPI_Datatype  recvtype,
                   HYPRE_Int           root,
                   hypre_MPI_Comm      comm )
{
   return ( hypre_MPI_Allgather(sendbuf, sendcount, sendtype,
                                recvbuf, recvcount, recvtype, comm) );
}
예제 #3
0
HYPRE_Int
main( HYPRE_Int   argc,
      char *argv[] )
{


   HYPRE_Int        num_procs, myid;
   HYPRE_Int        verbose = 0, build_matrix_type = 1;
   HYPRE_Int        index, matrix_arg_index, commpkg_flag=3;
   HYPRE_Int        i, k, ierr=0;
   HYPRE_Int        row_start, row_end; 
   HYPRE_Int        col_start, col_end, global_num_rows;
   HYPRE_Int       *row_part, *col_part; 
   char      *csrfilename;
   HYPRE_Int        preload = 0, loop = 0, loop2 = LOOP2;   
   HYPRE_Int        bcast_rows[2], *info;
   


   hypre_ParCSRMatrix    *parcsr_A, *small_A;
   HYPRE_ParCSRMatrix    A_temp, A_temp_small; 
   hypre_CSRMatrix       *A_CSR;
   hypre_ParCSRCommPkg	 *comm_pkg;   

  
   HYPRE_Int                 nx, ny, nz;
   HYPRE_Int                 P, Q, R;
   HYPRE_Int                 p, q, r;
   HYPRE_Real          values[4];

   hypre_ParVector     *x_new;
   hypre_ParVector     *y_new, *y;
   HYPRE_Int                 *row_starts;
   HYPRE_Real          ans;
   HYPRE_Real          start_time, end_time, total_time, *loop_times;
   HYPRE_Real          T_avg, T_std;
   
   HYPRE_Int                   noparmprint = 0;
 
#if mydebug   
   HYPRE_Int  j, tmp_int;
#endif

   /*-----------------------------------------------------------
    * Initialize MPI
    *-----------------------------------------------------------*/


   hypre_MPI_Init(&argc, &argv);

   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );



   /*-----------------------------------------------------------
    * default - is 27pt laplace
    *-----------------------------------------------------------*/

    
   build_matrix_type = 2;
   matrix_arg_index = argc;

   /*-----------------------------------------------------------
    * Parse command line
    *-----------------------------------------------------------*/
 
   index = 1;
   while ( index < argc) 
   {
      if  ( strcmp(argv[index], "-verbose") == 0 )
      {
         index++;  
         verbose = 1;
      }
      else if ( strcmp(argv[index], "-fromonecsrfile") == 0 )
      {
         index++;
         build_matrix_type      = 1;      
         matrix_arg_index = index; /*this tells where the name is*/
      }
      else if  ( strcmp(argv[index], "-commpkg") == 0 )
      {
         index++;  
         commpkg_flag = atoi(argv[index++]);
      }
      else if ( strcmp(argv[index], "-laplacian") == 0 )
      {
         index++;
         build_matrix_type      = 2;
         matrix_arg_index = index;
      }
      else if ( strcmp(argv[index], "-27pt") == 0 )
      {
         index++;
         build_matrix_type      = 4;
         matrix_arg_index = index;
      }
/*
      else if  ( strcmp(argv[index], "-nopreload") == 0 )
      {
         index++;  
         preload = 0;
      }
*/
      else if  ( strcmp(argv[index], "-loop") == 0 )
      {
         index++;  
         loop = atoi(argv[index++]);
      }
      else if  ( strcmp(argv[index], "-noparmprint") == 0 )
      {
         index++;  
         noparmprint = 1;
         
      }
      else  
      {
	 index++;
         /*hypre_printf("Warning: Unrecogized option '%s'\n",argv[index++] );*/
      }
   }
   
   
  
   /*-----------------------------------------------------------
    * Setup the Matrix problem   
    *-----------------------------------------------------------*/

  /*-----------------------------------------------------------
    *  Get actual partitioning- 
    *  read in an actual csr matrix.
    *-----------------------------------------------------------*/


   if (build_matrix_type ==1) /*read in a csr matrix from one file */
   {
      if (matrix_arg_index < argc)
      {
	 csrfilename = argv[matrix_arg_index];
      }
      else
      {
         hypre_printf("Error: No filename specified \n");
         exit(1);
      }
      if (myid == 0)
      {
	/*hypre_printf("  FromFile: %s\n", csrfilename);*/
         A_CSR = hypre_CSRMatrixRead(csrfilename);
      }
      row_part = NULL;
      col_part = NULL;

      parcsr_A = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, A_CSR, 
					       row_part, col_part);

      if (myid == 0) hypre_CSRMatrixDestroy(A_CSR);
   }
   else if (build_matrix_type ==2)
   {
      
      myBuildParLaplacian(argc, argv, matrix_arg_index,  &A_temp, !noparmprint);
     parcsr_A = (hypre_ParCSRMatrix *) A_temp;      
 
   }
   else if (build_matrix_type ==4)
   {
      myBuildParLaplacian27pt(argc, argv, matrix_arg_index, &A_temp, !noparmprint);
     parcsr_A = (hypre_ParCSRMatrix *) A_temp;
   }

 
  /*-----------------------------------------------------------
   * create a small problem so that timings are more accurate - 
   * code gets run twice (small laplace)
   *-----------------------------------------------------------*/

   /*this is no longer being used - preload = 0 is set at the beginning */

   if (preload == 1) 
   {
 
      /*hypre_printf("preload!\n");*/
      
        
       values[1] = -1;
       values[2] = -1;
       values[3] = -1;
       values[0] = - 6.0    ;

       nx = 2;
       ny = num_procs;
       nz = 2;

       P  = 1;
       Q  = num_procs;
       R  = 1;

       p = myid % P;
       q = (( myid - p)/P) % Q;
       r = ( myid - p - P*q)/( P*Q );
       
      A_temp_small = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD, nx, ny, nz, 
				      P, Q, R, p, q, r, values);
      small_A = (hypre_ParCSRMatrix *) A_temp_small;     

      /*do comm packages*/
      hypre_NewCommPkgCreate(small_A);
      hypre_NewCommPkgDestroy(small_A); 

      hypre_MatvecCommPkgCreate(small_A);
      hypre_ParCSRMatrixDestroy(small_A); 
  
   }





   /*-----------------------------------------------------------
    *  Prepare for timing
    *-----------------------------------------------------------*/

   /* instead of preloading, let's not time the first one if more than one*/

    
   if (!loop)
   {
      loop = 1;
      /* and don't do any timings */
      
   }
   else
   {
      
      loop +=1;
      if (loop < 2) loop = 2;
   }
      
   
   loop_times = hypre_CTAlloc(HYPRE_Real, loop);
   


/******************************************************************************************/   

   hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);

   if (commpkg_flag == 1 || commpkg_flag ==3 )
   {
  
      /*-----------------------------------------------------------
       *  Create new comm package
       *-----------------------------------------------------------*/


    
      if (!myid) hypre_printf("********************************************************\n" );  
 
      /*do loop times*/
      for (i=0; i< loop; i++) 
      {
         loop_times[i] = 0.0;
         for (k=0; k< loop2; k++) 
         {
         
            hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);
            
            start_time = hypre_MPI_Wtime();

#if mpip_on
            if (i==(loop-1)) hypre_MPI_Pcontrol(1); 
#endif
     
            hypre_NewCommPkgCreate(parcsr_A);

#if mpip_on
            if (i==(loop-1)) hypre_MPI_Pcontrol(0); 
#endif  
  
            end_time = hypre_MPI_Wtime();
            
            end_time = end_time - start_time;
        
            hypre_MPI_Allreduce(&end_time, &total_time, 1,
                       HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD);
         
            loop_times[i] += total_time;

            if (  !((i+1)== loop  &&  (k+1) == loop2)) hypre_NewCommPkgDestroy(parcsr_A); 
            
         }/*end of loop2 */
      
        
      } /*end of loop*/
      


      /* calculate the avg and std. */
      if (loop > 1)
      {
         
         /* calculate the avg and std. */
         stats_mo(loop_times, loop, &T_avg, &T_std);
      
         if (!myid) hypre_printf(" NewCommPkgCreate:  AVG. wall clock time =  %f seconds\n", T_avg);  
         if (!myid) hypre_printf("                    STD. for %d  runs     =  %f\n", loop-1, T_std);  
         if (!myid) hypre_printf("                    (Note: avg./std. timings exclude run 0.)\n");
         if (!myid) hypre_printf("********************************************************\n" );  
         for (i=0; i< loop; i++) 
         {
            if (!myid) hypre_printf("      run %d  =  %f sec.\n", i, loop_times[i]);  
         }
         if (!myid) hypre_printf("********************************************************\n" );  
   
       }
       else 
       {
         if (!myid) hypre_printf("********************************************************\n" );  
         if (!myid) hypre_printf(" NewCommPkgCreate:\n");  
         if (!myid) hypre_printf("      run time =  %f sec.\n", loop_times[0]);  
         if (!myid) hypre_printf("********************************************************\n" );  
       }


     /*-----------------------------------------------------------
       *  Verbose printing
       *-----------------------------------------------------------*/

      /*some verification*/

       global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); 

       if (verbose) 
       {

	  ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A,
                                      &row_start, &row_end ,
                                       &col_start, &col_end );


	  comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A);
     
          hypre_printf("myid = %i, my ACTUAL local range: [%i, %i]\n", myid, 
		 row_start, row_end);
	  
	
	  ierr = hypre_GetAssumedPartitionRowRange( myid, global_num_rows, &row_start, 
					      &row_end);


	  hypre_printf("myid = %i, my assumed local range: [%i, %i]\n", myid, 
		 row_start, row_end);

          hypre_printf("myid = %d, num_recvs = %d\n", myid, 
		 hypre_ParCSRCommPkgNumRecvs(comm_pkg)  );  

#if mydebug   
	  for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) 
	  {
              hypre_printf("myid = %d, recv proc = %d, vec_starts = [%d : %d]\n", 
		     myid,  hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], 
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i],
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1);
	   }
#endif 
	  hypre_printf("myid = %d, num_sends = %d\n", myid, 
		 hypre_ParCSRCommPkgNumSends(comm_pkg)  );  

#if mydebug
	  for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) 
          {
	    tmp_int =  hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] -  
                     hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	    index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	    for (j=0; j< tmp_int; j++) 
	    {
	       hypre_printf("myid = %d, send proc = %d, send element = %d\n",myid,  
		      hypre_ParCSRCommPkgSendProcs(comm_pkg)[i],
		      hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); 
	     }   
	  }
#endif
       }
       /*-----------------------------------------------------------
        *  To verify correctness (if commpkg_flag = 3)
        *-----------------------------------------------------------*/

       if (commpkg_flag == 3 ) 
       {
          /*do a matvec - we are assuming a square matrix */
          row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A);
   
          x_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts);
          hypre_ParVectorSetPartitioningOwner(x_new, 0);
          hypre_ParVectorInitialize(x_new);
          hypre_ParVectorSetRandomValues(x_new, 1);    
          
          y_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts);
          hypre_ParVectorSetPartitioningOwner(y_new, 0);
          hypre_ParVectorInitialize(y_new);
          hypre_ParVectorSetConstantValues(y_new, 0.0);
          
          /*y = 1.0*A*x+1.0*y */
          hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y_new);
       }
   
   /*-----------------------------------------------------------
    *  Clean up after MyComm
    *-----------------------------------------------------------*/


       hypre_NewCommPkgDestroy(parcsr_A); 

   }

  




/******************************************************************************************/
/******************************************************************************************/

   hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);


   if (commpkg_flag > 1 )
   {

      /*-----------------------------------------------------------
       *  Set up standard comm package
       *-----------------------------------------------------------*/

      bcast_rows[0] = 23;
      bcast_rows[1] = 1789;
      
      if (!myid) hypre_printf("********************************************************\n" );  
      /*do loop times*/
      for (i=0; i< loop; i++) 
      {

         loop_times[i] = 0.0;
         for (k=0; k< loop2; k++) 
         {
            

            hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);

         
            start_time = hypre_MPI_Wtime();

#if time_gather
                  
            info = hypre_CTAlloc(HYPRE_Int, num_procs);
            
            hypre_MPI_Allgather(bcast_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hypre_MPI_COMM_WORLD); 

#endif

            hypre_MatvecCommPkgCreate(parcsr_A);

            end_time = hypre_MPI_Wtime();


            end_time = end_time - start_time;
        
            hypre_MPI_Allreduce(&end_time, &total_time, 1,
                          HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD);

            loop_times[i] += total_time;
         
       
         if (  !((i+1)== loop  &&  (k+1) == loop2))   hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkg(parcsr_A));
               
         }/* end of loop 2*/
         
        
      } /*end of loop*/
      
      /* calculate the avg and std. */
      if (loop > 1)
      {
         
         stats_mo(loop_times, loop, &T_avg, &T_std);      
         if (!myid) hypre_printf("Current CommPkgCreate:  AVG. wall clock time =  %f seconds\n", T_avg);  
         if (!myid) hypre_printf("                        STD. for %d  runs     =  %f\n", loop-1, T_std);  
         if (!myid) hypre_printf("                        (Note: avg./std. timings exclude run 0.)\n");
         if (!myid) hypre_printf("********************************************************\n" );  
         for (i=0; i< loop; i++) 
         {
            if (!myid) hypre_printf("      run %d  =  %f sec.\n", i, loop_times[i]);  
         }
         if (!myid) hypre_printf("********************************************************\n" );  
         
      }
      else 
      {
         if (!myid) hypre_printf("********************************************************\n" );  
         if (!myid) hypre_printf(" Current CommPkgCreate:\n");  
         if (!myid) hypre_printf("      run time =  %f sec.\n", loop_times[0]);  
         if (!myid) hypre_printf("********************************************************\n" );  
      }





      /*-----------------------------------------------------------
       * Verbose printing
       *-----------------------------------------------------------*/

      /*some verification*/

    
       if (verbose) 
       {

          ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A,
						  &row_start, &row_end ,
						  &col_start, &col_end );


          comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A);
     
          hypre_printf("myid = %i, std - my local range: [%i, %i]\n", myid, 
		 row_start, row_end);

          ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A,
						  &row_start, &row_end ,
						  &col_start, &col_end );

          hypre_printf("myid = %d, std - num_recvs = %d\n", myid, 
		 hypre_ParCSRCommPkgNumRecvs(comm_pkg)  );  

#if mydebug   
	  for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) 
          {
              hypre_printf("myid = %d, std - recv proc = %d, vec_starts = [%d : %d]\n", 
		     myid,  hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], 
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i],
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1);
	  }
#endif
          hypre_printf("myid = %d, std - num_sends = %d\n", myid, 
		 hypre_ParCSRCommPkgNumSends(comm_pkg));  


#if mydebug
          for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) 
          {
	     tmp_int =  hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] -  
	                hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	     index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	     for (j=0; j< tmp_int; j++) 
	     {
	        hypre_printf("myid = %d, std - send proc = %d, send element = %d\n",myid,  
		       hypre_ParCSRCommPkgSendProcs(comm_pkg)[i],
		       hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); 
	     }   
	  } 
#endif
       }

       /*-----------------------------------------------------------
        * Verify correctness
        *-----------------------------------------------------------*/

 

       if (commpkg_flag == 3 ) 
       { 
          global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); 
          row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A);
 
       
          y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows,row_starts);
          hypre_ParVectorSetPartitioningOwner(y, 0);
          hypre_ParVectorInitialize(y);
          hypre_ParVectorSetConstantValues(y, 0.0);

          hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y);
      
       }

   }






   /*-----------------------------------------------------------
    *  Compare matvecs for both comm packages (3)
    *-----------------------------------------------------------*/

   if (commpkg_flag == 3 ) 
   { 
     /*make sure that y and y_new are the same  - now y_new should=0*/   
     hypre_ParVectorAxpy( -1.0, y, y_new );


     hypre_ParVectorSetRandomValues(y, 1);

     ans = hypre_ParVectorInnerProd( y, y_new );
     if (!myid)
     {
        
        if ( fabs(ans) > 1e-8 ) 
        {  
           hypre_printf("!!!!! WARNING !!!!! should be zero if correct = %6.10f\n", 
                  ans); 
        } 
        else
        {
           hypre_printf("Matvecs match ( should be zero = %6.10f )\n", 
                  ans); 
        }
     }
     

   }
 

   /*-----------------------------------------------------------
    *  Clean up
    *-----------------------------------------------------------*/

    
   hypre_ParCSRMatrixDestroy(parcsr_A); /*this calls the standard comm 
                                          package destroy - but we'll destroy 
                                          ours separately until it is
                                          incorporated */

  if (commpkg_flag == 3 ) 
  { 

      hypre_ParVectorDestroy(x_new);
      hypre_ParVectorDestroy(y);
      hypre_ParVectorDestroy(y_new);
  }




   hypre_MPI_Finalize();

   return(ierr);


}
예제 #4
0
HYPRE_Int hypre_seqAMGSetup( hypre_ParAMGData *amg_data,
                      HYPRE_Int p_level,
                      HYPRE_Int coarse_threshold)


{

   /* Par Data Structure variables */
   hypre_ParCSRMatrix **Par_A_array = hypre_ParAMGDataAArray(amg_data);

   MPI_Comm 	      comm = hypre_ParCSRMatrixComm(Par_A_array[0]); 
   MPI_Comm 	      new_comm, seq_comm;

   hypre_ParCSRMatrix   *A_seq = NULL;
   hypre_CSRMatrix  *A_seq_diag;
   hypre_CSRMatrix  *A_seq_offd;
   hypre_ParVector   *F_seq = NULL;
   hypre_ParVector   *U_seq = NULL;

   hypre_ParCSRMatrix *A;

   HYPRE_Int               **dof_func_array;   
   HYPRE_Int                num_procs, my_id;

   HYPRE_Int                not_finished_coarsening;
   HYPRE_Int                level;

   HYPRE_Solver  coarse_solver;

   /* misc */
   dof_func_array = hypre_ParAMGDataDofFuncArray(amg_data);

   /*MPI Stuff */
   hypre_MPI_Comm_size(comm, &num_procs);   
   hypre_MPI_Comm_rank(comm,&my_id);
  
   /*initial */
   level = p_level;
   
   not_finished_coarsening = 1;
  
   /* convert A at this level to sequential */
   A = Par_A_array[level];

   {
      double *A_seq_data = NULL;
      HYPRE_Int *A_seq_i = NULL;
      HYPRE_Int *A_seq_offd_i = NULL;
      HYPRE_Int *A_seq_j = NULL;

      double *A_tmp_data = NULL;
      HYPRE_Int *A_tmp_i = NULL;
      HYPRE_Int *A_tmp_j = NULL;

      HYPRE_Int *info, *displs, *displs2;
      HYPRE_Int i, j, size, num_nonzeros, total_nnz, cnt;
  
      hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
      hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
      HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
      HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
      HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
      HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
      HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
      double *A_diag_data = hypre_CSRMatrixData(A_diag);
      double *A_offd_data = hypre_CSRMatrixData(A_offd);
      HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
      HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A);

      hypre_MPI_Group orig_group, new_group; 
      HYPRE_Int *ranks, new_num_procs, *row_starts;

      info = hypre_CTAlloc(HYPRE_Int, num_procs);

      hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm);

      ranks = hypre_CTAlloc(HYPRE_Int, num_procs);

      new_num_procs = 0;
      for (i=0; i < num_procs; i++)
         if (info[i]) 
         {
            ranks[new_num_procs] = i;
            info[new_num_procs++] = info[i];
         }

      MPI_Comm_group(comm, &orig_group);
      hypre_MPI_Group_incl(orig_group, new_num_procs, ranks, &new_group);
      MPI_Comm_create(comm, new_group, &new_comm);
      hypre_MPI_Group_free(&new_group);
      hypre_MPI_Group_free(&orig_group);

      if (num_rows)
      {
         /* alloc space in seq data structure only for participating procs*/
         HYPRE_BoomerAMGCreate(&coarse_solver);
         HYPRE_BoomerAMGSetMaxRowSum(coarse_solver,
		hypre_ParAMGDataMaxRowSum(amg_data)); 
         HYPRE_BoomerAMGSetStrongThreshold(coarse_solver,
		hypre_ParAMGDataStrongThreshold(amg_data)); 
         HYPRE_BoomerAMGSetCoarsenType(coarse_solver,
		hypre_ParAMGDataCoarsenType(amg_data)); 
         HYPRE_BoomerAMGSetInterpType(coarse_solver,
		hypre_ParAMGDataInterpType(amg_data)); 
         HYPRE_BoomerAMGSetTruncFactor(coarse_solver, 
		hypre_ParAMGDataTruncFactor(amg_data)); 
         HYPRE_BoomerAMGSetPMaxElmts(coarse_solver, 
		hypre_ParAMGDataPMaxElmts(amg_data)); 
	 if (hypre_ParAMGDataUserRelaxType(amg_data) > -1) 
            HYPRE_BoomerAMGSetRelaxType(coarse_solver, 
		hypre_ParAMGDataUserRelaxType(amg_data)); 
         HYPRE_BoomerAMGSetRelaxOrder(coarse_solver, 
		hypre_ParAMGDataRelaxOrder(amg_data)); 
         HYPRE_BoomerAMGSetRelaxWt(coarse_solver, 
		hypre_ParAMGDataUserRelaxWeight(amg_data)); 
	 if (hypre_ParAMGDataUserNumSweeps(amg_data) > -1) 
            HYPRE_BoomerAMGSetNumSweeps(coarse_solver, 
		hypre_ParAMGDataUserNumSweeps(amg_data)); 
         HYPRE_BoomerAMGSetNumFunctions(coarse_solver, 
		hypre_ParAMGDataNumFunctions(amg_data)); 
         HYPRE_BoomerAMGSetMaxIter(coarse_solver, 1); 
         HYPRE_BoomerAMGSetTol(coarse_solver, 0); 

         /* Create CSR Matrix, will be Diag part of new matrix */
         A_tmp_i = hypre_CTAlloc(HYPRE_Int, num_rows+1);

         A_tmp_i[0] = 0;
         for (i=1; i < num_rows+1; i++)
            A_tmp_i[i] = A_diag_i[i]-A_diag_i[i-1]+A_offd_i[i]-A_offd_i[i-1];

         num_nonzeros = A_offd_i[num_rows]+A_diag_i[num_rows];

         A_tmp_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros);
         A_tmp_data = hypre_CTAlloc(double, num_nonzeros);

         cnt = 0;
         for (i=0; i < num_rows; i++)
         {
            for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++)
	    {
	       A_tmp_j[cnt] = A_diag_j[j]+first_row_index;
	       A_tmp_data[cnt++] = A_diag_data[j];
	    }
            for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++)
	    {
	       A_tmp_j[cnt] = col_map_offd[A_offd_j[j]];
	       A_tmp_data[cnt++] = A_offd_data[j];
	    }
         }

         displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);
         displs[0] = 0;
         for (i=1; i < new_num_procs+1; i++)
            displs[i] = displs[i-1]+info[i-1];
         size = displs[new_num_procs];
  
         A_seq_i = hypre_CTAlloc(HYPRE_Int, size+1);
         A_seq_offd_i = hypre_CTAlloc(HYPRE_Int, size+1);

         hypre_MPI_Allgatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, 
			displs, HYPRE_MPI_INT, new_comm );

         displs2 = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);

         A_seq_i[0] = 0;
         displs2[0] = 0;
         for (j=1; j < displs[1]; j++)
            A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1];
         for (i=1; i < new_num_procs; i++)
         {
            for (j=displs[i]; j < displs[i+1]; j++)
            {
               A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1];
            }
         }
         A_seq_i[size] = A_seq_i[size]+A_seq_i[size-1];
         displs2[new_num_procs] = A_seq_i[size];
         for (i=1; i < new_num_procs+1; i++)
         {
            displs2[i] = A_seq_i[displs[i]];
            info[i-1] = displs2[i] - displs2[i-1];
         }

         total_nnz = displs2[new_num_procs];
         A_seq_j = hypre_CTAlloc(HYPRE_Int, total_nnz);
         A_seq_data = hypre_CTAlloc(double, total_nnz);

         hypre_MPI_Allgatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT,
                       A_seq_j, info, displs2,
                       HYPRE_MPI_INT, new_comm );

         hypre_MPI_Allgatherv ( A_tmp_data, num_nonzeros, hypre_MPI_DOUBLE,
                       A_seq_data, info, displs2,
                       hypre_MPI_DOUBLE, new_comm );

         hypre_TFree(displs);
         hypre_TFree(displs2);
         hypre_TFree(A_tmp_i);
         hypre_TFree(A_tmp_j);
         hypre_TFree(A_tmp_data);
   
         row_starts = hypre_CTAlloc(HYPRE_Int,2);
         row_starts[0] = 0; 
         row_starts[1] = size;
 
         /* Create 1 proc communicator */
         seq_comm = hypre_MPI_COMM_SELF;

         A_seq = hypre_ParCSRMatrixCreate(seq_comm,size,size,
					  row_starts, row_starts,
						0,total_nnz,0); 

         A_seq_diag = hypre_ParCSRMatrixDiag(A_seq);
         A_seq_offd = hypre_ParCSRMatrixOffd(A_seq);

         hypre_CSRMatrixData(A_seq_diag) = A_seq_data;
         hypre_CSRMatrixI(A_seq_diag) = A_seq_i;
         hypre_CSRMatrixJ(A_seq_diag) = A_seq_j;
         hypre_CSRMatrixI(A_seq_offd) = A_seq_offd_i;

         F_seq = hypre_ParVectorCreate(seq_comm, size, row_starts);
         U_seq = hypre_ParVectorCreate(seq_comm, size, row_starts);
         hypre_ParVectorOwnsPartitioning(F_seq) = 0;
         hypre_ParVectorOwnsPartitioning(U_seq) = 0;
         hypre_ParVectorInitialize(F_seq);
         hypre_ParVectorInitialize(U_seq);

         hypre_BoomerAMGSetup(coarse_solver,A_seq,F_seq,U_seq);

         hypre_ParAMGDataCoarseSolver(amg_data) = coarse_solver;
         hypre_ParAMGDataACoarse(amg_data) = A_seq;
         hypre_ParAMGDataFCoarse(amg_data) = F_seq;
         hypre_ParAMGDataUCoarse(amg_data) = U_seq;
         hypre_ParAMGDataNewComm(amg_data) = new_comm;
      }
      hypre_TFree(info);
      hypre_TFree(ranks);
   }
 
   return 0;
   
   
}
예제 #5
0
void
hypre_MatTCommPkgCreate_core (

/* input args: */
   MPI_Comm comm, HYPRE_Int * col_map_offd, HYPRE_Int first_col_diag, HYPRE_Int * col_starts,
   HYPRE_Int num_rows_diag, HYPRE_Int num_cols_diag, HYPRE_Int num_cols_offd, HYPRE_Int * row_starts,
   HYPRE_Int firstColDiag, HYPRE_Int * colMapOffd,
   HYPRE_Int * mat_i_diag, HYPRE_Int * mat_j_diag, HYPRE_Int * mat_i_offd, HYPRE_Int * mat_j_offd,

   HYPRE_Int data,  /* = 1 for a matrix with floating-point data, =0 for Boolean matrix */

/* pointers to output args: */
   HYPRE_Int * p_num_recvs, HYPRE_Int ** p_recv_procs, HYPRE_Int ** p_recv_vec_starts,
   HYPRE_Int * p_num_sends, HYPRE_Int ** p_send_procs, HYPRE_Int ** p_send_map_starts,
   HYPRE_Int ** p_send_map_elmts

   )
{
   HYPRE_Int			num_sends;
   HYPRE_Int			*send_procs;
   HYPRE_Int			*send_map_starts;
   HYPRE_Int			*send_map_elmts;
   HYPRE_Int			num_recvs;
   HYPRE_Int			*recv_procs;
   HYPRE_Int			*recv_vec_starts;
   HYPRE_Int	i, j, j2, k, ir, rowmin, rowmax;
   HYPRE_Int	*tmp, *recv_buf, *displs, *info, *send_buf, *all_num_sends3;
   HYPRE_Int	num_procs, my_id, num_elmts;
   HYPRE_Int	local_info, index, index2;
   HYPRE_Int pmatch, col, kc, p;
   HYPRE_Int * recv_sz_buf;
   HYPRE_Int * row_marker;
   
   hypre_MPI_Comm_size(comm, &num_procs);  
   hypre_MPI_Comm_rank(comm, &my_id);

   info = hypre_CTAlloc(HYPRE_Int, num_procs);

/* ----------------------------------------------------------------------
 * determine which processors to receive from (set proc_mark) and num_recvs,
 * at the end of the loop proc_mark[i] contains the number of elements to be
 * received from Proc. i
 *
 * - For A*b or A*B: for each off-diagonal column i of A, you want to identify
 * the processor which has the corresponding element i of b (row i of B)
 * (columns in the local diagonal block, just multiply local rows of B).
 * You do it by finding the processor which has that column of A in its
 * _diagonal_ block - assuming b or B is distributed the same, which I believe
 * is evenly among processors, by row.  There is a unique solution because
 * the diag/offd blocking is defined by which processor owns which rows of A.
 *
 * - For A*A^T: A^T is not distributed by rows as B or any 'normal' matrix is.
 * For each off-diagonal row,column k,i element of A, you want to identify
 * the processors which have the corresponding row,column i,j elements of A^T
 * i.e., row,column j,i elements of A (all i,j,k for which these entries are
 * nonzero, row k of A lives on this processor, and row j of A lives on
 * a different processor).  So, given a column i in the local A-offd or A-diag,
 * we want to find all the processors which have column i, in diag or offd
 * blocks.  Unlike the A*B case, I don't think you can eliminate looking at
 * any class of blocks.
 * ---------------------------------------------------------------------*/

/* The algorithm for A*B was:
   For each of my columns i (in offd block), use known information on data
   distribution of columns in _diagonal_ blocks to find the processor p which
   owns row i. (Note that for i in diag block, I own the row, nothing to do.)
   Count up such i's for each processor in proc_mark.  Construct a data
   structure, recv_buf, made by appending a structure tmp from each processor.
   The data structure tmp looks like (p, no. of i's, i1, i2,...) (p=0,...) .
   There are two communication steps: gather size information (local_info) from
   all processors (into info), then gather the data (tmp) from all processors
   (into recv_buf).  Then you go through recv_buf.  For each (sink) processor p
   you search for for the appearance of my (source) processor number
   (most of recv_buf pertains to other processors and is ignored).
   When you find the appropriate section, pull out the i's, count them and
   save them, in send_map_elmts, and save p in send_procs and index information
   in send_map_starts.
*/
/* The algorithm for A*A^T:
   [ Originally I had planned to figure out approximately which processors
   had the information (for A*B it could be done exactly) to save on
   communication.  But even for A*B where the data owner is known, all data is
   sent to all processors, so that's not worth worrying about on the first cut.
   One consequence is that proc_mark is not needed.]
   Construct a data structure, recv_buf, made by appending a structure tmp for
   each processor.  It simply consists of (no. of i's, i1, i2,...) where i is
   the global number of a column in the offd block.  There are still two
   communication steps: gather size information (local_info) from all processors
   (into info), then gather the data (tmp) from all processors (into recv_buf).
   Then you go through recv_buf.  For each (sink) processor p you go through
   all its column numbers in recv_buf.  Check each one for whether you have
   data in that column.  If so, put in in send_map_elmts, p in send_procs,
   and update the index information in send_map_starts.  Note that these
   arrays don't mean quite the same thing as for A*B.
*/

   num_recvs=num_procs-1;
   local_info = num_procs + num_cols_offd + num_cols_diag;
			
   hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); 

/* ----------------------------------------------------------------------
 * generate information to be send: tmp contains for each recv_proc:
 * {deleted: id of recv_procs}, number of elements to be received for this processor,
 * indices of elements (in this order)
 * ---------------------------------------------------------------------*/

   displs = hypre_CTAlloc(HYPRE_Int, num_procs+1);
   displs[0] = 0;
   for (i=1; i < num_procs+1; i++)
	displs[i] = displs[i-1]+info[i-1]; 
   recv_buf = hypre_CTAlloc(HYPRE_Int, displs[num_procs]); 
   tmp = hypre_CTAlloc(HYPRE_Int, local_info);

   j = 0;
   for (i=0; i < num_procs; i++) {
      j2 = j++;
      tmp[j2] = 0;
      for (k=0; k < num_cols_offd; k++)
         if (col_map_offd[k] >= col_starts[i] && 
             col_map_offd[k] < col_starts[i+1]) {
            tmp[j++] = col_map_offd[k];
            ++(tmp[j2]);
         };
      for (k=0; k < num_cols_diag; k++)
         if ( k+first_col_diag >= col_starts[i] && 
              k+first_col_diag < col_starts[i+1] ) {
            tmp[j++] = k + first_col_diag;
            ++(tmp[j2]);
         }
   }

   hypre_MPI_Allgatherv(tmp,local_info,HYPRE_MPI_INT,recv_buf,info,displs,HYPRE_MPI_INT,comm);
	

/* ----------------------------------------------------------------------
 * determine send_procs and actual elements to be send (in send_map_elmts)
 * and send_map_starts whose i-th entry points to the beginning of the 
 * elements to be send to proc. i
 * ---------------------------------------------------------------------*/
/* Meanings of arrays being set here, more verbosely stated:
   send_procs: processors p to send to
   send_map_starts: for each p, gives range of indices in send_map_elmts; 
   send_map_elmts:  Each element is a send_map_elmts[i], with i in a range given
     by send_map_starts[p..p+1], for some p. This element is is the global
     column number for a column in the offd block of p which is to be multiplied
     by data from this processor.
     For A*B, send_map_elmts[i] is therefore a row of B belonging to this
     processor, to be sent to p.  For A*A^T, send_map_elmts[i] is a row of A
     belonging to this processor, to be sent to p; this row was selected
     because it has a nonzero on a _column_ needed by p.
*/
   num_sends = num_procs;   /* may turn out to be less, but we can't know yet */
   num_elmts = (num_procs-1)*num_rows_diag;
   /* ... a crude upper bound; should try to do better even if more comm required */
   send_procs = hypre_CTAlloc(HYPRE_Int, num_sends);
   send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1);
   send_map_elmts = hypre_CTAlloc(HYPRE_Int, num_elmts);
   row_marker = hypre_CTAlloc(HYPRE_Int,num_rows_diag);
 
   index = 0;
   index2 = 0; 
   send_map_starts[0] = 0;
   for (i=0; i < num_procs; i++) {
      send_map_starts[index+1] = send_map_starts[index];
      j = displs[i];
      pmatch = 0;
      for ( ir=0; ir<num_rows_diag; ++ir ) row_marker[ir] = 0;
      while ( j < displs[i+1])
      {
         num_elmts = recv_buf[j++];  /* no. of columns proc. i wants */
         for ( k=0; k<num_elmts; k++ ) {
            col = recv_buf[j++]; /* a global column no. at proc. i */
            for ( kc=0; kc<num_cols_offd; kc++ ) {
               if ( col_map_offd[kc]==col && i!=my_id ) {
                  /* this processor has the same column as proc. i (but is different) */
                  pmatch = 1;
                  send_procs[index] = i;
                  /* this would be right if we could send columns, but we can't ...
                     offset = first_col_diag;
                     ++send_map_starts[index+1];
                     send_map_elmts[index2++] = col - offset; */
                  /* Plan to send all of my rows which use this column... */
                  RowsWithColumn( &rowmin, &rowmax, col,
                                  num_rows_diag, 
                                  firstColDiag, colMapOffd,
                                  mat_i_diag, mat_j_diag, mat_i_offd, mat_j_offd
                     );
                  for ( ir=rowmin; ir<=rowmax; ++ir ) {
                     if ( row_marker[ir]==0 ) {
                        row_marker[ir] = 1;
                        ++send_map_starts[index+1];
                        send_map_elmts[index2++] = ir;
                     }
                  }
               }
            }
/* alternative way of doing the following for-loop:
            for ( kc=0; kc<num_cols_diag; kc++ ) {
               if ( kc+first_col_diag==col && i!=my_id ) {
               / * this processor has the same column as proc. i (but is different) * /
                  pmatch = 1;
/ * this would be right if we could send columns, but we can't ... >>> * /
                  send_procs[index] = i;
                  ++send_map_starts[index+1];
                  send_map_elmts[index2++] = col - offset;
                  / * Plan to send all of my rows which use this column... * /
                  / * NOT DONE * /
               }
            }
*/
            for ( kc=row_starts[my_id]; kc<row_starts[my_id+1]; kc++ ) {
               if ( kc==col && i!=my_id ) {
                  /* this processor has the same column as proc. i (but is different) */
                  pmatch = 1;
                  send_procs[index] = i;
/* this would be right if we could send columns, but we can't ... >>> 
                  ++send_map_starts[index+1];
                  send_map_elmts[index2++] = col - offset;*/
                  /* Plan to send all of my rows which use this column... */
                  RowsWithColumn( &rowmin, &rowmax, col,
                                  num_rows_diag, 
                                  firstColDiag, colMapOffd,
                                  mat_i_diag, mat_j_diag, mat_i_offd, mat_j_offd
                     );
                  for ( ir=rowmin; ir<=rowmax; ++ir ) {
                     if ( row_marker[ir]==0 ) {
                        row_marker[ir] = 1;
                        ++send_map_starts[index+1];
                        send_map_elmts[index2++] = ir;
                     }
                  }
               }
            }
         }
      }
      if ( pmatch ) index++;
   }
   num_sends = index;  /* no. of proc. rows will be sent to */

/* Compute receive arrays recv_procs, recv_vec_starts ... */
   recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs);
   recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1);
   j2 = 0;
   for (i=0; i < num_procs; i++) {
      if ( i!=my_id ) { recv_procs[j2] = i; j2++; };
   };

/* Compute recv_vec_starts.
   The real job is, for each processor p, to figure out how many rows
   p will send to me (me=this processor).  I now know how many (and which)
   rows I will send to each p.  Indeed, if send_procs[index]=p, then the
   number is send_map_starts[index+1]-send_map_starts[index].
   More communication is needed.
   options:
   hypre_MPI_Allgather of communication sizes. <--- my choice, for now
     good: simple   bad: send num_procs*num_sends data, only need num_procs
                    but: not that much data compared to previous communication
   hypre_MPI_Allgatherv of communication sizes, only for pairs of procs. that communicate
     good: less data than above   bad: need extra commun. step to get recvcounts
   hypre_MPI_ISend,hypre_MPI_IRecv of each size, separately between each pair of procs.
     good: no excess data sent   bad: lots of little messages
                                 but: Allgather might be done the same under the hood
     may be much slower than Allgather or may be a bit faster depending on
     implementations
*/
   send_buf = hypre_CTAlloc( HYPRE_Int, 3*num_sends );
   all_num_sends3 = hypre_CTAlloc( HYPRE_Int, num_procs );

   /* scatter-gather num_sends, to set up the size for the main comm. step */
   i = 3*num_sends;
   hypre_MPI_Allgather( &i, 1, HYPRE_MPI_INT, all_num_sends3, 1, HYPRE_MPI_INT, comm );
   displs[0] = 0;
   for ( p=0; p<num_procs; ++p ) {
      displs[p+1] = displs[p] + all_num_sends3[p];
   };
   recv_sz_buf = hypre_CTAlloc( HYPRE_Int, displs[num_procs] );
   
   /* scatter-gather size of row info to send, and proc. to send to */
   index = 0;
   for ( i=0; i<num_sends; ++i ) {
      send_buf[index++] = send_procs[i];   /* processor to send to */
      send_buf[index++] = my_id;
      send_buf[index++] = send_map_starts[i+1] - send_map_starts[i];
      /* ... sizes of info to send */
   };

   hypre_MPI_Allgatherv( send_buf, 3*num_sends, HYPRE_MPI_INT,
                   recv_sz_buf, all_num_sends3, displs, HYPRE_MPI_INT, comm);

   recv_vec_starts[0] = 0;
   j2 = 0;  j = 0;
   for ( i=0; i<displs[num_procs]; i=i+3 ) {
      j = i;
      if ( recv_sz_buf[j++]==my_id ) {
         recv_procs[j2] = recv_sz_buf[j++];
         recv_vec_starts[j2+1] = recv_vec_starts[j2] + recv_sz_buf[j++];
         j2++;
      }
   }
   num_recvs = j2;

#if 0
   hypre_printf("num_procs=%i send_map_starts (%i):",num_procs,num_sends+1);
   for( i=0; i<=num_sends; ++i ) hypre_printf(" %i", send_map_starts[i] );
   hypre_printf("  send_procs (%i):",num_sends);
   for( i=0; i<num_sends; ++i ) hypre_printf(" %i", send_procs[i] );
   hypre_printf("\n");
   hypre_printf("my_id=%i num_sends=%i send_buf[0,1,2]=%i %i %i",
          my_id, num_sends, send_buf[0], send_buf[1], send_buf[2] );
   hypre_printf(" all_num_sends3[0,1]=%i %i\n", all_num_sends3[0], all_num_sends3[1] );
   hypre_printf("my_id=%i rcv_sz_buf (%i):", my_id, displs[num_procs] );
   for( i=0; i<displs[num_procs]; ++i ) hypre_printf(" %i", recv_sz_buf[i] );
   hypre_printf("\n");
   hypre_printf("my_id=%i recv_vec_starts (%i):",my_id,num_recvs+1);
   for( i=0; i<=num_recvs; ++i ) hypre_printf(" %i", recv_vec_starts[i] );
   hypre_printf("  recv_procs (%i):",num_recvs);
   for( i=0; i<num_recvs; ++i ) hypre_printf(" %i", recv_procs[i] );
   hypre_printf("\n");
   hypre_printf("my_id=%i num_recvs=%i recv_sz_buf[0,1,2]=%i %i %i\n",
          my_id, num_recvs, recv_sz_buf[0], recv_sz_buf[1], recv_sz_buf[2] );
#endif

   hypre_TFree(send_buf);
   hypre_TFree(all_num_sends3);
   hypre_TFree(tmp);
   hypre_TFree(recv_buf);
   hypre_TFree(displs);
   hypre_TFree(info);
   hypre_TFree(recv_sz_buf);
   hypre_TFree(row_marker);


   /* finish up with the hand-coded call-by-reference... */
   *p_num_recvs = num_recvs;
   *p_recv_procs = recv_procs;
   *p_recv_vec_starts = recv_vec_starts;
   *p_num_sends = num_sends;
   *p_send_procs = send_procs;
   *p_send_map_starts = send_map_starts;
   *p_send_map_elmts = send_map_elmts;

}
예제 #6
0
HYPRE_Int
hypre_seqAMGCycle( hypre_ParAMGData *amg_data,
                   HYPRE_Int p_level,
                   hypre_ParVector  **Par_F_array,
                   hypre_ParVector  **Par_U_array   )
{
   
   hypre_ParVector    *Aux_U;
   hypre_ParVector    *Aux_F;

   /* Local variables  */

   HYPRE_Int       Solve_err_flag = 0;

   HYPRE_Int n;
   HYPRE_Int i;
   
   hypre_Vector   *u_local;
   HYPRE_Real     *u_data;
   
   HYPRE_Int	   first_index;
   
   /* Acquire seq data */
   MPI_Comm new_comm = hypre_ParAMGDataNewComm(amg_data);
   HYPRE_Solver coarse_solver = hypre_ParAMGDataCoarseSolver(amg_data);
   hypre_ParCSRMatrix *A_coarse = hypre_ParAMGDataACoarse(amg_data);
   hypre_ParVector *F_coarse = hypre_ParAMGDataFCoarse(amg_data);
   hypre_ParVector *U_coarse = hypre_ParAMGDataUCoarse(amg_data);
   HYPRE_Int redundant = hypre_ParAMGDataRedundant(amg_data);

   Aux_U = Par_U_array[p_level];
   Aux_F = Par_F_array[p_level];

   first_index = hypre_ParVectorFirstIndex(Aux_U);
   u_local = hypre_ParVectorLocalVector(Aux_U);
   u_data  = hypre_VectorData(u_local);
   n =  hypre_VectorSize(u_local);


   /*if (A_coarse)*/
   if (hypre_ParAMGDataParticipate(amg_data))
   {
      HYPRE_Real     *f_data;
      hypre_Vector   *f_local;
      hypre_Vector   *tmp_vec;
      
      HYPRE_Int nf;
      HYPRE_Int local_info;
      HYPRE_Real *recv_buf = NULL;
      HYPRE_Int *displs = NULL;
      HYPRE_Int *info = NULL;
      HYPRE_Int new_num_procs, my_id;
      
      hypre_MPI_Comm_size(new_comm, &new_num_procs);
      hypre_MPI_Comm_rank(new_comm, &my_id);

      f_local = hypre_ParVectorLocalVector(Aux_F);
      f_data = hypre_VectorData(f_local);
      nf =  hypre_VectorSize(f_local);

      /* first f */
      info = hypre_CTAlloc(HYPRE_Int, new_num_procs);
      local_info = nf;
      if (redundant)
         hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm);
      else
         hypre_MPI_Gather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, 0, new_comm);

      if (redundant || my_id ==0)
      {
         displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);
         displs[0] = 0;
         for (i=1; i < new_num_procs+1; i++)
            displs[i] = displs[i-1]+info[i-1]; 
      
         if (F_coarse) 
         {
            tmp_vec =  hypre_ParVectorLocalVector(F_coarse);
            recv_buf = hypre_VectorData(tmp_vec);
         }
      }

      if (redundant)
         hypre_MPI_Allgatherv ( f_data, nf, HYPRE_MPI_REAL,
                          recv_buf, info, displs,
                          HYPRE_MPI_REAL, new_comm );
      else
         hypre_MPI_Gatherv ( f_data, nf, HYPRE_MPI_REAL,
                          recv_buf, info, displs,
                          HYPRE_MPI_REAL, 0, new_comm );

      if (redundant || my_id ==0)
      {
         tmp_vec =  hypre_ParVectorLocalVector(U_coarse);
         recv_buf = hypre_VectorData(tmp_vec);
      }
      
      /*then u */
      if (redundant)
      {
         hypre_MPI_Allgatherv ( u_data, n, HYPRE_MPI_REAL,
                       recv_buf, info, displs,
                       HYPRE_MPI_REAL, new_comm );
         hypre_TFree(displs);
         hypre_TFree(info);
      }
      else
         hypre_MPI_Gatherv ( u_data, n, HYPRE_MPI_REAL,
                       recv_buf, info, displs,
                       HYPRE_MPI_REAL, 0, new_comm );
         
      /* clean up */
      if (redundant || my_id ==0)
      {
         hypre_BoomerAMGSolve(coarse_solver, A_coarse, F_coarse, U_coarse);
      }

      /*copy my part of U to parallel vector */
      if (redundant)
      {
         HYPRE_Real *local_data;

         local_data =  hypre_VectorData(hypre_ParVectorLocalVector(U_coarse));

         for (i = 0; i < n; i++)
         {
            u_data[i] = local_data[first_index+i];
         }
      }
      else
      {
         HYPRE_Real *local_data=NULL;

         if (my_id == 0)
            local_data =  hypre_VectorData(hypre_ParVectorLocalVector(U_coarse));

         hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL,
                       u_data, n, HYPRE_MPI_REAL, 0, new_comm );
         /*if (my_id == 0)
            local_data =  hypre_VectorData(hypre_ParVectorLocalVector(F_coarse));
            hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL,
                       f_data, n, HYPRE_MPI_REAL, 0, new_comm );*/
         if (my_id == 0) hypre_TFree(displs);
         hypre_TFree(info);
      }
   }

   return(Solve_err_flag);
}
예제 #7
0
HYPRE_Int
hypre_seqAMGCycle( hypre_ParAMGData *amg_data,
                   HYPRE_Int p_level,
                   hypre_ParVector  **Par_F_array,
                   hypre_ParVector  **Par_U_array   )
{
   
   hypre_ParVector    *Aux_U;
   hypre_ParVector    *Aux_F;

   /* Local variables  */

   HYPRE_Int       Solve_err_flag = 0;

   HYPRE_Int n;
   HYPRE_Int i;
   
   hypre_Vector   *u_local;
   double         *u_data;
   
   HYPRE_Int	   first_index;
   
   /* Acquire seq data */
   MPI_Comm new_comm = hypre_ParAMGDataNewComm(amg_data);
   HYPRE_Solver coarse_solver = hypre_ParAMGDataCoarseSolver(amg_data);
   hypre_ParCSRMatrix *A_coarse = hypre_ParAMGDataACoarse(amg_data);
   hypre_ParVector *F_coarse = hypre_ParAMGDataFCoarse(amg_data);
   hypre_ParVector *U_coarse = hypre_ParAMGDataUCoarse(amg_data);

   Aux_U = Par_U_array[p_level];
   Aux_F = Par_F_array[p_level];

   first_index = hypre_ParVectorFirstIndex(Aux_U);
   u_local = hypre_ParVectorLocalVector(Aux_U);
   u_data  = hypre_VectorData(u_local);
   n =  hypre_VectorSize(u_local);


   if (A_coarse)
   {
      double         *f_data;
      hypre_Vector   *f_local;
      hypre_Vector   *tmp_vec;
      
      HYPRE_Int nf;
      HYPRE_Int local_info;
      double *recv_buf;
      HYPRE_Int *displs, *info;
      HYPRE_Int size;
      HYPRE_Int new_num_procs;
      
      hypre_MPI_Comm_size(new_comm, &new_num_procs);

      f_local = hypre_ParVectorLocalVector(Aux_F);
      f_data = hypre_VectorData(f_local);
      nf =  hypre_VectorSize(f_local);

      /* first f */
      info = hypre_CTAlloc(HYPRE_Int, new_num_procs);
      local_info = nf;
      hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm);

      displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);
      displs[0] = 0;
      for (i=1; i < new_num_procs+1; i++)
         displs[i] = displs[i-1]+info[i-1]; 
      size = displs[new_num_procs];
      
      tmp_vec =  hypre_ParVectorLocalVector(F_coarse);
      recv_buf = hypre_VectorData(tmp_vec);

      hypre_MPI_Allgatherv ( f_data, nf, hypre_MPI_DOUBLE, 
                          recv_buf, info, displs, 
                          hypre_MPI_DOUBLE, new_comm );

      tmp_vec =  hypre_ParVectorLocalVector(U_coarse);
      recv_buf = hypre_VectorData(tmp_vec);
      
      /*then u */
      hypre_MPI_Allgatherv ( u_data, n, hypre_MPI_DOUBLE, 
                       recv_buf, info, displs, 
                       hypre_MPI_DOUBLE, new_comm );
         
      /* clean up */
      hypre_TFree(displs);
      hypre_TFree(info);

      hypre_BoomerAMGSolve(coarse_solver, A_coarse, F_coarse, U_coarse);      

      /*copy my part of U to parallel vector */
      {
         double *local_data;

         local_data =  hypre_VectorData(hypre_ParVectorLocalVector(U_coarse));

         for (i = 0; i < n; i++)
         {
            u_data[i] = local_data[first_index+i];
         }
      }
   }

   return(Solve_err_flag);
}
예제 #8
0
HYPRE_Int AmgCGCPrepare (hypre_ParCSRMatrix *S,HYPRE_Int nlocal,HYPRE_Int *CF_marker,HYPRE_Int **CF_marker_offd,HYPRE_Int coarsen_type,HYPRE_Int **vrange)
/* assemble a graph representing the connections between the grids
 * ================================================================================================
 * S : the strength matrix
 * nlocal : the number of locally created coarse grids
 * CF_marker, CF_marker_offd : the coare/fine markers
 * coarsen_type : the coarsening type
 * vrange : the ranges of the vertices representing coarse grids
 * ================================================================================================*/
{
  HYPRE_Int ierr=0;
  HYPRE_Int mpisize,mpirank;
  HYPRE_Int num_sends;
  HYPRE_Int *vertexrange=NULL;
  HYPRE_Int vstart,vend;
  HYPRE_Int *int_buf_data;
  HYPRE_Int start;
  HYPRE_Int i,ii,j;
  HYPRE_Int num_variables = hypre_CSRMatrixNumRows (hypre_ParCSRMatrixDiag(S));
  HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (hypre_ParCSRMatrixOffd (S));

  MPI_Comm comm = hypre_ParCSRMatrixComm(S);
/*   hypre_MPI_Status status; */

  hypre_ParCSRCommPkg    *comm_pkg    = hypre_ParCSRMatrixCommPkg (S);
  hypre_ParCSRCommHandle *comm_handle;


  hypre_MPI_Comm_size (comm,&mpisize);
  hypre_MPI_Comm_rank (comm,&mpirank);

  if (!comm_pkg) {
    hypre_MatvecCommPkgCreate (S);
    comm_pkg = hypre_ParCSRMatrixCommPkg (S);
  }
  num_sends = hypre_ParCSRCommPkgNumSends (comm_pkg);

  if (coarsen_type % 2 == 0) nlocal++; /* even coarsen_type means allow_emptygrids */
#ifdef HYPRE_NO_GLOBAL_PARTITION
   {
      HYPRE_Int scan_recv;
      
      vertexrange = hypre_CTAlloc(HYPRE_Int,2);
      hypre_MPI_Scan(&nlocal, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
      /* first point in my range */ 
      vertexrange[0] = scan_recv - nlocal;
      /* first point in next proc's range */
      vertexrange[1] = scan_recv;
      vstart = vertexrange[0];
      vend   = vertexrange[1];
   }
#else
  vertexrange = hypre_CTAlloc (HYPRE_Int,mpisize+1);
 
  hypre_MPI_Allgather (&nlocal,1,HYPRE_MPI_INT,vertexrange+1,1,HYPRE_MPI_INT,comm);
  vertexrange[0]=0;
  for (i=2;i<=mpisize;i++) vertexrange[i]+=vertexrange[i-1];
  vstart = vertexrange[mpirank];
  vend   = vertexrange[mpirank+1];
#endif

  /* Note: vstart uses 0-based indexing, while CF_marker uses 1-based indexing */
  if (coarsen_type % 2 == 1) { /* see above */
    for (i=0;i<num_variables;i++)
      if (CF_marker[i]>0)
	CF_marker[i]+=vstart;
  }
  else {
/*      hypre_printf ("processor %d: empty grid allowed\n",mpirank);  */
    for (i=0;i<num_variables;i++) {
      if (CF_marker[i]>0)
	CF_marker[i]+=vstart+1; /* add one because vertexrange[mpirank]+1 denotes the empty grid.
				   Hence, vertexrange[mpirank]+2 is the first coarse grid denoted in
				   global indices, ... */
    }
  }

  /* exchange data */
  *CF_marker_offd = hypre_CTAlloc (HYPRE_Int,num_cols_offd);
  int_buf_data = hypre_CTAlloc (HYPRE_Int,hypre_ParCSRCommPkgSendMapStart (comm_pkg,num_sends));

  for (i=0,ii=0;i<num_sends;i++) {
    start = hypre_ParCSRCommPkgSendMapStart (comm_pkg,i);
    for (j=start;j<hypre_ParCSRCommPkgSendMapStart (comm_pkg,i+1);j++)
      int_buf_data [ii++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
  }

  if (mpisize>1) {
    comm_handle = hypre_ParCSRCommHandleCreate (11,comm_pkg,int_buf_data,*CF_marker_offd);
    hypre_ParCSRCommHandleDestroy (comm_handle);
  }
  hypre_TFree (int_buf_data);
  *vrange=vertexrange;
  return (ierr);
}
예제 #9
0
HYPRE_Int hypre_BoomerAMGCoarsenCGC (hypre_ParCSRMatrix    *S,HYPRE_Int numberofgrids,HYPRE_Int coarsen_type,HYPRE_Int *CF_marker)
 /* CGC algorithm
  * ====================================================================================================
  * coupling : the strong couplings
  * numberofgrids : the number of grids
  * coarsen_type : the coarsening type
  * gridpartition : the grid partition
  * =====================================================================================================*/
{
  HYPRE_Int j,/*p,*/mpisize,mpirank,/*rstart,rend,*/choice,*coarse,ierr=0;
  HYPRE_Int *vertexrange = NULL;
  HYPRE_Int *vertexrange_all = NULL;
  HYPRE_Int *CF_marker_offd = NULL;
  HYPRE_Int num_variables = hypre_CSRMatrixNumRows (hypre_ParCSRMatrixDiag(S));
/*   HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (hypre_ParCSRMatrixOffd (S)); */
/*   HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd (S); */

/*   HYPRE_Real wall_time; */

  HYPRE_IJMatrix ijG;
  hypre_ParCSRMatrix *G;
  hypre_CSRMatrix *Gseq;
  MPI_Comm comm = hypre_ParCSRMatrixComm(S);

  hypre_MPI_Comm_size (comm,&mpisize);
  hypre_MPI_Comm_rank (comm,&mpirank);

#if 0
    if (!mpirank) {
      wall_time = time_getWallclockSeconds();
      hypre_printf ("Starting CGC preparation\n");
    }
#endif
  AmgCGCPrepare (S,numberofgrids,CF_marker,&CF_marker_offd,coarsen_type,&vertexrange);
#if 0 /* debugging */
  if (!mpirank) {
    wall_time = time_getWallclockSeconds() - wall_time;
    hypre_printf ("Finished CGC preparation, wall_time = %f s\n",wall_time);
    wall_time = time_getWallclockSeconds();
    hypre_printf ("Starting CGC matrix assembly\n");
  }
#endif
  AmgCGCGraphAssemble (S,vertexrange,CF_marker,CF_marker_offd,coarsen_type,&ijG);
#if 0
  HYPRE_IJMatrixPrint (ijG,"graph.txt");
#endif
  HYPRE_IJMatrixGetObject (ijG,(void**)&G);
#if 0 /* debugging */
  if (!mpirank) {
    wall_time = time_getWallclockSeconds() - wall_time;
    hypre_printf ("Finished CGC matrix assembly, wall_time = %f s\n",wall_time);
    wall_time = time_getWallclockSeconds();
    hypre_printf ("Starting CGC matrix communication\n");
  }
#endif
#ifdef HYPRE_NO_GLOBAL_PARTITION
 {
  /* classical CGC does not really make sense in combination with HYPRE_NO_GLOBAL_PARTITION,
     but anyway, here it is:
  */
   HYPRE_Int nlocal = vertexrange[1]-vertexrange[0];
   vertexrange_all = hypre_CTAlloc (HYPRE_Int,mpisize+1);
   hypre_MPI_Allgather (&nlocal,1,HYPRE_MPI_INT,vertexrange_all+1,1,HYPRE_MPI_INT,comm);
   vertexrange_all[0]=0;
   for (j=2;j<=mpisize;j++) vertexrange_all[j]+=vertexrange_all[j-1];
 }
#else
  vertexrange_all = vertexrange;
#endif
  Gseq = hypre_ParCSRMatrixToCSRMatrixAll (G);
#if 0 /* debugging */
  if (!mpirank) {
    wall_time = time_getWallclockSeconds() - wall_time;
    hypre_printf ("Finished CGC matrix communication, wall_time = %f s\n",wall_time);
  }
#endif

  if (Gseq) { /* BM Aug 31, 2006: Gseq==NULL if G has no local rows */
#if 0 /* debugging */
    if (!mpirank) {
      wall_time = time_getWallclockSeconds();
      hypre_printf ("Starting CGC election\n");
    }
#endif
    AmgCGCChoose (Gseq,vertexrange_all,mpisize,&coarse);
#if 0 /* debugging */
  if (!mpirank) {
    wall_time = time_getWallclockSeconds() - wall_time;
    hypre_printf ("Finished CGC election, wall_time = %f s\n",wall_time);
  }
#endif

#if 0 /* debugging */
    if (!mpirank) {
      for (j=0;j<mpisize;j++) 
	hypre_printf ("Processor %d, choice = %d of range %d - %d\n",j,coarse[j],vertexrange_all[j]+1,vertexrange_all[j+1]);
    }
    fflush(stdout);
#endif
#if 0 /* debugging */
    if (!mpirank) {
      wall_time = time_getWallclockSeconds();
      hypre_printf ("Starting CGC CF assignment\n");
    }
#endif
    choice = coarse[mpirank];
    for (j=0;j<num_variables;j++) {
      if (CF_marker[j]==choice)
	CF_marker[j] = C_PT;
      else
	CF_marker[j] = F_PT;
    }

    hypre_CSRMatrixDestroy (Gseq);
    hypre_TFree (coarse);
  }
  else
    for (j=0;j<num_variables;j++) CF_marker[j] = F_PT;
#if 0
  if (!mpirank) {
    wall_time = time_getWallclockSeconds() - wall_time;
    hypre_printf ("Finished CGC CF assignment, wall_time = %f s\n",wall_time);
  }
#endif

#if 0 /* debugging */
    if (!mpirank) {
      wall_time = time_getWallclockSeconds();
      hypre_printf ("Starting CGC cleanup\n");
    }
#endif
  HYPRE_IJMatrixDestroy (ijG);
  hypre_TFree (vertexrange);
#ifdef HYPRE_NO_GLOBAL_PARTITION
  hypre_TFree (vertexrange_all);
#endif
  hypre_TFree (CF_marker_offd);
#if 0
  if (!mpirank) {
    wall_time = time_getWallclockSeconds() - wall_time;
    hypre_printf ("Finished CGC cleanup, wall_time = %f s\n",wall_time);
  }
#endif
  return(ierr);
}
예제 #10
0
/******************************************************************************
 *
 * hypre_IJMatrixCreatePETSc
 *
 * creates AuxParCSRMatrix and ParCSRMatrix if necessary,
 * generates arrays row_starts and col_starts using either previously
 * set data local_m and local_n (user defined) or generates them evenly
 * distributed if not previously defined by user.
 *
 *****************************************************************************/
HYPRE_Int
hypre_IJMatrixCreatePETSc(hypre_IJMatrix *matrix)
{
   MPI_Comm comm = hypre_IJMatrixContext(matrix);
   HYPRE_Int global_m = hypre_IJMatrixM(matrix); 
   HYPRE_Int global_n = hypre_IJMatrixN(matrix); 
   hypre_AuxParCSRMatrix *aux_matrix = hypre_IJMatrixTranslator(matrix);
   HYPRE_Int local_m;   
   HYPRE_Int local_n;   
   HYPRE_Int ierr = 0;


   HYPRE_Int *row_starts;
   HYPRE_Int *col_starts;
   HYPRE_Int num_cols_offd = 0;
   HYPRE_Int num_nonzeros_diag = 0;
   HYPRE_Int num_nonzeros_offd = 0;
   HYPRE_Int num_procs, my_id;
   HYPRE_Int equal;
   HYPRE_Int i;
   hypre_MPI_Comm_size(comm, &num_procs);
   hypre_MPI_Comm_rank(comm, &my_id);

   if (aux_matrix)
   {
      local_m = hypre_AuxParCSRMatrixLocalNumRows(aux_matrix);   
      local_n = hypre_AuxParCSRMatrixLocalNumCols(aux_matrix);
   }
   else
   {
      aux_matrix = hypre_AuxParCSRMatrixCreate(-1,-1,NULL);
      local_m = -1;
      local_n = -1;
      hypre_IJMatrixTranslator(matrix) = aux_matrix;
   }

   if (local_m < 0)
   {
      row_starts = NULL;
   }
   else
   {
      row_starts = hypre_CTAlloc(HYPRE_Int,num_procs+1);

      if (my_id == 0 && local_m == global_m)
      {
         row_starts[1] = local_m;
      }
      else
      {
         hypre_MPI_Allgather(&local_m,1,HYPRE_MPI_INT,&row_starts[1],1,HYPRE_MPI_INT,comm);
      }

   }
   if (local_n < 0)
   {
      col_starts = NULL;
   }
   else
   {
      col_starts = hypre_CTAlloc(HYPRE_Int,num_procs+1);

      if (my_id == 0 && local_n == global_n)
      {
         col_starts[1] = local_n;
      }
      else
      {
         hypre_MPI_Allgather(&local_n,1,HYPRE_MPI_INT,&col_starts[1],1,HYPRE_MPI_INT,comm);
      }
   }

   if (row_starts && col_starts)
   {
      equal = 1;
      for (i=0; i < num_procs; i++)
      {
         row_starts[i+1] += row_starts[i];
         col_starts[i+1] += col_starts[i];
         if (row_starts[i+1] != col_starts[i+1])
	 equal = 0;
      }
      if (equal)
      {
         hypre_TFree(col_starts);
         col_starts = row_starts;
      }
   }

   hypre_IJMatrixLocalStorage(matrix) = hypre_ParCSRMatrixCreate(comm,global_m,
		global_n,row_starts, col_starts, num_cols_offd, 
		num_nonzeros_diag, num_nonzeros_offd);
   return ierr;
}
예제 #11
0
HYPRE_Int
hypre_ParCSRMatrixToParChordMatrix(
   hypre_ParCSRMatrix *Ap,
   MPI_Comm comm,
   hypre_ParChordMatrix **pAc )
{
   HYPRE_Int * row_starts = hypre_ParCSRMatrixRowStarts(Ap);
   HYPRE_Int * col_starts = hypre_ParCSRMatrixColStarts(Ap);
   hypre_CSRMatrix * diag = hypre_ParCSRMatrixDiag(Ap);
   hypre_CSRMatrix * offd = hypre_ParCSRMatrixOffd(Ap);
   HYPRE_Int * offd_j = hypre_CSRMatrixJ(offd);
   HYPRE_Int * diag_j = hypre_CSRMatrixJ(diag);
   HYPRE_Int * col_map_offd = hypre_ParCSRMatrixColMapOffd(Ap);
   HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(Ap);

   hypre_ParChordMatrix * Ac;
   hypre_NumbersNode * rdofs, * offd_cols_me;
   hypre_NumbersNode ** offd_cols;
   HYPRE_Int ** offd_col_array;
   HYPRE_Int * len_offd_col_array, * offd_col_array_me;
   HYPRE_Int len_offd_col_array_me;
   HYPRE_Int num_idofs, num_rdofs, j_local, j_global, row_global;
   HYPRE_Int i, j, jj, p, pto, q, qto, my_id, my_q, row, ireq;
   HYPRE_Int num_inprocessors, num_toprocessors, num_procs, len_num_rdofs_toprocessor;
   HYPRE_Int *inprocessor, *toprocessor, *pcr, *qcr, *num_inchords, *chord, *chordto;
   HYPRE_Int *inproc, *toproc, *num_rdofs_toprocessor;
   HYPRE_Int **inchord_idof, **inchord_rdof, **rdof_toprocessor;
   double **inchord_data;
   double data;
   HYPRE_Int *first_index_idof, *first_index_rdof;
   hypre_MPI_Request * request;
   hypre_MPI_Status * status;

   hypre_MPI_Comm_rank(comm, &my_id);
   hypre_MPI_Comm_size(comm, &num_procs);
   num_idofs = row_starts[my_id+1] - row_starts[my_id];
   num_rdofs = col_starts[my_id+1] - col_starts[my_id];

   hypre_ParChordMatrixCreate( pAc, comm, num_idofs, num_rdofs );
   Ac = *pAc;

/* The following block sets Inprocessor:
   On each proc. my_id, we find the columns in the offd and diag blocks
   (global no.s).  The columns are rdofs (contrary to what I wrote in
   ChordMatrix.txt).
   For each such col/rdof r, find the proc. p which owns row/idof r.
   We set the temporary array pcr[p]=1 for such p.
   An MPI all-to-all will exchange such arrays so my_id's array qcr has
   qcr[q]=1 iff, on proc. q, pcr[my_id]=1.  In other words, qcr[q]=1 if
   my_id owns a row/idof i which is the same as a col/rdof owned by q.
   Collect all such q's into in the array Inprocessor.
   While constructing pcr, we also construct pj such that for any index jj
   into offd_j,offd_data, pj[jj] is the processor which owns jj as a row (idof)
   (the number jj is local to this processor).
   */
   pcr = hypre_CTAlloc( HYPRE_Int, num_procs );
   qcr = hypre_CTAlloc( HYPRE_Int, num_procs );
   for ( p=0; p<num_procs; ++p ) pcr[p]=0;
   for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(offd); ++jj ) {
      j_local = offd_j[jj];
      j_global =  col_map_offd[j_local];
      for ( p=0; p<num_procs; ++p ) {
         if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) {
            pcr[p]=1;
/* not used yet...            pj[jj] = p;*/
            break;
         }
      }
   }
   /*   jjd = jj; ...not used yet */

   /* pcr[my_id] = 1; ...for square matrices (with nonzero diag block)
      this one line  would do the job of the following nested loop.
      For non-square matrices, the data distribution is too arbitrary. */
   for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(diag); ++jj ) {
      j_local = diag_j[jj];
      j_global = j_local + first_col_diag;
      for ( p=0; p<num_procs; ++p ) {
         if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) {
            pcr[p]=1;
/* not used yet...            pj[jj+jjd] = p;*/
            break;
         }
      }
   }


   /* Now pcr[p]=1 iff my_id owns a col/rdof r which proc. p owns as a row/idof */
   hypre_MPI_Alltoall( pcr, 1, HYPRE_MPI_INT, qcr, 1, HYPRE_MPI_INT, comm );
   /* Now qcr[q]==1 if my_id owns a row/idof i which is a col/rdof of proc. q
    The array of such q's is the array Inprocessor. */

   num_inprocessors = 0;
   for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) ++num_inprocessors;
   inprocessor = hypre_CTAlloc( HYPRE_Int, num_inprocessors );
   p = 0;
   for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) inprocessor[ p++ ] = q;
   num_toprocessors = 0;
   for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) ++num_toprocessors;
   toprocessor = hypre_CTAlloc( HYPRE_Int, num_toprocessors );
   p = 0;
   for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) toprocessor[ p++ ] = q;

   hypre_ParChordMatrixNumInprocessors(Ac) = num_inprocessors;
   hypre_ParChordMatrixInprocessor(Ac) = inprocessor;
   hypre_ParChordMatrixNumToprocessors(Ac) = num_toprocessors;
   hypre_ParChordMatrixToprocessor(Ac) = toprocessor;
   hypre_TFree( qcr );

   /* FirstIndexIdof[p] is the global index of proc. p's row 0 */
   /* FirstIndexRdof[p] is the global index of proc. p's col 0 */
   /* Fir FirstIndexIdof, we copy the array row_starts rather than its pointers,
      because the chord matrix will think it's free to delete FirstIndexIdof */
   /* col_starts[p] contains the global index of the first column
      in the diag block of p.  But for first_index_rdof we want the global
      index of the first column in p (whether that's in the diag or offd block).
      So it's more involved than row/idof: we also check the offd block, and
      have to do a gather to get first_index_rdof for every proc. on every proc. */
   first_index_idof = hypre_CTAlloc( HYPRE_Int, num_procs+1 );
   first_index_rdof = hypre_CTAlloc( HYPRE_Int, num_procs+1 );
   for ( p=0; p<=num_procs; ++p ) {
      first_index_idof[p] = row_starts[p];
      first_index_rdof[p] = col_starts[p];
   };
   if ( hypre_CSRMatrixNumRows(offd) > 0  && hypre_CSRMatrixNumCols(offd) > 0 )
      first_index_rdof[my_id] =
         col_starts[my_id]<col_map_offd[0] ? col_starts[my_id] : col_map_offd[0];
   hypre_MPI_Allgather( &first_index_rdof[my_id], 1, HYPRE_MPI_INT,
                  first_index_rdof, 1, HYPRE_MPI_INT, comm );

   /* Set num_inchords: num_inchords[p] is no. chords on my_id connected to p.
      Set each chord (idof,jdof,data).
      We go through each matrix element in the diag block, find what processor
      owns its column no. as a row, then update num_inchords[p], inchord_idof[p],
      inchord_rdof[p], inchord_data[p].
   */

   inchord_idof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors );
   inchord_rdof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors );
   inchord_data = hypre_CTAlloc( double*, num_inprocessors );
   num_inchords = hypre_CTAlloc( HYPRE_Int, num_inprocessors );
   chord = hypre_CTAlloc( HYPRE_Int, num_inprocessors );
   chordto = hypre_CTAlloc( HYPRE_Int, num_toprocessors );
   num_rdofs = 0;
   for ( q=0; q<num_inprocessors; ++q ) num_inchords[q] = 0;
   my_q = -1;
   for ( q=0; q<num_inprocessors; ++q ) if ( inprocessor[q]==my_id ) my_q = q;
   hypre_assert( my_q>=0 );

   /* diag block: first count chords (from my_id to my_id),
      then set them from diag block's CSR data structure */
   num_idofs = hypre_CSRMatrixNumRows(diag);
   rdofs = hypre_NumbersNewNode();
   for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) {
      for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) {
         j_local = hypre_CSRMatrixJ(diag)[i];
         hypre_NumbersEnter( rdofs, j_local );
         ++num_inchords[my_q];
      }
   };
   num_rdofs = hypre_NumbersNEntered( rdofs );
   inchord_idof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] );
   inchord_rdof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] );
   inchord_data[my_q] = hypre_CTAlloc( double, num_inchords[my_q] );
   chord[0] = 0;
   for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) {
      for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) {
         j_local = hypre_CSRMatrixJ(diag)[i];
         data = hypre_CSRMatrixData(diag)[i];
         inchord_idof[my_q][chord[0]] = row;
         /* Here We need to convert from j_local - a column local to
            the diag of this proc., to a j which is local only to this
            processor - a column (rdof) numbering scheme to be shared by the
            diag and offd blocks...  */
         j_global = j_local + hypre_ParCSRMatrixColStarts(Ap)[my_q];
         j = j_global - first_index_rdof[my_q];
         inchord_rdof[my_q][chord[0]] = j;
         inchord_data[my_q][chord[0]] = data;
         hypre_assert( chord[0] < num_inchords[my_q] );
         ++chord[0];
      }
   };
   hypre_NumbersDeleteNode(rdofs);


   /* offd block: */

   /* >>> offd_cols_me duplicates rdofs */
   offd_cols_me = hypre_NumbersNewNode();
   for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) {
      for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) {
         j_local = hypre_CSRMatrixJ(offd)[i];
         j_global =  col_map_offd[j_local];
         hypre_NumbersEnter( offd_cols_me, j_global );
      }
   }
   offd_col_array = hypre_CTAlloc( HYPRE_Int*, num_inprocessors );
   len_offd_col_array = hypre_CTAlloc( HYPRE_Int, num_inprocessors );
   offd_col_array_me = hypre_NumbersArray( offd_cols_me );
   len_offd_col_array_me = hypre_NumbersNEntered( offd_cols_me );
   request = hypre_CTAlloc(hypre_MPI_Request, 2*num_procs );
   ireq = 0;
   for ( q=0; q<num_inprocessors; ++q )
      hypre_MPI_Irecv( &len_offd_col_array[q], 1, HYPRE_MPI_INT,
                 inprocessor[q], 0, comm, &request[ireq++] );
   for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) {
      hypre_MPI_Isend( &len_offd_col_array_me, 1, HYPRE_MPI_INT, p, 0, comm, &request[ireq++] );
   }
   status = hypre_CTAlloc(hypre_MPI_Status, ireq );
   hypre_MPI_Waitall( ireq, request, status );
   hypre_TFree(status);
   ireq = 0;
   for ( q=0; q<num_inprocessors; ++q )
      offd_col_array[q] = hypre_CTAlloc( HYPRE_Int, len_offd_col_array[q] );
   for ( q=0; q<num_inprocessors; ++q )
      hypre_MPI_Irecv( offd_col_array[q], len_offd_col_array[q], HYPRE_MPI_INT,
                 inprocessor[q], 0, comm, &request[ireq++] );
   for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) {
      hypre_MPI_Isend( offd_col_array_me, len_offd_col_array_me,
                 HYPRE_MPI_INT, p, 0, comm, &request[ireq++] );
   }
   status = hypre_CTAlloc(hypre_MPI_Status, ireq );
   hypre_MPI_Waitall( ireq, request, status );
   hypre_TFree(request);
   hypre_TFree(status);
   offd_cols = hypre_CTAlloc( hypre_NumbersNode *, num_inprocessors );
   for ( q=0; q<num_inprocessors; ++q ) {
      offd_cols[q] = hypre_NumbersNewNode();
      for ( i=0; i<len_offd_col_array[q]; ++i )
         hypre_NumbersEnter( offd_cols[q], offd_col_array[q][i] );
   }

   len_num_rdofs_toprocessor = 1 + hypre_CSRMatrixI(offd)
      [hypre_CSRMatrixNumRows(offd)];
   inproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor );
   toproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor );
   num_rdofs_toprocessor = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor );
   for ( qto=0; qto<len_num_rdofs_toprocessor; ++qto ) {
      inproc[qto] = -1;
      toproc[qto] = -1;
      num_rdofs_toprocessor[qto] = 0;
   };
   rdofs = hypre_NumbersNewNode();
   for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) {
      for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) {
         j_local = hypre_CSRMatrixJ(offd)[i];
         j_global =  col_map_offd[j_local];
         hypre_NumbersEnter( rdofs, j_local );
         
         /* TO DO: find faster ways to do the two processor lookups below.*/
         /* Find a processor p (local index q) from the inprocessor list,
            which owns the column(rdof) whichis the same as this processor's
            row(idof) row. Update num_inchords for p.
            Save q as inproc[i] for quick recall later.  It represents
            an inprocessor (not unique) connected to a chord i.
         */
         inproc[i] = -1;
         for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) {
            p = inprocessor[q];
            if ( hypre_NumbersQuery( offd_cols[q],
                                     row+hypre_ParCSRMatrixFirstRowIndex(Ap) )
                 == 1 ) {
               /* row is one of the offd columns of p */
               ++num_inchords[q];
               inproc[i] = q;
               break;
            }
         }
         if ( inproc[i]<0 ) {
            /* For square matrices, we would have found the column in some
               other processor's offd.  But for non-square matrices it could
               exist only in some other processor's diag...*/
            /* Note that all data in a diag block is stored.  We don't check
               whether the value of a data entry is zero. */
            for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) {
               p = inprocessor[q];
               row_global = row+hypre_ParCSRMatrixFirstRowIndex(Ap);
               if ( row_global>=col_starts[p] &&
                    row_global< col_starts[p+1] ) {
                  /* row is one of the diag columns of p */
                  ++num_inchords[q];
                  inproc[i] = q;
                  break;
               }
            }  
         }
         hypre_assert( inproc[i]>=0 );

         /* Find the processor pto (local index qto) from the toprocessor list,
            which owns the row(idof) which is the  same as this processor's
            column(rdof) j_global. Update num_rdofs_toprocessor for pto.
            Save pto as toproc[i] for quick recall later. It represents
            the toprocessor connected to a chord i. */
         for ( qto=0; qto<num_toprocessors; ++qto ) {
            pto = toprocessor[qto];
            if ( j_global >= row_starts[pto] && j_global<row_starts[pto+1] ) {
               hypre_assert( qto < len_num_rdofs_toprocessor );
               ++num_rdofs_toprocessor[qto];
               /* ... an overestimate, as if two chords share an rdof, that
                  rdof will be counted twice in num_rdofs_toprocessor.
                  It can be fixed up later.*/
               toproc[i] = qto;
               break;
            }
         }
      }
   };
   num_rdofs += hypre_NumbersNEntered(rdofs);
   hypre_NumbersDeleteNode(rdofs);

   for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) {
      inchord_idof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] );
      inchord_rdof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] );
      inchord_data[q] = hypre_CTAlloc( double, num_inchords[q] );
      chord[q] = 0;
   };
예제 #12
0
HYPRE_Int
HYPRE_IJMatrixCreate( MPI_Comm        comm,
                      HYPRE_Int       ilower,
                      HYPRE_Int       iupper,
                      HYPRE_Int       jlower,
                      HYPRE_Int       jupper,
                      HYPRE_IJMatrix *matrix )
{
   HYPRE_Int *row_partitioning;
   HYPRE_Int *col_partitioning;
   HYPRE_Int *info;
   HYPRE_Int num_procs;
   HYPRE_Int myid;

   hypre_IJMatrix *ijmatrix;

#ifdef HYPRE_NO_GLOBAL_PARTITION
   HYPRE_Int  row0, col0, rowN, colN;
#else
 HYPRE_Int *recv_buf;
   HYPRE_Int i, i4;
   HYPRE_Int square;
#endif

   ijmatrix = hypre_CTAlloc(hypre_IJMatrix, 1);

   hypre_IJMatrixComm(ijmatrix)         = comm;
   hypre_IJMatrixObject(ijmatrix)       = NULL;
   hypre_IJMatrixTranslator(ijmatrix)   = NULL;
   hypre_IJMatrixObjectType(ijmatrix)   = HYPRE_UNITIALIZED;
   hypre_IJMatrixAssembleFlag(ijmatrix) = 0;
   hypre_IJMatrixPrintLevel(ijmatrix) = 0;

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm, &myid);
   

   if (ilower > iupper+1 || ilower < 0)
   {
      hypre_error_in_arg(2);
      hypre_TFree(ijmatrix);
      return hypre_error_flag;
   }

   if (iupper < -1)
   {
      hypre_error_in_arg(3);
      hypre_TFree(ijmatrix);
      return hypre_error_flag;
   }

   if (jlower > jupper+1 || jlower < 0)
   {
      hypre_error_in_arg(4);
      hypre_TFree(ijmatrix);
      return hypre_error_flag;
   }

   if (jupper < -1)
   {
      hypre_error_in_arg(5);
      hypre_TFree(ijmatrix);
      return hypre_error_flag;
   }

#ifdef HYPRE_NO_GLOBAL_PARTITION

   info = hypre_CTAlloc(HYPRE_Int,2);

   row_partitioning = hypre_CTAlloc(HYPRE_Int, 2);
   col_partitioning = hypre_CTAlloc(HYPRE_Int, 2);

   row_partitioning[0] = ilower;
   row_partitioning[1] = iupper+1;
   col_partitioning[0] = jlower;
   col_partitioning[1] = jupper+1;

   /* now we need the global number of rows and columns as well
      as the global first row and column index */

   /* proc 0 has the first row and col */
   if (myid==0) 
   {
      info[0] = ilower;
      info[1] = jlower;
   }
   hypre_MPI_Bcast(info, 2, HYPRE_MPI_INT, 0, comm);
   row0 = info[0];
   col0 = info[1];
   
   /* proc (num_procs-1) has the last row and col */   
   if (myid == (num_procs-1))
   {
      info[0] = iupper;
      info[1] = jupper;
   }
   hypre_MPI_Bcast(info, 2, HYPRE_MPI_INT, num_procs-1, comm);

   rowN = info[0];
   colN = info[1];

   hypre_IJMatrixGlobalFirstRow(ijmatrix) = row0;
   hypre_IJMatrixGlobalFirstCol(ijmatrix) = col0;
   hypre_IJMatrixGlobalNumRows(ijmatrix) = rowN - row0 + 1;
   hypre_IJMatrixGlobalNumCols(ijmatrix) = colN - col0 + 1;
   
   hypre_TFree(info);
   

#else

   info = hypre_CTAlloc(HYPRE_Int,4);
   recv_buf = hypre_CTAlloc(HYPRE_Int,4*num_procs);
   row_partitioning = hypre_CTAlloc(HYPRE_Int, num_procs+1);

   info[0] = ilower;
   info[1] = iupper;
   info[2] = jlower;
   info[3] = jupper;
  
   /* Generate row- and column-partitioning through information exchange
      across all processors, check whether the matrix is square, and
      if the partitionings match. i.e. no overlaps or gaps,
      if there are overlaps or gaps in the row partitioning or column
      partitioning , ierr will be set to -9 or -10, respectively */

   hypre_MPI_Allgather(info,4,HYPRE_MPI_INT,recv_buf,4,HYPRE_MPI_INT,comm);

   row_partitioning[0] = recv_buf[0];
   square = 1;
   for (i=0; i < num_procs-1; i++)
   {
      i4 = 4*i;
      if ( recv_buf[i4+1] != (recv_buf[i4+4]-1) )
      {
         hypre_error(HYPRE_ERROR_GENERIC);
         hypre_TFree(ijmatrix);
         hypre_TFree(info);
         hypre_TFree(recv_buf);
         hypre_TFree(row_partitioning);
   	 return hypre_error_flag;
      }
      else
	 row_partitioning[i+1] = recv_buf[i4+4];
	 
      if ((square && (recv_buf[i4]   != recv_buf[i4+2])) ||
                    (recv_buf[i4+1] != recv_buf[i4+3])  )
      {
         square = 0;
      }
   }	
   i4 = (num_procs-1)*4;
   row_partitioning[num_procs] = recv_buf[i4+1]+1;

   if ((recv_buf[i4] != recv_buf[i4+2]) || (recv_buf[i4+1] != recv_buf[i4+3])) 
      square = 0;

   if (square)
      col_partitioning = row_partitioning;
   else
   {   
      col_partitioning = hypre_CTAlloc(HYPRE_Int,num_procs+1);
      col_partitioning[0] = recv_buf[2];
      for (i=0; i < num_procs-1; i++)
      {
         i4 = 4*i;
         if (recv_buf[i4+3] != recv_buf[i4+6]-1)
         {
           hypre_error(HYPRE_ERROR_GENERIC);
           hypre_TFree(ijmatrix);
           hypre_TFree(info);
           hypre_TFree(recv_buf);
           hypre_TFree(row_partitioning);
           hypre_TFree(col_partitioning);
   	   return hypre_error_flag;
         }
         else
   	   col_partitioning[i+1] = recv_buf[i4+6];
      }
      col_partitioning[num_procs] = recv_buf[num_procs*4-1]+1;
   }

   hypre_IJMatrixGlobalFirstRow(ijmatrix) = row_partitioning[0];
   hypre_IJMatrixGlobalFirstCol(ijmatrix) = col_partitioning[0];
   hypre_IJMatrixGlobalNumRows(ijmatrix) = row_partitioning[num_procs] - 
      row_partitioning[0];
   hypre_IJMatrixGlobalNumCols(ijmatrix) = col_partitioning[num_procs] - 
      col_partitioning[0];
   
   hypre_TFree(info);
   hypre_TFree(recv_buf);
   
#endif

   hypre_IJMatrixRowPartitioning(ijmatrix) = row_partitioning;
   hypre_IJMatrixColPartitioning(ijmatrix) = col_partitioning;

   *matrix = (HYPRE_IJMatrix) ijmatrix;
  
   return hypre_error_flag;
}