示例#1
0
hypre_ParCSRMatrix *
hypre_ParCSRBlockMatrixCompress( hypre_ParCSRBlockMatrix *matrix )
{
   MPI_Comm comm = hypre_ParCSRBlockMatrixComm(matrix);
   hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(matrix);
   hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(matrix);
   HYPRE_Int global_num_rows = hypre_ParCSRBlockMatrixGlobalNumRows(matrix);
   HYPRE_Int global_num_cols = hypre_ParCSRBlockMatrixGlobalNumCols(matrix);
   HYPRE_Int *row_starts = hypre_ParCSRBlockMatrixRowStarts(matrix);
   HYPRE_Int *col_starts = hypre_ParCSRBlockMatrixColStarts(matrix);
   HYPRE_Int num_cols_offd = hypre_CSRBlockMatrixNumCols(offd);
   HYPRE_Int num_nonzeros_diag = hypre_CSRBlockMatrixNumNonzeros(diag);
   HYPRE_Int num_nonzeros_offd = hypre_CSRBlockMatrixNumNonzeros(offd);

   hypre_ParCSRMatrix *matrix_C;

   HYPRE_Int i;

   matrix_C = hypre_ParCSRMatrixCreate(comm, global_num_rows, global_num_cols,
                                       row_starts,col_starts,num_cols_offd,num_nonzeros_diag,num_nonzeros_offd);
   hypre_ParCSRMatrixInitialize(matrix_C);

   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C));
   hypre_ParCSRMatrixDiag(matrix_C) = hypre_CSRBlockMatrixCompress(diag);
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C));
   hypre_ParCSRMatrixOffd(matrix_C) = hypre_CSRBlockMatrixCompress(offd);

   for(i = 0; i < num_cols_offd; i++)
      hypre_ParCSRMatrixColMapOffd(matrix_C)[i] = 
         hypre_ParCSRBlockMatrixColMapOffd(matrix)[i];
   return matrix_C;
}
示例#2
0
HYPRE_Int
hypre_SchwarzDestroy( void *data )
{
      hypre_SchwarzData  *schwarz_data = data;

   if (hypre_SchwarzDataScale(schwarz_data))
      hypre_TFree (hypre_SchwarzDataScale(schwarz_data));
   if (hypre_SchwarzDataDofFunc(schwarz_data))
      hypre_TFree (hypre_SchwarzDataDofFunc(schwarz_data));
   hypre_CSRMatrixDestroy(hypre_SchwarzDataDomainStructure(schwarz_data));
   if (hypre_SchwarzDataVariant(schwarz_data) == 3);
      hypre_CSRMatrixDestroy(hypre_SchwarzDataABoundary(schwarz_data));
   hypre_ParVectorDestroy(hypre_SchwarzDataVtemp(schwarz_data));

   if (hypre_SchwarzDataPivots(schwarz_data))
      hypre_TFree (hypre_SchwarzDataPivots(schwarz_data));


   hypre_TFree(schwarz_data);
   return hypre_error_flag;

}
示例#3
0
HYPRE_Int
main( HYPRE_Int   argc,
      char *argv[] )
{


   HYPRE_Int        num_procs, myid;
   HYPRE_Int        verbose = 0, build_matrix_type = 1;
   HYPRE_Int        index, matrix_arg_index, commpkg_flag=3;
   HYPRE_Int        i, k, ierr=0;
   HYPRE_Int        row_start, row_end; 
   HYPRE_Int        col_start, col_end, global_num_rows;
   HYPRE_Int       *row_part, *col_part; 
   char      *csrfilename;
   HYPRE_Int        preload = 0, loop = 0, loop2 = LOOP2;   
   HYPRE_Int        bcast_rows[2], *info;
   


   hypre_ParCSRMatrix    *parcsr_A, *small_A;
   HYPRE_ParCSRMatrix    A_temp, A_temp_small; 
   hypre_CSRMatrix       *A_CSR;
   hypre_ParCSRCommPkg	 *comm_pkg;   

  
   HYPRE_Int                 nx, ny, nz;
   HYPRE_Int                 P, Q, R;
   HYPRE_Int                 p, q, r;
   HYPRE_Real          values[4];

   hypre_ParVector     *x_new;
   hypre_ParVector     *y_new, *y;
   HYPRE_Int                 *row_starts;
   HYPRE_Real          ans;
   HYPRE_Real          start_time, end_time, total_time, *loop_times;
   HYPRE_Real          T_avg, T_std;
   
   HYPRE_Int                   noparmprint = 0;
 
#if mydebug   
   HYPRE_Int  j, tmp_int;
#endif

   /*-----------------------------------------------------------
    * Initialize MPI
    *-----------------------------------------------------------*/


   hypre_MPI_Init(&argc, &argv);

   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );



   /*-----------------------------------------------------------
    * default - is 27pt laplace
    *-----------------------------------------------------------*/

    
   build_matrix_type = 2;
   matrix_arg_index = argc;

   /*-----------------------------------------------------------
    * Parse command line
    *-----------------------------------------------------------*/
 
   index = 1;
   while ( index < argc) 
   {
      if  ( strcmp(argv[index], "-verbose") == 0 )
      {
         index++;  
         verbose = 1;
      }
      else if ( strcmp(argv[index], "-fromonecsrfile") == 0 )
      {
         index++;
         build_matrix_type      = 1;      
         matrix_arg_index = index; /*this tells where the name is*/
      }
      else if  ( strcmp(argv[index], "-commpkg") == 0 )
      {
         index++;  
         commpkg_flag = atoi(argv[index++]);
      }
      else if ( strcmp(argv[index], "-laplacian") == 0 )
      {
         index++;
         build_matrix_type      = 2;
         matrix_arg_index = index;
      }
      else if ( strcmp(argv[index], "-27pt") == 0 )
      {
         index++;
         build_matrix_type      = 4;
         matrix_arg_index = index;
      }
/*
      else if  ( strcmp(argv[index], "-nopreload") == 0 )
      {
         index++;  
         preload = 0;
      }
*/
      else if  ( strcmp(argv[index], "-loop") == 0 )
      {
         index++;  
         loop = atoi(argv[index++]);
      }
      else if  ( strcmp(argv[index], "-noparmprint") == 0 )
      {
         index++;  
         noparmprint = 1;
         
      }
      else  
      {
	 index++;
         /*hypre_printf("Warning: Unrecogized option '%s'\n",argv[index++] );*/
      }
   }
   
   
  
   /*-----------------------------------------------------------
    * Setup the Matrix problem   
    *-----------------------------------------------------------*/

  /*-----------------------------------------------------------
    *  Get actual partitioning- 
    *  read in an actual csr matrix.
    *-----------------------------------------------------------*/


   if (build_matrix_type ==1) /*read in a csr matrix from one file */
   {
      if (matrix_arg_index < argc)
      {
	 csrfilename = argv[matrix_arg_index];
      }
      else
      {
         hypre_printf("Error: No filename specified \n");
         exit(1);
      }
      if (myid == 0)
      {
	/*hypre_printf("  FromFile: %s\n", csrfilename);*/
         A_CSR = hypre_CSRMatrixRead(csrfilename);
      }
      row_part = NULL;
      col_part = NULL;

      parcsr_A = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, A_CSR, 
					       row_part, col_part);

      if (myid == 0) hypre_CSRMatrixDestroy(A_CSR);
   }
   else if (build_matrix_type ==2)
   {
      
      myBuildParLaplacian(argc, argv, matrix_arg_index,  &A_temp, !noparmprint);
     parcsr_A = (hypre_ParCSRMatrix *) A_temp;      
 
   }
   else if (build_matrix_type ==4)
   {
      myBuildParLaplacian27pt(argc, argv, matrix_arg_index, &A_temp, !noparmprint);
     parcsr_A = (hypre_ParCSRMatrix *) A_temp;
   }

 
  /*-----------------------------------------------------------
   * create a small problem so that timings are more accurate - 
   * code gets run twice (small laplace)
   *-----------------------------------------------------------*/

   /*this is no longer being used - preload = 0 is set at the beginning */

   if (preload == 1) 
   {
 
      /*hypre_printf("preload!\n");*/
      
        
       values[1] = -1;
       values[2] = -1;
       values[3] = -1;
       values[0] = - 6.0    ;

       nx = 2;
       ny = num_procs;
       nz = 2;

       P  = 1;
       Q  = num_procs;
       R  = 1;

       p = myid % P;
       q = (( myid - p)/P) % Q;
       r = ( myid - p - P*q)/( P*Q );
       
      A_temp_small = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD, nx, ny, nz, 
				      P, Q, R, p, q, r, values);
      small_A = (hypre_ParCSRMatrix *) A_temp_small;     

      /*do comm packages*/
      hypre_NewCommPkgCreate(small_A);
      hypre_NewCommPkgDestroy(small_A); 

      hypre_MatvecCommPkgCreate(small_A);
      hypre_ParCSRMatrixDestroy(small_A); 
  
   }





   /*-----------------------------------------------------------
    *  Prepare for timing
    *-----------------------------------------------------------*/

   /* instead of preloading, let's not time the first one if more than one*/

    
   if (!loop)
   {
      loop = 1;
      /* and don't do any timings */
      
   }
   else
   {
      
      loop +=1;
      if (loop < 2) loop = 2;
   }
      
   
   loop_times = hypre_CTAlloc(HYPRE_Real, loop);
   


/******************************************************************************************/   

   hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);

   if (commpkg_flag == 1 || commpkg_flag ==3 )
   {
  
      /*-----------------------------------------------------------
       *  Create new comm package
       *-----------------------------------------------------------*/


    
      if (!myid) hypre_printf("********************************************************\n" );  
 
      /*do loop times*/
      for (i=0; i< loop; i++) 
      {
         loop_times[i] = 0.0;
         for (k=0; k< loop2; k++) 
         {
         
            hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);
            
            start_time = hypre_MPI_Wtime();

#if mpip_on
            if (i==(loop-1)) hypre_MPI_Pcontrol(1); 
#endif
     
            hypre_NewCommPkgCreate(parcsr_A);

#if mpip_on
            if (i==(loop-1)) hypre_MPI_Pcontrol(0); 
#endif  
  
            end_time = hypre_MPI_Wtime();
            
            end_time = end_time - start_time;
        
            hypre_MPI_Allreduce(&end_time, &total_time, 1,
                       HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD);
         
            loop_times[i] += total_time;

            if (  !((i+1)== loop  &&  (k+1) == loop2)) hypre_NewCommPkgDestroy(parcsr_A); 
            
         }/*end of loop2 */
      
        
      } /*end of loop*/
      


      /* calculate the avg and std. */
      if (loop > 1)
      {
         
         /* calculate the avg and std. */
         stats_mo(loop_times, loop, &T_avg, &T_std);
      
         if (!myid) hypre_printf(" NewCommPkgCreate:  AVG. wall clock time =  %f seconds\n", T_avg);  
         if (!myid) hypre_printf("                    STD. for %d  runs     =  %f\n", loop-1, T_std);  
         if (!myid) hypre_printf("                    (Note: avg./std. timings exclude run 0.)\n");
         if (!myid) hypre_printf("********************************************************\n" );  
         for (i=0; i< loop; i++) 
         {
            if (!myid) hypre_printf("      run %d  =  %f sec.\n", i, loop_times[i]);  
         }
         if (!myid) hypre_printf("********************************************************\n" );  
   
       }
       else 
       {
         if (!myid) hypre_printf("********************************************************\n" );  
         if (!myid) hypre_printf(" NewCommPkgCreate:\n");  
         if (!myid) hypre_printf("      run time =  %f sec.\n", loop_times[0]);  
         if (!myid) hypre_printf("********************************************************\n" );  
       }


     /*-----------------------------------------------------------
       *  Verbose printing
       *-----------------------------------------------------------*/

      /*some verification*/

       global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); 

       if (verbose) 
       {

	  ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A,
                                      &row_start, &row_end ,
                                       &col_start, &col_end );


	  comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A);
     
          hypre_printf("myid = %i, my ACTUAL local range: [%i, %i]\n", myid, 
		 row_start, row_end);
	  
	
	  ierr = hypre_GetAssumedPartitionRowRange( myid, global_num_rows, &row_start, 
					      &row_end);


	  hypre_printf("myid = %i, my assumed local range: [%i, %i]\n", myid, 
		 row_start, row_end);

          hypre_printf("myid = %d, num_recvs = %d\n", myid, 
		 hypre_ParCSRCommPkgNumRecvs(comm_pkg)  );  

#if mydebug   
	  for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) 
	  {
              hypre_printf("myid = %d, recv proc = %d, vec_starts = [%d : %d]\n", 
		     myid,  hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], 
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i],
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1);
	   }
#endif 
	  hypre_printf("myid = %d, num_sends = %d\n", myid, 
		 hypre_ParCSRCommPkgNumSends(comm_pkg)  );  

#if mydebug
	  for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) 
          {
	    tmp_int =  hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] -  
                     hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	    index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	    for (j=0; j< tmp_int; j++) 
	    {
	       hypre_printf("myid = %d, send proc = %d, send element = %d\n",myid,  
		      hypre_ParCSRCommPkgSendProcs(comm_pkg)[i],
		      hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); 
	     }   
	  }
#endif
       }
       /*-----------------------------------------------------------
        *  To verify correctness (if commpkg_flag = 3)
        *-----------------------------------------------------------*/

       if (commpkg_flag == 3 ) 
       {
          /*do a matvec - we are assuming a square matrix */
          row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A);
   
          x_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts);
          hypre_ParVectorSetPartitioningOwner(x_new, 0);
          hypre_ParVectorInitialize(x_new);
          hypre_ParVectorSetRandomValues(x_new, 1);    
          
          y_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts);
          hypre_ParVectorSetPartitioningOwner(y_new, 0);
          hypre_ParVectorInitialize(y_new);
          hypre_ParVectorSetConstantValues(y_new, 0.0);
          
          /*y = 1.0*A*x+1.0*y */
          hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y_new);
       }
   
   /*-----------------------------------------------------------
    *  Clean up after MyComm
    *-----------------------------------------------------------*/


       hypre_NewCommPkgDestroy(parcsr_A); 

   }

  




/******************************************************************************************/
/******************************************************************************************/

   hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);


   if (commpkg_flag > 1 )
   {

      /*-----------------------------------------------------------
       *  Set up standard comm package
       *-----------------------------------------------------------*/

      bcast_rows[0] = 23;
      bcast_rows[1] = 1789;
      
      if (!myid) hypre_printf("********************************************************\n" );  
      /*do loop times*/
      for (i=0; i< loop; i++) 
      {

         loop_times[i] = 0.0;
         for (k=0; k< loop2; k++) 
         {
            

            hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);

         
            start_time = hypre_MPI_Wtime();

#if time_gather
                  
            info = hypre_CTAlloc(HYPRE_Int, num_procs);
            
            hypre_MPI_Allgather(bcast_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hypre_MPI_COMM_WORLD); 

#endif

            hypre_MatvecCommPkgCreate(parcsr_A);

            end_time = hypre_MPI_Wtime();


            end_time = end_time - start_time;
        
            hypre_MPI_Allreduce(&end_time, &total_time, 1,
                          HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD);

            loop_times[i] += total_time;
         
       
         if (  !((i+1)== loop  &&  (k+1) == loop2))   hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkg(parcsr_A));
               
         }/* end of loop 2*/
         
        
      } /*end of loop*/
      
      /* calculate the avg and std. */
      if (loop > 1)
      {
         
         stats_mo(loop_times, loop, &T_avg, &T_std);      
         if (!myid) hypre_printf("Current CommPkgCreate:  AVG. wall clock time =  %f seconds\n", T_avg);  
         if (!myid) hypre_printf("                        STD. for %d  runs     =  %f\n", loop-1, T_std);  
         if (!myid) hypre_printf("                        (Note: avg./std. timings exclude run 0.)\n");
         if (!myid) hypre_printf("********************************************************\n" );  
         for (i=0; i< loop; i++) 
         {
            if (!myid) hypre_printf("      run %d  =  %f sec.\n", i, loop_times[i]);  
         }
         if (!myid) hypre_printf("********************************************************\n" );  
         
      }
      else 
      {
         if (!myid) hypre_printf("********************************************************\n" );  
         if (!myid) hypre_printf(" Current CommPkgCreate:\n");  
         if (!myid) hypre_printf("      run time =  %f sec.\n", loop_times[0]);  
         if (!myid) hypre_printf("********************************************************\n" );  
      }





      /*-----------------------------------------------------------
       * Verbose printing
       *-----------------------------------------------------------*/

      /*some verification*/

    
       if (verbose) 
       {

          ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A,
						  &row_start, &row_end ,
						  &col_start, &col_end );


          comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A);
     
          hypre_printf("myid = %i, std - my local range: [%i, %i]\n", myid, 
		 row_start, row_end);

          ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A,
						  &row_start, &row_end ,
						  &col_start, &col_end );

          hypre_printf("myid = %d, std - num_recvs = %d\n", myid, 
		 hypre_ParCSRCommPkgNumRecvs(comm_pkg)  );  

#if mydebug   
	  for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) 
          {
              hypre_printf("myid = %d, std - recv proc = %d, vec_starts = [%d : %d]\n", 
		     myid,  hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], 
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i],
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1);
	  }
#endif
          hypre_printf("myid = %d, std - num_sends = %d\n", myid, 
		 hypre_ParCSRCommPkgNumSends(comm_pkg));  


#if mydebug
          for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) 
          {
	     tmp_int =  hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] -  
	                hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	     index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	     for (j=0; j< tmp_int; j++) 
	     {
	        hypre_printf("myid = %d, std - send proc = %d, send element = %d\n",myid,  
		       hypre_ParCSRCommPkgSendProcs(comm_pkg)[i],
		       hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); 
	     }   
	  } 
#endif
       }

       /*-----------------------------------------------------------
        * Verify correctness
        *-----------------------------------------------------------*/

 

       if (commpkg_flag == 3 ) 
       { 
          global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); 
          row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A);
 
       
          y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows,row_starts);
          hypre_ParVectorSetPartitioningOwner(y, 0);
          hypre_ParVectorInitialize(y);
          hypre_ParVectorSetConstantValues(y, 0.0);

          hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y);
      
       }

   }






   /*-----------------------------------------------------------
    *  Compare matvecs for both comm packages (3)
    *-----------------------------------------------------------*/

   if (commpkg_flag == 3 ) 
   { 
     /*make sure that y and y_new are the same  - now y_new should=0*/   
     hypre_ParVectorAxpy( -1.0, y, y_new );


     hypre_ParVectorSetRandomValues(y, 1);

     ans = hypre_ParVectorInnerProd( y, y_new );
     if (!myid)
     {
        
        if ( fabs(ans) > 1e-8 ) 
        {  
           hypre_printf("!!!!! WARNING !!!!! should be zero if correct = %6.10f\n", 
                  ans); 
        } 
        else
        {
           hypre_printf("Matvecs match ( should be zero = %6.10f )\n", 
                  ans); 
        }
     }
     

   }
 

   /*-----------------------------------------------------------
    *  Clean up
    *-----------------------------------------------------------*/

    
   hypre_ParCSRMatrixDestroy(parcsr_A); /*this calls the standard comm 
                                          package destroy - but we'll destroy 
                                          ours separately until it is
                                          incorporated */

  if (commpkg_flag == 3 ) 
  { 

      hypre_ParVectorDestroy(x_new);
      hypre_ParVectorDestroy(y);
      hypre_ParVectorDestroy(y_new);
  }




   hypre_MPI_Finalize();

   return(ierr);


}
示例#4
0
HYPRE_Int
hypre_BoomerAMGCreateNodalA(hypre_ParCSRMatrix    *A,
                            HYPRE_Int                    num_functions,
                            HYPRE_Int                   *dof_func,
                            HYPRE_Int                    option,
                            HYPRE_Int                    diag_option,     
                            hypre_ParCSRMatrix   **AN_ptr)
{
   MPI_Comm 	       comm            = hypre_ParCSRMatrixComm(A);
   hypre_CSRMatrix    *A_diag          = hypre_ParCSRMatrixDiag(A);
   HYPRE_Int                *A_diag_i        = hypre_CSRMatrixI(A_diag);
   double             *A_diag_data     = hypre_CSRMatrixData(A_diag);


   hypre_CSRMatrix    *A_offd          = hypre_ParCSRMatrixOffd(A);
   HYPRE_Int                *A_offd_i        = hypre_CSRMatrixI(A_offd);
   double             *A_offd_data     = hypre_CSRMatrixData(A_offd);
   HYPRE_Int                *A_diag_j        = hypre_CSRMatrixJ(A_diag);
   HYPRE_Int                *A_offd_j        = hypre_CSRMatrixJ(A_offd);

   HYPRE_Int 		      *row_starts      = hypre_ParCSRMatrixRowStarts(A);
   HYPRE_Int 		      *col_map_offd    = hypre_ParCSRMatrixColMapOffd(A);
   HYPRE_Int                 num_variables   = hypre_CSRMatrixNumRows(A_diag);
   HYPRE_Int 		       num_nonzeros_offd = 0;
   HYPRE_Int 		       num_cols_offd = 0;
                  
   hypre_ParCSRMatrix *AN;
   hypre_CSRMatrix    *AN_diag;
   HYPRE_Int                *AN_diag_i;
   HYPRE_Int                *AN_diag_j;
   double             *AN_diag_data; 
   hypre_CSRMatrix    *AN_offd;
   HYPRE_Int                *AN_offd_i;
   HYPRE_Int                *AN_offd_j;
   double             *AN_offd_data; 
   HYPRE_Int		      *col_map_offd_AN;
   HYPRE_Int		      *new_col_map_offd;
   HYPRE_Int		      *row_starts_AN;
   HYPRE_Int		       AN_num_nonzeros_diag = 0;
   HYPRE_Int		       AN_num_nonzeros_offd = 0;
   HYPRE_Int		       num_cols_offd_AN;
   HYPRE_Int		       new_num_cols_offd;
                 
   hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   HYPRE_Int		       num_sends;
   HYPRE_Int		       num_recvs;
   HYPRE_Int		      *send_procs;
   HYPRE_Int		      *send_map_starts;
   HYPRE_Int		      *send_map_elmts;
   HYPRE_Int		      *new_send_map_elmts;
   HYPRE_Int		      *recv_procs;
   HYPRE_Int		      *recv_vec_starts;

   hypre_ParCSRCommPkg *comm_pkg_AN;
   HYPRE_Int		      *send_procs_AN;
   HYPRE_Int		      *send_map_starts_AN;
   HYPRE_Int		      *send_map_elmts_AN;
   HYPRE_Int		      *recv_procs_AN;
   HYPRE_Int		      *recv_vec_starts_AN;

   HYPRE_Int                 i, j, k, k_map;
                      
   HYPRE_Int                 ierr = 0;

   HYPRE_Int		       index, row;
   HYPRE_Int		       start_index;
   HYPRE_Int		       num_procs;
   HYPRE_Int		       node, cnt;
   HYPRE_Int		       mode;
   HYPRE_Int		       new_send_elmts_size;

   HYPRE_Int		       global_num_nodes;
   HYPRE_Int		       num_nodes;
   HYPRE_Int		       num_fun2;
   HYPRE_Int		      *map_to_node;
   HYPRE_Int		      *map_to_map;
   HYPRE_Int		      *counter;

   double sum;
   double *data;
   

   hypre_MPI_Comm_size(comm,&num_procs);

   if (!comm_pkg)
   {
      hypre_MatvecCommPkgCreate(A);
      comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   }

   mode = fabs(option);

   comm_pkg_AN = NULL;
   col_map_offd_AN = NULL;

#ifdef HYPRE_NO_GLOBAL_PARTITION
   row_starts_AN = hypre_CTAlloc(HYPRE_Int, 2);

   for (i=0; i < 2; i++)
   {
      row_starts_AN[i] = row_starts[i]/num_functions;
      if (row_starts_AN[i]*num_functions < row_starts[i])
      {
	  hypre_printf("nodes not properly aligned or incomplete info!\n");
	  return (87);
      }
   }
   
   global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(A)/num_functions;


#else
   row_starts_AN = hypre_CTAlloc(HYPRE_Int, num_procs+1);

  for (i=0; i < num_procs+1; i++)
   {
      row_starts_AN[i] = row_starts[i]/num_functions;
      if (row_starts_AN[i]*num_functions < row_starts[i])
      {
	  hypre_printf("nodes not properly aligned or incomplete info!\n");
	  return (87);
      }
   }
   
   global_num_nodes = row_starts_AN[num_procs];

#endif

 
   num_nodes =  num_variables/num_functions;
   num_fun2 = num_functions*num_functions;

   map_to_node = hypre_CTAlloc(HYPRE_Int, num_variables);
   AN_diag_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1);
   counter = hypre_CTAlloc(HYPRE_Int, num_nodes);
   for (i=0; i < num_variables; i++)
      map_to_node[i] = i/num_functions;
   for (i=0; i < num_nodes; i++)
      counter[i] = -1;

   AN_num_nonzeros_diag = 0;
   row = 0;
   for (i=0; i < num_nodes; i++)
   {
      AN_diag_i[i] = AN_num_nonzeros_diag;
      for (j=0; j < num_functions; j++)
      {
	 for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	 {
	    k_map = map_to_node[A_diag_j[k]];
	    if (counter[k_map] < i)
	    {
	       counter[k_map] = i;
	       AN_num_nonzeros_diag++;
	    }
	 }
	 row++;
      }
   }
   AN_diag_i[num_nodes] = AN_num_nonzeros_diag;

   AN_diag_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_diag);	
   AN_diag_data = hypre_CTAlloc(double, AN_num_nonzeros_diag);	

   AN_diag = hypre_CSRMatrixCreate(num_nodes,num_nodes,AN_num_nonzeros_diag);
   hypre_CSRMatrixI(AN_diag) = AN_diag_i;
   hypre_CSRMatrixJ(AN_diag) = AN_diag_j;
   hypre_CSRMatrixData(AN_diag) = AN_diag_data;
       
   for (i=0; i < num_nodes; i++)
      counter[i] = -1;
   index = 0;
   start_index = 0;
   row = 0;

   switch (mode)
   {
      case 1:  /* frobenius norm */
      {
         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             AN_diag_data[index] = A_diag_data[k]*A_diag_data[k];
	             index++;
	          }
	          else
	          {
	             AN_diag_data[counter[k_map]] += 
				A_diag_data[k]*A_diag_data[k];
	          }
	       }
	       row++;
            }
            start_index = index;
         }
         for (i=0; i < AN_num_nonzeros_diag; i++)
            AN_diag_data[i] = sqrt(AN_diag_data[i]);

      }
      break;
      
      case 2:  /* sum of abs. value of all elements in each block */
      {
         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             AN_diag_data[index] = fabs(A_diag_data[k]);
	             index++;
	          }
	          else
	          {
	             AN_diag_data[counter[k_map]] += fabs(A_diag_data[k]);
	          }
	       }
	       row++;
            }
            start_index = index;
         }
         for (i=0; i < AN_num_nonzeros_diag; i++)
            AN_diag_data[i] /= num_fun2;
      }
      break;

      case 3:  /* largest element of each block (sets true value - not abs. value) */
      {

         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
      	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
      	       {
      	          k_map = map_to_node[A_diag_j[k]];
      	          if (counter[k_map] < start_index)
      	          {
      	             counter[k_map] = index;
      	             AN_diag_j[index] = k_map;
      	             AN_diag_data[index] = A_diag_data[k];
      	             index++;
      	          }
      	          else
      	          {
      	             if (fabs(A_diag_data[k]) > 
				fabs(AN_diag_data[counter[k_map]]))
      	                AN_diag_data[counter[k_map]] = A_diag_data[k];
      	          }
      	       }
      	       row++;
            }
            start_index = index;
         }
      }
      break;

      case 4:  /* inf. norm (row-sum)  */
      {

         data = hypre_CTAlloc(double, AN_num_nonzeros_diag*num_functions);

         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             data[index*num_functions + j] = fabs(A_diag_data[k]);
	             index++;
	          }
	          else
	          {
	             data[(counter[k_map])*num_functions + j] += fabs(A_diag_data[k]);
	          }
	       }
	       row++;
            }
            start_index = index;
         }
         for (i=0; i < AN_num_nonzeros_diag; i++)
         {
            AN_diag_data[i]  = data[i*num_functions];
            
            for (j=1; j< num_functions; j++)
            {
               AN_diag_data[i]  = hypre_max( AN_diag_data[i],data[i*num_functions+j]);
            }
         }
         hypre_TFree(data);
      
      }
      break;

      case 6:  /* sum of all elements in each block */
      {
         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             AN_diag_data[index] = (A_diag_data[k]);
	             index++;
	          }
	          else
	          {
	             AN_diag_data[counter[k_map]] += (A_diag_data[k]);
	          }
	       }
	       row++;
            }
            start_index = index;
         }
      }
      break;

   }

   if (diag_option ==1 )
   {
      /* make the diag entry the negative of the sum of off-diag entries (DO MORE BELOW) */
      for (i=0; i < num_nodes; i++)
      {
         index = AN_diag_i[i]; 
         sum = 0.0;
         for (k = AN_diag_i[i]+1; k < AN_diag_i[i+1]; k++)
         {
            sum += AN_diag_data[k];
            
         }
         AN_diag_data[index] = -sum;
      }
      
   }
   else if (diag_option == 2)
   {
      
      /*  make all diagonal entries negative */
      /* the diagonal is the first element listed in each row - */
      
      for (i=0; i < num_nodes; i++)
      {
         index = AN_diag_i[i];
         AN_diag_data[index] = - AN_diag_data[index];
      }
   }






   num_nonzeros_offd = A_offd_i[num_variables];
   AN_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1);

   num_cols_offd_AN = 0;

   if (comm_pkg)
   {
      comm_pkg_AN = hypre_CTAlloc(hypre_ParCSRCommPkg,1);
      hypre_ParCSRCommPkgComm(comm_pkg_AN) = comm;
      num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
      hypre_ParCSRCommPkgNumSends(comm_pkg_AN) = num_sends;
      num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
      hypre_ParCSRCommPkgNumRecvs(comm_pkg_AN) = num_recvs;
      send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
      send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
      send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
      recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
      recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
      send_procs_AN = NULL;
      send_map_elmts_AN = NULL;
      if (num_sends) 
      {
         send_procs_AN = hypre_CTAlloc(HYPRE_Int,num_sends);
         send_map_elmts_AN = hypre_CTAlloc(HYPRE_Int,send_map_starts[num_sends]);
      }
      send_map_starts_AN = hypre_CTAlloc(HYPRE_Int,num_sends+1);
      recv_vec_starts_AN = hypre_CTAlloc(HYPRE_Int,num_recvs+1);
      recv_procs_AN = NULL;
      if (num_recvs) recv_procs_AN = hypre_CTAlloc(HYPRE_Int,num_recvs);
      for (i=0; i < num_sends; i++)
         send_procs_AN[i] = send_procs[i];
      for (i=0; i < num_recvs; i++)
         recv_procs_AN[i] = recv_procs[i];

      send_map_starts_AN[0] = 0;
      cnt = 0;
      for (i=0; i < num_sends; i++)
      {
	 k_map = send_map_starts[i];
	 if (send_map_starts[i+1]-k_map)
            send_map_elmts_AN[cnt++] = send_map_elmts[k_map]/num_functions;
         for (j=send_map_starts[i]+1; j < send_map_starts[i+1]; j++)
         {
            node = send_map_elmts[j]/num_functions;
            if (node > send_map_elmts_AN[cnt-1])
	       send_map_elmts_AN[cnt++] = node; 
         }
         send_map_starts_AN[i+1] = cnt;
      }
      hypre_ParCSRCommPkgSendProcs(comm_pkg_AN) = send_procs_AN;
      hypre_ParCSRCommPkgSendMapStarts(comm_pkg_AN) = send_map_starts_AN;
      hypre_ParCSRCommPkgSendMapElmts(comm_pkg_AN) = send_map_elmts_AN;
      hypre_ParCSRCommPkgRecvProcs(comm_pkg_AN) = recv_procs_AN;
      hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_AN) = recv_vec_starts_AN;
   }

   num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
   if (num_cols_offd)
   {
      if (num_cols_offd > num_variables)
      {
         hypre_TFree(map_to_node);
         map_to_node = hypre_CTAlloc(HYPRE_Int,num_cols_offd);
      }

      num_cols_offd_AN = 1;
      map_to_node[0] = col_map_offd[0]/num_functions;
      for (i=1; i < num_cols_offd; i++)
      {
         map_to_node[i] = col_map_offd[i]/num_functions;
         if (map_to_node[i] > map_to_node[i-1]) num_cols_offd_AN++;
      }
      
      if (num_cols_offd_AN > num_nodes)
      {
         hypre_TFree(counter);
         counter = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN);
      }

      map_to_map = NULL;
      col_map_offd_AN = NULL;
      map_to_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
      col_map_offd_AN = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN);
      col_map_offd_AN[0] = map_to_node[0];
      recv_vec_starts_AN[0] = 0;
      cnt = 1;
      for (i=0; i < num_recvs; i++)
      {
         for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++)
         {
            node = map_to_node[j];
	    if (node > col_map_offd_AN[cnt-1])
	    {
	       col_map_offd_AN[cnt++] = node; 
	    }
	    map_to_map[j] = cnt-1;
         }
         recv_vec_starts_AN[i+1] = cnt;
      }

      for (i=0; i < num_cols_offd_AN; i++)
         counter[i] = -1;

      AN_num_nonzeros_offd = 0;
      row = 0;
      for (i=0; i < num_nodes; i++)
      {
         AN_offd_i[i] = AN_num_nonzeros_offd;
         for (j=0; j < num_functions; j++)
         {
	    for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
	    {
	       k_map = map_to_map[A_offd_j[k]];
	       if (counter[k_map] < i)
	       {
	          counter[k_map] = i;
	          AN_num_nonzeros_offd++;
	       }
	    }
	    row++;
         }
      }
      AN_offd_i[num_nodes] = AN_num_nonzeros_offd;
   }

       
   AN_offd = hypre_CSRMatrixCreate(num_nodes,num_cols_offd_AN,	
		AN_num_nonzeros_offd);
   hypre_CSRMatrixI(AN_offd) = AN_offd_i;
   if (AN_num_nonzeros_offd)
   {
      AN_offd_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_offd);	
      AN_offd_data = hypre_CTAlloc(double, AN_num_nonzeros_offd);	
      hypre_CSRMatrixJ(AN_offd) = AN_offd_j;
      hypre_CSRMatrixData(AN_offd) = AN_offd_data;
   
      for (i=0; i < num_cols_offd_AN; i++)
         counter[i] = -1;
      index = 0;
      row = 0;
      AN_offd_i[0] = 0;
      start_index = 0;
      switch (mode)
      {
         case 1: /* frobenius norm */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
	          for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
	          {
	             k_map = map_to_map[A_offd_j[k]];
	             if (counter[k_map] < start_index)
	             {
	                counter[k_map] = index;
	                AN_offd_j[index] = k_map;
	                AN_offd_data[index] = A_offd_data[k]*A_offd_data[k];
	                index++;
	             }
	             else
	             {
	                AN_offd_data[counter[k_map]] += 
				A_offd_data[k]*A_offd_data[k];
	             }
	          }
	          row++;
               }
               start_index = index;
            }
            for (i=0; i < AN_num_nonzeros_offd; i++)
	       AN_offd_data[i] = sqrt(AN_offd_data[i]);
         }
         break;
      
         case 2:  /* sum of abs. value of all elements in block */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
	          for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
	          {
	             k_map = map_to_map[A_offd_j[k]];
	             if (counter[k_map] < start_index)
	             {
	                counter[k_map] = index;
	                AN_offd_j[index] = k_map;
	                AN_offd_data[index] = fabs(A_offd_data[k]);
	                index++;
	             }
	             else
	             {
	                AN_offd_data[counter[k_map]] += fabs(A_offd_data[k]);
	             }
	          }
	          row++;
               }
               start_index = index;
            }
            for (i=0; i < AN_num_nonzeros_offd; i++)
               AN_offd_data[i] /= num_fun2;
         }
         break;

         case 3: /* largest element in each block (not abs. value ) */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
      	          for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
      	          {
      	             k_map = map_to_map[A_offd_j[k]];
      	             if (counter[k_map] < start_index)
      	             {
      	                counter[k_map] = index;
      	                AN_offd_j[index] = k_map;
      	                AN_offd_data[index] = A_offd_data[k];
      	                index++;
      	             }
      	             else
      	             {
      	                if (fabs(A_offd_data[k]) > 
				fabs(AN_offd_data[counter[k_map]]))
      	                   AN_offd_data[counter[k_map]] = A_offd_data[k];
      	             }
      	          }
      	          row++;
               }
               start_index = index;
            }
         }
         break;
         
         case 4:  /* inf. norm (row-sum)  */
         {
            
            data = hypre_CTAlloc(double, AN_num_nonzeros_offd*num_functions);
            
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
                  for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
                  {
                     k_map = map_to_map[A_offd_j[k]];
                     if (counter[k_map] < start_index)
                     {
                        counter[k_map] = index;
                        AN_offd_j[index] = k_map;
                        data[index*num_functions + j] = fabs(A_offd_data[k]);
                        index++;
                     }
                     else
                     {
                        data[(counter[k_map])*num_functions + j] += fabs(A_offd_data[k]);
                     }
                  }
                  row++;
               }
               start_index = index;
            }
            for (i=0; i < AN_num_nonzeros_offd; i++)
            {
               AN_offd_data[i]  = data[i*num_functions];
               
               for (j=1; j< num_functions; j++)
               {
                  AN_offd_data[i]  = hypre_max( AN_offd_data[i],data[i*num_functions+j]);
               }
            }
            hypre_TFree(data);
            
         }
         break;
         
         case 6:  /* sum of value of all elements in block */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
                  for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
                  {
                     k_map = map_to_map[A_offd_j[k]];
                     if (counter[k_map] < start_index)
                     {
                        counter[k_map] = index;
                        AN_offd_j[index] = k_map;
                        AN_offd_data[index] = (A_offd_data[k]);
                        index++;
                     }
                     else
                     {
                        AN_offd_data[counter[k_map]] += (A_offd_data[k]);
                     }
                  }
                  row++;
               }
               start_index = index;
            }
            
         }
         break;
      }
   
      hypre_TFree(map_to_map);
   }

   if (diag_option ==1 )
   {
      /* make the diag entry the negative of the sum of off-diag entries (here we are adding the 
         off_diag contribution)*/
      /* the diagonal is the first element listed in each row of AN_diag_data - */
      for (i=0; i < num_nodes; i++)
      {
         sum = 0.0;
         for (k = AN_offd_i[i]; k < AN_offd_i[i+1]; k++)
         {
            sum += AN_offd_data[k];
            
         }
         index = AN_diag_i[i];/* location of diag entry in data */ 
         AN_diag_data[index] -= sum; /* subtract from current value */
      }
      
   }

    
   AN = hypre_ParCSRMatrixCreate(comm, global_num_nodes, global_num_nodes,
		row_starts_AN, row_starts_AN, num_cols_offd_AN,
		AN_num_nonzeros_diag, AN_num_nonzeros_offd);

   /* we already created the diag and offd matrices - so we don't need the ones
      created above */
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(AN));
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(AN));
   hypre_ParCSRMatrixDiag(AN) = AN_diag;
   hypre_ParCSRMatrixOffd(AN) = AN_offd;


   hypre_ParCSRMatrixColMapOffd(AN) = col_map_offd_AN;
   hypre_ParCSRMatrixCommPkg(AN) = comm_pkg_AN;

   new_num_cols_offd = num_functions*num_cols_offd_AN;

   if (new_num_cols_offd > num_cols_offd)
   {
      new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols_offd);
      cnt = 0;
      for (i=0; i < num_cols_offd_AN; i++)
      {
	 for (j=0; j < num_functions; j++)
         {
 	    new_col_map_offd[cnt++] = num_functions*col_map_offd_AN[i]+j;
         }
      }
      cnt = 0;
      for (i=0; i < num_cols_offd; i++)
      {
         while (col_map_offd[i] >  new_col_map_offd[cnt])
            cnt++;
         col_map_offd[i] = cnt++;
      }
      for (i=0; i < num_recvs+1; i++)
      {
         recv_vec_starts[i] = num_functions*recv_vec_starts_AN[i];
      }

      for (i=0; i < num_nonzeros_offd; i++)
      {
         j = A_offd_j[i];
	 A_offd_j[i] = col_map_offd[j];
      }
      hypre_ParCSRMatrixColMapOffd(A) = new_col_map_offd;
      hypre_CSRMatrixNumCols(A_offd) = new_num_cols_offd;
      hypre_TFree(col_map_offd);
   }
 
   hypre_TFree(map_to_node);
   new_send_elmts_size = send_map_starts_AN[num_sends]*num_functions;

   if (new_send_elmts_size > send_map_starts[num_sends])
   {
      new_send_map_elmts = hypre_CTAlloc(HYPRE_Int,new_send_elmts_size);
      cnt = 0;
      send_map_starts[0] = 0;
      for (i=0; i < num_sends; i++)
      {
         send_map_starts[i+1] = send_map_starts_AN[i+1]*num_functions;
         for (j=send_map_starts_AN[i]; j < send_map_starts_AN[i+1]; j++)
	 {
            for (k=0; k < num_functions; k++)
	       new_send_map_elmts[cnt++] = send_map_elmts_AN[j]*num_functions+k;
	 }
      }
      hypre_TFree(send_map_elmts);
      hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = new_send_map_elmts;
   }
 
   *AN_ptr        = AN;

   hypre_TFree(counter);

   return (ierr);
}
示例#5
0
hypre_ParCSRMatrix * hypre_ParMatmul_FC(
   hypre_ParCSRMatrix * A, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker,
   HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd )
/* hypre_parMatmul_FC creates and returns the "Fine"-designated rows of the
   matrix product A*P.  A's size is (nC+nF)*(nC+nF), P's size is (nC+nF)*nC
   where nC is the number of coarse rows/columns, nF the number of fine
   rows/columns.  The size of C=A*P is (nC+nF)*nC, even though not all rows
   of C are actually computed.  If we were to construct a matrix consisting
   only of the computed rows of C, its size would be nF*nC.
   "Fine" is defined solely by the marker array, and for example could be
   a proper subset of the fine points of a multigrid hierarchy.
*/
{
   /* To compute a submatrix of C containing only the computed data, i.e.
      only "Fine" rows, we would have to do a lot of computational work,
      with a lot of communication.  The communication is because such a
      matrix would need global information that depends on which rows are
      "Fine".
   */

   MPI_Comm 	   comm = hypre_ParCSRMatrixComm(A);

   hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
   
   double          *A_diag_data = hypre_CSRMatrixData(A_diag);
   HYPRE_Int             *A_diag_i = hypre_CSRMatrixI(A_diag);
   HYPRE_Int             *A_diag_j = hypre_CSRMatrixJ(A_diag);

   hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
   
   double          *A_offd_data = hypre_CSRMatrixData(A_offd);
   HYPRE_Int             *A_offd_i = hypre_CSRMatrixI(A_offd);
   HYPRE_Int             *A_offd_j = hypre_CSRMatrixJ(A_offd);

   HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
   HYPRE_Int	num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
   HYPRE_Int	num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag);
   HYPRE_Int	num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
   
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   
   double          *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);

   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
   HYPRE_Int		   *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P);
   
   double          *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   HYPRE_Int             *P_offd_j = hypre_CSRMatrixJ(P_offd);

   HYPRE_Int	first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P);
   HYPRE_Int	last_col_diag_P;
   HYPRE_Int *col_starts_P = hypre_ParCSRMatrixColStarts(P);
   HYPRE_Int	num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
   HYPRE_Int	num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag);
   HYPRE_Int	num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd);

   hypre_ParCSRMatrix *C;
   HYPRE_Int		      *col_map_offd_C;
   HYPRE_Int		      *map_P_to_C;

   hypre_CSRMatrix *C_diag;

   double          *C_diag_data;
   HYPRE_Int             *C_diag_i;
   HYPRE_Int             *C_diag_j;

   hypre_CSRMatrix *C_offd;

   double          *C_offd_data=NULL;
   HYPRE_Int             *C_offd_i=NULL;
   HYPRE_Int             *C_offd_j=NULL;

   HYPRE_Int              C_diag_size;
   HYPRE_Int              C_offd_size;
   HYPRE_Int		    num_cols_offd_C = 0;
   
   hypre_CSRMatrix *Ps_ext;
   
   double          *Ps_ext_data;
   HYPRE_Int             *Ps_ext_i;
   HYPRE_Int             *Ps_ext_j;

   double          *P_ext_diag_data;
   HYPRE_Int             *P_ext_diag_i;
   HYPRE_Int             *P_ext_diag_j;
   HYPRE_Int              P_ext_diag_size;

   double          *P_ext_offd_data;
   HYPRE_Int             *P_ext_offd_i;
   HYPRE_Int             *P_ext_offd_j;
   HYPRE_Int              P_ext_offd_size;

   HYPRE_Int		   *P_marker;
   HYPRE_Int		   *temp;

   HYPRE_Int              i, j;
   HYPRE_Int              i1, i2, i3;
   HYPRE_Int              jj2, jj3;
   
   HYPRE_Int              jj_count_diag, jj_count_offd;
   HYPRE_Int              jj_row_begin_diag, jj_row_begin_offd;
   HYPRE_Int              start_indexing = 0; /* start indexing for C_data at 0 */
   HYPRE_Int		    n_rows_A_global, n_cols_A_global;
   HYPRE_Int		    n_rows_P_global, n_cols_P_global;
   HYPRE_Int              allsquare = 0;
   HYPRE_Int              cnt, cnt_offd, cnt_diag;
   HYPRE_Int 		    num_procs;
   HYPRE_Int 		    value;

   double           a_entry;
   double           a_b_product;
   
   n_rows_A_global = hypre_ParCSRMatrixGlobalNumRows(A);
   n_cols_A_global = hypre_ParCSRMatrixGlobalNumCols(A);
   n_rows_P_global = hypre_ParCSRMatrixGlobalNumRows(P);
   n_cols_P_global = hypre_ParCSRMatrixGlobalNumCols(P);

   if (n_cols_A_global != n_rows_P_global || num_cols_diag_A != num_rows_diag_P)
   {
	hypre_printf(" Error! Incompatible matrix dimensions!\n");
	return NULL;
   }
   /* if (num_rows_A==num_cols_P) allsquare = 1; */

   /*-----------------------------------------------------------------------
    *  Extract P_ext, i.e. portion of P that is stored on neighbor procs
    *  and needed locally for matrix matrix product 
    *-----------------------------------------------------------------------*/

   hypre_MPI_Comm_size(comm, &num_procs);

   if (num_procs > 1)
   {
       /*---------------------------------------------------------------------
    	* If there exists no CommPkg for A, a CommPkg is generated using
    	* equally load balanced partitionings within 
	* hypre_ParCSRMatrixExtractBExt
    	*--------------------------------------------------------------------*/
   	Ps_ext = hypre_ParCSRMatrixExtractBExt(P,A,1);
   	Ps_ext_data = hypre_CSRMatrixData(Ps_ext);
   	Ps_ext_i    = hypre_CSRMatrixI(Ps_ext);
   	Ps_ext_j    = hypre_CSRMatrixJ(Ps_ext);
   }
   P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1);
   P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1);
   P_ext_diag_size = 0;
   P_ext_offd_size = 0;
   last_col_diag_P = first_col_diag_P + num_cols_diag_P -1;

   for (i=0; i < num_cols_offd_A; i++)
   {
      for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
         if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
            P_ext_offd_size++;
         else
            P_ext_diag_size++;
      P_ext_diag_i[i+1] = P_ext_diag_size;
      P_ext_offd_i[i+1] = P_ext_offd_size;
   }

   if (P_ext_diag_size)
   {
      P_ext_diag_j = hypre_CTAlloc(HYPRE_Int, P_ext_diag_size);
      P_ext_diag_data = hypre_CTAlloc(double, P_ext_diag_size);
   }
   if (P_ext_offd_size)
   {
      P_ext_offd_j = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size);
      P_ext_offd_data = hypre_CTAlloc(double, P_ext_offd_size);
   }

   cnt_offd = 0;
   cnt_diag = 0;
   for (i=0; i < num_cols_offd_A; i++)
   {
      for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
         if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
         {
            P_ext_offd_j[cnt_offd] = Ps_ext_j[j];
            P_ext_offd_data[cnt_offd++] = Ps_ext_data[j];
         }
         else
         {
            P_ext_diag_j[cnt_diag] = Ps_ext_j[j] - first_col_diag_P;
            P_ext_diag_data[cnt_diag++] = Ps_ext_data[j];
         }
   }

   if (num_procs > 1)
   {
      hypre_CSRMatrixDestroy(Ps_ext);
      Ps_ext = NULL;
   }

   cnt = 0;
   if (P_ext_offd_size || num_cols_offd_P)
   {
      temp = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size+num_cols_offd_P);
      for (i=0; i < P_ext_offd_size; i++)
         temp[i] = P_ext_offd_j[i];
      cnt = P_ext_offd_size;
      for (i=0; i < num_cols_offd_P; i++)
         temp[cnt++] = col_map_offd_P[i];
   }
   if (cnt)
   {
      qsort0(temp, 0, cnt-1);

      num_cols_offd_C = 1;
      value = temp[0];
      for (i=1; i < cnt; i++)
      {
         if (temp[i] > value)
         {
            value = temp[i];
            temp[num_cols_offd_C++] = value;
         }
      }
   }

   if (num_cols_offd_C)
        col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C);

   for (i=0; i < num_cols_offd_C; i++)
      col_map_offd_C[i] = temp[i];

   if (P_ext_offd_size || num_cols_offd_P)
      hypre_TFree(temp);

   for (i=0 ; i < P_ext_offd_size; i++)
      P_ext_offd_j[i] = hypre_BinarySearch(col_map_offd_C,
                                           P_ext_offd_j[i],
                                           num_cols_offd_C);
   if (num_cols_offd_P)
   {
      map_P_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P);

      cnt = 0;
      for (i=0; i < num_cols_offd_C; i++)
         if (col_map_offd_C[i] == col_map_offd_P[cnt])
         {
            map_P_to_C[cnt++] = i;
            if (cnt == num_cols_offd_P) break;
         }
   }

   /*-----------------------------------------------------------------------
   *  Allocate marker array.
    *-----------------------------------------------------------------------*/

   P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_C);

   /*-----------------------------------------------------------------------
    *  Initialize some stuff.
    *-----------------------------------------------------------------------*/

   for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++)
   {      
      P_marker[i1] = -1;
   }


/* no changes for the marked version above this point */
   /* This function call is the first pass: */
   hypre_ParMatmul_RowSizes_Marked(
      &C_diag_i, &C_offd_i, &P_marker,
      A_diag_i, A_diag_j, A_offd_i, A_offd_j,
      P_diag_i, P_diag_j, P_offd_i, P_offd_j,
      P_ext_diag_i, P_ext_diag_j, P_ext_offd_i, P_ext_offd_j,
      map_P_to_C,
      &C_diag_size, &C_offd_size,
      num_rows_diag_A, num_cols_offd_A, allsquare,
      num_cols_diag_P, num_cols_offd_P,
      num_cols_offd_C, CF_marker, dof_func, dof_func_offd
      );

   /* The above call of hypre_ParMatmul_RowSizes_Marked computed
      two scalars: C_diag_size, C_offd_size,
      and two arrays: C_diag_i, C_offd_i
      ( P_marker is also computed, but only used internally )
   */

   /*-----------------------------------------------------------------------
    *  Allocate C_diag_data and C_diag_j arrays.
    *  Allocate C_offd_data and C_offd_j arrays.
    *-----------------------------------------------------------------------*/
 
   last_col_diag_P = first_col_diag_P + num_cols_diag_P - 1;
   C_diag_data = hypre_CTAlloc(double, C_diag_size);
   C_diag_j    = hypre_CTAlloc(HYPRE_Int, C_diag_size);
   if (C_offd_size)
   { 
   	C_offd_data = hypre_CTAlloc(double, C_offd_size);
   	C_offd_j    = hypre_CTAlloc(HYPRE_Int, C_offd_size);
   } 


   /*-----------------------------------------------------------------------
    *  Second Pass: Fill in C_diag_data and C_diag_j.
    *  Second Pass: Fill in C_offd_data and C_offd_j.
    *-----------------------------------------------------------------------*/

   /*-----------------------------------------------------------------------
    *  Initialize some stuff.
    *-----------------------------------------------------------------------*/

   jj_count_diag = start_indexing;
   jj_count_offd = start_indexing;
   for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++)
   {      
      P_marker[i1] = -1;
   }
   
   /*-----------------------------------------------------------------------
    *  Loop over interior c-points.
    *-----------------------------------------------------------------------*/
    
   for (i1 = 0; i1 < num_rows_diag_A; i1++)
   {

      if ( CF_marker[i1] < 0 )  /* i1 is a fine row */
         /* ... This and the coarse row code are the only parts between first pass
            and near the end where
            hypre_ParMatmul_FC is different from the regular hypre_ParMatmul */
      {

         /*--------------------------------------------------------------------
          *  Create diagonal entry, C_{i1,i1} 
          *--------------------------------------------------------------------*/

         jj_row_begin_diag = jj_count_diag;
         jj_row_begin_offd = jj_count_offd;

         /*-----------------------------------------------------------------
          *  Loop over entries in row i1 of A_offd.
          *-----------------------------------------------------------------*/
         
	 if (num_cols_offd_A)
	 {
            for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
            {
               i2 = A_offd_j[jj2];
               if( dof_func==NULL || dof_func[i1] == dof_func_offd[i2] )
               {  /* interpolate only like "functions" */
                  a_entry = A_offd_data[jj2];
            
                  /*-----------------------------------------------------------
                   *  Loop over entries in row i2 of P_ext.
                   *-----------------------------------------------------------*/

                  for (jj3 = P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
                  {
                     i3 = num_cols_diag_P+P_ext_offd_j[jj3];
                     a_b_product = a_entry * P_ext_offd_data[jj3];
                  
                     /*--------------------------------------------------------
                      *  Check P_marker to see that C_{i1,i3} has not already
                      *  been accounted for. If it has not, create a new entry.
                      *  If it has, add new contribution.
                      *--------------------------------------------------------*/
                     if (P_marker[i3] < jj_row_begin_offd)
                     {
                        P_marker[i3] = jj_count_offd;
                        C_offd_data[jj_count_offd] = a_b_product;
                        C_offd_j[jj_count_offd] = i3-num_cols_diag_P;
                        jj_count_offd++;
                     }
                     else
                        C_offd_data[P_marker[i3]] += a_b_product;
                  }
                  for (jj3 = P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
                  {
                     i3 = P_ext_diag_j[jj3];
                     a_b_product = a_entry * P_ext_diag_data[jj3];

                     if (P_marker[i3] < jj_row_begin_diag)
                     {
                        P_marker[i3] = jj_count_diag;
                        C_diag_data[jj_count_diag] = a_b_product;
                        C_diag_j[jj_count_diag] = i3;
                        jj_count_diag++;
                     }
                     else
                        C_diag_data[P_marker[i3]] += a_b_product;
                  }
               }
               else
               {  /* Interpolation mat should be 0 where i1 and i2 correspond to
                     different "functions".  As we haven't created an entry for
                     C(i1,i2), nothing needs to be done. */
               }

            }
         }

         /*-----------------------------------------------------------------
          *  Loop over entries in row i1 of A_diag.
          *-----------------------------------------------------------------*/

         for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
         {
            i2 = A_diag_j[jj2];
            if( dof_func==NULL || dof_func[i1] == dof_func[i2] )
            {  /* interpolate only like "functions" */
               a_entry = A_diag_data[jj2];
            
               /*-----------------------------------------------------------
                *  Loop over entries in row i2 of P_diag.
                *-----------------------------------------------------------*/

               for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
               {
                  i3 = P_diag_j[jj3];
                  a_b_product = a_entry * P_diag_data[jj3];
                  
                  /*--------------------------------------------------------
                   *  Check P_marker to see that C_{i1,i3} has not already
                   *  been accounted for. If it has not, create a new entry.
                   *  If it has, add new contribution.
                   *--------------------------------------------------------*/

                  if (P_marker[i3] < jj_row_begin_diag)
                  {
                     P_marker[i3] = jj_count_diag;
                     C_diag_data[jj_count_diag] = a_b_product;
                     C_diag_j[jj_count_diag] = i3;
                     jj_count_diag++;
                  }
                  else
                  {
                     C_diag_data[P_marker[i3]] += a_b_product;
                  }
               }
               if (num_cols_offd_P)
	       {
                  for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
                  {
                     i3 = num_cols_diag_P+map_P_to_C[P_offd_j[jj3]];
                     a_b_product = a_entry * P_offd_data[jj3];
                  
                     /*--------------------------------------------------------
                      *  Check P_marker to see that C_{i1,i3} has not already
                      *  been accounted for. If it has not, create a new entry.
                      *  If it has, add new contribution.
                      *--------------------------------------------------------*/

                     if (P_marker[i3] < jj_row_begin_offd)
                     {
                        P_marker[i3] = jj_count_offd;
                        C_offd_data[jj_count_offd] = a_b_product;
                        C_offd_j[jj_count_offd] = i3-num_cols_diag_P;
                        jj_count_offd++;
                     }
                     else
                     {
                        C_offd_data[P_marker[i3]] += a_b_product;
                     }
                  }
               }
            }
            else
            {  /* Interpolation mat should be 0 where i1 and i2 correspond to
                  different "functions".  As we haven't created an entry for
                  C(i1,i2), nothing needs to be done. */
            }
         }
      }
      else  /* i1 is a coarse row.*/
         /* Copy P coarse-row values to C.  This is useful if C is meant to
            become a replacement for P */
      {
	 if (num_cols_offd_P)
	 {
            for (jj2 = P_offd_i[i1]; jj2 < P_offd_i[i1+1]; jj2++)
            {
               C_offd_j[jj_count_offd] = P_offd_j[jj_count_offd];
               C_offd_data[jj_count_offd] = P_offd_data[jj_count_offd];
               ++jj_count_offd;
            }
         }
         for (jj2 = P_diag_i[i1]; jj2 < P_diag_i[i1+1]; jj2++)
         {
            C_diag_j[jj_count_diag] = P_diag_j[jj2];
            C_diag_data[jj_count_diag] = P_diag_data[jj2];
            ++jj_count_diag;
         }
      }
   }

   C = hypre_ParCSRMatrixCreate(
      comm, n_rows_A_global, n_cols_P_global,
      row_starts_A, col_starts_P, num_cols_offd_C, C_diag_size, C_offd_size );

   /* Note that C does not own the partitionings */
   hypre_ParCSRMatrixSetRowStartsOwner(C,0);
   hypre_ParCSRMatrixSetColStartsOwner(C,0);

   C_diag = hypre_ParCSRMatrixDiag(C);
   hypre_CSRMatrixData(C_diag) = C_diag_data; 
   hypre_CSRMatrixI(C_diag) = C_diag_i; 
   hypre_CSRMatrixJ(C_diag) = C_diag_j; 

   C_offd = hypre_ParCSRMatrixOffd(C);
   hypre_CSRMatrixI(C_offd) = C_offd_i; 
   hypre_ParCSRMatrixOffd(C) = C_offd;

   if (num_cols_offd_C)
   {
      hypre_CSRMatrixData(C_offd) = C_offd_data; 
      hypre_CSRMatrixJ(C_offd) = C_offd_j; 
      hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;

   }

   /*-----------------------------------------------------------------------
    *  Free various arrays
    *-----------------------------------------------------------------------*/

   hypre_TFree(P_marker);   
   hypre_TFree(P_ext_diag_i);
   if (P_ext_diag_size)
   {
      hypre_TFree(P_ext_diag_j);
      hypre_TFree(P_ext_diag_data);
   }
   hypre_TFree(P_ext_offd_i);
   if (P_ext_offd_size)
   {
      hypre_TFree(P_ext_offd_j);
      hypre_TFree(P_ext_offd_data);
   }
   if (num_cols_offd_P) hypre_TFree(map_P_to_C);

   return C;
   
}
示例#6
0
HYPRE_Int  hypre_BoomerAMGRelaxT( hypre_ParCSRMatrix *A,
                        hypre_ParVector    *f,
                        HYPRE_Int                *cf_marker,
                        HYPRE_Int                 relax_type,
                        HYPRE_Int                 relax_points,
                        HYPRE_Real          relax_weight,
                        hypre_ParVector    *u,
                        hypre_ParVector    *Vtemp )
{
   hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
   HYPRE_Real     *A_diag_data  = hypre_CSRMatrixData(A_diag);
   HYPRE_Int            *A_diag_i     = hypre_CSRMatrixI(A_diag);

   HYPRE_Int             n_global= hypre_ParCSRMatrixGlobalNumRows(A);
   HYPRE_Int             n       = hypre_CSRMatrixNumRows(A_diag);
   HYPRE_Int	      	   first_index = hypre_ParVectorFirstIndex(u);
   
   hypre_Vector   *u_local = hypre_ParVectorLocalVector(u);
   HYPRE_Real     *u_data  = hypre_VectorData(u_local);

   hypre_Vector   *Vtemp_local = hypre_ParVectorLocalVector(Vtemp);
   HYPRE_Real     *Vtemp_data = hypre_VectorData(Vtemp_local);

   hypre_CSRMatrix *A_CSR;
   HYPRE_Int		   *A_CSR_i;   
   HYPRE_Int		   *A_CSR_j;
   HYPRE_Real	   *A_CSR_data;
   
   hypre_Vector    *f_vector;
   HYPRE_Real	   *f_vector_data;

   HYPRE_Int             i;
   HYPRE_Int             jj;
   HYPRE_Int             column;
   HYPRE_Int             relax_error = 0;

   HYPRE_Real     *A_mat;
   HYPRE_Real     *b_vec;

   HYPRE_Real      zero = 0.0;
  
   /*-----------------------------------------------------------------------
    * Switch statement to direct control based on relax_type:
    *     relax_type = 7 -> Jacobi (uses ParMatvec)
    *     relax_type = 9 -> Direct Solve
    *-----------------------------------------------------------------------*/
   
   switch (relax_type)
   {            

      case 7: /* Jacobi (uses ParMatvec) */
      {
 
         /*-----------------------------------------------------------------
          * Copy f into temporary vector.
          *-----------------------------------------------------------------*/
        
         hypre_ParVectorCopy(f,Vtemp); 
 
         /*-----------------------------------------------------------------
          * Perform MatvecT Vtemp=f-A^Tu
          *-----------------------------------------------------------------*/
 
            hypre_ParCSRMatrixMatvecT(-1.0,A, u, 1.0, Vtemp);
            for (i = 0; i < n; i++)
            {
 
               /*-----------------------------------------------------------
                * If diagonal is nonzero, relax point i; otherwise, skip it.
                *-----------------------------------------------------------*/
           
               if (A_diag_data[A_diag_i[i]] != zero)
               {
                  u_data[i] += relax_weight * Vtemp_data[i] 
				/ A_diag_data[A_diag_i[i]];
               }
            }
      }
      break;
      
      
      case 9: /* Direct solve: use gaussian elimination */
      {

         /*-----------------------------------------------------------------
          *  Generate CSR matrix from ParCSRMatrix A
          *-----------------------------------------------------------------*/

	 if (n)
	 {
	    A_CSR = hypre_ParCSRMatrixToCSRMatrixAll(A);
	    f_vector = hypre_ParVectorToVectorAll(f);
 	    A_CSR_i = hypre_CSRMatrixI(A_CSR);
 	    A_CSR_j = hypre_CSRMatrixJ(A_CSR);
 	    A_CSR_data = hypre_CSRMatrixData(A_CSR);
   	    f_vector_data = hypre_VectorData(f_vector);

            A_mat = hypre_CTAlloc(HYPRE_Real, n_global*n_global);
            b_vec = hypre_CTAlloc(HYPRE_Real, n_global);    

            /*---------------------------------------------------------------
             *  Load transpose of CSR matrix into A_mat.
             *---------------------------------------------------------------*/

            for (i = 0; i < n_global; i++)
            {
               for (jj = A_CSR_i[i]; jj < A_CSR_i[i+1]; jj++)
               {
                  column = A_CSR_j[jj];
                  A_mat[column*n_global+i] = A_CSR_data[jj];
               }
               b_vec[i] = f_vector_data[i];
            }

            relax_error = gselim(A_mat,b_vec,n_global);

            for (i = 0; i < n; i++)
            {
               u_data[i] = b_vec[first_index+i];
            }

	    hypre_TFree(A_mat); 
            hypre_TFree(b_vec);
            hypre_CSRMatrixDestroy(A_CSR);
            A_CSR = NULL;
            hypre_SeqVectorDestroy(f_vector);
            f_vector = NULL;
         
         }
      }
      break;   
   }

   return(relax_error); 
}
/*
  Assume that we are given a fine and coarse topology and the
  coarse degrees of freedom (DOFs) have been chosen. Assume also,
  that the global interpolation matrix dof_DOF has a prescribed
  nonzero pattern. Then, the fine degrees of freedom can be split
  into 4 groups (here "i" stands for "interior"):

  NODEidof - dofs which are interpolated only from the DOF
             in one coarse vertex
  EDGEidof - dofs which are interpolated only from the DOFs
             in one coarse edge
  FACEidof - dofs which are interpolated only from the DOFs
             in one coarse face
  ELEMidof - dofs which are interpolated only from the DOFs
             in one coarse element

  The interpolation operator dof_DOF can be build in 4 steps, by
  consequently filling-in the rows corresponding to the above groups.
  The code below uses harmonic extension to extend the interpolation
  from one group to the next.
*/
HYPRE_Int hypre_ND1AMGeInterpolation (hypre_ParCSRMatrix       * Aee,
                                hypre_ParCSRMatrix       * ELEM_idof,
                                hypre_ParCSRMatrix       * FACE_idof,
                                hypre_ParCSRMatrix       * EDGE_idof,
                                hypre_ParCSRMatrix       * ELEM_FACE,
                                hypre_ParCSRMatrix       * ELEM_EDGE,
                                HYPRE_Int                  num_OffProcRows,
                                hypre_MaxwellOffProcRow ** OffProcRows,
                                hypre_IJMatrix           * IJ_dof_DOF)
{
   HYPRE_Int ierr = 0;

   HYPRE_Int  i, j, k;
   HYPRE_Int *offproc_rnums, *swap;

   hypre_ParCSRMatrix * dof_DOF = hypre_IJMatrixObject(IJ_dof_DOF);
   hypre_ParCSRMatrix * ELEM_DOF = ELEM_EDGE;
   hypre_ParCSRMatrix * ELEM_FACEidof;
   hypre_ParCSRMatrix * ELEM_EDGEidof;
   hypre_CSRMatrix *A, *P;
   HYPRE_Int numELEM = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(ELEM_EDGE));

   HYPRE_Int getrow_ierr;
   HYPRE_Int three_dimensional_problem;

   MPI_Comm comm= hypre_ParCSRMatrixComm(Aee);
   HYPRE_Int      myproc;

   hypre_MPI_Comm_rank(comm, &myproc);

#if 0
   hypre_IJMatrix * ij_dof_DOF = hypre_CTAlloc(hypre_IJMatrix, 1);
   /* Convert dof_DOF to IJ matrix, so we can use AddToValues */
   hypre_IJMatrixComm(ij_dof_DOF) = hypre_ParCSRMatrixComm(dof_DOF);
   hypre_IJMatrixRowPartitioning(ij_dof_DOF) =
      hypre_ParCSRMatrixRowStarts(dof_DOF);
   hypre_IJMatrixColPartitioning(ij_dof_DOF) =
      hypre_ParCSRMatrixColStarts(dof_DOF);
   hypre_IJMatrixObject(ij_dof_DOF) = dof_DOF;
   hypre_IJMatrixAssembleFlag(ij_dof_DOF) = 1;
#endif

  /* sort the offproc rows to get quicker comparison for later */
   if (num_OffProcRows)
   {
      offproc_rnums= hypre_TAlloc(HYPRE_Int, num_OffProcRows);
      swap         = hypre_TAlloc(HYPRE_Int, num_OffProcRows);
      for (i= 0; i< num_OffProcRows; i++)
      {
         offproc_rnums[i]=(OffProcRows[i] -> row);
         swap[i]         = i;
      }
   }

   if (num_OffProcRows > 1)
   {
      hypre_qsort2i(offproc_rnums, swap, 0, num_OffProcRows-1);
   }

   if (FACE_idof == EDGE_idof)
      three_dimensional_problem = 0;
   else
      three_dimensional_problem = 1;

   /* ELEM_FACEidof = ELEM_FACE x FACE_idof */
   if (three_dimensional_problem)
      ELEM_FACEidof = hypre_ParMatmul(ELEM_FACE, FACE_idof);

   /* ELEM_EDGEidof = ELEM_EDGE x EDGE_idof */
   ELEM_EDGEidof = hypre_ParMatmul(ELEM_EDGE, EDGE_idof);

   /* Loop over local coarse elements */
   k = hypre_ParCSRMatrixFirstRowIndex(ELEM_EDGE);
   for (i = 0; i < numELEM; i++, k++)
   {
      HYPRE_Int size1, size2;
      HYPRE_Int *col_ind0, *col_ind1, *col_ind2;

      HYPRE_Int num_DOF, *DOF0, *DOF;
      HYPRE_Int num_idof, *idof0, *idof;
      HYPRE_Int num_bdof, *bdof;

      double *boolean_data;

      /* Determine the coarse DOFs */
      hypre_ParCSRMatrixGetRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data);
      DOF= hypre_TAlloc(HYPRE_Int, num_DOF);
      for (j= 0; j< num_DOF; j++)
      {
         DOF[j]= DOF0[j];
      }
      hypre_ParCSRMatrixRestoreRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data);

      qsort0(DOF,0,num_DOF-1);

      /* Find the fine dofs interior for the current coarse element */
      hypre_ParCSRMatrixGetRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data);
      idof= hypre_TAlloc(HYPRE_Int, num_idof);
      for (j= 0; j< num_idof; j++)
      {
         idof[j]= idof0[j];
      }
      hypre_ParCSRMatrixRestoreRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data);

      /* Sort the interior dofs according to their global number */
      qsort0(idof,0,num_idof-1);

      /* Find the fine dofs on the boundary of the current coarse element */
      if (three_dimensional_problem)
      {
         hypre_ParCSRMatrixGetRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data);
         col_ind1= hypre_TAlloc(HYPRE_Int, size1);
         for (j= 0; j< size1; j++)
         {
            col_ind1[j]= col_ind0[j];
         }
         hypre_ParCSRMatrixRestoreRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data);
      }
      else
         size1 = 0;

      hypre_ParCSRMatrixGetRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data);
      col_ind2= hypre_TAlloc(HYPRE_Int, size2);
      for (j= 0; j< size2; j++)
      {
         col_ind2[j]= col_ind0[j];
      }
      hypre_ParCSRMatrixRestoreRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data);

      /* Merge and sort the boundary dofs according to their global number */
      num_bdof = size1 + size2;
      bdof = hypre_CTAlloc(HYPRE_Int, num_bdof);
      if (three_dimensional_problem)
         memcpy(bdof, col_ind1, size1*sizeof(HYPRE_Int));
      memcpy(bdof+size1, col_ind2, size2*sizeof(HYPRE_Int));

      qsort0(bdof,0,num_bdof-1);

      /* A = extract_rows(Aee, idof) */
      A = hypre_CSRMatrixCreate (num_idof, num_idof + num_bdof,
                                 num_idof * (num_idof + num_bdof));
      hypre_CSRMatrixInitialize(A);
      {
         HYPRE_Int *I = hypre_CSRMatrixI(A);
         HYPRE_Int *J = hypre_CSRMatrixJ(A);
         double *data = hypre_CSRMatrixData(A);

         HYPRE_Int *tmp_J;
         double *tmp_data;

         I[0] = 0;
         for (j = 0; j < num_idof; j++)
         {
            getrow_ierr= hypre_ParCSRMatrixGetRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data);
            if (getrow_ierr <0)
               hypre_printf("getrow Aee off proc[%d] = \n",myproc);
            memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int));
            memcpy(data, tmp_data, I[j+1]*sizeof(double));
            J+= I[j+1];
            data+= I[j+1];
            hypre_ParCSRMatrixRestoreRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data);
            I[j+1] += I[j];
         }
      }

      /* P = extract_rows(dof_DOF, idof+bdof) */
      P = hypre_CSRMatrixCreate (num_idof + num_bdof, num_DOF,
                                 (num_idof + num_bdof) * num_DOF);
      hypre_CSRMatrixInitialize(P);
      {
         HYPRE_Int *I = hypre_CSRMatrixI(P);
         HYPRE_Int *J = hypre_CSRMatrixJ(P);
         double *data = hypre_CSRMatrixData(P);
         HYPRE_Int     m;

         HYPRE_Int *tmp_J;
         double *tmp_data;
     
         I[0] = 0;
         for (j = 0; j < num_idof; j++)
         {
            getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data);
            if (getrow_ierr >= 0)
            {
               memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int));
               memcpy(data, tmp_data, I[j+1]*sizeof(double));
               J+= I[j+1];
               data+= I[j+1];
               hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data);
               I[j+1] += I[j];
            }
            else    /* row offproc */
            {
               hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data);
              /* search for OffProcRows */
               m= 0;
               while (m < num_OffProcRows)
               {
                  if (offproc_rnums[m] == idof[j])
                  { 
                     break;
                  }
                  else
                  {
                     m++;
                  }
               }
               I[j+1]= (OffProcRows[swap[m]] -> ncols);
               tmp_J = (OffProcRows[swap[m]] -> cols);
               tmp_data= (OffProcRows[swap[m]] -> data);
               memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int));
               memcpy(data, tmp_data, I[j+1]*sizeof(double));
               J+= I[j+1];
               data+= I[j+1];
               I[j+1] += I[j];
            }

         }
         for ( ; j < num_idof + num_bdof; j++)
         {
            getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data);
            if (getrow_ierr >= 0)
            {
               memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int));
               memcpy(data, tmp_data, I[j+1]*sizeof(double));
               J+= I[j+1];
               data+= I[j+1];
               hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data);
               I[j+1] += I[j];
            }
            else    /* row offproc */
            {
               hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data);
              /* search for OffProcRows */
               m= 0;
               while (m < num_OffProcRows)
               {
                  if (offproc_rnums[m] == bdof[j-num_idof])
                  {
                     break;
                  }
                  else
                  {
                     m++;
                  }
               }
               if (m>= num_OffProcRows)hypre_printf("here the mistake\n");
               I[j+1]= (OffProcRows[swap[m]] -> ncols);
               tmp_J = (OffProcRows[swap[m]] -> cols);
               tmp_data= (OffProcRows[swap[m]] -> data);
               memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int));
               memcpy(data, tmp_data, I[j+1]*sizeof(double));
               J+= I[j+1];
               data+= I[j+1];
               I[j+1] += I[j];
            }
         }
      }

      /* Pi = Aii^{-1} Aib Pb */
      hypre_HarmonicExtension (A, P, num_DOF, DOF,
                               num_idof, idof, num_bdof, bdof);

      /* Insert Pi in dof_DOF */
      {
         HYPRE_Int * ncols = hypre_CTAlloc(HYPRE_Int, num_idof);

         for (j = 0; j < num_idof; j++)
            ncols[j] = num_DOF;

         hypre_IJMatrixAddToValuesParCSR (IJ_dof_DOF,
                                          num_idof, ncols, idof,
                                          hypre_CSRMatrixJ(P),
                                          hypre_CSRMatrixData(P));

         hypre_TFree(ncols);
      }

      hypre_TFree(DOF);
      hypre_TFree(idof);
      if (three_dimensional_problem)
      {
         hypre_TFree(col_ind1);
      }
      hypre_TFree(col_ind2);
      hypre_TFree(bdof);

      hypre_CSRMatrixDestroy(A);
      hypre_CSRMatrixDestroy(P);
   }

#if 0
   hypre_TFree(ij_dof_DOF);
#endif

   if (three_dimensional_problem)
      hypre_ParCSRMatrixDestroy(ELEM_FACEidof);
   hypre_ParCSRMatrixDestroy(ELEM_EDGEidof);

   if (num_OffProcRows)
   {
      hypre_TFree(offproc_rnums);
      hypre_TFree(swap);
   }

   return ierr;
}
示例#8
0
HYPRE_Int
hypre_InexactPartitionOfUnityInterpolation
(hypre_CSRMatrix **P_pointer,

 HYPRE_Int    *i_dof_dof,
 HYPRE_Int    *j_dof_dof,
 HYPRE_Real *a_dof_dof,


 HYPRE_Real *unit_vector,


 HYPRE_Int *i_domain_dof,
 HYPRE_Int *j_domain_dof,

 HYPRE_Int num_domains, /* == num-coarsedofs */

 HYPRE_Int num_dofs)

{
  HYPRE_Int ierr = 0;
  HYPRE_Int i,j,k;

  HYPRE_Int ind = 1;
  HYPRE_Int nu, nu_max = 1;

  HYPRE_Real  eps = 1.e-24;
  HYPRE_Int max_iter = 1000;
  HYPRE_Int iter;
  HYPRE_Real delta0, delta_old, delta, alpha, tau, beta;
  HYPRE_Real aux, diag;

  HYPRE_Real *P_t_coeff;
  hypre_CSRMatrix *P_t, *P;

  HYPRE_Real *x,*r,*d,*g,*h;
  HYPRE_Real *row_sum;


  HYPRE_Int *i_global_to_local;
  HYPRE_Int local_dof_counter;


  HYPRE_Real *diag_dof_dof;
  /* ------------------------------------------------------------------

     domain_dof relation should satisfy the following property:

     num_domains == num_coarsedofs;

     each domain contains only one coarse dof;

     ------------------------------------------------------------------ */

  
  i_global_to_local = hypre_CTAlloc(HYPRE_Int, num_dofs);

  for (i=0; i < num_dofs; i++)
    i_global_to_local[i] = -1;

  local_dof_counter = 0;
  for (i=0; i < num_domains; i++)
    if (local_dof_counter < i_domain_dof[i+1]-i_domain_dof[i])
      local_dof_counter = i_domain_dof[i+1]-i_domain_dof[i];
  /* solve T x = unit_vector; --------------------------------------- */

  /* cg loop: ------------------------------------------------------- */
  hypre_printf("\n---------------------- num_domains: %d, nnz: %d;\n", 
	 num_domains, i_domain_dof[num_domains]);

  x = hypre_CTAlloc(HYPRE_Real, num_dofs);
  d = hypre_CTAlloc(HYPRE_Real, num_dofs);
  g = hypre_CTAlloc(HYPRE_Real, num_dofs);
  r = hypre_CTAlloc(HYPRE_Real, num_dofs);

  h = hypre_CTAlloc(HYPRE_Real, local_dof_counter);
  diag_dof_dof = hypre_CTAlloc(HYPRE_Real, i_dof_dof[num_dofs]);
  for (i=0; i<num_dofs; i++)
    for (j=i_dof_dof[i]; j<i_dof_dof[i+1]; j++)
      if (i!=j_dof_dof[j])
	diag_dof_dof[j] = 0.e0;
      else
	diag_dof_dof[j] = a_dof_dof[j];	

  delta0 = 0.e0;
  for (i=0; i < num_dofs; i++)
    {
      x[i] = 0.e0;
      r[i] = unit_vector[i];
      delta0+=r[i]*r[i];
    }
  /* compute initial iterate:  

  ierr =
    compute_sum_A_i_action(x,
			   r, 
		       
			   i_domain_dof,
			   j_domain_dof,


			   i_dof_dof,
			   j_dof_dof,
			   a_dof_dof,

			   i_global_to_local,

			   num_domains,
			   num_dofs);
			   ------------------------------------- */	  


  /* matrix vector product: g < -- T x; ------------------------------ */

  ierr= 
    compute_sym_GS_T_action(g,
			    x,
			    h,

			    i_domain_dof,
			    j_domain_dof,
			    nu_max,
		     
			    i_dof_dof,
			    j_dof_dof,
			    a_dof_dof,

			    i_global_to_local,

			    num_domains,
			    num_dofs);

  delta = 0;
  for (i=0; i < num_dofs; i++)
    {
      r[i] -= g[i];
      delta+=r[i]*r[i];
    }

  if (delta < eps * delta0)
    goto end_cg;

  ierr= 
    compute_sym_GS_T_action(g,
			    unit_vector,
			    h,

			    i_domain_dof,
			    j_domain_dof,
			    1,
		     
			    i_dof_dof,
			    j_dof_dof,
			    diag_dof_dof,

			    i_global_to_local,

			    num_domains,
			    num_dofs);

  /* 
  ierr =
    compute_sum_A_i_action(d,
			   r, 
		       
			   i_domain_dof,
			   j_domain_dof,


			   i_dof_dof,
			   j_dof_dof,
			   a_dof_dof,

			   i_global_to_local,

			   num_domains,
			   num_dofs);
			   */

  for (i=0; i < num_dofs; i++)
    d[i]=r[i]/g[i];

  /* d contains precondtitioned residual: ------------------------ */
  delta = 0.e0;
  for (i=0; i < num_dofs; i++)
    delta+=d[i]*r[i];

  delta0 = delta;

  eps = 1.e-12;
  iter = 0;
loop:
  /* matrix vector product: -------------------------------------- */

  ierr= 
    compute_sym_GS_T_action(g,
			    d,
			    h,

			    i_domain_dof,
			    j_domain_dof,
			    nu_max,

			    i_dof_dof,
			    j_dof_dof,
			    a_dof_dof,

			    i_global_to_local,

			    num_domains,
			    num_dofs);

  tau = 0.e0;
  for (i=0; i < num_dofs; i++)
    tau += d[i]*g[i];

  alpha = delta/tau;

  for (i=0; i < num_dofs; i++)
    {
      x[i] += alpha * d[i];
      r[i] -= alpha * g[i];
    }

  iter++;
  delta_old = delta;
  /*
  ierr =
    compute_sum_A_i_action(g,
			   r, 
		       
			   i_domain_dof,
			   j_domain_dof,


			   i_dof_dof,
			   j_dof_dof,
			   a_dof_dof,

			   i_global_to_local,

			   num_domains,
			   num_dofs);

			   */
  ierr= 
    compute_sym_GS_T_action(g,
			    unit_vector,
			    h,

			    i_domain_dof,
			    j_domain_dof,
			    1,
		     
			    i_dof_dof,
			    j_dof_dof,
			    diag_dof_dof,

			    i_global_to_local,

			    num_domains,
			    num_dofs);

  for (i=0; i < num_dofs; i++)
    g[i] = r[i]/g[i];

  delta = 0.e0;
  for (i=0; i < num_dofs; i++)
    delta  += g[i] * r[i];

  hypre_printf("\n---------------------- iter: %d, delta: %le\n",
	 iter, delta);
  if (delta < eps * delta0 || iter > max_iter)
    goto end_cg;
  
  beta = delta/delta_old;

  for (i=0; i < num_dofs; i++)
    d[i] = g[i] + beta * d[i];

  goto loop;


 
end_cg:
  hypre_printf("\n END CG in partition of unity interpolation; num_iters: %d\n",
	 iter);

  hypre_TFree(r);
  hypre_TFree(g);
  hypre_TFree(d);

  /* ith column of P is T_i x; ----------------------------------- */

  P_t_coeff = hypre_CTAlloc(HYPRE_Real, i_domain_dof[num_domains]);

  for (i=0; i < num_domains; i++)
    {
      for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
	{
	  i_global_to_local[j_domain_dof[j]] = j-i_domain_dof[i];
	  h[j-i_domain_dof[i]] = 0.e0;
	}
      nu = 0;
    loop_nu:
      for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
	{

	  aux = x[j_domain_dof[j]];
	  for (k=i_dof_dof[j_domain_dof[j]];
	       k<i_dof_dof[j_domain_dof[j]+1]; k++)
             if (i_global_to_local[j_dof_dof[k]] > -1)
             {
	      /* this is a_{i_loc, j_loc} --------------------------------- */
               if (j_dof_dof[k] != j_domain_dof[j])
               {
                  aux -= a_dof_dof[k] * h[i_global_to_local[j_dof_dof[k]]];
               }
               else
               {
                  diag = a_dof_dof[k];
               }
             }
          

	  h[i_global_to_local[j_domain_dof[j]]] = aux/diag;
	}

      for (j=i_domain_dof[i+1]-1; j >= i_domain_dof[i]; j--)
	{
	  aux = x[j_domain_dof[j]];
	  for (k =i_dof_dof[j_domain_dof[j]+1]-1;
	       k>=i_dof_dof[j_domain_dof[j]]; k--)
             if (i_global_to_local[j_dof_dof[k]] > -1)
             {
	      /* this is a_{i_loc, j_loc} --------------------------------- */
               if (j_dof_dof[k] != j_domain_dof[j])
               {
                  aux -= a_dof_dof[k] * h[i_global_to_local[j_dof_dof[k]]];
               }
               else
               {
                  diag = a_dof_dof[k];
               }
             }
          
	  h[i_global_to_local[j_domain_dof[j]]] = aux/diag;
	}
      nu++;
      if (nu < nu_max)
	goto loop_nu;

      for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
	{
	  P_t_coeff[j]= h[i_global_to_local[j_domain_dof[j]]];
	  i_global_to_local[j_domain_dof[j]] = -1;
	}

    }

  
  hypre_TFree(diag_dof_dof);


  hypre_TFree(x);
  hypre_TFree(h);

  hypre_TFree(i_global_to_local);

	  
  P_t = hypre_CSRMatrixCreate(num_domains, num_dofs,
			      i_domain_dof[num_domains]);


  hypre_CSRMatrixData(P_t) = P_t_coeff;
  hypre_CSRMatrixI(P_t) = i_domain_dof;
  hypre_CSRMatrixJ(P_t) = j_domain_dof;

  row_sum = hypre_CTAlloc(HYPRE_Real, num_dofs);
  for (i=0; i < num_dofs; i++)
    row_sum[i] = 0.e0;
  for (i=0; i < num_domains; i++)
    for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++)
      row_sum[j_domain_dof[j]]+=P_t_coeff[j];

  delta = 0.e0;
  for (i=0; i < num_dofs; i++)
    delta+= (row_sum[i] - 1.e0)*(row_sum[i] - 1.e0);

  hypre_printf("\n unit row_sum deviation in seq_PU_interpolation: %le\n", 
	 sqrt(delta/num_dofs));

  hypre_TFree(row_sum);
    
  ind = 1;
  ierr =
    hypre_CSRMatrixTranspose(P_t, &P, ind);

  *P_pointer = P;

  hypre_CSRMatrixI(P_t) = NULL;
  hypre_CSRMatrixJ(P_t) = NULL;

  hypre_CSRMatrixDestroy(P_t);


  return ierr;

}
示例#9
0
HYPRE_Int hypre_BoomerAMGCoarsenCGC (hypre_ParCSRMatrix    *S,HYPRE_Int numberofgrids,HYPRE_Int coarsen_type,HYPRE_Int *CF_marker)
 /* CGC algorithm
  * ====================================================================================================
  * coupling : the strong couplings
  * numberofgrids : the number of grids
  * coarsen_type : the coarsening type
  * gridpartition : the grid partition
  * =====================================================================================================*/
{
  HYPRE_Int j,/*p,*/mpisize,mpirank,/*rstart,rend,*/choice,*coarse,ierr=0;
  HYPRE_Int *vertexrange = NULL;
  HYPRE_Int *vertexrange_all = NULL;
  HYPRE_Int *CF_marker_offd = NULL;
  HYPRE_Int num_variables = hypre_CSRMatrixNumRows (hypre_ParCSRMatrixDiag(S));
/*   HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (hypre_ParCSRMatrixOffd (S)); */
/*   HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd (S); */

/*   HYPRE_Real wall_time; */

  HYPRE_IJMatrix ijG;
  hypre_ParCSRMatrix *G;
  hypre_CSRMatrix *Gseq;
  MPI_Comm comm = hypre_ParCSRMatrixComm(S);

  hypre_MPI_Comm_size (comm,&mpisize);
  hypre_MPI_Comm_rank (comm,&mpirank);

#if 0
    if (!mpirank) {
      wall_time = time_getWallclockSeconds();
      hypre_printf ("Starting CGC preparation\n");
    }
#endif
  AmgCGCPrepare (S,numberofgrids,CF_marker,&CF_marker_offd,coarsen_type,&vertexrange);
#if 0 /* debugging */
  if (!mpirank) {
    wall_time = time_getWallclockSeconds() - wall_time;
    hypre_printf ("Finished CGC preparation, wall_time = %f s\n",wall_time);
    wall_time = time_getWallclockSeconds();
    hypre_printf ("Starting CGC matrix assembly\n");
  }
#endif
  AmgCGCGraphAssemble (S,vertexrange,CF_marker,CF_marker_offd,coarsen_type,&ijG);
#if 0
  HYPRE_IJMatrixPrint (ijG,"graph.txt");
#endif
  HYPRE_IJMatrixGetObject (ijG,(void**)&G);
#if 0 /* debugging */
  if (!mpirank) {
    wall_time = time_getWallclockSeconds() - wall_time;
    hypre_printf ("Finished CGC matrix assembly, wall_time = %f s\n",wall_time);
    wall_time = time_getWallclockSeconds();
    hypre_printf ("Starting CGC matrix communication\n");
  }
#endif
#ifdef HYPRE_NO_GLOBAL_PARTITION
 {
  /* classical CGC does not really make sense in combination with HYPRE_NO_GLOBAL_PARTITION,
     but anyway, here it is:
  */
   HYPRE_Int nlocal = vertexrange[1]-vertexrange[0];
   vertexrange_all = hypre_CTAlloc (HYPRE_Int,mpisize+1);
   hypre_MPI_Allgather (&nlocal,1,HYPRE_MPI_INT,vertexrange_all+1,1,HYPRE_MPI_INT,comm);
   vertexrange_all[0]=0;
   for (j=2;j<=mpisize;j++) vertexrange_all[j]+=vertexrange_all[j-1];
 }
#else
  vertexrange_all = vertexrange;
#endif
  Gseq = hypre_ParCSRMatrixToCSRMatrixAll (G);
#if 0 /* debugging */
  if (!mpirank) {
    wall_time = time_getWallclockSeconds() - wall_time;
    hypre_printf ("Finished CGC matrix communication, wall_time = %f s\n",wall_time);
  }
#endif

  if (Gseq) { /* BM Aug 31, 2006: Gseq==NULL if G has no local rows */
#if 0 /* debugging */
    if (!mpirank) {
      wall_time = time_getWallclockSeconds();
      hypre_printf ("Starting CGC election\n");
    }
#endif
    AmgCGCChoose (Gseq,vertexrange_all,mpisize,&coarse);
#if 0 /* debugging */
  if (!mpirank) {
    wall_time = time_getWallclockSeconds() - wall_time;
    hypre_printf ("Finished CGC election, wall_time = %f s\n",wall_time);
  }
#endif

#if 0 /* debugging */
    if (!mpirank) {
      for (j=0;j<mpisize;j++) 
	hypre_printf ("Processor %d, choice = %d of range %d - %d\n",j,coarse[j],vertexrange_all[j]+1,vertexrange_all[j+1]);
    }
    fflush(stdout);
#endif
#if 0 /* debugging */
    if (!mpirank) {
      wall_time = time_getWallclockSeconds();
      hypre_printf ("Starting CGC CF assignment\n");
    }
#endif
    choice = coarse[mpirank];
    for (j=0;j<num_variables;j++) {
      if (CF_marker[j]==choice)
	CF_marker[j] = C_PT;
      else
	CF_marker[j] = F_PT;
    }

    hypre_CSRMatrixDestroy (Gseq);
    hypre_TFree (coarse);
  }
  else
    for (j=0;j<num_variables;j++) CF_marker[j] = F_PT;
#if 0
  if (!mpirank) {
    wall_time = time_getWallclockSeconds() - wall_time;
    hypre_printf ("Finished CGC CF assignment, wall_time = %f s\n",wall_time);
  }
#endif

#if 0 /* debugging */
    if (!mpirank) {
      wall_time = time_getWallclockSeconds();
      hypre_printf ("Starting CGC cleanup\n");
    }
#endif
  HYPRE_IJMatrixDestroy (ijG);
  hypre_TFree (vertexrange);
#ifdef HYPRE_NO_GLOBAL_PARTITION
  hypre_TFree (vertexrange_all);
#endif
  hypre_TFree (CF_marker_offd);
#if 0
  if (!mpirank) {
    wall_time = time_getWallclockSeconds() - wall_time;
    hypre_printf ("Finished CGC cleanup, wall_time = %f s\n",wall_time);
  }
#endif
  return(ierr);
}
示例#10
0
/**************************************************************
 *
 *      CGC Coarsening routine
 *
 **************************************************************/
HYPRE_Int
hypre_BoomerAMGCoarsenCGCb( hypre_ParCSRMatrix    *S,
                            hypre_ParCSRMatrix    *A,
                            HYPRE_Int                    measure_type,
                            HYPRE_Int                    coarsen_type,
			    HYPRE_Int                    cgc_its,
                            HYPRE_Int                    debug_flag,
                            HYPRE_Int                  **CF_marker_ptr)
{
   MPI_Comm         comm          = hypre_ParCSRMatrixComm(S);
   hypre_ParCSRCommPkg   *comm_pkg      = hypre_ParCSRMatrixCommPkg(S);
   hypre_ParCSRCommHandle *comm_handle;
   hypre_CSRMatrix *S_diag        = hypre_ParCSRMatrixDiag(S);
   hypre_CSRMatrix *S_offd        = hypre_ParCSRMatrixOffd(S);
   HYPRE_Int             *S_i           = hypre_CSRMatrixI(S_diag);
   HYPRE_Int             *S_j           = hypre_CSRMatrixJ(S_diag);
   HYPRE_Int             *S_offd_i      = hypre_CSRMatrixI(S_offd);
   HYPRE_Int             *S_offd_j;
   HYPRE_Int              num_variables = hypre_CSRMatrixNumRows(S_diag);
   HYPRE_Int              num_cols_offd = hypre_CSRMatrixNumCols(S_offd);
                  
   hypre_CSRMatrix *S_ext;
   HYPRE_Int             *S_ext_i;
   HYPRE_Int             *S_ext_j;
                 
   hypre_CSRMatrix *ST;
   HYPRE_Int             *ST_i;
   HYPRE_Int             *ST_j;
                 
   HYPRE_Int             *CF_marker;
   HYPRE_Int             *CF_marker_offd=NULL;
   HYPRE_Int              ci_tilde = -1;
   HYPRE_Int              ci_tilde_mark = -1;

   HYPRE_Int             *measure_array;
   HYPRE_Int             *measure_array_master;
   HYPRE_Int             *graph_array;
   HYPRE_Int 	           *int_buf_data=NULL;
   /*HYPRE_Int 	           *ci_array=NULL;*/

   HYPRE_Int              i, j, k, l, jS;
   HYPRE_Int		    ji, jj, index;
   HYPRE_Int		    set_empty = 1;
   HYPRE_Int		    C_i_nonempty = 0;
   HYPRE_Int		    num_nonzeros;
   HYPRE_Int		    num_procs, my_id;
   HYPRE_Int		    num_sends = 0;
   HYPRE_Int		    first_col, start;
   HYPRE_Int		    col_0, col_n;

   hypre_LinkList   LoL_head;
   hypre_LinkList   LoL_tail;

   HYPRE_Int             *lists, *where;
   HYPRE_Int              measure, new_meas;
   HYPRE_Int              num_left;
   HYPRE_Int              nabor, nabor_two;

   HYPRE_Int              ierr = 0;
   HYPRE_Int              use_commpkg_A = 0;
   HYPRE_Real	    wall_time;

   HYPRE_Int              measure_max; /* BM Aug 30, 2006: maximal measure, needed for CGC */

   if (coarsen_type < 0) coarsen_type = -coarsen_type;

   /*-------------------------------------------------------
    * Initialize the C/F marker, LoL_head, LoL_tail  arrays
    *-------------------------------------------------------*/

   LoL_head = NULL;
   LoL_tail = NULL;
   lists = hypre_CTAlloc(HYPRE_Int, num_variables);
   where = hypre_CTAlloc(HYPRE_Int, num_variables);

#if 0 /* debugging */
   char  filename[256];
   FILE *fp;
   HYPRE_Int   iter = 0;
#endif

   /*--------------------------------------------------------------
    * Compute a CSR strength matrix, S.
    *
    * For now, the "strength" of dependence/influence is defined in
    * the following way: i depends on j if
    *     aij > hypre_max (k != i) aik,    aii < 0
    * or
    *     aij < hypre_min (k != i) aik,    aii >= 0
    * Then S_ij = 1, else S_ij = 0.
    *
    * NOTE: the entries are negative initially, corresponding
    * to "unaccounted-for" dependence.
    *----------------------------------------------------------------*/

   if (debug_flag == 3) wall_time = time_getWallclockSeconds();

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);

   if (!comm_pkg)
   {
        use_commpkg_A = 1;
        comm_pkg = hypre_ParCSRMatrixCommPkg(A); 
   }

   if (!comm_pkg)
   {
        hypre_MatvecCommPkgCreate(A);
        comm_pkg = hypre_ParCSRMatrixCommPkg(A); 
   }

   num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);

   if (num_cols_offd) S_offd_j = hypre_CSRMatrixJ(S_offd);

   jS = S_i[num_variables];

   ST = hypre_CSRMatrixCreate(num_variables, num_variables, jS);
   ST_i = hypre_CTAlloc(HYPRE_Int,num_variables+1);
   ST_j = hypre_CTAlloc(HYPRE_Int,jS);
   hypre_CSRMatrixI(ST) = ST_i;
   hypre_CSRMatrixJ(ST) = ST_j;

   /*----------------------------------------------------------
    * generate transpose of S, ST
    *----------------------------------------------------------*/

   for (i=0; i <= num_variables; i++)
      ST_i[i] = 0;
 
   for (i=0; i < jS; i++)
   {
	 ST_i[S_j[i]+1]++;
   }
   for (i=0; i < num_variables; i++)
   {
      ST_i[i+1] += ST_i[i];
   }
   for (i=0; i < num_variables; i++)
   {
      for (j=S_i[i]; j < S_i[i+1]; j++)
      {
	 index = S_j[j];
       	 ST_j[ST_i[index]] = i;
       	 ST_i[index]++;
      }
   }      
   for (i = num_variables; i > 0; i--)
   {
      ST_i[i] = ST_i[i-1];
   }
   ST_i[0] = 0;

   /*----------------------------------------------------------
    * Compute the measures
    *
    * The measures are given by the row sums of ST.
    * Hence, measure_array[i] is the number of influences
    * of variable i.
    * correct actual measures through adding influences from
    * neighbor processors
    *----------------------------------------------------------*/

   measure_array_master = hypre_CTAlloc(HYPRE_Int, num_variables);
   measure_array = hypre_CTAlloc(HYPRE_Int, num_variables);

   for (i = 0; i < num_variables; i++)
   {
      measure_array_master[i] = ST_i[i+1]-ST_i[i];
   }

   if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) 
		&& num_procs > 1)
   {
      if (use_commpkg_A)
         S_ext      = hypre_ParCSRMatrixExtractBExt(S,A,0);
      else
         S_ext      = hypre_ParCSRMatrixExtractBExt(S,S,0);
      S_ext_i    = hypre_CSRMatrixI(S_ext);
      S_ext_j    = hypre_CSRMatrixJ(S_ext);
      num_nonzeros = S_ext_i[num_cols_offd];
      first_col = hypre_ParCSRMatrixFirstColDiag(S);
      col_0 = first_col-1;
      col_n = col_0+num_variables;
      if (measure_type)
      {
	 for (i=0; i < num_nonzeros; i++)
         {
	    index = S_ext_j[i] - first_col;
	    if (index > -1 && index < num_variables)
		measure_array_master[index]++;
         } 
      } 
   }

   /*---------------------------------------------------
    * Loop until all points are either fine or coarse.
    *---------------------------------------------------*/

   if (debug_flag == 3) wall_time = time_getWallclockSeconds();

   /* first coarsening phase */

  /*************************************************************
   *
   *   Initialize the lists
   *
   *************************************************************/

   CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables);
   
   num_left = 0;
   for (j = 0; j < num_variables; j++)
   {
     if ((S_i[j+1]-S_i[j])== 0 &&
	 (S_offd_i[j+1]-S_offd_i[j]) == 0)
     {
       CF_marker[j] = SF_PT;
       measure_array_master[j] = 0;
     }
     else
     {
       CF_marker[j] = UNDECIDED; 
       /*        num_left++; */ /* BM May 19, 2006: see below*/
     }
   } 

   if (coarsen_type==22) {
     /* BM Sep 8, 2006: allow_emptygrids only if the following holds for all points j: 
        (a) the point has no strong connections at all, OR
        (b) the point has a strong connection across a boundary */
     for (j=0;j<num_variables;j++)
       if (S_i[j+1]>S_i[j] && S_offd_i[j+1] == S_offd_i[j]) {coarsen_type=21;break;}
   }

   for (l = 1; l <= cgc_its; l++)
   {
     LoL_head = NULL;
     LoL_tail = NULL;
     num_left = 0;  /* compute num_left before each RS coarsening loop */
     memcpy (measure_array,measure_array_master,num_variables*sizeof(HYPRE_Int));
     memset (lists,0,sizeof(HYPRE_Int)*num_variables);
     memset (where,0,sizeof(HYPRE_Int)*num_variables);

     for (j = 0; j < num_variables; j++) 
     {    
       measure = measure_array[j];
       if (CF_marker[j] != SF_PT)  
       {
	 if (measure > 0)
	 {
	   enter_on_lists(&LoL_head, &LoL_tail, measure, j, lists, where);
	   num_left++; /* compute num_left before each RS coarsening loop */
	 }
	 else if (CF_marker[j] == 0) /* increase weight of strongly coupled neighbors only 
					if j is not conained in a previously constructed coarse grid.
					Reason: these neighbors should start with the same initial weight
					in each CGC iteration.                    BM Aug 30, 2006 */
					
	 {
	   if (measure < 0) hypre_printf("negative measure!\n");
/* 	   CF_marker[j] = f_pnt; */
	   for (k = S_i[j]; k < S_i[j+1]; k++)
	   {
	     nabor = S_j[k];
/*  	     if (CF_marker[nabor] != SF_PT)  */
 	     if (CF_marker[nabor] == 0)  /* BM Aug 30, 2006: don't alter weights of points 
 					    contained in other candidate coarse grids */ 
	     {
	       if (nabor < j)
	       {
		 new_meas = measure_array[nabor];
		 if (new_meas > 0)
		   remove_point(&LoL_head, &LoL_tail, new_meas, 
				nabor, lists, where);
		 else num_left++; /* BM Aug 29, 2006 */
		 
		 new_meas = ++(measure_array[nabor]);
		 enter_on_lists(&LoL_head, &LoL_tail, new_meas,
				nabor, lists, where);
	       }
	       else
	       {
		 new_meas = ++(measure_array[nabor]);
	       }
	     }
	   }
	   /* 	   --num_left; */ /* BM May 19, 2006 */
         }
       }
     }

     /* BM Aug 30, 2006: first iteration: determine maximal weight */
     if (num_left && l==1) measure_max = measure_array[LoL_head->head]; 
     /* BM Aug 30, 2006: break CGC iteration if no suitable 
	starting point is available any more */
     if (!num_left || measure_array[LoL_head->head]<measure_max) {
       while (LoL_head) {
	 hypre_LinkList list_ptr = LoL_head;
	 LoL_head = LoL_head->next_elt;
	 dispose_elt (list_ptr);
       }
       break;
     }

   /****************************************************************
    *
    *  Main loop of Ruge-Stueben first coloring pass.
    *
    *  WHILE there are still points to classify DO:
    *        1) find first point, i,  on list with max_measure
    *           make i a C-point, remove it from the lists
    *        2) For each point, j,  in S_i^T,
    *           a) Set j to be an F-point
    *           b) For each point, k, in S_j
    *                  move k to the list in LoL with measure one
    *                  greater than it occupies (creating new LoL
    *                  entry if necessary)
    *        3) For each point, j,  in S_i,
    *                  move j to the list in LoL with measure one
    *                  smaller than it occupies (creating new LoL
    *                  entry if necessary)
    *
    ****************************************************************/

     while (num_left > 0)
     {
       index = LoL_head -> head;
/*         index = LoL_head -> tail;  */

/*        CF_marker[index] = C_PT; */
       CF_marker[index] = l;  /* BM Aug 18, 2006 */
       measure = measure_array[index];
       measure_array[index] = 0;
       measure_array_master[index] = 0; /* BM May 19: for CGC */
       --num_left;
      
       remove_point(&LoL_head, &LoL_tail, measure, index, lists, where);
  
       for (j = ST_i[index]; j < ST_i[index+1]; j++)
       {
         nabor = ST_j[j];
/*          if (CF_marker[nabor] == UNDECIDED) */
	 if (measure_array[nabor]>0) /* undecided point */
	 {
	   /* 	   CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */
	   measure = measure_array[nabor];
	   measure_array[nabor]=0;

	   remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where);
	   --num_left;
	   
	   for (k = S_i[nabor]; k < S_i[nabor+1]; k++)
           {
	     nabor_two = S_j[k];
/* 	     if (CF_marker[nabor_two] == UNDECIDED) */
	     if (measure_array[nabor_two]>0) /* undecided point */
             {
	       measure = measure_array[nabor_two];
	       remove_point(&LoL_head, &LoL_tail, measure, 
			    nabor_two, lists, where);
	       
	       new_meas = ++(measure_array[nabor_two]);
	       
	       enter_on_lists(&LoL_head, &LoL_tail, new_meas,
			      nabor_two, lists, where);
	     }
	   }
         }
       }
       for (j = S_i[index]; j < S_i[index+1]; j++)
       {
         nabor = S_j[j];
/*          if (CF_marker[nabor] == UNDECIDED) */
	 if (measure_array[nabor]>0) /* undecided point */
         {
	   measure = measure_array[nabor];
	   
	   remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where);
	   
	   measure_array[nabor] = --measure;
	   
	   if (measure > 0)
	     enter_on_lists(&LoL_head, &LoL_tail, measure, nabor, 
			    lists, where);
	   else
	   {
/* 	     CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */
	     --num_left;

	     for (k = S_i[nabor]; k < S_i[nabor+1]; k++)
             {
	       nabor_two = S_j[k];
/* 	       if (CF_marker[nabor_two] == UNDECIDED) */
	       if (measure_array[nabor_two]>0)
               {
		 new_meas = measure_array[nabor_two];
		 remove_point(&LoL_head, &LoL_tail, new_meas, 
			      nabor_two, lists, where);
		 
		 new_meas = ++(measure_array[nabor_two]);
                 
		 enter_on_lists(&LoL_head, &LoL_tail, new_meas,
				nabor_two, lists, where);
	       }
	     }
	   }
         }
       }
     }
     if (LoL_head) hypre_printf ("Linked list not empty! head: %d\n",LoL_head->head);
   }
   l--; /* BM Aug 15, 2006 */

   hypre_TFree(measure_array);
   hypre_TFree(measure_array_master);
   hypre_CSRMatrixDestroy(ST);

   if (debug_flag == 3)
   {
      wall_time = time_getWallclockSeconds() - wall_time;
      hypre_printf("Proc = %d    Coarsen 1st pass = %f\n",
                     my_id, wall_time); 
   }

   hypre_TFree(lists);
   hypre_TFree(where);
   
     if (num_procs>1) {
       if (debug_flag == 3)  wall_time = time_getWallclockSeconds();
       hypre_BoomerAMGCoarsenCGC (S,l,coarsen_type,CF_marker);
       
       if (debug_flag == 3)  { 
	 wall_time = time_getWallclockSeconds() - wall_time; 
	 hypre_printf("Proc = %d    Coarsen CGC = %f\n", 
		my_id, wall_time);  
       } 
     }
     else {
       /* the first candiate coarse grid is the coarse grid */ 
       for (j=0;j<num_variables;j++) {
	 if (CF_marker[j]==1) CF_marker[j]=C_PT;
	 else CF_marker[j]=F_PT;
       }
     }

   /* BM May 19, 2006:
      Set all undecided points to be fine grid points. */
   for (j=0;j<num_variables;j++)
     if (!CF_marker[j]) CF_marker[j]=F_PT;

   /*---------------------------------------------------
    * Initialize the graph array
    *---------------------------------------------------*/

   graph_array = hypre_CTAlloc(HYPRE_Int, num_variables);

   for (i = 0; i < num_variables; i++)
   {
      graph_array[i] = -1;
   }

   if (debug_flag == 3) wall_time = time_getWallclockSeconds();

      for (i=0; i < num_variables; i++)
      {
	 if (ci_tilde_mark != i) ci_tilde = -1;
         if (CF_marker[i] == -1)
         {
   	    for (ji = S_i[i]; ji < S_i[i+1]; ji++)
   	    {
   	       j = S_j[ji];
   	       if (CF_marker[j] > 0)
   	          graph_array[j] = i;
    	    }
   	    for (ji = S_i[i]; ji < S_i[i+1]; ji++)
   	    {
   	       j = S_j[ji];
   	       if (CF_marker[j] == -1)
   	       {
   	          set_empty = 1;
   	          for (jj = S_i[j]; jj < S_i[j+1]; jj++)
   	          {
   		     index = S_j[jj];
   		     if (graph_array[index] == i)
   		     {
   		        set_empty = 0;
   		        break;
   		     }
   	          }
   	          if (set_empty)
   	          {
   		     if (C_i_nonempty)
   		     {
   		        CF_marker[i] = 1;
   		        if (ci_tilde > -1)
   		        {
   			   CF_marker[ci_tilde] = -1;
   		           ci_tilde = -1;
   		        }
   	    		C_i_nonempty = 0;
   		        break;
   		     }
   		     else
   		     {
   		        ci_tilde = j;
   		        ci_tilde_mark = i;
   		        CF_marker[j] = 1;
   		        C_i_nonempty = 1;
		        i--;
		        break;
		     }
	          }
	       }
	    }
	 }
      }

   if (debug_flag == 3 && coarsen_type != 2)
   {
      wall_time = time_getWallclockSeconds() - wall_time;
      hypre_printf("Proc = %d    Coarsen 2nd pass = %f\n",
                       my_id, wall_time); 
   }

   /* third pass, check boundary fine points for coarse neighbors */

      /*------------------------------------------------
       * Exchange boundary data for CF_marker
       *------------------------------------------------*/

      if (debug_flag == 3) wall_time = time_getWallclockSeconds();
    
      CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
      int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg,
                                                   num_sends));
    
      index = 0;
      for (i = 0; i < num_sends; i++)
      {
        start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
        for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
                int_buf_data[index++]
                 = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
      }
    
      if (num_procs > 1)
      {
      comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
        CF_marker_offd);
    
      hypre_ParCSRCommHandleDestroy(comm_handle);
      }
      AmgCGCBoundaryFix (S,CF_marker,CF_marker_offd);
      if (debug_flag == 3)
      {
         wall_time = time_getWallclockSeconds() - wall_time;
         hypre_printf("Proc = %d    CGC boundary fix = %f\n",
                       my_id, wall_time); 
      }

   /*---------------------------------------------------
    * Clean up and return
    *---------------------------------------------------*/

   /*if (coarsen_type != 1)
   { */  
     if (CF_marker_offd) hypre_TFree(CF_marker_offd);  /* BM Aug 21, 2006 */
     if (int_buf_data) hypre_TFree(int_buf_data); /* BM Aug 21, 2006 */
     /*if (ci_array) hypre_TFree(ci_array);*/ /* BM Aug 21, 2006 */
   /*} */   
   hypre_TFree(graph_array);
   if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) 
		&& num_procs > 1)
   	hypre_CSRMatrixDestroy(S_ext); 
   
   *CF_marker_ptr   = CF_marker;
   
   return (ierr);
}
示例#11
0
HYPRE_Int AmgCGCChoose (hypre_CSRMatrix *G,HYPRE_Int *vertexrange,HYPRE_Int mpisize,HYPRE_Int **coarse)
  /* chooses one grid for every processor
   * ============================================================
   * G : the connectivity graph
   * map : the parallel layout
   * mpisize : number of procs
   * coarse : the chosen coarse grids
   * ===========================================================*/
{
  HYPRE_Int i,j,jj,p,choice,*processor,ierr=0;
  HYPRE_Int measure,new_measure;

/*   MPI_Comm comm = hypre_ParCSRMatrixComm(G); */

/*   hypre_ParCSRCommPkg    *comm_pkg    = hypre_ParCSRMatrixCommPkg (G); */
/*   hypre_ParCSRCommHandle *comm_handle; */

  HYPRE_Real *G_data = hypre_CSRMatrixData (G);
  HYPRE_Real max;
  HYPRE_Int *G_i = hypre_CSRMatrixI(G);
  HYPRE_Int *G_j = hypre_CSRMatrixJ(G);
  hypre_CSRMatrix *H,*HT;
  HYPRE_Int *H_i,*H_j,*HT_i,*HT_j;
  HYPRE_Int jG,jH;
  HYPRE_Int num_vertices = hypre_CSRMatrixNumRows (G);
  HYPRE_Int *measure_array;
  HYPRE_Int *lists,*where;

  hypre_LinkList LoL_head = NULL;
  hypre_LinkList LoL_tail = NULL;

  processor = hypre_CTAlloc (HYPRE_Int,num_vertices);
  *coarse = hypre_CTAlloc (HYPRE_Int,mpisize);
  memset (*coarse,0,sizeof(HYPRE_Int)*mpisize);

  measure_array = hypre_CTAlloc (HYPRE_Int,num_vertices);
  lists = hypre_CTAlloc (HYPRE_Int,num_vertices);
  where = hypre_CTAlloc (HYPRE_Int,num_vertices);

/*   for (p=0;p<mpisize;p++) hypre_printf ("%d: %d-%d\n",p,range[p]+1,range[p+1]); */

  /******************************************************************
   * determine heavy edges
   ******************************************************************/

  jG  = G_i[num_vertices];
  H   = hypre_CSRMatrixCreate (num_vertices,num_vertices,jG);
  H_i = hypre_CTAlloc (HYPRE_Int,num_vertices+1);
  H_j = hypre_CTAlloc (HYPRE_Int,jG);
  hypre_CSRMatrixI(H) = H_i;
  hypre_CSRMatrixJ(H) = H_j;

  for (i=0,p=0;i<num_vertices;i++) {
    while (vertexrange[p+1]<=i) p++;
    processor[i]=p;
  }

  H_i[0]=0;
  for (i=0,jj=0;i<num_vertices;i++) {
#if 0 
    hypre_printf ("neighbors of grid %d:",i); 
#endif
    H_i[i+1]=H_i[i];
    for (j=G_i[i],choice=-1,max=0;j<G_i[i+1];j++) {
#if 0
      if (G_data[j]>=0.0) 
	hypre_printf ("G[%d,%d]=0. G_j(j)=%d, G_data(j)=%f.\n",i,G_j[j],j,G_data[j]);
#endif
      /* G_data is always negative, so this test is sufficient */
      if (choice==-1 || G_data[j]>max) {
	choice = G_j[j];
	max = G_data[j];
      }
      if (j==G_i[i+1]-1 || processor[G_j[j+1]] > processor[choice]) {
	/* we are done for this processor boundary */
	H_j[jj++]=choice;
	H_i[i+1]++;
#if 0
 	hypre_printf (" %d",choice); 
#endif
	choice = -1; max=0;
      }
    }
#if 0
     hypre_printf("\n"); 
#endif
  }

  /******************************************************************
   * compute H^T, the transpose of H
   ******************************************************************/

  jH = H_i[num_vertices];
  HT = hypre_CSRMatrixCreate (num_vertices,num_vertices,jH);
  HT_i = hypre_CTAlloc (HYPRE_Int,num_vertices+1);
  HT_j = hypre_CTAlloc (HYPRE_Int,jH);
  hypre_CSRMatrixI(HT) = HT_i;
  hypre_CSRMatrixJ(HT) = HT_j;

   for (i=0; i <= num_vertices; i++)
      HT_i[i] = 0;
   for (i=0; i < jH; i++) {
     HT_i[H_j[i]+1]++;
   }
   for (i=0; i < num_vertices; i++) {
     HT_i[i+1] += HT_i[i];
   }
   for (i=0; i < num_vertices; i++) {
     for (j=H_i[i]; j < H_i[i+1]; j++) {
       HYPRE_Int myindex = H_j[j];
       HT_j[HT_i[myindex]] = i;
       HT_i[myindex]++;
     }
   }      
   for (i = num_vertices; i > 0; i--) {
     HT_i[i] = HT_i[i-1];
   }
   HT_i[0] = 0;

  /*****************************************************************
   * set initial vertex weights
   *****************************************************************/

  for (i=0;i<num_vertices;i++) {
    measure_array[i] = H_i[i+1] - H_i[i] + HT_i[i+1] - HT_i[i];
    enter_on_lists (&LoL_head,&LoL_tail,measure_array[i],i,lists,where);
  }

  /******************************************************************
   * apply CGC iteration
   ******************************************************************/

  while (LoL_head && measure_array[LoL_head->head]) {


    choice = LoL_head->head;
    measure = measure_array[choice];
#if 0
    hypre_printf ("Choice: %d, measure %d, processor %d\n",choice, measure,processor[choice]);
    fflush(stdout);
#endif

    (*coarse)[processor[choice]] = choice+1;  /* add one because coarsegrid indexing starts with 1, not 0 */
    /* new maximal weight */
    new_measure = measure+1;
    for (i=vertexrange[processor[choice]];i<vertexrange[processor[choice]+1];i++) {
      /* set weights for all remaining vertices on this processor to zero */
      measure = measure_array[i];
      remove_point (&LoL_head,&LoL_tail,measure,i,lists,where);
      measure_array[i]=0;
    }
    for (j=H_i[choice];j<H_i[choice+1];j++){
      jj = H_j[j];
      /* if no vertex is chosen on this proc, set weights of all heavily coupled vertices to max1 */
      if (!(*coarse)[processor[jj]]) {
	measure = measure_array[jj];
	remove_point (&LoL_head,&LoL_tail,measure,jj,lists,where);
	enter_on_lists (&LoL_head,&LoL_tail,new_measure,jj,lists,where);
	measure_array[jj]=new_measure;
      }
    }
    for (j=HT_i[choice];j<HT_i[choice+1];j++) {
      jj = HT_j[j];
      /* if no vertex is chosen on this proc, set weights of all heavily coupled vertices to max1 */
      if (!(*coarse)[processor[jj]]) {
	measure = measure_array[jj];
	remove_point (&LoL_head,&LoL_tail,measure,jj,lists,where);
	enter_on_lists (&LoL_head,&LoL_tail,new_measure,jj,lists,where);
	measure_array[jj]=new_measure;
      }
    }
  }

  /* remove remaining list elements, if they exist. They all should have measure 0 */
  while (LoL_head) {
    i = LoL_head->head;
    measure = measure_array[i];
#if 0
    hypre_assert (measure==0);
#endif
    remove_point (&LoL_head,&LoL_tail,measure,i,lists,where);
  }
    

  for (p=0;p<mpisize;p++)
    /* if the algorithm has not determined a coarse vertex for this proc, simply take the last one 
       Do not take the first one, it might by empty! */
    if (!(*coarse)[p]) {
      (*coarse)[p] = vertexrange[p+1];
/*       hypre_printf ("choice for processor %d: %d\n",p,range[p]+1); */
    }

  /********************************************
   * clean up 
   ********************************************/

  hypre_CSRMatrixDestroy (H);
  hypre_CSRMatrixDestroy (HT);


  hypre_TFree (processor);
  hypre_TFree (measure_array);
  hypre_TFree (lists);
  hypre_TFree (where);
  
  return(ierr);
}
示例#12
0
hypre_ParCSRMatrix *
hypre_ParCSRBlockMatrixConvertToParCSRMatrix(hypre_ParCSRBlockMatrix *matrix)
{
   MPI_Comm comm = hypre_ParCSRBlockMatrixComm(matrix);
   hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(matrix);
   hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(matrix);
   HYPRE_Int block_size = hypre_ParCSRBlockMatrixBlockSize(matrix);
   HYPRE_Int global_num_rows = hypre_ParCSRBlockMatrixGlobalNumRows(matrix);
   HYPRE_Int global_num_cols = hypre_ParCSRBlockMatrixGlobalNumCols(matrix);
   HYPRE_Int *row_starts = hypre_ParCSRBlockMatrixRowStarts(matrix);
   HYPRE_Int *col_starts = hypre_ParCSRBlockMatrixColStarts(matrix);
   HYPRE_Int num_cols_offd = hypre_CSRBlockMatrixNumCols(offd);
   HYPRE_Int num_nonzeros_diag = hypre_CSRBlockMatrixNumNonzeros(diag);
   HYPRE_Int num_nonzeros_offd = hypre_CSRBlockMatrixNumNonzeros(offd);
 
   hypre_ParCSRMatrix *matrix_C;
   HYPRE_Int *matrix_C_row_starts;
   HYPRE_Int *matrix_C_col_starts;

   HYPRE_Int *counter, *new_j_map;
   HYPRE_Int size_j, size_map, index, new_num_cols, removed = 0;
   HYPRE_Int *offd_j, *col_map_offd, *new_col_map_offd;
   

   HYPRE_Int num_procs, i, j;

   hypre_CSRMatrix *diag_nozeros, *offd_nozeros;

   hypre_MPI_Comm_size(comm,&num_procs);

#ifdef HYPRE_NO_GLOBAL_PARTITION
   matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, 2);
   matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, 2);
   for(i = 0; i < 2; i++)
   {
      matrix_C_row_starts[i] = row_starts[i]*block_size;
      matrix_C_col_starts[i] = col_starts[i]*block_size;
   }
#else
   matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1);
   matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1);
   for(i = 0; i < num_procs + 1; i++)
   {
      matrix_C_row_starts[i] = row_starts[i]*block_size;
      matrix_C_col_starts[i] = col_starts[i]*block_size;
   }
#endif

   matrix_C = hypre_ParCSRMatrixCreate(comm, global_num_rows*block_size, 
                                       global_num_cols*block_size, matrix_C_row_starts, 
                                       matrix_C_col_starts, num_cols_offd*block_size, 
                                       num_nonzeros_diag*block_size*block_size, 
                                       num_nonzeros_offd*block_size*block_size);
   hypre_ParCSRMatrixInitialize(matrix_C);

   /* DIAG */
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C));
   hypre_ParCSRMatrixDiag(matrix_C) = 
      hypre_CSRBlockMatrixConvertToCSRMatrix(diag);

   /* AB - added to delete zeros */
   diag_nozeros = hypre_CSRMatrixDeleteZeros( 
      hypre_ParCSRMatrixDiag(matrix_C), 1e-14);
   if(diag_nozeros) 
   {
      hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C));
      hypre_ParCSRMatrixDiag(matrix_C) = diag_nozeros;
   }

   /* OFF-DIAG */
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C));
   hypre_ParCSRMatrixOffd(matrix_C) = 
      hypre_CSRBlockMatrixConvertToCSRMatrix(offd);

   /* AB - added to delete zeros - this just deletes from data and j arrays */
   offd_nozeros = hypre_CSRMatrixDeleteZeros( 
      hypre_ParCSRMatrixOffd(matrix_C), 1e-14);
   if(offd_nozeros) 
   {
      hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C));
      hypre_ParCSRMatrixOffd(matrix_C) = offd_nozeros;
      removed = 1;
       
   }

   /* now convert the col_map_offd */
   for (i = 0; i < num_cols_offd; i++)
      for (j = 0; j < block_size; j++)
         hypre_ParCSRMatrixColMapOffd(matrix_C)[i*block_size + j] = 
            hypre_ParCSRBlockMatrixColMapOffd(matrix)[i]*block_size + j;
 
   /* if we deleted zeros, then it is possible that col_map_offd can be
      compressed as well - this requires some amount of work that could be skipped... */

   if (removed)
   {
      size_map =   num_cols_offd*block_size;
      counter = hypre_CTAlloc(HYPRE_Int, size_map);
      new_j_map = hypre_CTAlloc(HYPRE_Int, size_map);

      offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(matrix_C));
      col_map_offd = hypre_ParCSRMatrixColMapOffd(matrix_C);
      
      size_j = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(matrix_C));
      /* mark which off_d entries are found in j */
      for (i=0; i < size_j; i++)
      {
         counter[offd_j[i]] = 1;
      }
      /*now find new numbering for columns (we will delete the 
        cols where counter = 0*/
      index = 0;
      for (i=0; i < size_map; i++)
      {
         if (counter[i]) new_j_map[i] = index++;
      }
      new_num_cols = index;
      /* if there are some col entries to remove: */ 
      if (!(index == size_map))
      {
         /* go thru j and adjust entries */         
         for (i=0; i < size_j; i++)
         {
            offd_j[i] = new_j_map[offd_j[i]];
         }
         /*now go thru col map and get rid of non-needed entries */ 
         new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols);
         index = 0;
         for (i=0; i < size_map; i++)  
         {
            if (counter[i]) 
            {
               new_col_map_offd[index++] = col_map_offd[i];
            }
         }
         /* set the new col map */ 
         hypre_TFree(col_map_offd);
         hypre_ParCSRMatrixColMapOffd(matrix_C) = new_col_map_offd;
         /* modify the number of cols */
         hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(matrix_C)) = new_num_cols;
      }
      hypre_TFree(new_j_map);
      hypre_TFree(counter);
      
   }
   
   hypre_ParCSRMatrixSetNumNonzeros( matrix_C );
   hypre_ParCSRMatrixSetDNumNonzeros( matrix_C );
   
   /* we will not copy the comm package */  
   hypre_ParCSRMatrixCommPkg(matrix_C) = NULL;

   return matrix_C;
}
示例#13
0
HYPRE_Int
hypre_BoomerAMGBlockCreateNodalA(hypre_ParCSRBlockMatrix *A,
                                 HYPRE_Int                option,
                                 HYPRE_Int                diag_option,
                                 hypre_ParCSRMatrix     **AN_ptr)
{
   MPI_Comm                 comm         = hypre_ParCSRBlockMatrixComm(A);
   hypre_CSRBlockMatrix    *A_diag       = hypre_ParCSRBlockMatrixDiag(A);
   HYPRE_Int               *A_diag_i     = hypre_CSRBlockMatrixI(A_diag);
   HYPRE_Real              *A_diag_data  = hypre_CSRBlockMatrixData(A_diag);

   HYPRE_Int                block_size = hypre_CSRBlockMatrixBlockSize(A_diag);
   HYPRE_Int                bnnz = block_size*block_size;

   hypre_CSRBlockMatrix    *A_offd          = hypre_ParCSRMatrixOffd(A);
   HYPRE_Int               *A_offd_i        = hypre_CSRBlockMatrixI(A_offd);
   HYPRE_Real              *A_offd_data     = hypre_CSRBlockMatrixData(A_offd);
   HYPRE_Int               *A_diag_j        = hypre_CSRBlockMatrixJ(A_diag);
   HYPRE_Int               *A_offd_j        = hypre_CSRBlockMatrixJ(A_offd);

   HYPRE_Int               *row_starts      = hypre_ParCSRBlockMatrixRowStarts(A);
   HYPRE_Int               *col_map_offd    = hypre_ParCSRBlockMatrixColMapOffd(A);
   HYPRE_Int                num_nonzeros_diag;
   HYPRE_Int                num_nonzeros_offd = 0;
   HYPRE_Int                num_cols_offd = 0;
                  
   hypre_ParCSRMatrix *AN;
   hypre_CSRMatrix    *AN_diag;
   HYPRE_Int          *AN_diag_i;
   HYPRE_Int          *AN_diag_j=NULL;
   HYPRE_Real         *AN_diag_data = NULL; 
   hypre_CSRMatrix    *AN_offd;
   HYPRE_Int          *AN_offd_i;
   HYPRE_Int          *AN_offd_j = NULL;
   HYPRE_Real         *AN_offd_data = NULL; 
   HYPRE_Int          *col_map_offd_AN = NULL;
   HYPRE_Int          *row_starts_AN;

                 
   hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A);
   HYPRE_Int            num_sends;
   HYPRE_Int            num_recvs;
   HYPRE_Int           *send_procs;
   HYPRE_Int           *send_map_starts;
   HYPRE_Int           *send_map_elmts;
   HYPRE_Int           *recv_procs;
   HYPRE_Int           *recv_vec_starts;

   hypre_ParCSRCommPkg *comm_pkg_AN = NULL;
   HYPRE_Int           *send_procs_AN = NULL;
   HYPRE_Int           *send_map_starts_AN = NULL;
   HYPRE_Int           *send_map_elmts_AN = NULL;
   HYPRE_Int           *recv_procs_AN = NULL;
   HYPRE_Int           *recv_vec_starts_AN = NULL;

   HYPRE_Int            i;
                      
   HYPRE_Int            ierr = 0;

   HYPRE_Int            num_procs;
   HYPRE_Int            cnt;
   HYPRE_Int            norm_type;

   HYPRE_Int            global_num_nodes;
   HYPRE_Int            num_nodes;

   HYPRE_Int            index, k;
   
   HYPRE_Real           tmp;
   HYPRE_Real           sum;

   hypre_MPI_Comm_size(comm,&num_procs);

   if (!comm_pkg)
   {
      hypre_BlockMatvecCommPkgCreate(A);
      comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A);
   }

   norm_type = fabs(option);


/* Set up the new matrix AN */


#ifdef HYPRE_NO_GLOBAL_PARTITION
   row_starts_AN = hypre_CTAlloc(HYPRE_Int, 2);
   for (i=0; i < 2; i++)
   {
      row_starts_AN[i] = row_starts[i];
   }
#else
   row_starts_AN = hypre_CTAlloc(HYPRE_Int, num_procs+1);
   for (i=0; i < num_procs+1; i++)
   {
      row_starts_AN[i] = row_starts[i];
   }
#endif

   global_num_nodes = hypre_ParCSRBlockMatrixGlobalNumRows(A);
   num_nodes = hypre_CSRBlockMatrixNumRows(A_diag);

   /* the diag part */

   num_nonzeros_diag = A_diag_i[num_nodes];
   AN_diag_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1);

   for (i=0; i <= num_nodes; i++)
   {
      AN_diag_i[i] = A_diag_i[i];
   }

   AN_diag_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros_diag);     
   AN_diag_data = hypre_CTAlloc(HYPRE_Real, num_nonzeros_diag);      


   AN_diag = hypre_CSRMatrixCreate(num_nodes, num_nodes, num_nonzeros_diag);
   hypre_CSRMatrixI(AN_diag) = AN_diag_i;
   hypre_CSRMatrixJ(AN_diag) = AN_diag_j;
   hypre_CSRMatrixData(AN_diag) = AN_diag_data;

   for (i=0; i< num_nonzeros_diag; i++)
   {
      AN_diag_j[i]  = A_diag_j[i];
      hypre_CSRBlockMatrixBlockNorm(norm_type, &A_diag_data[i*bnnz], 
                                    &tmp, block_size);
      AN_diag_data[i] = tmp;
   }
   

   if (diag_option ==1 )
   {
      /* make the diag entry the negative of the sum of off-diag entries (NEED
       * to get more below!)*/
      /* the diagonal is the first element listed in each row - */
      for (i=0; i < num_nodes; i++)
      {
         index = AN_diag_i[i]; 
         sum = 0.0;
         for (k = AN_diag_i[i]+1; k < AN_diag_i[i+1]; k++)
         {
            sum += AN_diag_data[k];
            
         }

         AN_diag_data[index] = -sum;
      }
      
   }
   else if (diag_option == 2)
   {
      
      /*  make all diagonal entries negative */
      /* the diagonal is the first element listed in each row - */
      
      for (i=0; i < num_nodes; i++)
      {
         index = AN_diag_i[i];
         AN_diag_data[index] = -AN_diag_data[index];
      }
   }

   /* copy the commpkg */
   if (comm_pkg)
   {
      comm_pkg_AN = hypre_CTAlloc(hypre_ParCSRCommPkg,1);
      hypre_ParCSRCommPkgComm(comm_pkg_AN) = comm;

      num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
      hypre_ParCSRCommPkgNumSends(comm_pkg_AN) = num_sends;

      num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
      hypre_ParCSRCommPkgNumRecvs(comm_pkg_AN) = num_recvs;

      send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
      send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
      send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
      if (num_sends) 
      {
         send_procs_AN = hypre_CTAlloc(HYPRE_Int, num_sends);
         send_map_elmts_AN = hypre_CTAlloc(HYPRE_Int, send_map_starts[num_sends]);
      }
      send_map_starts_AN = hypre_CTAlloc(HYPRE_Int, num_sends+1);
      send_map_starts_AN[0] = 0;
      for (i=0; i < num_sends; i++)
      {
         send_procs_AN[i] = send_procs[i];
         send_map_starts_AN[i+1] = send_map_starts[i+1];
      }
      cnt = send_map_starts_AN[num_sends];
      for (i=0; i< cnt; i++)
      {
         send_map_elmts_AN[i] = send_map_elmts[i];
      }
      hypre_ParCSRCommPkgSendProcs(comm_pkg_AN) = send_procs_AN;
      hypre_ParCSRCommPkgSendMapStarts(comm_pkg_AN) = send_map_starts_AN;
      hypre_ParCSRCommPkgSendMapElmts(comm_pkg_AN) = send_map_elmts_AN;

      recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
      recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
      recv_vec_starts_AN = hypre_CTAlloc(HYPRE_Int, num_recvs+1);
      if (num_recvs) recv_procs_AN = hypre_CTAlloc(HYPRE_Int, num_recvs);

      recv_vec_starts_AN[0] = recv_vec_starts[0];
      for (i=0; i < num_recvs; i++)
      {
         recv_procs_AN[i] = recv_procs[i];
         recv_vec_starts_AN[i+1] = recv_vec_starts[i+1];
         
      }
      hypre_ParCSRCommPkgRecvProcs(comm_pkg_AN) = recv_procs_AN;
      hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_AN) = recv_vec_starts_AN;

   }

   /* the off-diag part */

   num_cols_offd = hypre_CSRBlockMatrixNumCols(A_offd);
   col_map_offd_AN = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
   for (i=0; i < num_cols_offd; i++)
   {
      col_map_offd_AN[i] = col_map_offd[i];
   }

   num_nonzeros_offd = A_offd_i[num_nodes];
   AN_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1);
   for (i=0; i <= num_nodes; i++)
   {
      AN_offd_i[i] = A_offd_i[i];
   }
      
   AN_offd_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros_offd);     
   AN_offd_data = hypre_CTAlloc(HYPRE_Real, num_nonzeros_offd);

   for (i=0; i< num_nonzeros_offd; i++)
   {
      AN_offd_j[i]  = A_offd_j[i];
      hypre_CSRBlockMatrixBlockNorm(norm_type, &A_offd_data[i*bnnz], 
                                    &tmp, block_size);
      AN_offd_data[i] = tmp;
   }
   
   AN_offd = hypre_CSRMatrixCreate(num_nodes, num_cols_offd, num_nonzeros_offd);
  
   hypre_CSRMatrixI(AN_offd) = AN_offd_i;
   hypre_CSRMatrixJ(AN_offd) = AN_offd_j;
   hypre_CSRMatrixData(AN_offd) = AN_offd_data;
   
   if (diag_option ==1 )
   {
      /* make the diag entry the negative of the sum of off-diag entries (here
         we are adding the off_diag contribution)*/
      /* the diagonal is the first element listed in each row of AN_diag_data - */
      for (i=0; i < num_nodes; i++)
      {
         sum = 0.0;
         for (k = AN_offd_i[i]; k < AN_offd_i[i+1]; k++)
         {
            sum += AN_offd_data[k];
            
         }
         index = AN_diag_i[i];/* location of diag entry in data */ 
         AN_diag_data[index] -= sum; /* subtract from current value */
      }
      
   }

   /* now create AN */   
    
   AN = hypre_ParCSRMatrixCreate(comm, global_num_nodes, global_num_nodes,
                                 row_starts_AN, row_starts_AN, num_cols_offd,
                                 num_nonzeros_diag, num_nonzeros_offd);

   /* we already created the diag and offd matrices - so we don't need the ones
      created above */
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(AN));
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(AN));
   hypre_ParCSRMatrixDiag(AN) = AN_diag;
   hypre_ParCSRMatrixOffd(AN) = AN_offd;


   hypre_ParCSRMatrixColMapOffd(AN) = col_map_offd_AN;
   hypre_ParCSRMatrixCommPkg(AN) = comm_pkg_AN;

   *AN_ptr        = AN;

   return (ierr);
}
示例#14
0
hypre_ParCSRMatrix *hypre_ParCSRAAt( hypre_ParCSRMatrix  *A )
{
    MPI_Comm         comm = hypre_ParCSRMatrixComm(A);

    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);

    HYPRE_Complex   *A_diag_data = hypre_CSRMatrixData(A_diag);
    HYPRE_Int       *A_diag_i = hypre_CSRMatrixI(A_diag);
    HYPRE_Int       *A_diag_j = hypre_CSRMatrixJ(A_diag);

    hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);

    HYPRE_Complex   *A_offd_data = hypre_CSRMatrixData(A_offd);
    HYPRE_Int       *A_offd_i = hypre_CSRMatrixI(A_offd);
    HYPRE_Int       *A_offd_j = hypre_CSRMatrixJ(A_offd);
    HYPRE_Int       *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
    HYPRE_Int       *A_ext_row_map;

    HYPRE_Int       *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
    HYPRE_Int        num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
    HYPRE_Int        num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);

    hypre_ParCSRMatrix *C;
    HYPRE_Int          *col_map_offd_C;

    hypre_CSRMatrix *C_diag;

    HYPRE_Complex   *C_diag_data;
    HYPRE_Int       *C_diag_i;
    HYPRE_Int       *C_diag_j;

    hypre_CSRMatrix *C_offd;

    HYPRE_Complex   *C_offd_data=NULL;
    HYPRE_Int       *C_offd_i=NULL;
    HYPRE_Int       *C_offd_j=NULL;
    HYPRE_Int       *new_C_offd_j;

    HYPRE_Int        C_diag_size;
    HYPRE_Int        C_offd_size;
    HYPRE_Int        last_col_diag_C;
    HYPRE_Int        num_cols_offd_C;

    hypre_CSRMatrix *A_ext;

    HYPRE_Complex   *A_ext_data;
    HYPRE_Int       *A_ext_i;
    HYPRE_Int       *A_ext_j;
    HYPRE_Int        num_rows_A_ext=0;

    HYPRE_Int        first_row_index_A = hypre_ParCSRMatrixFirstRowIndex(A);
    HYPRE_Int        first_col_diag_A = hypre_ParCSRMatrixFirstColDiag(A);
    HYPRE_Int       *B_marker;

    HYPRE_Int        i;
    HYPRE_Int        i1, i2, i3;
    HYPRE_Int        jj2, jj3;

    HYPRE_Int        jj_count_diag, jj_count_offd;
    HYPRE_Int        jj_row_begin_diag, jj_row_begin_offd;
    HYPRE_Int        start_indexing = 0; /* start indexing for C_data at 0 */
    HYPRE_Int        count;
    HYPRE_Int        n_rows_A, n_cols_A;

    HYPRE_Complex    a_entry;
    HYPRE_Complex    a_b_product;

    HYPRE_Complex    zero = 0.0;

    n_rows_A = hypre_ParCSRMatrixGlobalNumRows(A);
    n_cols_A = hypre_ParCSRMatrixGlobalNumCols(A);

    if (n_cols_A != n_rows_A)
    {
        hypre_printf(" Error! Incompatible matrix dimensions!\n");
        return NULL;
    }
    /*-----------------------------------------------------------------------
     *  Extract A_ext, i.e. portion of A that is stored on neighbor procs
     *  and needed locally for A^T in the matrix matrix product A*A^T
     *-----------------------------------------------------------------------*/

    if (num_rows_diag_A != n_rows_A)
    {
        /*---------------------------------------------------------------------
         * If there exists no CommPkg for A, a CommPkg is generated using
         * equally load balanced partitionings
         *--------------------------------------------------------------------*/
        if (!hypre_ParCSRMatrixCommPkg(A))
        {
            hypre_MatTCommPkgCreate(A);
        }

        A_ext = hypre_ParCSRMatrixExtractAExt( A, 1, &A_ext_row_map );
        A_ext_data = hypre_CSRMatrixData(A_ext);
        A_ext_i    = hypre_CSRMatrixI(A_ext);
        A_ext_j    = hypre_CSRMatrixJ(A_ext);
        num_rows_A_ext = hypre_CSRMatrixNumRows(A_ext);
    }
    /*-----------------------------------------------------------------------
     *  Allocate marker array.
     *-----------------------------------------------------------------------*/

    B_marker = hypre_CTAlloc(HYPRE_Int, num_rows_diag_A+num_rows_A_ext );

    /*-----------------------------------------------------------------------
     *  Initialize some stuff.
     *-----------------------------------------------------------------------*/

    for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 )
    {
        B_marker[i1] = -1;
    }


    hypre_ParAat_RowSizes(
        &C_diag_i, &C_offd_i, B_marker,
        A_diag_i, A_diag_j,
        A_offd_i, A_offd_j, A_col_map_offd,
        A_ext_i, A_ext_j, A_ext_row_map,
        &C_diag_size, &C_offd_size,
        num_rows_diag_A, num_cols_offd_A,
        num_rows_A_ext,
        first_col_diag_A, first_row_index_A
    );

#if 0
    /* debugging output: */
    hypre_printf("A_ext_row_map (%i):",num_rows_A_ext);
    for ( i1=0; i1<num_rows_A_ext; ++i1 ) hypre_printf(" %i",A_ext_row_map[i1] );
    hypre_printf("\nC_diag_i (%i):",C_diag_size);
    for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_diag_i[i1] );
    hypre_printf("\nC_offd_i (%i):",C_offd_size);
    for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_offd_i[i1] );
    hypre_printf("\n");
#endif

    /*-----------------------------------------------------------------------
     *  Allocate C_diag_data and C_diag_j arrays.
     *  Allocate C_offd_data and C_offd_j arrays.
     *-----------------------------------------------------------------------*/

    last_col_diag_C = first_row_index_A + num_rows_diag_A - 1;
    C_diag_data = hypre_CTAlloc(HYPRE_Complex, C_diag_size);
    C_diag_j    = hypre_CTAlloc(HYPRE_Int, C_diag_size);
    if (C_offd_size)
    {
        C_offd_data = hypre_CTAlloc(HYPRE_Complex, C_offd_size);
        C_offd_j    = hypre_CTAlloc(HYPRE_Int, C_offd_size);
    }

    /*-----------------------------------------------------------------------
     *  Second Pass: Fill in C_diag_data and C_diag_j.
     *  Second Pass: Fill in C_offd_data and C_offd_j.
     *-----------------------------------------------------------------------*/

    /*-----------------------------------------------------------------------
     *  Initialize some stuff.
     *-----------------------------------------------------------------------*/

    jj_count_diag = start_indexing;
    jj_count_offd = start_indexing;
    for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 )
    {
        B_marker[i1] = -1;
    }

    /*-----------------------------------------------------------------------
     *  Loop over interior c-points.
     *-----------------------------------------------------------------------*/

    for (i1 = 0; i1 < num_rows_diag_A; i1++)
    {

        /*--------------------------------------------------------------------
         *  Create diagonal entry, C_{i1,i1}
         *--------------------------------------------------------------------*/

        B_marker[i1] = jj_count_diag;
        jj_row_begin_diag = jj_count_diag;
        jj_row_begin_offd = jj_count_offd;
        C_diag_data[jj_count_diag] = zero;
        C_diag_j[jj_count_diag] = i1;
        jj_count_diag++;

        /*-----------------------------------------------------------------
         *  Loop over entries in row i1 of A_offd.
         *-----------------------------------------------------------------*/

        /* There are 3 CSRMatrix or CSRBooleanMatrix objects here:
           ext*ext, ext*diag, and ext*offd belong to another processor.
           diag*offd and offd*diag don't count - never share a column by definition.
           So we have to do 4 cases:
           diag*ext, offd*ext, diag*diag, and offd*offd.
        */

        for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
        {
            i2 = A_diag_j[jj2];
            a_entry = A_diag_data[jj2];

            /* diag*ext */
            /*-----------------------------------------------------------
             *  Loop over entries (columns) i3 in row i2 of (A_ext)^T
             *  That is, rows i3 having a column i2 of A_ext.
             *  For now, for each row i3 of A_ext we crudely check _all_
             *  columns to see whether one matches i2.
             *  For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2)
             *  to C(i1,i3) .  This contributes to both the diag and offd
             *  blocks of C.
             *-----------------------------------------------------------*/

            for ( i3=0; i3<num_rows_A_ext; i3++ ) {
                for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) {
                    if ( A_ext_j[jj3]==i2+first_col_diag_A ) {
                        /* row i3, column i2 of A_ext; or,
                           row i2, column i3 of (A_ext)^T */

                        a_b_product = a_entry * A_ext_data[jj3];

                        /*--------------------------------------------------------
                         *  Check B_marker to see that C_{i1,i3} has not already
                         *  been accounted for. If it has not, create a new entry.
                         *  If it has, add new contribution.
                         *--------------------------------------------------------*/

                        if ( A_ext_row_map[i3] < first_row_index_A ||
                                A_ext_row_map[i3] > last_col_diag_C ) { /* offd */
                            if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) {
                                B_marker[i3+num_rows_diag_A] = jj_count_offd;
                                C_offd_data[jj_count_offd] = a_b_product;
                                C_offd_j[jj_count_offd] = i3;
                                jj_count_offd++;
                            }
                            else
                                C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                        }
                        else {                                              /* diag */
                            if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) {
                                B_marker[i3+num_rows_diag_A] = jj_count_diag;
                                C_diag_data[jj_count_diag] = a_b_product;
                                C_diag_j[jj_count_diag] = i3-first_col_diag_A;
                                jj_count_diag++;
                            }
                            else
                                C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                        }
                    }
                }
            }
        }

        if (num_cols_offd_A)
        {
            for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
            {
                i2 = A_offd_j[jj2];
                a_entry = A_offd_data[jj2];

                /* offd * ext */
                /*-----------------------------------------------------------
                 *  Loop over entries (columns) i3 in row i2 of (A_ext)^T
                 *  That is, rows i3 having a column i2 of A_ext.
                 *  For now, for each row i3 of A_ext we crudely check _all_
                 *  columns to see whether one matches i2.
                 *  For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2)
                 *  to C(i1,i3) .  This contributes to both the diag and offd
                 *  blocks of C.
                 *-----------------------------------------------------------*/

                for ( i3=0; i3<num_rows_A_ext; i3++ ) {
                    for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) {
                        if ( A_ext_j[jj3]==A_col_map_offd[i2] ) {
                            /* row i3, column i2 of A_ext; or,
                               row i2, column i3 of (A_ext)^T */

                            a_b_product = a_entry * A_ext_data[jj3];

                            /*--------------------------------------------------------
                             *  Check B_marker to see that C_{i1,i3} has not already
                             *  been accounted for. If it has not, create a new entry.
                             *  If it has, add new contribution.
                             *--------------------------------------------------------*/

                            if ( A_ext_row_map[i3] < first_row_index_A ||
                                    A_ext_row_map[i3] > last_col_diag_C ) { /* offd */
                                if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) {
                                    B_marker[i3+num_rows_diag_A] = jj_count_offd;
                                    C_offd_data[jj_count_offd] = a_b_product;
                                    C_offd_j[jj_count_offd] = i3;
                                    jj_count_offd++;
                                }
                                else
                                    C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                            }
                            else {                                              /* diag */
                                if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) {
                                    B_marker[i3+num_rows_diag_A] = jj_count_diag;
                                    C_diag_data[jj_count_diag] = a_b_product;
                                    C_diag_j[jj_count_diag] = i3-first_row_index_A;
                                    jj_count_diag++;
                                }
                                else
                                    C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                            }
                        }
                    }
                }
            }
        }

        /* diag * diag */
        /*-----------------------------------------------------------------
         *  Loop over entries (columns) i2 in row i1 of A_diag.
         *  For each such column we will find the contributions of the
         *  corresponding rows i2 of A^T to C=A*A^T .  Now we only look
         *  at the local part of A^T - with columns (rows of A) living
         *  on this processor.
         *-----------------------------------------------------------------*/

        for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
        {
            i2 = A_diag_j[jj2];
            a_entry = A_diag_data[jj2];

            /*-----------------------------------------------------------
             *  Loop over entries (columns) i3 in row i2 of A^T
             *  That is, rows i3 having a column i2 of A (local part).
             *  For now, for each row i3 of A we crudely check _all_
             *  columns to see whether one matches i2.
             *  This i3-loop is for the diagonal block of A.
             *  It contributes to the diagonal block of C.
             *  For each entry (i2,i3) of A^T,  add A(i1,i2)*A(i3,i2)
             *  to C(i1,i3)
             *-----------------------------------------------------------*/
            for ( i3=0; i3<num_rows_diag_A; i3++ ) {
                for ( jj3=A_diag_i[i3]; jj3<A_diag_i[i3+1]; jj3++ ) {
                    if ( A_diag_j[jj3]==i2 ) {
                        /* row i3, column i2 of A; or,
                           row i2, column i3 of A^T */
                        a_b_product = a_entry * A_diag_data[jj3];

                        /*--------------------------------------------------------
                         *  Check B_marker to see that C_{i1,i3} has not already
                         *  been accounted for. If it has not, mark it and increment
                         *  counter.
                         *--------------------------------------------------------*/
                        if (B_marker[i3] < jj_row_begin_diag)
                        {
                            B_marker[i3] = jj_count_diag;
                            C_diag_data[jj_count_diag] = a_b_product;
                            C_diag_j[jj_count_diag] = i3;
                            jj_count_diag++;
                        }
                        else
                        {
                            C_diag_data[B_marker[i3]] += a_b_product;
                        }
                    }
                }
            } /* end of i3 loop */
        } /* end of third i2 loop */

        /* offd * offd */
        /*-----------------------------------------------------------
         *  Loop over offd columns i2 of A in A*A^T.  Then
         *  loop over offd entries (columns) i3 in row i2 of A^T
         *  That is, rows i3 having a column i2 of A (local part).
         *  For now, for each row i3 of A we crudely check _all_
         *  columns to see whether one matches i2.
         *  This i3-loop is for the off-diagonal block of A.
         *  It contributes to the diag block of C.
         *  For each entry (i2,i3) of A^T, add A*A^T to C
         *-----------------------------------------------------------*/
        if (num_cols_offd_A) {

            for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
            {
                i2 = A_offd_j[jj2];
                a_entry = A_offd_data[jj2];

                for ( i3=0; i3<num_rows_diag_A; i3++ ) {
                    /* ... note that num_rows_diag_A == num_rows_offd_A */
                    for ( jj3=A_offd_i[i3]; jj3<A_offd_i[i3+1]; jj3++ ) {
                        if ( A_offd_j[jj3]==i2 ) {
                            /* row i3, column i2 of A; or,
                               row i2, column i3 of A^T */
                            a_b_product = a_entry * A_offd_data[jj3];

                            /*--------------------------------------------------------
                             *  Check B_marker to see that C_{i1,i3} has not already
                             *  been accounted for. If it has not, create a new entry.
                             *  If it has, add new contribution
                             *--------------------------------------------------------*/

                            if (B_marker[i3] < jj_row_begin_diag)
                            {
                                B_marker[i3] = jj_count_diag;
                                C_diag_data[jj_count_diag] = a_b_product;
                                C_diag_j[jj_count_diag] = i3;
                                jj_count_diag++;
                            }
                            else
                            {
                                C_diag_data[B_marker[i3]] += a_b_product;
                            }
                        }
                    }
                }  /* end of last i3 loop */
            }     /* end of if (num_cols_offd_A) */

        }        /* end of fourth and last i2 loop */
#if 0          /* debugging printout */
        hypre_printf("end of i1 loop: i1=%i jj_count_diag=%i\n", i1, jj_count_diag );
        hypre_printf("  C_diag_j=");
        for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%i ",C_diag_j[jj3]);
        hypre_printf("  C_diag_data=");
        for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%f ",C_diag_data[jj3]);
        hypre_printf("\n");
        hypre_printf("  C_offd_j=");
        for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%i ",C_offd_j[jj3]);
        hypre_printf("  C_offd_data=");
        for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%f ",C_offd_data[jj3]);
        hypre_printf("\n");
        hypre_printf( "  B_marker =" );
        for ( it=0; it<num_rows_diag_A+num_rows_A_ext; ++it )
            hypre_printf(" %i", B_marker[it] );
        hypre_printf( "\n" );
#endif
    }           /* end of i1 loop */

    /*-----------------------------------------------------------------------
     *  Delete 0-columns in C_offd, i.e. generate col_map_offd and reset
     *  C_offd_j.  Note that (with the indexing we have coming into this
     *  block) col_map_offd_C[i3]==A_ext_row_map[i3].
     *-----------------------------------------------------------------------*/

    for ( i=0; i<num_rows_diag_A+num_rows_A_ext; ++i )
        B_marker[i] = -1;
    for ( i=0; i<C_offd_size; i++ )
        B_marker[ C_offd_j[i] ] = -2;

    count = 0;
    for (i=0; i < num_rows_diag_A + num_rows_A_ext; i++) {
        if (B_marker[i] == -2) {
            B_marker[i] = count;
            count++;
        }
    }
    num_cols_offd_C = count;

    if (num_cols_offd_C) {
        col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C);
        new_C_offd_j = hypre_CTAlloc(HYPRE_Int,C_offd_size);
        /* ... a bit big, but num_cols_offd_C is too small.  It might be worth
           computing the correct size, which is sum( no. columns in row i, over all rows i )
        */

        for (i=0; i < C_offd_size; i++) {
            new_C_offd_j[i] = B_marker[C_offd_j[i]];
            col_map_offd_C[ new_C_offd_j[i] ] = A_ext_row_map[ C_offd_j[i] ];
        }

        hypre_TFree(C_offd_j);
        C_offd_j = new_C_offd_j;

    }

    /*----------------------------------------------------------------
     * Create C
     *----------------------------------------------------------------*/

    C = hypre_ParCSRMatrixCreate(comm, n_rows_A, n_rows_A, row_starts_A,
                                 row_starts_A, num_cols_offd_C,
                                 C_diag_size, C_offd_size);

    /* Note that C does not own the partitionings */
    hypre_ParCSRMatrixSetRowStartsOwner(C,0);
    hypre_ParCSRMatrixSetColStartsOwner(C,0);

    C_diag = hypre_ParCSRMatrixDiag(C);
    hypre_CSRMatrixData(C_diag) = C_diag_data;
    hypre_CSRMatrixI(C_diag) = C_diag_i;
    hypre_CSRMatrixJ(C_diag) = C_diag_j;

    if (num_cols_offd_C)
    {
        C_offd = hypre_ParCSRMatrixOffd(C);
        hypre_CSRMatrixData(C_offd) = C_offd_data;
        hypre_CSRMatrixI(C_offd) = C_offd_i;
        hypre_CSRMatrixJ(C_offd) = C_offd_j;
        hypre_ParCSRMatrixOffd(C) = C_offd;
        hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;

    }
    else
        hypre_TFree(C_offd_i);

    /*-----------------------------------------------------------------------
     *  Free B_ext and marker array.
     *-----------------------------------------------------------------------*/

    if (num_cols_offd_A)
    {
        hypre_CSRMatrixDestroy(A_ext);
        A_ext = NULL;
    }
    hypre_TFree(B_marker);
    if ( num_rows_diag_A != n_rows_A )
        hypre_TFree(A_ext_row_map);

    return C;

}
示例#15
0
HYPRE_Int
main( HYPRE_Int   argc,
      char *argv[] )
{
   hypre_CSRMatrix     *matrix;
   hypre_CSRMatrix     *matrix1;
   hypre_ParCSRMatrix  *par_matrix;
   hypre_Vector        *x_local;
   hypre_Vector        *y_local;
   hypre_Vector        *y2_local;
   hypre_ParVector     *x;
   hypre_ParVector     *x2;
   hypre_ParVector     *y;
   hypre_ParVector     *y2;

   HYPRE_Int          vecstride_x, idxstride_x, vecstride_y, idxstride_y;
   HYPRE_Int          num_procs, my_id;
   HYPRE_Int		local_size;
   HYPRE_Int          num_vectors;
   HYPRE_Int		global_num_rows, global_num_cols;
   HYPRE_Int		first_index;
   HYPRE_Int 		i, j, ierr=0;
   double 	*data, *data2;
   HYPRE_Int 		*row_starts, *col_starts;
   char		file_name[80];
   /* Initialize MPI */
   hypre_MPI_Init(&argc, &argv);

   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs);
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &my_id);

   hypre_printf(" my_id: %d num_procs: %d\n", my_id, num_procs);
 
   if (my_id == 0) 
   {
	matrix = hypre_CSRMatrixRead("input");
   	hypre_printf(" read input\n");
   }
   row_starts = NULL;
   col_starts = NULL; 
   par_matrix = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, matrix, 
		row_starts, col_starts);
   hypre_printf(" converted\n");

   matrix1 = hypre_ParCSRMatrixToCSRMatrixAll(par_matrix);

   hypre_sprintf(file_name,"matrix1.%d",my_id);

   if (matrix1) hypre_CSRMatrixPrint(matrix1, file_name);

   hypre_ParCSRMatrixPrint(par_matrix,"matrix");
   hypre_ParCSRMatrixPrintIJ(par_matrix,0,0,"matrixIJ");

   par_matrix = hypre_ParCSRMatrixRead(hypre_MPI_COMM_WORLD,"matrix");

   global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix);
   hypre_printf(" global_num_cols %d\n", global_num_cols);
   global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix);
 
   col_starts = hypre_ParCSRMatrixColStarts(par_matrix);
   first_index = col_starts[my_id];
   local_size = col_starts[my_id+1] - first_index;

   num_vectors = 3;

   x = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_cols,
                                         col_starts, num_vectors );
   hypre_ParVectorSetPartitioningOwner(x,0);
   hypre_ParVectorInitialize(x);
   x_local = hypre_ParVectorLocalVector(x);
   data = hypre_VectorData(x_local);
   vecstride_x = hypre_VectorVectorStride(x_local);
   idxstride_x = hypre_VectorIndexStride(x_local);
   for ( j=0; j<num_vectors; ++j )
      for (i=0; i < local_size; i++)
         data[i*idxstride_x + j*vecstride_x] = first_index+i+1 + 100*j;

   x2 = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_cols,
                                    col_starts, num_vectors );
   hypre_ParVectorSetPartitioningOwner(x2,0);
   hypre_ParVectorInitialize(x2);
   hypre_ParVectorSetConstantValues(x2,2.0);

   row_starts = hypre_ParCSRMatrixRowStarts(par_matrix);
   first_index = row_starts[my_id];
   local_size = row_starts[my_id+1] - first_index;
   y = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_rows,
                                   row_starts, num_vectors );
   hypre_ParVectorSetPartitioningOwner(y,0);
   hypre_ParVectorInitialize(y);
   y_local = hypre_ParVectorLocalVector(y);

   y2 = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_rows,
                                    row_starts, num_vectors );
   hypre_ParVectorSetPartitioningOwner(y2,0);
   hypre_ParVectorInitialize(y2);
   y2_local = hypre_ParVectorLocalVector(y2);
   data2 = hypre_VectorData(y2_local);
   vecstride_y = hypre_VectorVectorStride(y2_local);
   idxstride_y = hypre_VectorIndexStride(y2_local);
 
   for ( j=0; j<num_vectors; ++j )
      for (i=0; i < local_size; i++)
         data2[i*idxstride_y+j*vecstride_y] = first_index+i+1 + 100*j;

   hypre_ParVectorSetConstantValues(y,1.0);
   hypre_printf(" initialized vectors, first_index=%i\n", first_index);

   hypre_ParVectorPrint(x, "vectorx");
   hypre_ParVectorPrint(y, "vectory");

   hypre_MatvecCommPkgCreate(par_matrix);

   hypre_ParCSRMatrixMatvec ( 1.0, par_matrix, x, 1.0, y);
   hypre_printf(" did matvec\n");

   hypre_ParVectorPrint(y, "result");

   ierr = hypre_ParCSRMatrixMatvecT ( 1.0, par_matrix, y2, 1.0, x2);
   hypre_printf(" did matvecT %d\n", ierr);

   hypre_ParVectorPrint(x2, "transp"); 

   hypre_ParCSRMatrixDestroy(par_matrix);
   hypre_ParVectorDestroy(x);
   hypre_ParVectorDestroy(x2);
   hypre_ParVectorDestroy(y);
   hypre_ParVectorDestroy(y2);
   if (my_id == 0) hypre_CSRMatrixDestroy(matrix);
   if (matrix1) hypre_CSRMatrixDestroy(matrix1);

   /* Finalize MPI */
   hypre_MPI_Finalize();

   return 0;
}
示例#16
0
HYPRE_Int
hypre_ParChordMatrixToParCSRMatrix(
   hypre_ParChordMatrix *Ac,
   MPI_Comm comm,
   hypre_ParCSRMatrix **pAp )
{
   /* Some parts of this function are copied from hypre_CSRMatrixToParCSRMatrix. */

   hypre_ParCSRMatrix *Ap;
   HYPRE_Int *row_starts, *col_starts;
   HYPRE_Int global_num_rows, global_num_cols, my_id, num_procs;
   HYPRE_Int num_cols_offd, num_nonzeros_diag, num_nonzeros_offd;
   HYPRE_Int          *local_num_rows;
/* not computed   HYPRE_Int          *local_num_nonzeros; */
   HYPRE_Int          num_nonzeros, first_col_diag, last_col_diag;
   HYPRE_Int i,ic,ij,ir,ilocal,p,r,r_p,r_global,r_local, jlen;
   HYPRE_Int *a_i, *a_j, *ilen;
   HYPRE_Int **rdofs, **ps;
   double data;
   double *a_data;
   double **datas;
   hypre_CSRMatrix *local_A;

   hypre_MPI_Comm_rank(comm, &my_id);
   hypre_MPI_Comm_size(comm, &num_procs);

   hypre_ParChordMatrix_RowStarts
      ( Ac, comm, &row_starts, &global_num_cols );
   /* ... this function works correctly only under some assumptions;
      see the function definition for details */
   global_num_rows = row_starts[num_procs] - row_starts[0];

   col_starts = NULL;
   /* The offd and diag blocks aren't defined until we have both row
      and column partitions... */
   num_cols_offd = 0;
   num_nonzeros_diag = 0;
   num_nonzeros_offd = 0;

   Ap  = hypre_ParCSRMatrixCreate( comm, global_num_rows, global_num_cols,
                          row_starts, col_starts,
                          num_cols_offd, num_nonzeros_diag, num_nonzeros_offd);
   *pAp = Ap;

   row_starts = hypre_ParCSRMatrixRowStarts(Ap);
   col_starts = hypre_ParCSRMatrixColStarts(Ap);

   local_num_rows = hypre_CTAlloc(HYPRE_Int, num_procs);
   for (i=0; i < num_procs; i++)
         local_num_rows[i] = row_starts[i+1] - row_starts[i];

   num_nonzeros = 0;
   for ( p=0; p<hypre_ParChordMatrixNumInprocessors(Ac); ++p ) {
      num_nonzeros += hypre_ParChordMatrixNumInchords(Ac)[p];
   };

   local_A = hypre_CSRMatrixCreate( local_num_rows[my_id], global_num_cols,
                                    num_nonzeros );

   /* Compute local CSRMatrix-like i,j arrays for this processor. */

   ps = hypre_CTAlloc( HYPRE_Int*, hypre_ParChordMatrixNumIdofs(Ac) );
   rdofs = hypre_CTAlloc( HYPRE_Int*, hypre_ParChordMatrixNumIdofs(Ac) );
   datas = hypre_CTAlloc( double*, hypre_ParChordMatrixNumIdofs(Ac) );
   ilen  = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumIdofs(Ac) );
   jlen = 0;
   for ( i=0; i<hypre_ParChordMatrixNumIdofs(Ac); ++i ) {
      ilen[i] = 0;
      ps[i] = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumRdofs(Ac) );
      rdofs[i] = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumRdofs(Ac) );
      datas[i] = hypre_CTAlloc( double, hypre_ParChordMatrixNumRdofs(Ac) );
      /* ... rdofs[i], datas[i] will generally, not always, be much too big */
   }
   for ( p=0; p<hypre_ParChordMatrixNumInprocessors(Ac); ++p ) {
      for ( ic=0; ic<hypre_ParChordMatrixNumInchords(Ac)[p]; ++ic ) {
         ilocal = hypre_ParChordMatrixInchordIdof(Ac)[p][ic];
         r = hypre_ParChordMatrixInchordRdof(Ac)[p][ic];
         data = hypre_ParChordMatrixInchordData(Ac)[p][ic];
         ps[ilocal][ ilen[ilocal] ] = p;
         rdofs[ilocal][ ilen[ilocal] ] = r;
         datas[ilocal][ ilen[ilocal] ] = data;
         ++ilen[ilocal];
         ++jlen;
      }
   };

   a_i = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumIdofs(Ac)+1 );
   a_j = hypre_CTAlloc( HYPRE_Int, jlen );
   a_data = hypre_CTAlloc( double, jlen );
   a_i[0] = 0;
   for ( ilocal=0; ilocal<hypre_ParChordMatrixNumIdofs(Ac); ++ilocal ) {
      a_i[ilocal+1] = a_i[ilocal] + ilen[ilocal];
      ir = 0;
      for ( ij=a_i[ilocal]; ij<a_i[ilocal+1]; ++ij ) {
         p = ps[ilocal][ir];
         r_p = rdofs[ilocal][ir];  /* local in proc. p */
         r_global = r_p + hypre_ParChordMatrixFirstindexRdof(Ac)[p];
         r_local = r_global - hypre_ParChordMatrixFirstindexRdof(Ac)[my_id];
         a_j[ij] = r_local;
         a_data[ij] = datas[ilocal][ir];
         ir++;
      };
   };

   for ( i=0; i<hypre_ParChordMatrixNumIdofs(Ac); ++i ) {
      hypre_TFree( ps[i] );
      hypre_TFree( rdofs[i] );
      hypre_TFree( datas[i] );
   };
   hypre_TFree( ps );
   hypre_TFree( rdofs );
   hypre_TFree( datas );
   hypre_TFree( ilen );

   first_col_diag = col_starts[my_id];
   last_col_diag = col_starts[my_id+1]-1;

   hypre_CSRMatrixData(local_A) = a_data;
   hypre_CSRMatrixI(local_A) = a_i;
   hypre_CSRMatrixJ(local_A) = a_j;
   hypre_CSRMatrixOwnsData(local_A) = 0;

   GenerateDiagAndOffd(local_A, Ap, first_col_diag, last_col_diag);

   /* set pointers back to NULL before destroying */
   if (my_id == 0)
   {
      hypre_TFree(a_data);
      /* ... the data has been copied into different diag & offd arrays of Ap */
      hypre_TFree(a_j);
      hypre_TFree(a_i);
      hypre_CSRMatrixData(local_A) = NULL;
      hypre_CSRMatrixI(local_A) = NULL;
      hypre_CSRMatrixJ(local_A) = NULL; 
   }      
   hypre_CSRMatrixDestroy(local_A);
   hypre_TFree(local_num_rows);
/*   hypre_TFree(csr_matrix_datatypes);*/
   return 0;
}