Example #1
0
void hypre_BoomerAMGTruncateInterp( hypre_ParCSRMatrix *P,
                                    HYPRE_Real eps, HYPRE_Real dlt,
                                    HYPRE_Int * CF_marker )
/* Truncate the interpolation matrix P, but only in rows for which the
   marker is <0.  Truncation means that an element P(i,j) is set to 0 if
   P(i,j)>0 and P(i,j)<eps*max( P(i,j) )  or if
   P(i,j)>0 and P(i,j)<dlt*max( -P(i,j) )  or if
   P(i,j)<0 and P(i,j)>dlt*min( -P(i,j) )  or if
   P(i,j)<0 and P(i,j)>eps*min( P(i,j) )
      ( 0<eps,dlt<1, typically 0.1=dlt<eps=0.2, )
   The min and max are only computed locally, as I'm guessing that there isn't
   usually much to be gained (in the way of improved performance) by getting
   them perfectly right.
*/

/* The function hypre_BoomerAMGInterpTruncation in par_interp.c is
   very similar.  It looks at fabs(value) rather than separately
   dealing with value<0 and value>0 as recommended by Klaus Stuben,
   thus as this function does.  In this function, only "marked" rows
   are affected.  Lastly, in hypre_BoomerAMGInterpTruncation, if any
   element gets discarded, it reallocates arrays to the new size.
*/
{
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
   HYPRE_Real      *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);
   HYPRE_Real      *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   HYPRE_Int             *P_offd_j = hypre_CSRMatrixJ(P_offd);
   HYPRE_Int             *new_P_diag_i;
   HYPRE_Int             *new_P_offd_i;
   HYPRE_Int	num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
   HYPRE_Int	num_rows_offd_P = hypre_CSRMatrixNumRows(P_offd);
   HYPRE_Int num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(P_diag);
   HYPRE_Int num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(P_offd);
#if 0
   MPI_Comm comm = hypre_ParCSRMatrixComm( P );
   HYPRE_Real vmax1, vmin1;
#endif
   HYPRE_Real vmax = 0.0;
   HYPRE_Real vmin = 0.0;
   HYPRE_Real v, old_sum, new_sum, scale, wmax, wmin;
   HYPRE_Int i1, m, m1d, m1o;

   /* compute vmax = eps*max(P(i,j)), vmin = eps*min(P(i,j)) */
   for ( i1 = 0; i1 < num_rows_diag_P; i1++ )
   {
      for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m )
         {
            v = P_diag_data[m];
            vmax = hypre_max( v, vmax );
            vmin = hypre_min( v, vmin );
         }
      for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m )
         {
            v = P_offd_data[m];
            vmax = hypre_max( v, vmax );
            vmin = hypre_min( v, vmin );
         }
   }
#if 0
   /* This can make max,min global so results don't depend on no. processors
      We don't want this except for testing, or maybe this could be put
      someplace better.  I don't like adding communication here, for a minor reason.
   */
   vmax1 = vmax; vmin1 = vmin;
   hypre_MPI_Allreduce( &vmax1, &vmax, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm );
   hypre_MPI_Allreduce( &vmin1, &vmin, 1, HYPRE_MPI_REAL, hypre_MPI_MIN, comm );
#endif
   if ( vmax <= 0.0 ) vmax =  1.0;  /* make sure no v is v>vmax if no v is v>0 */
   if ( vmin >= 0.0 ) vmin = -1.0;  /* make sure no v is v<vmin if no v is v<0 */
   wmax = - dlt * vmin;
   wmin = - dlt * vmax;
   vmax *= eps;
   vmin *= eps;

   /* Repack the i,j,and data arrays so as to discard the small elements of P.
      Elements of Coarse rows (CF_marker>=0) are always kept.
      The arrays are not re-allocated, so there will generally be unused space
      at the ends of the arrays. */
   new_P_diag_i = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P+1 );
   new_P_offd_i = hypre_CTAlloc( HYPRE_Int, num_rows_offd_P+1 );
   m1d = P_diag_i[0];
   m1o = P_offd_i[0];
   for ( i1 = 0; i1 < num_rows_diag_P; i1++ )
   {
      old_sum = 0;
      new_sum = 0;
      for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m )
      {
         v = P_diag_data[m];
         old_sum += v;
         if ( CF_marker[i1]>=0 || ( v>=vmax && v>=wmax ) || ( v<=vmin && v<=wmin ) )
         {  /* keep v */
            new_sum += v;
            P_diag_j[m1d] = P_diag_j[m];
            P_diag_data[m1d] = P_diag_data[m];
            ++m1d;
         }
         else
         {  /* discard v */
            --num_nonzeros_diag;
         }
      }
      for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m )
      {
         v = P_offd_data[m];
         old_sum += v;
         if ( CF_marker[i1]>=0 || ( v>=vmax && v>=wmax ) || ( v<=vmin && v<=wmin ) )
         {  /* keep v */
            new_sum += v;
            P_offd_j[m1o] = P_offd_j[m];
            P_offd_data[m1o] = P_offd_data[m];
            ++m1o;
         }
         else
         {  /* discard v */
            --num_nonzeros_offd;
         }
      }

      new_P_diag_i[i1+1] = m1d;
      if ( i1<num_rows_offd_P ) new_P_offd_i[i1+1] = m1o;

      /* rescale to keep row sum the same */
      if (new_sum!=0) scale = old_sum/new_sum; else scale = 1.0;
      for ( m=new_P_diag_i[i1]; m<new_P_diag_i[i1+1]; ++m )
         P_diag_data[m] *= scale;
      if ( i1<num_rows_offd_P ) /* this test fails when there is no offd block */
         for ( m=new_P_offd_i[i1]; m<new_P_offd_i[i1+1]; ++m )
            P_offd_data[m] *= scale;

   }

   for ( i1 = 1; i1 <= num_rows_diag_P; i1++ )
   {
      P_diag_i[i1] = new_P_diag_i[i1];
      if ( i1<=num_rows_offd_P && num_nonzeros_offd>0 ) P_offd_i[i1] = new_P_offd_i[i1];
   }
   hypre_TFree( new_P_diag_i );
   if ( num_rows_offd_P>0 ) hypre_TFree( new_P_offd_i );

   hypre_CSRMatrixNumNonzeros(P_diag) = num_nonzeros_diag;
   hypre_CSRMatrixNumNonzeros(P_offd) = num_nonzeros_offd;
   hypre_ParCSRMatrixSetDNumNonzeros( P );
   hypre_ParCSRMatrixSetNumNonzeros( P );

}
Example #2
0
void hypre_ParCSRMatrixSplit(hypre_ParCSRMatrix *A,
                             HYPRE_Int nr, HYPRE_Int nc,
                             hypre_ParCSRMatrix **blocks,
                             int interleaved_rows, int interleaved_cols)
{
    HYPRE_Int i, j, k;

    MPI_Comm comm = hypre_ParCSRMatrixComm(A);

    hypre_CSRMatrix *Adiag = hypre_ParCSRMatrixDiag(A);
    hypre_CSRMatrix *Aoffd = hypre_ParCSRMatrixOffd(A);

    HYPRE_Int global_rows = hypre_ParCSRMatrixGlobalNumRows(A);
    HYPRE_Int global_cols = hypre_ParCSRMatrixGlobalNumCols(A);

    HYPRE_Int local_rows = hypre_CSRMatrixNumRows(Adiag);
    HYPRE_Int local_cols = hypre_CSRMatrixNumCols(Adiag);
    HYPRE_Int offd_cols = hypre_CSRMatrixNumCols(Aoffd);

    hypre_assert(local_rows % nr == 0 && local_cols % nc == 0);
    hypre_assert(global_rows % nr == 0 && global_cols % nc == 0);

    HYPRE_Int block_rows = local_rows / nr;
    HYPRE_Int block_cols = local_cols / nc;
    HYPRE_Int num_blocks = nr * nc;

    /* mark local rows and columns with block number */
    HYPRE_Int *row_block_num = hypre_TAlloc(HYPRE_Int, local_rows);
    HYPRE_Int *col_block_num = hypre_TAlloc(HYPRE_Int, local_cols);

    for (i = 0; i < local_rows; i++)
    {
        row_block_num[i] = interleaved_rows ? (i % nr) : (i / block_rows);
    }
    for (i = 0; i < local_cols; i++)
    {
        col_block_num[i] = interleaved_cols ? (i % nc) : (i / block_cols);
    }

    /* determine the block numbers for offd columns */
    HYPRE_Int* offd_col_block_num = hypre_TAlloc(HYPRE_Int, offd_cols);
    hypre_ParCSRCommHandle *comm_handle;
    HYPRE_Int *int_buf_data;
    {
        /* make sure A has a communication package */
        hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
        if (!comm_pkg)
        {
            hypre_MatvecCommPkgCreate(A);
            comm_pkg = hypre_ParCSRMatrixCommPkg(A);
        }

        /* calculate the final global column numbers for each block */
        HYPRE_Int *count = hypre_CTAlloc(HYPRE_Int, nc);
        HYPRE_Int *block_global_col = hypre_TAlloc(HYPRE_Int, local_cols);
        HYPRE_Int first_col = hypre_ParCSRMatrixFirstColDiag(A) / nc;
        for (i = 0; i < local_cols; i++)
        {
            block_global_col[i] = first_col + count[col_block_num[i]]++;
        }
        hypre_TFree(count);

        /* use a Matvec communication pattern to determine offd_col_block_num */
        HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
        int_buf_data = hypre_CTAlloc(HYPRE_Int,
                                     hypre_ParCSRCommPkgSendMapStart(comm_pkg,
                                             num_sends));
        HYPRE_Int start, index = 0;
        for (i = 0; i < num_sends; i++)
        {
            start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
            for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
            {
                k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j);
                int_buf_data[index++] = col_block_num[k] + nc*block_global_col[k];
            }
        }
        hypre_TFree(block_global_col);

        comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
                      offd_col_block_num);
    }

    /* create the block matrices */
    HYPRE_Int num_procs = 1;
    if (!hypre_ParCSRMatrixAssumedPartition(A))
    {
        hypre_MPI_Comm_size(comm, &num_procs);
    }

    HYPRE_Int *row_starts = hypre_TAlloc(HYPRE_Int, num_procs+1);
    HYPRE_Int *col_starts = hypre_TAlloc(HYPRE_Int, num_procs+1);
    for (i = 0; i <= num_procs; i++)
    {
        row_starts[i] = hypre_ParCSRMatrixRowStarts(A)[i] / nr;
        col_starts[i] = hypre_ParCSRMatrixColStarts(A)[i] / nc;
    }

    for (i = 0; i < num_blocks; i++)
    {
        blocks[i] = hypre_ParCSRMatrixCreate(comm,
                                             global_rows/nr, global_cols/nc,
                                             row_starts, col_starts, 0, 0, 0);
    }

    /* split diag part */
    hypre_CSRMatrix **csr_blocks = hypre_TAlloc(hypre_CSRMatrix*, nr*nc);
    hypre_CSRMatrixSplit(Adiag, nr, nc, row_block_num, col_block_num,
                         csr_blocks);

    for (i = 0; i < num_blocks; i++)
    {
        hypre_TFree(hypre_ParCSRMatrixDiag(blocks[i]));
        hypre_ParCSRMatrixDiag(blocks[i]) = csr_blocks[i];
    }

    /* finish communication, receive offd_col_block_num */
    hypre_ParCSRCommHandleDestroy(comm_handle);
    hypre_TFree(int_buf_data);

    /* decode global offd column numbers */
    HYPRE_Int* offd_global_col = hypre_TAlloc(HYPRE_Int, offd_cols);
    for (i = 0; i < offd_cols; i++)
    {
        offd_global_col[i] = offd_col_block_num[i] / nc;
        offd_col_block_num[i] %= nc;
    }

    /* split offd part */
    hypre_CSRMatrixSplit(Aoffd, nr, nc, row_block_num, offd_col_block_num,
                         csr_blocks);

    for (i = 0; i < num_blocks; i++)
    {
        hypre_TFree(hypre_ParCSRMatrixOffd(blocks[i]));
        hypre_ParCSRMatrixOffd(blocks[i]) = csr_blocks[i];
    }

    hypre_TFree(csr_blocks);
    hypre_TFree(col_block_num);
    hypre_TFree(row_block_num);

    /* update block col-maps */
    for (int bi = 0; bi < nr; bi++)
    {
        for (int bj = 0; bj < nc; bj++)
        {
            hypre_ParCSRMatrix *block = blocks[bi*nc + bj];
            hypre_CSRMatrix *block_offd = hypre_ParCSRMatrixOffd(block);
            HYPRE_Int block_offd_cols = hypre_CSRMatrixNumCols(block_offd);

            HYPRE_Int *block_col_map = hypre_TAlloc(HYPRE_Int, block_offd_cols);
            for (i = j = 0; i < offd_cols; i++)
            {
                HYPRE_Int bn = offd_col_block_num[i];
                if (bn == bj) {
                    block_col_map[j++] = offd_global_col[i];
                }
            }
            hypre_assert(j == block_offd_cols);

            hypre_ParCSRMatrixColMapOffd(block) = block_col_map;
        }
    }

    hypre_TFree(offd_global_col);
    hypre_TFree(offd_col_block_num);

    /* finish the new matrices, make them own all the stuff */
    for (i = 0; i < num_blocks; i++)
    {
        hypre_ParCSRMatrixSetNumNonzeros(blocks[i]);
        hypre_MatvecCommPkgCreate(blocks[i]);

        hypre_ParCSRMatrixOwnsData(blocks[i]) = 1;

        /* only the first block will own the row/col_starts */
        hypre_ParCSRMatrixOwnsRowStarts(blocks[i]) = !i;
        hypre_ParCSRMatrixOwnsColStarts(blocks[i]) = !i;
    }
}
Example #3
0
hypre_ParCSRMatrix *
hypre_ParCSRBlockMatrixConvertToParCSRMatrix(hypre_ParCSRBlockMatrix *matrix)
{
   MPI_Comm comm = hypre_ParCSRBlockMatrixComm(matrix);
   hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(matrix);
   hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(matrix);
   HYPRE_Int block_size = hypre_ParCSRBlockMatrixBlockSize(matrix);
   HYPRE_Int global_num_rows = hypre_ParCSRBlockMatrixGlobalNumRows(matrix);
   HYPRE_Int global_num_cols = hypre_ParCSRBlockMatrixGlobalNumCols(matrix);
   HYPRE_Int *row_starts = hypre_ParCSRBlockMatrixRowStarts(matrix);
   HYPRE_Int *col_starts = hypre_ParCSRBlockMatrixColStarts(matrix);
   HYPRE_Int num_cols_offd = hypre_CSRBlockMatrixNumCols(offd);
   HYPRE_Int num_nonzeros_diag = hypre_CSRBlockMatrixNumNonzeros(diag);
   HYPRE_Int num_nonzeros_offd = hypre_CSRBlockMatrixNumNonzeros(offd);
 
   hypre_ParCSRMatrix *matrix_C;
   HYPRE_Int *matrix_C_row_starts;
   HYPRE_Int *matrix_C_col_starts;

   HYPRE_Int *counter, *new_j_map;
   HYPRE_Int size_j, size_map, index, new_num_cols, removed = 0;
   HYPRE_Int *offd_j, *col_map_offd, *new_col_map_offd;
   

   HYPRE_Int num_procs, i, j;

   hypre_CSRMatrix *diag_nozeros, *offd_nozeros;

   hypre_MPI_Comm_size(comm,&num_procs);

#ifdef HYPRE_NO_GLOBAL_PARTITION
   matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, 2);
   matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, 2);
   for(i = 0; i < 2; i++)
   {
      matrix_C_row_starts[i] = row_starts[i]*block_size;
      matrix_C_col_starts[i] = col_starts[i]*block_size;
   }
#else
   matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1);
   matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1);
   for(i = 0; i < num_procs + 1; i++)
   {
      matrix_C_row_starts[i] = row_starts[i]*block_size;
      matrix_C_col_starts[i] = col_starts[i]*block_size;
   }
#endif

   matrix_C = hypre_ParCSRMatrixCreate(comm, global_num_rows*block_size, 
                                       global_num_cols*block_size, matrix_C_row_starts, 
                                       matrix_C_col_starts, num_cols_offd*block_size, 
                                       num_nonzeros_diag*block_size*block_size, 
                                       num_nonzeros_offd*block_size*block_size);
   hypre_ParCSRMatrixInitialize(matrix_C);

   /* DIAG */
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C));
   hypre_ParCSRMatrixDiag(matrix_C) = 
      hypre_CSRBlockMatrixConvertToCSRMatrix(diag);

   /* AB - added to delete zeros */
   diag_nozeros = hypre_CSRMatrixDeleteZeros( 
      hypre_ParCSRMatrixDiag(matrix_C), 1e-14);
   if(diag_nozeros) 
   {
      hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C));
      hypre_ParCSRMatrixDiag(matrix_C) = diag_nozeros;
   }

   /* OFF-DIAG */
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C));
   hypre_ParCSRMatrixOffd(matrix_C) = 
      hypre_CSRBlockMatrixConvertToCSRMatrix(offd);

   /* AB - added to delete zeros - this just deletes from data and j arrays */
   offd_nozeros = hypre_CSRMatrixDeleteZeros( 
      hypre_ParCSRMatrixOffd(matrix_C), 1e-14);
   if(offd_nozeros) 
   {
      hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C));
      hypre_ParCSRMatrixOffd(matrix_C) = offd_nozeros;
      removed = 1;
       
   }

   /* now convert the col_map_offd */
   for (i = 0; i < num_cols_offd; i++)
      for (j = 0; j < block_size; j++)
         hypre_ParCSRMatrixColMapOffd(matrix_C)[i*block_size + j] = 
            hypre_ParCSRBlockMatrixColMapOffd(matrix)[i]*block_size + j;
 
   /* if we deleted zeros, then it is possible that col_map_offd can be
      compressed as well - this requires some amount of work that could be skipped... */

   if (removed)
   {
      size_map =   num_cols_offd*block_size;
      counter = hypre_CTAlloc(HYPRE_Int, size_map);
      new_j_map = hypre_CTAlloc(HYPRE_Int, size_map);

      offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(matrix_C));
      col_map_offd = hypre_ParCSRMatrixColMapOffd(matrix_C);
      
      size_j = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(matrix_C));
      /* mark which off_d entries are found in j */
      for (i=0; i < size_j; i++)
      {
         counter[offd_j[i]] = 1;
      }
      /*now find new numbering for columns (we will delete the 
        cols where counter = 0*/
      index = 0;
      for (i=0; i < size_map; i++)
      {
         if (counter[i]) new_j_map[i] = index++;
      }
      new_num_cols = index;
      /* if there are some col entries to remove: */ 
      if (!(index == size_map))
      {
         /* go thru j and adjust entries */         
         for (i=0; i < size_j; i++)
         {
            offd_j[i] = new_j_map[offd_j[i]];
         }
         /*now go thru col map and get rid of non-needed entries */ 
         new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols);
         index = 0;
         for (i=0; i < size_map; i++)  
         {
            if (counter[i]) 
            {
               new_col_map_offd[index++] = col_map_offd[i];
            }
         }
         /* set the new col map */ 
         hypre_TFree(col_map_offd);
         hypre_ParCSRMatrixColMapOffd(matrix_C) = new_col_map_offd;
         /* modify the number of cols */
         hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(matrix_C)) = new_num_cols;
      }
      hypre_TFree(new_j_map);
      hypre_TFree(counter);
      
   }
   
   hypre_ParCSRMatrixSetNumNonzeros( matrix_C );
   hypre_ParCSRMatrixSetDNumNonzeros( matrix_C );
   
   /* we will not copy the comm package */  
   hypre_ParCSRMatrixCommPkg(matrix_C) = NULL;

   return matrix_C;
}
Example #4
0
/*
  Function:  hypre_ParCSRMatrixEliminateAAe

                    (input)                  (output)

                / A_ii | A_ib \          / A_ii |  0   \
            A = | -----+----- |   --->   | -----+----- |
                \ A_bi | A_bb /          \   0  |  I   /


                                         /   0  |   A_ib   \
                                    Ae = | -----+--------- |
                                         \ A_bi | A_bb - I /

*/
void hypre_ParCSRMatrixEliminateAAe(hypre_ParCSRMatrix *A,
                                    hypre_ParCSRMatrix **Ae,
                                    HYPRE_Int num_rowscols_to_elim,
                                    HYPRE_Int *rowscols_to_elim)
{
    HYPRE_Int i, j, k;

    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
    hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
    HYPRE_Int A_diag_nrows  = hypre_CSRMatrixNumRows(A_diag);
    HYPRE_Int A_offd_ncols  = hypre_CSRMatrixNumCols(A_offd);

    *Ae = hypre_ParCSRMatrixCreate(hypre_ParCSRMatrixComm(A),
                                   hypre_ParCSRMatrixGlobalNumRows(A),
                                   hypre_ParCSRMatrixGlobalNumCols(A),
                                   hypre_ParCSRMatrixRowStarts(A),
                                   hypre_ParCSRMatrixColStarts(A),
                                   0, 0, 0);

    hypre_ParCSRMatrixSetRowStartsOwner(*Ae, 0);
    hypre_ParCSRMatrixSetColStartsOwner(*Ae, 0);

    hypre_CSRMatrix *Ae_diag = hypre_ParCSRMatrixDiag(*Ae);
    hypre_CSRMatrix *Ae_offd = hypre_ParCSRMatrixOffd(*Ae);
    HYPRE_Int Ae_offd_ncols;

    HYPRE_Int  num_offd_cols_to_elim;
    HYPRE_Int  *offd_cols_to_elim;

    HYPRE_Int  *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
    HYPRE_Int  *Ae_col_map_offd;

    HYPRE_Int  *col_mark;
    HYPRE_Int  *col_remap;

    /* figure out which offd cols should be eliminated */
    {
        hypre_ParCSRCommHandle *comm_handle;
        hypre_ParCSRCommPkg *comm_pkg;
        HYPRE_Int num_sends, *int_buf_data;
        HYPRE_Int index, start;

        HYPRE_Int *eliminate_row = hypre_CTAlloc(HYPRE_Int, A_diag_nrows);
        HYPRE_Int *eliminate_col = hypre_CTAlloc(HYPRE_Int, A_offd_ncols);

        /* make sure A has a communication package */
        comm_pkg = hypre_ParCSRMatrixCommPkg(A);
        if (!comm_pkg)
        {
            hypre_MatvecCommPkgCreate(A);
            comm_pkg = hypre_ParCSRMatrixCommPkg(A);
        }

        /* which of the local rows are to be eliminated */
        for (i = 0; i < A_diag_nrows; i++)
        {
            eliminate_row[i] = 0;
        }
        for (i = 0; i < num_rowscols_to_elim; i++)
        {
            eliminate_row[rowscols_to_elim[i]] = 1;
        }

        /* use a Matvec communication pattern to find (in eliminate_col)
           which of the local offd columns are to be eliminated */
        num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
        int_buf_data = hypre_CTAlloc(HYPRE_Int,
                                     hypre_ParCSRCommPkgSendMapStart(comm_pkg,
                                             num_sends));
        index = 0;
        for (i = 0; i < num_sends; i++)
        {
            start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
            for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
            {
                k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j);
                int_buf_data[index++] = eliminate_row[k];
            }
        }
        comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg,
                      int_buf_data, eliminate_col);

        /* eliminate diagonal part, overlapping it with communication */
        hypre_CSRMatrixElimCreate(A_diag, Ae_diag,
                                  num_rowscols_to_elim, rowscols_to_elim,
                                  num_rowscols_to_elim, rowscols_to_elim,
                                  NULL);

        hypre_CSRMatrixEliminateRowsCols(A_diag, Ae_diag,
                                         num_rowscols_to_elim, rowscols_to_elim,
                                         num_rowscols_to_elim, rowscols_to_elim,
                                         1, NULL);
        hypre_CSRMatrixReorder(Ae_diag);

        /* finish the communication */
        hypre_ParCSRCommHandleDestroy(comm_handle);

        /* received eliminate_col[], count offd columns to eliminate */
        num_offd_cols_to_elim = 0;
        for (i = 0; i < A_offd_ncols; i++)
        {
            if (eliminate_col[i]) {
                num_offd_cols_to_elim++;
            }
        }

        offd_cols_to_elim = hypre_CTAlloc(HYPRE_Int, num_offd_cols_to_elim);

        /* get a list of offd column indices and coefs */
        num_offd_cols_to_elim = 0;
        for (i = 0; i < A_offd_ncols; i++)
        {
            if (eliminate_col[i])
            {
                offd_cols_to_elim[num_offd_cols_to_elim++] = i;
            }
        }

        hypre_TFree(int_buf_data);
        hypre_TFree(eliminate_row);
        hypre_TFree(eliminate_col);
    }

    /* eliminate the off-diagonal part */
    col_mark = hypre_CTAlloc(HYPRE_Int, A_offd_ncols);
    col_remap = hypre_CTAlloc(HYPRE_Int, A_offd_ncols);

    hypre_CSRMatrixElimCreate(A_offd, Ae_offd,
                              num_rowscols_to_elim, rowscols_to_elim,
                              num_offd_cols_to_elim, offd_cols_to_elim,
                              col_mark);

    for (i = k = 0; i < A_offd_ncols; i++)
    {
        if (col_mark[i]) {
            col_remap[i] = k++;
        }
    }

    hypre_CSRMatrixEliminateRowsCols(A_offd, Ae_offd,
                                     num_rowscols_to_elim, rowscols_to_elim,
                                     num_offd_cols_to_elim, offd_cols_to_elim,
                                     0, col_remap);

    /* create col_map_offd for Ae */
    Ae_offd_ncols = 0;
    for (i = 0; i < A_offd_ncols; i++)
    {
        if (col_mark[i]) {
            Ae_offd_ncols++;
        }
    }

    Ae_col_map_offd  = hypre_CTAlloc(HYPRE_Int, Ae_offd_ncols);

    Ae_offd_ncols = 0;
    for (i = 0; i < A_offd_ncols; i++)
    {
        if (col_mark[i])
        {
            Ae_col_map_offd[Ae_offd_ncols++] = A_col_map_offd[i];
        }
    }

    hypre_ParCSRMatrixColMapOffd(*Ae) = Ae_col_map_offd;
    hypre_CSRMatrixNumCols(Ae_offd) = Ae_offd_ncols;

    hypre_TFree(col_remap);
    hypre_TFree(col_mark);
    hypre_TFree(offd_cols_to_elim);

    hypre_ParCSRMatrixSetNumNonzeros(*Ae);
    hypre_MatvecCommPkgCreate(*Ae);
}