예제 #1
0
hypre_ParCSRMatrix *
hypre_ParCSRBlockMatrixCompress( hypre_ParCSRBlockMatrix *matrix )
{
   MPI_Comm comm = hypre_ParCSRBlockMatrixComm(matrix);
   hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(matrix);
   hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(matrix);
   HYPRE_Int global_num_rows = hypre_ParCSRBlockMatrixGlobalNumRows(matrix);
   HYPRE_Int global_num_cols = hypre_ParCSRBlockMatrixGlobalNumCols(matrix);
   HYPRE_Int *row_starts = hypre_ParCSRBlockMatrixRowStarts(matrix);
   HYPRE_Int *col_starts = hypre_ParCSRBlockMatrixColStarts(matrix);
   HYPRE_Int num_cols_offd = hypre_CSRBlockMatrixNumCols(offd);
   HYPRE_Int num_nonzeros_diag = hypre_CSRBlockMatrixNumNonzeros(diag);
   HYPRE_Int num_nonzeros_offd = hypre_CSRBlockMatrixNumNonzeros(offd);

   hypre_ParCSRMatrix *matrix_C;

   HYPRE_Int i;

   matrix_C = hypre_ParCSRMatrixCreate(comm, global_num_rows, global_num_cols,
                                       row_starts,col_starts,num_cols_offd,num_nonzeros_diag,num_nonzeros_offd);
   hypre_ParCSRMatrixInitialize(matrix_C);

   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C));
   hypre_ParCSRMatrixDiag(matrix_C) = hypre_CSRBlockMatrixCompress(diag);
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C));
   hypre_ParCSRMatrixOffd(matrix_C) = hypre_CSRBlockMatrixCompress(offd);

   for(i = 0; i < num_cols_offd; i++)
      hypre_ParCSRMatrixColMapOffd(matrix_C)[i] = 
         hypre_ParCSRBlockMatrixColMapOffd(matrix)[i];
   return matrix_C;
}
예제 #2
0
HYPRE_Int
hypre_MatTCommPkgCreate ( hypre_ParCSRMatrix *A)
{
   hypre_ParCSRCommPkg	*comm_pkg;
   
   MPI_Comm             comm = hypre_ParCSRMatrixComm(A);
/*   hypre_MPI_Datatype         *recv_mpi_types;
   hypre_MPI_Datatype         *send_mpi_types;
*/
   HYPRE_Int			num_sends;
   HYPRE_Int			*send_procs;
   HYPRE_Int			*send_map_starts;
   HYPRE_Int			*send_map_elmts;
   HYPRE_Int			num_recvs;
   HYPRE_Int			*recv_procs;
   HYPRE_Int			*recv_vec_starts;
   
   HYPRE_Int  *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
   HYPRE_Int  first_col_diag = hypre_ParCSRMatrixFirstColDiag(A);
   HYPRE_Int  *col_starts = hypre_ParCSRMatrixColStarts(A);

   HYPRE_Int	ierr = 0;
   HYPRE_Int	num_rows_diag = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A));
   HYPRE_Int	num_cols_diag = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(A));
   HYPRE_Int	num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
   HYPRE_Int * row_starts = hypre_ParCSRMatrixRowStarts(A);

   hypre_MatTCommPkgCreate_core (
      comm, col_map_offd, first_col_diag, col_starts,
      num_rows_diag, num_cols_diag, num_cols_offd, row_starts,
                                  hypre_ParCSRMatrixFirstColDiag(A),
                                  hypre_ParCSRMatrixColMapOffd(A),
                                  hypre_CSRMatrixI( hypre_ParCSRMatrixDiag(A) ),
                                  hypre_CSRMatrixJ( hypre_ParCSRMatrixDiag(A) ),
                                  hypre_CSRMatrixI( hypre_ParCSRMatrixOffd(A) ),
      hypre_CSRMatrixJ( hypre_ParCSRMatrixOffd(A) ),
      1,
      &num_recvs, &recv_procs, &recv_vec_starts,
      &num_sends, &send_procs, &send_map_starts,
      &send_map_elmts
      );

   comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1);

   hypre_ParCSRCommPkgComm(comm_pkg) = comm;

   hypre_ParCSRCommPkgNumRecvs(comm_pkg) = num_recvs;
   hypre_ParCSRCommPkgRecvProcs(comm_pkg) = recv_procs;
   hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) = recv_vec_starts;
   hypre_ParCSRCommPkgNumSends(comm_pkg) = num_sends;
   hypre_ParCSRCommPkgSendProcs(comm_pkg) = send_procs;
   hypre_ParCSRCommPkgSendMapStarts(comm_pkg) = send_map_starts;
   hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = send_map_elmts;

   hypre_ParCSRMatrixCommPkgT(A) = comm_pkg;

   return ierr;
}
예제 #3
0
HYPRE_Int
hypre_ParCSRMatrix_dof_func_offd(
   hypre_ParCSRMatrix    *A,
   HYPRE_Int                    num_functions,
   HYPRE_Int                   *dof_func,
   HYPRE_Int                  **dof_func_offd )
{
   hypre_ParCSRCommPkg     *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   hypre_ParCSRCommHandle  *comm_handle;
   hypre_CSRMatrix    *A_offd          = hypre_ParCSRMatrixOffd(A);

   HYPRE_Int 		       num_cols_offd = 0;
   HYPRE_Int                 Solve_err_flag = 0;
   HYPRE_Int			num_sends;
   HYPRE_Int		       *int_buf_data;
   HYPRE_Int			index, start, i, j;

   num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
   *dof_func_offd = NULL;
   if (num_cols_offd)
   {
        if (num_functions > 1)
	   *dof_func_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
   }


  /*-------------------------------------------------------------------
    * Get the dof_func data for the off-processor columns
    *-------------------------------------------------------------------*/

   if (!comm_pkg)
   {
	hypre_MatvecCommPkgCreate(A);
	comm_pkg = hypre_ParCSRMatrixCommPkg(A); 
   }

   num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
   if (num_functions > 1)
   {
      int_buf_data = hypre_CTAlloc(HYPRE_Int,hypre_ParCSRCommPkgSendMapStart(comm_pkg,
						num_sends));
      index = 0;
      for (i = 0; i < num_sends; i++)
      {
	 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
	 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
		int_buf_data[index++] 
		 = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
      }
	
      comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, 
	*dof_func_offd);

      hypre_ParCSRCommHandleDestroy(comm_handle);   
      hypre_TFree(int_buf_data);
   }

   return(Solve_err_flag);
}
예제 #4
0
파일: hypre_parcsr.cpp 프로젝트: jdahm/mfem
/* Based on hypre_ParCSRMatrixMatvec in par_csr_matvec.c */
void hypre_ParCSRMatrixBooleanMatvec(hypre_ParCSRMatrix *A,
                                     HYPRE_Bool alpha,
                                     HYPRE_Bool *x,
                                     HYPRE_Bool beta,
                                     HYPRE_Bool *y)
{
    hypre_ParCSRCommHandle *comm_handle;
    hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
    hypre_CSRMatrix   *diag   = hypre_ParCSRMatrixDiag(A);
    hypre_CSRMatrix   *offd   = hypre_ParCSRMatrixOffd(A);

    HYPRE_Int          num_cols_offd = hypre_CSRMatrixNumCols(offd);
    HYPRE_Int          num_sends, i, j, index;

    HYPRE_Bool        *x_tmp, *x_buf;

    x_tmp = hypre_CTAlloc(HYPRE_Bool, num_cols_offd);

    /*---------------------------------------------------------------------
     * If there exists no CommPkg for A, a CommPkg is generated using
     * equally load balanced partitionings
     *--------------------------------------------------------------------*/
    if (!comm_pkg)
    {
        hypre_MatvecCommPkgCreate(A);
        comm_pkg = hypre_ParCSRMatrixCommPkg(A);
    }

    num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
    x_buf = hypre_CTAlloc(HYPRE_Bool,
                          hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));

    index = 0;
    for (i = 0; i < num_sends; i++)
    {
        j = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
        for ( ; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
        {
            x_buf[index++] = x[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)];
        }
    }

    comm_handle = hypre_ParCSRCommHandleCreate_bool(1, comm_pkg, x_buf, x_tmp);

    hypre_CSRMatrixBooleanMatvec(diag, alpha, x, beta, y);

    hypre_ParCSRCommHandleDestroy(comm_handle);

    if (num_cols_offd)
    {
        hypre_CSRMatrixBooleanMatvec(offd, alpha, x_tmp, 1, y);
    }

    hypre_TFree(x_buf);
    hypre_TFree(x_tmp);
}
예제 #5
0
 /* coarse (marked >=0) rows of P copied from C Both matrices have the same sizes. */
void hypre_ParCSRMatrixCopy_C( hypre_ParCSRMatrix * P,
                               hypre_ParCSRMatrix * C, HYPRE_Int * CF_marker )
{
   hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C);
   hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C);
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);

   double          *C_diag_data = hypre_CSRMatrixData(C_diag);
   HYPRE_Int             *C_diag_i = hypre_CSRMatrixI(C_diag);
   double          *C_offd_data = hypre_CSRMatrixData(C_offd);
   HYPRE_Int             *C_offd_i = hypre_CSRMatrixI(C_offd);
   double          *P_diag_data = hypre_CSRMatrixData(P_diag);
   double          *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int	num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag);
   HYPRE_Int	num_rows_offd_C = hypre_CSRMatrixNumRows(C_offd);
   HYPRE_Int	num_cols_offd_C = hypre_CSRMatrixNumCols(C_offd);

   HYPRE_Int i1, m;

   for ( i1= 0; i1 < num_rows_diag_C; i1++ )
   {
      if ( CF_marker[i1] >= 0 )  /* Coarse rows only */
      {
         for ( m=C_diag_i[i1]; m<C_diag_i[i1+1]; ++m )
         {
            P_diag_data[m] = C_diag_data[m];
         }
      }
   }
   if ( num_cols_offd_C )
      for ( i1= 0; i1 < num_rows_offd_C; i1++ )
      {
         if ( CF_marker[i1] >= 0 )  /* Coarse rows only */
         {
            for ( m=C_offd_i[i1]; m<C_offd_i[i1+1]; ++m )
            {
               P_offd_data[m] = C_offd_data[m];
            }
         }
      }

}
예제 #6
0
void RowsWithColumn_original
( HYPRE_Int * rowmin, HYPRE_Int * rowmax, HYPRE_Int column, hypre_ParCSRMatrix * A )
/* Finds rows of A which have a nonzero at the given (global) column number.
   Sets rowmin to the minimum (local) row number of such rows, and rowmax
   to the max.  If there are no such rows, will return rowmax<0<=rowmin */
{
   hypre_CSRMatrix * diag = hypre_ParCSRMatrixDiag(A);
   hypre_CSRMatrix * offd = hypre_ParCSRMatrixOffd(A);
   HYPRE_Int * mat_i, * mat_j;
   HYPRE_Int i, j, num_rows;
   HYPRE_Int firstColDiag;
   HYPRE_Int * colMapOffd;

   mat_i = hypre_CSRMatrixI(diag);
   mat_j = hypre_CSRMatrixJ(diag);
   num_rows = hypre_CSRMatrixNumRows(diag);
   firstColDiag = hypre_ParCSRMatrixFirstColDiag(A);
   *rowmin = num_rows;
   *rowmax = -1;

   for ( i=0; i<num_rows; ++i ) {
      /* global number: row = i + firstRowIndex;*/
      for ( j=mat_i[i]; j<mat_i[i+1]; ++j ) {
         if ( mat_j[j]+firstColDiag==column ) {
            /* row i (local row number) has column mat_j[j] (local column number) */
            *rowmin = i<*rowmin ? i : *rowmin;
            *rowmax = i>*rowmax ? i : *rowmax;
            break;
         }
      }
   }
   mat_i = hypre_CSRMatrixI(offd);
   mat_j = hypre_CSRMatrixJ(offd);
   num_rows = hypre_CSRMatrixNumRows(offd);
   colMapOffd = hypre_ParCSRMatrixColMapOffd(A);
   for ( i=0; i<num_rows; ++i ) {
      /* global number: row = i + firstRowIndex;*/
      for ( j=mat_i[i]; j<mat_i[i+1]; ++j ) {
         if ( colMapOffd[ mat_j[j] ]==column ) {
            /* row i (local row number) has column mat_j[j] (local column number) */
            *rowmin = i<*rowmin ? i : *rowmin;
            *rowmax = i>*rowmax ? i : *rowmax;
            break;
         }
      }
   }

/*      global col no.:  mat_j[j]+hypre_ParCSRMatrixFirstColDiag(A)
                      or hypre_ParCSRMatrixColMapOffd(A)[ mat_j[j] ]
        global row no.: i + hypre_ParCSRMatrixFirstRowIndex(A)
*/

}
예제 #7
0
HYPRE_Int AmgCGCBoundaryFix (hypre_ParCSRMatrix *S,HYPRE_Int *CF_marker,HYPRE_Int *CF_marker_offd)
  /* Checks whether an interpolation is possible for a fine grid point with strong couplings.
   * Required after CGC coarsening
   * ========================================================================================
   * S : the strength matrix
   * CF_marker, CF_marker_offd : the coarse/fine markers
   * ========================================================================================*/
{
  HYPRE_Int mpirank,i,j,has_c_pt,ierr=0;
  hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag (S);
  hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd (S);
  HYPRE_Int *S_i = hypre_CSRMatrixI(S_diag);
  HYPRE_Int *S_j = hypre_CSRMatrixJ(S_diag);
  HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
  HYPRE_Int *S_offd_j = NULL;
  HYPRE_Int num_variables = hypre_CSRMatrixNumRows (S_diag);
  HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (S_offd);
  HYPRE_Int added_cpts=0;
  MPI_Comm comm = hypre_ParCSRMatrixComm(S);

  hypre_MPI_Comm_rank (comm,&mpirank);
  if (num_cols_offd) {
      S_offd_j = hypre_CSRMatrixJ(S_offd);
  }
  
  for (i=0;i<num_variables;i++) {
    if (S_offd_i[i]==S_offd_i[i+1] || CF_marker[i] == C_PT) continue;
    has_c_pt=0;

    /* fine grid point with strong connections across the boundary */
    for (j=S_i[i];j<S_i[i+1];j++) 
      if (CF_marker[S_j[j]] == C_PT) {has_c_pt=1; break;}
    if (has_c_pt) continue;

    for (j=S_offd_i[i];j<S_offd_i[i+1];j++) 
      if (CF_marker_offd[S_offd_j[j]] == C_PT) {has_c_pt=1; break;}
    if (has_c_pt) continue;

    /* all points i is strongly coupled to are fine: make i C_PT */
    CF_marker[i] = C_PT;
#if 0
    hypre_printf ("Processor %d: added point %d in AmgCGCBoundaryFix\n",mpirank,i);
#endif
    added_cpts++;
  }
#if 0
  if (added_cpts)  hypre_printf ("Processor %d: added %d points in AmgCGCBoundaryFix\n",mpirank,added_cpts);
  fflush(stdout);
#endif
  return(ierr);
}
예제 #8
0
  /* fine (marked <0 ) rows of Pnew set to 0 */
void  hypre_ParCSRMatrixZero_F( hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker )
{
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);

   double          *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   double          *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   HYPRE_Int	num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
   HYPRE_Int	num_rows_offd_P = hypre_CSRMatrixNumRows(P_offd);
   HYPRE_Int	num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd);
   HYPRE_Int i1,  m;

   for ( i1= 0; i1 < num_rows_diag_P; i1++ )
   {
      if ( CF_marker[i1] < 0 )  /* Fine rows only */
      {
         for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m )
         {
            P_diag_data[m] = 0;
         }
      }
   }
   if ( num_cols_offd_P )
      for ( i1= 0; i1 < num_rows_offd_P; i1++ )
      {
         if ( CF_marker[i1] < 0 )  /* Fine rows only */
         {
            for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m )
            {
               P_offd_data[m] = 0;
            }
         }
      }

}
예제 #9
0
/******************************************************************************
 *
 * hypre_IJMatrixSetOffDiagRowSizesPETSc
 * sets offd_i inside the offd part of the ParCSRMatrix,
 * requires exact sizes for offd
 *
 *****************************************************************************/
HYPRE_Int
hypre_IJMatrixSetOffDiagRowSizesPETSc(hypre_IJMatrix *matrix,
			      	      HYPRE_Int	      *sizes)
{
   HYPRE_Int local_num_rows;
   HYPRE_Int i;
   hypre_ParCSRMatrix *par_matrix;
   hypre_CSRMatrix *offd;
   HYPRE_Int *offd_i;
   par_matrix = hypre_IJMatrixLocalStorage(matrix);
   if (!par_matrix)
      return -1;
   
   offd =  hypre_ParCSRMatrixOffd(par_matrix);
   offd_i =  hypre_CSRMatrixI(offd);
   local_num_rows = hypre_CSRMatrixNumRows(offd);
   if (!offd_i)
      offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);
   for (i = 0; i < local_num_rows+1; i++)
      offd_i[i] = sizes[i];
   hypre_CSRMatrixI(offd) = offd_i;
   hypre_CSRMatrixNumNonzeros(offd) = offd_i[local_num_rows];
   return 0;
}
예제 #10
0
/* Delete any matrix entry C(i,j) for which the corresponding entry P(i,j) doesn't exist -
   but only for "fine" rows C(i)<0
   This is done as a purely local computation - C and P must have the same data distribution
   (among processors).
*/
void hypre_ParCSRMatrixDropEntries( hypre_ParCSRMatrix * C,
                                    hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker )
{
   hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C);
   hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C);
   double          *C_diag_data = hypre_CSRMatrixData(C_diag);
   HYPRE_Int             *C_diag_i = hypre_CSRMatrixI(C_diag);
   HYPRE_Int             *C_diag_j = hypre_CSRMatrixJ(C_diag);
   double          *C_offd_data = hypre_CSRMatrixData(C_offd);
   HYPRE_Int             *C_offd_i = hypre_CSRMatrixI(C_offd);
   HYPRE_Int             *C_offd_j = hypre_CSRMatrixJ(C_offd);
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   HYPRE_Int             *P_offd_j = hypre_CSRMatrixJ(P_offd);
   HYPRE_Int             *new_C_diag_i;
   HYPRE_Int             *new_C_offd_i;
   HYPRE_Int	num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag);
   HYPRE_Int	num_rows_offd_C = hypre_CSRMatrixNumCols(C_offd);
   HYPRE_Int num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(C_diag);
   HYPRE_Int num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(C_offd);
   double vmax = 0.0;
   double vmin = 0.0;
   double v, old_sum, new_sum, scale;
   HYPRE_Int i1, m, m1d, m1o, jC, mP, keep;

   /* Repack the i,j,and data arrays of C so as to discard those elements for which
      there is no corresponding element in P.
      Elements of Coarse rows (CF_marker>=0) are always kept.
      The arrays are not re-allocated, so there will generally be unused space
      at the ends of the arrays. */
   new_C_diag_i = hypre_CTAlloc( HYPRE_Int, num_rows_diag_C+1 );
   new_C_offd_i = hypre_CTAlloc( HYPRE_Int, num_rows_offd_C+1 );
   m1d = C_diag_i[0];
   m1o = C_offd_i[0];
   for ( i1 = 0; i1 < num_rows_diag_C; i1++ )
   {
      old_sum = 0;
      new_sum = 0;
      for ( m=C_diag_i[i1]; m<C_diag_i[i1+1]; ++m )
      {
         v = C_diag_data[m];
         jC = C_diag_j[m];
         old_sum += v;
         /* Do we know anything about the order of P_diag_j?  It would be better
            not to search through it all here.  If we know nothing, some ordering or
            index scheme will be needed for efficiency (worth doing iff this function
            gets called at all ) (may2006: this function is no longer called) */
         keep=0;
         for ( mP=P_diag_i[i1]; mP<P_diag_i[i1+1]; ++mP )
         {
            if ( jC==P_diag_j[m] )
            {
               keep=1;
               break;
            }
         }
         if ( CF_marker[i1]>=0 || keep==1 )
         {  /* keep v in C */
            new_sum += v;
            C_diag_j[m1d] = C_diag_j[m];
            C_diag_data[m1d] = C_diag_data[m];
            ++m1d;
         }
         else
         {  /* discard v */
            --num_nonzeros_diag;
         }
      }
      for ( m=C_offd_i[i1]; m<C_offd_i[i1+1]; ++m )
      {
         v = C_offd_data[m];
         jC = C_diag_j[m];
         old_sum += v;
         keep=0;
         for ( mP=P_offd_i[i1]; mP<P_offd_i[i1+1]; ++mP )
         {
            if ( jC==P_offd_j[m] )
            {
               keep=1;
               break;
            }
         }
         if ( CF_marker[i1]>=0 || v>=vmax || v<=vmin )
         {  /* keep v in C */
            new_sum += v;
            C_offd_j[m1o] = C_offd_j[m];
            C_offd_data[m1o] = C_offd_data[m];
            ++m1o;
         }
         else
         {  /* discard v */
            --num_nonzeros_offd;
         }
      }

      new_C_diag_i[i1+1] = m1d;
      if ( i1<num_rows_offd_C ) new_C_offd_i[i1+1] = m1o;

      /* rescale to keep row sum the same */
      if (new_sum!=0) scale = old_sum/new_sum; else scale = 1.0;
      for ( m=new_C_diag_i[i1]; m<new_C_diag_i[i1+1]; ++m )
         C_diag_data[m] *= scale;
      if ( i1<num_rows_offd_C ) /* this test fails when there is no offd block */
         for ( m=new_C_offd_i[i1]; m<new_C_offd_i[i1+1]; ++m )
            C_offd_data[m] *= scale;

   }

   for ( i1 = 1; i1 <= num_rows_diag_C; i1++ )
   {
      C_diag_i[i1] = new_C_diag_i[i1];
      if ( i1<num_rows_offd_C ) C_offd_i[i1] = new_C_offd_i[i1];
   }
   hypre_TFree( new_C_diag_i );
   if ( num_rows_offd_C>0 ) hypre_TFree( new_C_offd_i );

   hypre_CSRMatrixNumNonzeros(C_diag) = num_nonzeros_diag;
   hypre_CSRMatrixNumNonzeros(C_offd) = num_nonzeros_offd;
   /*  SetNumNonzeros, SetDNumNonzeros are global, need hypre_MPI_Allreduce.
       I suspect, but don't know, that other parts of hypre do not assume that
       the correct values have been set.
     hypre_ParCSRMatrixSetNumNonzeros( C );
     hypre_ParCSRMatrixSetDNumNonzeros( C );*/
   hypre_ParCSRMatrixNumNonzeros( C ) = 0;
   hypre_ParCSRMatrixDNumNonzeros( C ) = 0.0;

}
예제 #11
0
hypre_ParCSRMatrix * hypre_ParMatmul_FC(
   hypre_ParCSRMatrix * A, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker,
   HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd )
/* hypre_parMatmul_FC creates and returns the "Fine"-designated rows of the
   matrix product A*P.  A's size is (nC+nF)*(nC+nF), P's size is (nC+nF)*nC
   where nC is the number of coarse rows/columns, nF the number of fine
   rows/columns.  The size of C=A*P is (nC+nF)*nC, even though not all rows
   of C are actually computed.  If we were to construct a matrix consisting
   only of the computed rows of C, its size would be nF*nC.
   "Fine" is defined solely by the marker array, and for example could be
   a proper subset of the fine points of a multigrid hierarchy.
*/
{
   /* To compute a submatrix of C containing only the computed data, i.e.
      only "Fine" rows, we would have to do a lot of computational work,
      with a lot of communication.  The communication is because such a
      matrix would need global information that depends on which rows are
      "Fine".
   */

   MPI_Comm 	   comm = hypre_ParCSRMatrixComm(A);

   hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
   
   double          *A_diag_data = hypre_CSRMatrixData(A_diag);
   HYPRE_Int             *A_diag_i = hypre_CSRMatrixI(A_diag);
   HYPRE_Int             *A_diag_j = hypre_CSRMatrixJ(A_diag);

   hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
   
   double          *A_offd_data = hypre_CSRMatrixData(A_offd);
   HYPRE_Int             *A_offd_i = hypre_CSRMatrixI(A_offd);
   HYPRE_Int             *A_offd_j = hypre_CSRMatrixJ(A_offd);

   HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
   HYPRE_Int	num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
   HYPRE_Int	num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag);
   HYPRE_Int	num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
   
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   
   double          *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);

   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
   HYPRE_Int		   *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P);
   
   double          *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   HYPRE_Int             *P_offd_j = hypre_CSRMatrixJ(P_offd);

   HYPRE_Int	first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P);
   HYPRE_Int	last_col_diag_P;
   HYPRE_Int *col_starts_P = hypre_ParCSRMatrixColStarts(P);
   HYPRE_Int	num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
   HYPRE_Int	num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag);
   HYPRE_Int	num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd);

   hypre_ParCSRMatrix *C;
   HYPRE_Int		      *col_map_offd_C;
   HYPRE_Int		      *map_P_to_C;

   hypre_CSRMatrix *C_diag;

   double          *C_diag_data;
   HYPRE_Int             *C_diag_i;
   HYPRE_Int             *C_diag_j;

   hypre_CSRMatrix *C_offd;

   double          *C_offd_data=NULL;
   HYPRE_Int             *C_offd_i=NULL;
   HYPRE_Int             *C_offd_j=NULL;

   HYPRE_Int              C_diag_size;
   HYPRE_Int              C_offd_size;
   HYPRE_Int		    num_cols_offd_C = 0;
   
   hypre_CSRMatrix *Ps_ext;
   
   double          *Ps_ext_data;
   HYPRE_Int             *Ps_ext_i;
   HYPRE_Int             *Ps_ext_j;

   double          *P_ext_diag_data;
   HYPRE_Int             *P_ext_diag_i;
   HYPRE_Int             *P_ext_diag_j;
   HYPRE_Int              P_ext_diag_size;

   double          *P_ext_offd_data;
   HYPRE_Int             *P_ext_offd_i;
   HYPRE_Int             *P_ext_offd_j;
   HYPRE_Int              P_ext_offd_size;

   HYPRE_Int		   *P_marker;
   HYPRE_Int		   *temp;

   HYPRE_Int              i, j;
   HYPRE_Int              i1, i2, i3;
   HYPRE_Int              jj2, jj3;
   
   HYPRE_Int              jj_count_diag, jj_count_offd;
   HYPRE_Int              jj_row_begin_diag, jj_row_begin_offd;
   HYPRE_Int              start_indexing = 0; /* start indexing for C_data at 0 */
   HYPRE_Int		    n_rows_A_global, n_cols_A_global;
   HYPRE_Int		    n_rows_P_global, n_cols_P_global;
   HYPRE_Int              allsquare = 0;
   HYPRE_Int              cnt, cnt_offd, cnt_diag;
   HYPRE_Int 		    num_procs;
   HYPRE_Int 		    value;

   double           a_entry;
   double           a_b_product;
   
   n_rows_A_global = hypre_ParCSRMatrixGlobalNumRows(A);
   n_cols_A_global = hypre_ParCSRMatrixGlobalNumCols(A);
   n_rows_P_global = hypre_ParCSRMatrixGlobalNumRows(P);
   n_cols_P_global = hypre_ParCSRMatrixGlobalNumCols(P);

   if (n_cols_A_global != n_rows_P_global || num_cols_diag_A != num_rows_diag_P)
   {
	hypre_printf(" Error! Incompatible matrix dimensions!\n");
	return NULL;
   }
   /* if (num_rows_A==num_cols_P) allsquare = 1; */

   /*-----------------------------------------------------------------------
    *  Extract P_ext, i.e. portion of P that is stored on neighbor procs
    *  and needed locally for matrix matrix product 
    *-----------------------------------------------------------------------*/

   hypre_MPI_Comm_size(comm, &num_procs);

   if (num_procs > 1)
   {
       /*---------------------------------------------------------------------
    	* If there exists no CommPkg for A, a CommPkg is generated using
    	* equally load balanced partitionings within 
	* hypre_ParCSRMatrixExtractBExt
    	*--------------------------------------------------------------------*/
   	Ps_ext = hypre_ParCSRMatrixExtractBExt(P,A,1);
   	Ps_ext_data = hypre_CSRMatrixData(Ps_ext);
   	Ps_ext_i    = hypre_CSRMatrixI(Ps_ext);
   	Ps_ext_j    = hypre_CSRMatrixJ(Ps_ext);
   }
   P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1);
   P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1);
   P_ext_diag_size = 0;
   P_ext_offd_size = 0;
   last_col_diag_P = first_col_diag_P + num_cols_diag_P -1;

   for (i=0; i < num_cols_offd_A; i++)
   {
      for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
         if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
            P_ext_offd_size++;
         else
            P_ext_diag_size++;
      P_ext_diag_i[i+1] = P_ext_diag_size;
      P_ext_offd_i[i+1] = P_ext_offd_size;
   }

   if (P_ext_diag_size)
   {
      P_ext_diag_j = hypre_CTAlloc(HYPRE_Int, P_ext_diag_size);
      P_ext_diag_data = hypre_CTAlloc(double, P_ext_diag_size);
   }
   if (P_ext_offd_size)
   {
      P_ext_offd_j = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size);
      P_ext_offd_data = hypre_CTAlloc(double, P_ext_offd_size);
   }

   cnt_offd = 0;
   cnt_diag = 0;
   for (i=0; i < num_cols_offd_A; i++)
   {
      for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
         if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
         {
            P_ext_offd_j[cnt_offd] = Ps_ext_j[j];
            P_ext_offd_data[cnt_offd++] = Ps_ext_data[j];
         }
         else
         {
            P_ext_diag_j[cnt_diag] = Ps_ext_j[j] - first_col_diag_P;
            P_ext_diag_data[cnt_diag++] = Ps_ext_data[j];
         }
   }

   if (num_procs > 1)
   {
      hypre_CSRMatrixDestroy(Ps_ext);
      Ps_ext = NULL;
   }

   cnt = 0;
   if (P_ext_offd_size || num_cols_offd_P)
   {
      temp = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size+num_cols_offd_P);
      for (i=0; i < P_ext_offd_size; i++)
         temp[i] = P_ext_offd_j[i];
      cnt = P_ext_offd_size;
      for (i=0; i < num_cols_offd_P; i++)
         temp[cnt++] = col_map_offd_P[i];
   }
   if (cnt)
   {
      qsort0(temp, 0, cnt-1);

      num_cols_offd_C = 1;
      value = temp[0];
      for (i=1; i < cnt; i++)
      {
         if (temp[i] > value)
         {
            value = temp[i];
            temp[num_cols_offd_C++] = value;
         }
      }
   }

   if (num_cols_offd_C)
        col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C);

   for (i=0; i < num_cols_offd_C; i++)
      col_map_offd_C[i] = temp[i];

   if (P_ext_offd_size || num_cols_offd_P)
      hypre_TFree(temp);

   for (i=0 ; i < P_ext_offd_size; i++)
      P_ext_offd_j[i] = hypre_BinarySearch(col_map_offd_C,
                                           P_ext_offd_j[i],
                                           num_cols_offd_C);
   if (num_cols_offd_P)
   {
      map_P_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P);

      cnt = 0;
      for (i=0; i < num_cols_offd_C; i++)
         if (col_map_offd_C[i] == col_map_offd_P[cnt])
         {
            map_P_to_C[cnt++] = i;
            if (cnt == num_cols_offd_P) break;
         }
   }

   /*-----------------------------------------------------------------------
   *  Allocate marker array.
    *-----------------------------------------------------------------------*/

   P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_C);

   /*-----------------------------------------------------------------------
    *  Initialize some stuff.
    *-----------------------------------------------------------------------*/

   for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++)
   {      
      P_marker[i1] = -1;
   }


/* no changes for the marked version above this point */
   /* This function call is the first pass: */
   hypre_ParMatmul_RowSizes_Marked(
      &C_diag_i, &C_offd_i, &P_marker,
      A_diag_i, A_diag_j, A_offd_i, A_offd_j,
      P_diag_i, P_diag_j, P_offd_i, P_offd_j,
      P_ext_diag_i, P_ext_diag_j, P_ext_offd_i, P_ext_offd_j,
      map_P_to_C,
      &C_diag_size, &C_offd_size,
      num_rows_diag_A, num_cols_offd_A, allsquare,
      num_cols_diag_P, num_cols_offd_P,
      num_cols_offd_C, CF_marker, dof_func, dof_func_offd
      );

   /* The above call of hypre_ParMatmul_RowSizes_Marked computed
      two scalars: C_diag_size, C_offd_size,
      and two arrays: C_diag_i, C_offd_i
      ( P_marker is also computed, but only used internally )
   */

   /*-----------------------------------------------------------------------
    *  Allocate C_diag_data and C_diag_j arrays.
    *  Allocate C_offd_data and C_offd_j arrays.
    *-----------------------------------------------------------------------*/
 
   last_col_diag_P = first_col_diag_P + num_cols_diag_P - 1;
   C_diag_data = hypre_CTAlloc(double, C_diag_size);
   C_diag_j    = hypre_CTAlloc(HYPRE_Int, C_diag_size);
   if (C_offd_size)
   { 
   	C_offd_data = hypre_CTAlloc(double, C_offd_size);
   	C_offd_j    = hypre_CTAlloc(HYPRE_Int, C_offd_size);
   } 


   /*-----------------------------------------------------------------------
    *  Second Pass: Fill in C_diag_data and C_diag_j.
    *  Second Pass: Fill in C_offd_data and C_offd_j.
    *-----------------------------------------------------------------------*/

   /*-----------------------------------------------------------------------
    *  Initialize some stuff.
    *-----------------------------------------------------------------------*/

   jj_count_diag = start_indexing;
   jj_count_offd = start_indexing;
   for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++)
   {      
      P_marker[i1] = -1;
   }
   
   /*-----------------------------------------------------------------------
    *  Loop over interior c-points.
    *-----------------------------------------------------------------------*/
    
   for (i1 = 0; i1 < num_rows_diag_A; i1++)
   {

      if ( CF_marker[i1] < 0 )  /* i1 is a fine row */
         /* ... This and the coarse row code are the only parts between first pass
            and near the end where
            hypre_ParMatmul_FC is different from the regular hypre_ParMatmul */
      {

         /*--------------------------------------------------------------------
          *  Create diagonal entry, C_{i1,i1} 
          *--------------------------------------------------------------------*/

         jj_row_begin_diag = jj_count_diag;
         jj_row_begin_offd = jj_count_offd;

         /*-----------------------------------------------------------------
          *  Loop over entries in row i1 of A_offd.
          *-----------------------------------------------------------------*/
         
	 if (num_cols_offd_A)
	 {
            for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
            {
               i2 = A_offd_j[jj2];
               if( dof_func==NULL || dof_func[i1] == dof_func_offd[i2] )
               {  /* interpolate only like "functions" */
                  a_entry = A_offd_data[jj2];
            
                  /*-----------------------------------------------------------
                   *  Loop over entries in row i2 of P_ext.
                   *-----------------------------------------------------------*/

                  for (jj3 = P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
                  {
                     i3 = num_cols_diag_P+P_ext_offd_j[jj3];
                     a_b_product = a_entry * P_ext_offd_data[jj3];
                  
                     /*--------------------------------------------------------
                      *  Check P_marker to see that C_{i1,i3} has not already
                      *  been accounted for. If it has not, create a new entry.
                      *  If it has, add new contribution.
                      *--------------------------------------------------------*/
                     if (P_marker[i3] < jj_row_begin_offd)
                     {
                        P_marker[i3] = jj_count_offd;
                        C_offd_data[jj_count_offd] = a_b_product;
                        C_offd_j[jj_count_offd] = i3-num_cols_diag_P;
                        jj_count_offd++;
                     }
                     else
                        C_offd_data[P_marker[i3]] += a_b_product;
                  }
                  for (jj3 = P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
                  {
                     i3 = P_ext_diag_j[jj3];
                     a_b_product = a_entry * P_ext_diag_data[jj3];

                     if (P_marker[i3] < jj_row_begin_diag)
                     {
                        P_marker[i3] = jj_count_diag;
                        C_diag_data[jj_count_diag] = a_b_product;
                        C_diag_j[jj_count_diag] = i3;
                        jj_count_diag++;
                     }
                     else
                        C_diag_data[P_marker[i3]] += a_b_product;
                  }
               }
               else
               {  /* Interpolation mat should be 0 where i1 and i2 correspond to
                     different "functions".  As we haven't created an entry for
                     C(i1,i2), nothing needs to be done. */
               }

            }
         }

         /*-----------------------------------------------------------------
          *  Loop over entries in row i1 of A_diag.
          *-----------------------------------------------------------------*/

         for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
         {
            i2 = A_diag_j[jj2];
            if( dof_func==NULL || dof_func[i1] == dof_func[i2] )
            {  /* interpolate only like "functions" */
               a_entry = A_diag_data[jj2];
            
               /*-----------------------------------------------------------
                *  Loop over entries in row i2 of P_diag.
                *-----------------------------------------------------------*/

               for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
               {
                  i3 = P_diag_j[jj3];
                  a_b_product = a_entry * P_diag_data[jj3];
                  
                  /*--------------------------------------------------------
                   *  Check P_marker to see that C_{i1,i3} has not already
                   *  been accounted for. If it has not, create a new entry.
                   *  If it has, add new contribution.
                   *--------------------------------------------------------*/

                  if (P_marker[i3] < jj_row_begin_diag)
                  {
                     P_marker[i3] = jj_count_diag;
                     C_diag_data[jj_count_diag] = a_b_product;
                     C_diag_j[jj_count_diag] = i3;
                     jj_count_diag++;
                  }
                  else
                  {
                     C_diag_data[P_marker[i3]] += a_b_product;
                  }
               }
               if (num_cols_offd_P)
	       {
                  for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
                  {
                     i3 = num_cols_diag_P+map_P_to_C[P_offd_j[jj3]];
                     a_b_product = a_entry * P_offd_data[jj3];
                  
                     /*--------------------------------------------------------
                      *  Check P_marker to see that C_{i1,i3} has not already
                      *  been accounted for. If it has not, create a new entry.
                      *  If it has, add new contribution.
                      *--------------------------------------------------------*/

                     if (P_marker[i3] < jj_row_begin_offd)
                     {
                        P_marker[i3] = jj_count_offd;
                        C_offd_data[jj_count_offd] = a_b_product;
                        C_offd_j[jj_count_offd] = i3-num_cols_diag_P;
                        jj_count_offd++;
                     }
                     else
                     {
                        C_offd_data[P_marker[i3]] += a_b_product;
                     }
                  }
               }
            }
            else
            {  /* Interpolation mat should be 0 where i1 and i2 correspond to
                  different "functions".  As we haven't created an entry for
                  C(i1,i2), nothing needs to be done. */
            }
         }
      }
      else  /* i1 is a coarse row.*/
         /* Copy P coarse-row values to C.  This is useful if C is meant to
            become a replacement for P */
      {
	 if (num_cols_offd_P)
	 {
            for (jj2 = P_offd_i[i1]; jj2 < P_offd_i[i1+1]; jj2++)
            {
               C_offd_j[jj_count_offd] = P_offd_j[jj_count_offd];
               C_offd_data[jj_count_offd] = P_offd_data[jj_count_offd];
               ++jj_count_offd;
            }
         }
         for (jj2 = P_diag_i[i1]; jj2 < P_diag_i[i1+1]; jj2++)
         {
            C_diag_j[jj_count_diag] = P_diag_j[jj2];
            C_diag_data[jj_count_diag] = P_diag_data[jj2];
            ++jj_count_diag;
         }
      }
   }

   C = hypre_ParCSRMatrixCreate(
      comm, n_rows_A_global, n_cols_P_global,
      row_starts_A, col_starts_P, num_cols_offd_C, C_diag_size, C_offd_size );

   /* Note that C does not own the partitionings */
   hypre_ParCSRMatrixSetRowStartsOwner(C,0);
   hypre_ParCSRMatrixSetColStartsOwner(C,0);

   C_diag = hypre_ParCSRMatrixDiag(C);
   hypre_CSRMatrixData(C_diag) = C_diag_data; 
   hypre_CSRMatrixI(C_diag) = C_diag_i; 
   hypre_CSRMatrixJ(C_diag) = C_diag_j; 

   C_offd = hypre_ParCSRMatrixOffd(C);
   hypre_CSRMatrixI(C_offd) = C_offd_i; 
   hypre_ParCSRMatrixOffd(C) = C_offd;

   if (num_cols_offd_C)
   {
      hypre_CSRMatrixData(C_offd) = C_offd_data; 
      hypre_CSRMatrixJ(C_offd) = C_offd_j; 
      hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;

   }

   /*-----------------------------------------------------------------------
    *  Free various arrays
    *-----------------------------------------------------------------------*/

   hypre_TFree(P_marker);   
   hypre_TFree(P_ext_diag_i);
   if (P_ext_diag_size)
   {
      hypre_TFree(P_ext_diag_j);
      hypre_TFree(P_ext_diag_data);
   }
   hypre_TFree(P_ext_offd_i);
   if (P_ext_offd_size)
   {
      hypre_TFree(P_ext_offd_j);
      hypre_TFree(P_ext_offd_data);
   }
   if (num_cols_offd_P) hypre_TFree(map_P_to_C);

   return C;
   
}
예제 #12
0
hypre_ParCSRMatrix *
hypre_ParCSRBlockMatrixConvertToParCSRMatrix(hypre_ParCSRBlockMatrix *matrix)
{
   MPI_Comm comm = hypre_ParCSRBlockMatrixComm(matrix);
   hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(matrix);
   hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(matrix);
   HYPRE_Int block_size = hypre_ParCSRBlockMatrixBlockSize(matrix);
   HYPRE_Int global_num_rows = hypre_ParCSRBlockMatrixGlobalNumRows(matrix);
   HYPRE_Int global_num_cols = hypre_ParCSRBlockMatrixGlobalNumCols(matrix);
   HYPRE_Int *row_starts = hypre_ParCSRBlockMatrixRowStarts(matrix);
   HYPRE_Int *col_starts = hypre_ParCSRBlockMatrixColStarts(matrix);
   HYPRE_Int num_cols_offd = hypre_CSRBlockMatrixNumCols(offd);
   HYPRE_Int num_nonzeros_diag = hypre_CSRBlockMatrixNumNonzeros(diag);
   HYPRE_Int num_nonzeros_offd = hypre_CSRBlockMatrixNumNonzeros(offd);
 
   hypre_ParCSRMatrix *matrix_C;
   HYPRE_Int *matrix_C_row_starts;
   HYPRE_Int *matrix_C_col_starts;

   HYPRE_Int *counter, *new_j_map;
   HYPRE_Int size_j, size_map, index, new_num_cols, removed = 0;
   HYPRE_Int *offd_j, *col_map_offd, *new_col_map_offd;
   

   HYPRE_Int num_procs, i, j;

   hypre_CSRMatrix *diag_nozeros, *offd_nozeros;

   hypre_MPI_Comm_size(comm,&num_procs);

#ifdef HYPRE_NO_GLOBAL_PARTITION
   matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, 2);
   matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, 2);
   for(i = 0; i < 2; i++)
   {
      matrix_C_row_starts[i] = row_starts[i]*block_size;
      matrix_C_col_starts[i] = col_starts[i]*block_size;
   }
#else
   matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1);
   matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1);
   for(i = 0; i < num_procs + 1; i++)
   {
      matrix_C_row_starts[i] = row_starts[i]*block_size;
      matrix_C_col_starts[i] = col_starts[i]*block_size;
   }
#endif

   matrix_C = hypre_ParCSRMatrixCreate(comm, global_num_rows*block_size, 
                                       global_num_cols*block_size, matrix_C_row_starts, 
                                       matrix_C_col_starts, num_cols_offd*block_size, 
                                       num_nonzeros_diag*block_size*block_size, 
                                       num_nonzeros_offd*block_size*block_size);
   hypre_ParCSRMatrixInitialize(matrix_C);

   /* DIAG */
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C));
   hypre_ParCSRMatrixDiag(matrix_C) = 
      hypre_CSRBlockMatrixConvertToCSRMatrix(diag);

   /* AB - added to delete zeros */
   diag_nozeros = hypre_CSRMatrixDeleteZeros( 
      hypre_ParCSRMatrixDiag(matrix_C), 1e-14);
   if(diag_nozeros) 
   {
      hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C));
      hypre_ParCSRMatrixDiag(matrix_C) = diag_nozeros;
   }

   /* OFF-DIAG */
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C));
   hypre_ParCSRMatrixOffd(matrix_C) = 
      hypre_CSRBlockMatrixConvertToCSRMatrix(offd);

   /* AB - added to delete zeros - this just deletes from data and j arrays */
   offd_nozeros = hypre_CSRMatrixDeleteZeros( 
      hypre_ParCSRMatrixOffd(matrix_C), 1e-14);
   if(offd_nozeros) 
   {
      hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C));
      hypre_ParCSRMatrixOffd(matrix_C) = offd_nozeros;
      removed = 1;
       
   }

   /* now convert the col_map_offd */
   for (i = 0; i < num_cols_offd; i++)
      for (j = 0; j < block_size; j++)
         hypre_ParCSRMatrixColMapOffd(matrix_C)[i*block_size + j] = 
            hypre_ParCSRBlockMatrixColMapOffd(matrix)[i]*block_size + j;
 
   /* if we deleted zeros, then it is possible that col_map_offd can be
      compressed as well - this requires some amount of work that could be skipped... */

   if (removed)
   {
      size_map =   num_cols_offd*block_size;
      counter = hypre_CTAlloc(HYPRE_Int, size_map);
      new_j_map = hypre_CTAlloc(HYPRE_Int, size_map);

      offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(matrix_C));
      col_map_offd = hypre_ParCSRMatrixColMapOffd(matrix_C);
      
      size_j = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(matrix_C));
      /* mark which off_d entries are found in j */
      for (i=0; i < size_j; i++)
      {
         counter[offd_j[i]] = 1;
      }
      /*now find new numbering for columns (we will delete the 
        cols where counter = 0*/
      index = 0;
      for (i=0; i < size_map; i++)
      {
         if (counter[i]) new_j_map[i] = index++;
      }
      new_num_cols = index;
      /* if there are some col entries to remove: */ 
      if (!(index == size_map))
      {
         /* go thru j and adjust entries */         
         for (i=0; i < size_j; i++)
         {
            offd_j[i] = new_j_map[offd_j[i]];
         }
         /*now go thru col map and get rid of non-needed entries */ 
         new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols);
         index = 0;
         for (i=0; i < size_map; i++)  
         {
            if (counter[i]) 
            {
               new_col_map_offd[index++] = col_map_offd[i];
            }
         }
         /* set the new col map */ 
         hypre_TFree(col_map_offd);
         hypre_ParCSRMatrixColMapOffd(matrix_C) = new_col_map_offd;
         /* modify the number of cols */
         hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(matrix_C)) = new_num_cols;
      }
      hypre_TFree(new_j_map);
      hypre_TFree(counter);
      
   }
   
   hypre_ParCSRMatrixSetNumNonzeros( matrix_C );
   hypre_ParCSRMatrixSetDNumNonzeros( matrix_C );
   
   /* we will not copy the comm package */  
   hypre_ParCSRMatrixCommPkg(matrix_C) = NULL;

   return matrix_C;
}
예제 #13
0
void hypre_BoomerAMGJacobiInterp_1( hypre_ParCSRMatrix * A,
                                    hypre_ParCSRMatrix ** P,
                                    hypre_ParCSRMatrix * S,
                                    HYPRE_Int * CF_marker, HYPRE_Int level,
                                    HYPRE_Real truncation_threshold,
                                    HYPRE_Real truncation_threshold_minus,
                                    HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd,
                                    HYPRE_Real weight_AF)
/* One step of Jacobi interpolation:
   A is the linear system.
   P is an interpolation matrix, input and output
   CF_marker identifies coarse and fine points
   If we imagine P and A as split into coarse and fine submatrices,

       [ AFF  AFC ]   [ AF ]            [ IFC ]
   A = [          ] = [    ] ,      P = [     ]
       [ ACF  ACC ]   [ AC ]            [ ICC ]
   (note that ICC is an identity matrix, applied to coarse points only)
   then this function computes

   IFCnew = IFCold - DFF(-1) * ( AFF*IFCold + AFC )
          = IFCold - DFF(-1) * AF * Pold)
   where DFF is the diagonal of AFF, (-1) represents the inverse, and
   where "old" denotes a value on entry to this function, "new" a returned value.

*/
{
   hypre_ParCSRMatrix * Pnew;
   hypre_ParCSRMatrix * C;
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(*P);
   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(*P);
   HYPRE_Real      *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);
   HYPRE_Real      *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   hypre_CSRMatrix *C_diag;
   hypre_CSRMatrix *C_offd;
   hypre_CSRMatrix *Pnew_diag;
   hypre_CSRMatrix *Pnew_offd;
   HYPRE_Int	num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
   HYPRE_Int i;
   HYPRE_Int Jnochanges=0, Jchanges, Pnew_num_nonzeros;
   HYPRE_Int CF_coarse=0;
   HYPRE_Int * J_marker = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P );
   HYPRE_Int nc, ncmax, ncmin, nc1;
   HYPRE_Int num_procs, my_id;
   MPI_Comm comm = hypre_ParCSRMatrixComm( A );
#ifdef HYPRE_JACINT_PRINT_ROW_SUMS
   HYPRE_Int m, nmav, npav;
   HYPRE_Real PIi, PIimax, PIimin, PIimav, PIipav, randthresh;
   HYPRE_Real eps = 1.0e-17;
#endif
#ifdef HYPRE_JACINT_PRINT_MATRICES
   char filename[80];
   HYPRE_Int i_dummy, j_dummy;
   HYPRE_Int *base_i_ptr = &i_dummy;
   HYPRE_Int *base_j_ptr = &j_dummy;
#endif
#ifdef HYPRE_JACINT_PRINT_SOME_ROWS
   HYPRE_Int sample_rows[50], n_sample_rows=0, isamp;
#endif

   hypre_MPI_Comm_size(comm, &num_procs);   
   hypre_MPI_Comm_rank(comm,&my_id);


   for ( i=0; i<num_rows_diag_P; ++i )
   {
      J_marker[i] = CF_marker[i];
      if (CF_marker[i]>=0) ++CF_coarse;
   }
#ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS
   hypre_printf("%i %i Jacobi_Interp_1, P has %i+%i=%i nonzeros, local sum %e\n", my_id, level,
          hypre_CSRMatrixNumNonzeros(P_diag), hypre_CSRMatrixNumNonzeros(P_offd),
          hypre_CSRMatrixNumNonzeros(P_diag)+hypre_CSRMatrixNumNonzeros(P_offd),
          hypre_ParCSRMatrixLocalSumElts(*P) );
#endif

   /* row sum computations, for output */
#ifdef HYPRE_JACINT_PRINT_ROW_SUMS
   PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0;
   nmav=0, npav=0;
   for ( i=0; i<num_rows_diag_P; ++i )
   {
      PIi = 0;  /* i-th value of P*1, i.e. sum of row i of P */
      for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m )
         PIi += P_diag_data[m];
      for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m )
         PIi += P_offd_data[m];
      if (CF_marker[i]<0)
      {
         PIimax = hypre_max( PIimax, PIi );
         PIimin = hypre_min( PIimin, PIi );
         if (PIi<=1-eps) { PIimav+=PIi; ++nmav; };
         if (PIi>=1+eps) { PIipav+=PIi; ++npav; };
      }
   }
   if ( nmav>0 ) PIimav = PIimav/nmav;
   if ( npav>0 ) PIipav = PIipav/npav;
   hypre_printf("%i %i P in max,min row sums %e %e\n", my_id, level, PIimax, PIimin );
#endif

   ncmax=0; ncmin=num_rows_diag_P; nc1=0;
   for ( i=0; i<num_rows_diag_P; ++i )
      if (CF_marker[i]<0)
      {
         nc = P_diag_i[i+1] - P_diag_i[i];
         if (nc<=1)
         {
            ++nc1;
         }
         ncmax = hypre_max( nc, ncmax );
         ncmin = hypre_min( nc, ncmin );
      }
#if 0
   /* a very agressive reduction in how much the Jacobi step does: */
   for ( i=0; i<num_rows_diag_P; ++i )
      if (CF_marker[i]<0)
      {
         nc = P_diag_i[i+1] - P_diag_i[i];
         if (nc>ncmin+1)
            /*if ( nc > ncmin + 0.5*(ncmax-ncmin) )*/
         {
            J_marker[i] = 1;
            ++Jnochanges;
         }
      }
#endif
   Jchanges = num_rows_diag_P - Jnochanges - CF_coarse;

#ifdef HYPRE_JACINT_PRINT_SOME_ROWS
   hypre_printf("some rows to be changed: ");
   randthresh = 15/(HYPRE_Real)Jchanges;
   for ( i=0; i<num_rows_diag_P; ++i )
   {
      if ( J_marker[i]<0 )
      {
         if ( ((HYPRE_Real)rand())/RAND_MAX < randthresh )
         {
            hypre_printf( "%i: ", i );
            for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m )
               hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] );
            hypre_printf(";  ");
            sample_rows[n_sample_rows] = i;
            ++n_sample_rows;
         }
      }
   }
   hypre_printf("\n");
#endif
#ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS
   hypre_printf("%i %i P has %i rows, %i changeable, %i don't change-good, %i coarse\n",
          my_id, level, num_rows_diag_P, Jchanges, Jnochanges, CF_coarse );
   hypre_printf("%i %i min,max diag cols per row: %i, %i;  no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 );
#endif
#ifdef HYPRE_JACINT_PRINT_MATRICES
   if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX )
   {
      hypre_sprintf( filename, "Ain%i", level );
      hypre_ParCSRMatrixPrintIJ( A,0,0,filename);
      hypre_sprintf( filename, "Sin%i", level );
      hypre_ParCSRMatrixPrintIJ( S,0,0,filename);
      hypre_sprintf( filename, "Pin%i", level );
      hypre_ParCSRMatrixPrintIJ( *P,0,0,filename);
   }
#endif

   C = hypre_ParMatmul_FC( A, *P, J_marker, dof_func, dof_func_offd );
   /* hypre_parMatmul_FC creates and returns C, a variation of the
      matrix product A*P in which only the "Fine"-designated rows have
      been computed.  (all columns are Coarse because all columns of P
      are).  "Fine" is defined solely by the marker array, and for
      example could be a proper subset of the fine points of a
      multigrid hierarchy.
      As a matrix, C is the size of A*P.  But only the marked rows have
      been computed.
   */
#ifdef HYPRE_JACINT_PRINT_MATRICES
   hypre_sprintf( filename, "C%i", level );
   if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( C,0,0,filename);
#endif
   C_diag = hypre_ParCSRMatrixDiag(C);
   C_offd = hypre_ParCSRMatrixOffd(C);
#ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS
   hypre_printf("%i %i Jacobi_Interp_1 after matmul, C has %i+%i=%i nonzeros, local sum %e\n",
          my_id, level, hypre_CSRMatrixNumNonzeros(C_diag),
          hypre_CSRMatrixNumNonzeros(C_offd),
          hypre_CSRMatrixNumNonzeros(C_diag)+hypre_CSRMatrixNumNonzeros(C_offd),
          hypre_ParCSRMatrixLocalSumElts(C) );
#endif

   hypre_ParMatScaleDiagInv_F( C, A, weight_AF, J_marker );
   /* hypre_ParMatScaleDiagInv scales of its first argument by premultiplying with
      a submatrix of the inverse of the diagonal of its second argument.
      The marker array determines which diagonal elements are used.  The marker
      array should select exactly the right number of diagonal elements (the number
      of rows of AP_FC).
   */
#ifdef HYPRE_JACINT_PRINT_MATRICES
   hypre_sprintf( filename, "Cout%i", level );
   if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX )  hypre_ParCSRMatrixPrintIJ( C,0,0,filename);
#endif

   Pnew = hypre_ParMatMinus_F( *P, C, J_marker );
   /* hypre_ParMatMinus_F subtracts rows of its second argument from selected rows
      of its first argument.  The marker array determines which rows of the first
      argument are affected, and they should exactly correspond to all the rows
      of the second argument.
   */
   Pnew_diag = hypre_ParCSRMatrixDiag(Pnew);
   Pnew_offd = hypre_ParCSRMatrixOffd(Pnew);
   Pnew_num_nonzeros = hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd);
#ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS
   hypre_printf("%i %i Jacobi_Interp_1 after MatMinus, Pnew has %i+%i=%i nonzeros, local sum %e\n",
          my_id, level, hypre_CSRMatrixNumNonzeros(Pnew_diag),
          hypre_CSRMatrixNumNonzeros(Pnew_offd), Pnew_num_nonzeros,
          hypre_ParCSRMatrixLocalSumElts(Pnew) );
#endif

   /* Transfer ownership of col_starts from P to Pnew  ... */
   if ( hypre_ParCSRMatrixColStarts(*P) &&
        hypre_ParCSRMatrixColStarts(*P)==hypre_ParCSRMatrixColStarts(Pnew) )
   {
      if ( hypre_ParCSRMatrixOwnsColStarts(*P) && !hypre_ParCSRMatrixOwnsColStarts(Pnew) )
      {
         hypre_ParCSRMatrixSetColStartsOwner(*P,0);
         hypre_ParCSRMatrixSetColStartsOwner(Pnew,1);
      }
   }

   hypre_ParCSRMatrixDestroy( C );
   hypre_ParCSRMatrixDestroy( *P );

   /* Note that I'm truncating all the fine rows, not just the J-marked ones. */
#if 0
   if ( Pnew_num_nonzeros < 10000 )  /* a fixed number like this makes it no.procs.-depdendent */
   {  /* ad-hoc attempt to reduce zero-matrix problems seen in testing..*/
      truncation_threshold = 1.0e-6 * truncation_threshold; 
      truncation_threshold_minus = 1.0e-6 * truncation_threshold_minus;
  }
#endif
   hypre_BoomerAMGTruncateInterp( Pnew, truncation_threshold,
                                  truncation_threshold_minus, CF_marker );

   hypre_MatvecCommPkgCreate ( Pnew );


   *P = Pnew;

   P_diag = hypre_ParCSRMatrixDiag(*P);
   P_offd = hypre_ParCSRMatrixOffd(*P);
   P_diag_data = hypre_CSRMatrixData(P_diag);
   P_diag_i = hypre_CSRMatrixI(P_diag);
   P_diag_j = hypre_CSRMatrixJ(P_diag);
   P_offd_data = hypre_CSRMatrixData(P_offd);
   P_offd_i = hypre_CSRMatrixI(P_offd);

   /* row sum computations, for output */
#ifdef HYPRE_JACINT_PRINT_ROW_SUMS
   PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0;
   nmav=0, npav=0;
   for ( i=0; i<num_rows_diag_P; ++i )
   {
      PIi = 0;  /* i-th value of P*1, i.e. sum of row i of P */
      for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m )
         PIi += P_diag_data[m];
      for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m )
         PIi += P_offd_data[m];
      if (CF_marker[i]<0)
      {
         PIimax = hypre_max( PIimax, PIi );
         PIimin = hypre_min( PIimin, PIi );
         if (PIi<=1-eps) { PIimav+=PIi; ++nmav; };
         if (PIi>=1+eps) { PIipav+=PIi; ++npav; };
      }
   }
   if ( nmav>0 ) PIimav = PIimav/nmav;
   if ( npav>0 ) PIipav = PIipav/npav;
   hypre_printf("%i %i P out max,min row sums %e %e\n", my_id, level, PIimax, PIimin );
#endif

#ifdef HYPRE_JACINT_PRINT_SOME_ROWS
   hypre_printf("some changed rows: ");
   for ( isamp=0; isamp<n_sample_rows; ++isamp )
   {
      i = sample_rows[isamp];
      hypre_printf( "%i: ", i );
      for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m )
         hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] );
      hypre_printf(";  ");
   }
   hypre_printf("\n");
#endif
   ncmax=0; ncmin=num_rows_diag_P; nc1=0;
   for ( i=0; i<num_rows_diag_P; ++i )
      if (CF_marker[i]<0)
      {
         nc = P_diag_i[i+1] - P_diag_i[i];
         if (nc<=1) ++nc1;
         ncmax = hypre_max( nc, ncmax );
         ncmin = hypre_min( nc, ncmin );
      }
#ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS
   hypre_printf("%i %i P has %i rows, %i changeable, %i too good, %i coarse\n",
          my_id, level, num_rows_diag_P, num_rows_diag_P-Jnochanges-CF_coarse, Jnochanges, CF_coarse );
   hypre_printf("%i %i min,max diag cols per row: %i, %i;  no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 );

   hypre_printf("%i %i Jacobi_Interp_1 after truncation (%e), Pnew has %i+%i=%i nonzeros, local sum %e\n",
          my_id, level, truncation_threshold,
          hypre_CSRMatrixNumNonzeros(Pnew_diag), hypre_CSRMatrixNumNonzeros(Pnew_offd),
          hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd),
          hypre_ParCSRMatrixLocalSumElts(Pnew) );
#endif

   /* Programming Notes:
      1. Judging by around line 299 of par_interp.c, they typical use of CF_marker
      is that CF_marker>=0 means Coarse, CF_marker<0 means Fine.
   */
#ifdef HYPRE_JACINT_PRINT_MATRICES
   hypre_sprintf( filename, "Pout%i", level );
   if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX )  hypre_ParCSRMatrixPrintIJ( *P,0,0,filename);
#endif

   hypre_TFree( J_marker );
      
}
예제 #14
0
HYPRE_Int
hypre_ParCSRMatrixMatvec( HYPRE_Complex       alpha,
                          hypre_ParCSRMatrix *A,
                          hypre_ParVector    *x,
                          HYPRE_Complex       beta,
                          hypre_ParVector    *y )
{
   hypre_ParCSRCommHandle **comm_handle;
   hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   hypre_CSRMatrix   *diag   = hypre_ParCSRMatrixDiag(A);
   hypre_CSRMatrix   *offd   = hypre_ParCSRMatrixOffd(A);
   hypre_Vector      *x_local  = hypre_ParVectorLocalVector(x);   
   hypre_Vector      *y_local  = hypre_ParVectorLocalVector(y);   
   HYPRE_Int          num_rows = hypre_ParCSRMatrixGlobalNumRows(A);
   HYPRE_Int          num_cols = hypre_ParCSRMatrixGlobalNumCols(A);

   hypre_Vector      *x_tmp;
   HYPRE_Int          x_size = hypre_ParVectorGlobalSize(x);
   HYPRE_Int          y_size = hypre_ParVectorGlobalSize(y);
   HYPRE_Int          num_vectors = hypre_VectorNumVectors(x_local);
   HYPRE_Int          num_cols_offd = hypre_CSRMatrixNumCols(offd);
   HYPRE_Int          ierr = 0;
   HYPRE_Int          num_sends, i, j, jv, index, start;

   HYPRE_Int          vecstride = hypre_VectorVectorStride( x_local );
   HYPRE_Int          idxstride = hypre_VectorIndexStride( x_local );

   HYPRE_Complex     *x_tmp_data, **x_buf_data;
   HYPRE_Complex     *x_local_data = hypre_VectorData(x_local);

   /*---------------------------------------------------------------------
    *  Check for size compatibility.  ParMatvec returns ierr = 11 if
    *  length of X doesn't equal the number of columns of A,
    *  ierr = 12 if the length of Y doesn't equal the number of rows
    *  of A, and ierr = 13 if both are true.
    *
    *  Because temporary vectors are often used in ParMatvec, none of 
    *  these conditions terminates processing, and the ierr flag
    *  is informational only.
    *--------------------------------------------------------------------*/
 
   hypre_assert( idxstride>0 );

   if (num_cols != x_size)
      ierr = 11;

   if (num_rows != y_size)
      ierr = 12;

   if (num_cols != x_size && num_rows != y_size)
      ierr = 13;

   hypre_assert( hypre_VectorNumVectors(y_local)==num_vectors );

   if ( num_vectors==1 )
      x_tmp = hypre_SeqVectorCreate( num_cols_offd );
   else
   {
      hypre_assert( num_vectors>1 );
      x_tmp = hypre_SeqMultiVectorCreate( num_cols_offd, num_vectors );
   }
   hypre_SeqVectorInitialize(x_tmp);
   x_tmp_data = hypre_VectorData(x_tmp);
   
   comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors);

   /*---------------------------------------------------------------------
    * If there exists no CommPkg for A, a CommPkg is generated using
    * equally load balanced partitionings
    *--------------------------------------------------------------------*/
   if (!comm_pkg)
   {
      hypre_MatvecCommPkgCreate(A);
      comm_pkg = hypre_ParCSRMatrixCommPkg(A); 
   }

   num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
   x_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors );
   for ( jv=0; jv<num_vectors; ++jv )
      x_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart
                                     (comm_pkg, num_sends));

   if ( num_vectors==1 )
   {
      index = 0;
      for (i = 0; i < num_sends; i++)
      {
         start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
         for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
            x_buf_data[0][index++] 
               = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
      }
   }
   else
      for ( jv=0; jv<num_vectors; ++jv )
      {
         index = 0;
         for (i = 0; i < num_sends; i++)
         {
            start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
            for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
               x_buf_data[jv][index++] 
                  = x_local_data[
                     jv*vecstride +
                     idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ];
         }
      }

   hypre_assert( idxstride==1 );
   /* ... The assert is because the following loop only works for 'column'
      storage of a multivector. This needs to be fixed to work more generally,
      at least for 'row' storage. This in turn, means either change CommPkg so
      num_sends is no.zones*no.vectors (not no.zones) or, less dangerously, put
      a stride in the logic of CommHandleCreate (stride either from a new arg or
      a new variable inside CommPkg).  Or put the num_vector iteration inside
      CommHandleCreate (perhaps a new multivector variant of it).
   */
   for ( jv=0; jv<num_vectors; ++jv )
   {
      comm_handle[jv] = hypre_ParCSRCommHandleCreate
         ( 1, comm_pkg, x_buf_data[jv], &(x_tmp_data[jv*num_cols_offd]) );
   }

   hypre_CSRMatrixMatvec( alpha, diag, x_local, beta, y_local);
   
   for ( jv=0; jv<num_vectors; ++jv )
   {
      hypre_ParCSRCommHandleDestroy(comm_handle[jv]);
      comm_handle[jv] = NULL;
   }
   hypre_TFree(comm_handle);

   if (num_cols_offd) hypre_CSRMatrixMatvec( alpha, offd, x_tmp, 1.0, y_local);    

   hypre_SeqVectorDestroy(x_tmp);
   x_tmp = NULL;
   for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(x_buf_data[jv]);
   hypre_TFree(x_buf_data);
  
   return ierr;
}
예제 #15
0
/*--------------------------------------------------------------------------
 * hypre_ParCSRMatrixMatvec_FF
 *--------------------------------------------------------------------------*/
                                                                                    HYPRE_Int
hypre_ParCSRMatrixMatvec_FF( HYPRE_Complex       alpha,
                             hypre_ParCSRMatrix *A,
                             hypre_ParVector    *x,
                             HYPRE_Complex       beta,
                             hypre_ParVector    *y,
                             HYPRE_Int          *CF_marker,
                             HYPRE_Int           fpt )
{
   MPI_Comm                comm = hypre_ParCSRMatrixComm(A);
   hypre_ParCSRCommHandle *comm_handle;
   hypre_ParCSRCommPkg    *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   hypre_CSRMatrix        *diag   = hypre_ParCSRMatrixDiag(A);
   hypre_CSRMatrix        *offd   = hypre_ParCSRMatrixOffd(A);
   hypre_Vector           *x_local  = hypre_ParVectorLocalVector(x);
   hypre_Vector           *y_local  = hypre_ParVectorLocalVector(y);
   HYPRE_Int               num_rows = hypre_ParCSRMatrixGlobalNumRows(A);
   HYPRE_Int               num_cols = hypre_ParCSRMatrixGlobalNumCols(A);

   hypre_Vector      *x_tmp;
   HYPRE_Int          x_size = hypre_ParVectorGlobalSize(x);
   HYPRE_Int          y_size = hypre_ParVectorGlobalSize(y);
   HYPRE_Int          num_cols_offd = hypre_CSRMatrixNumCols(offd);
   HYPRE_Int          ierr = 0;
   HYPRE_Int          num_sends, i, j, index, start, num_procs;
   HYPRE_Int         *int_buf_data = NULL;
   HYPRE_Int         *CF_marker_offd = NULL;

   HYPRE_Complex     *x_tmp_data = NULL;
   HYPRE_Complex     *x_buf_data = NULL;
   HYPRE_Complex     *x_local_data = hypre_VectorData(x_local);
   /*---------------------------------------------------------------------
    *  Check for size compatibility.  ParMatvec returns ierr = 11 if
    *  length of X doesn't equal the number of columns of A,
    *  ierr = 12 if the length of Y doesn't equal the number of rows
    *  of A, and ierr = 13 if both are true.
    *
    *  Because temporary vectors are often used in ParMatvec, none of
    *  these conditions terminates processing, and the ierr flag
    *  is informational only.
    *--------------------------------------------------------------------*/

   hypre_MPI_Comm_size(comm,&num_procs);

   if (num_cols != x_size)
      ierr = 11;

   if (num_rows != y_size)
      ierr = 12;

   if (num_cols != x_size && num_rows != y_size)
      ierr = 13;

   if (num_procs > 1)
   {
      if (num_cols_offd)
      {
         x_tmp = hypre_SeqVectorCreate( num_cols_offd );
         hypre_SeqVectorInitialize(x_tmp);
         x_tmp_data = hypre_VectorData(x_tmp);
      }

      /*---------------------------------------------------------------------
       * If there exists no CommPkg for A, a CommPkg is generated using
       * equally load balanced partitionings
       *--------------------------------------------------------------------*/
      if (!comm_pkg)
      {
         hypre_MatvecCommPkgCreate(A);
         comm_pkg = hypre_ParCSRMatrixCommPkg(A);
      }

      num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
      if (num_sends)
         x_buf_data = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart
                                    (comm_pkg, num_sends));

      index = 0;
      for (i = 0; i < num_sends; i++)
      {
         start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
         for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
            x_buf_data[index++]
               = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
      }
      comm_handle =
         hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data, x_tmp_data );
   }
   hypre_CSRMatrixMatvec_FF( alpha, diag, x_local, beta, y_local, CF_marker,
                             CF_marker, fpt);

   if (num_procs > 1)
   {
      hypre_ParCSRCommHandleDestroy(comm_handle);
      comm_handle = NULL;

      if (num_sends)
         int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart
                                      (comm_pkg, num_sends));
      if (num_cols_offd) CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
      index = 0;
      for (i = 0; i < num_sends; i++)
      {
         start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
         for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
            int_buf_data[index++]
               = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
      }
      comm_handle =
         hypre_ParCSRCommHandleCreate(11,comm_pkg,int_buf_data,CF_marker_offd );

      hypre_ParCSRCommHandleDestroy(comm_handle);
      comm_handle = NULL;

      if (num_cols_offd) hypre_CSRMatrixMatvec_FF( alpha, offd, x_tmp, 1.0, y_local,
                                                   CF_marker, CF_marker_offd, fpt);

      hypre_SeqVectorDestroy(x_tmp);
      x_tmp = NULL;
      hypre_TFree(x_buf_data);
      hypre_TFree(int_buf_data);
      hypre_TFree(CF_marker_offd);
   }

   return ierr;
}
예제 #16
0
HYPRE_Int
hypre_BoomerAMGCreateNodalA(hypre_ParCSRMatrix    *A,
                            HYPRE_Int                    num_functions,
                            HYPRE_Int                   *dof_func,
                            HYPRE_Int                    option,
                            HYPRE_Int                    diag_option,     
                            hypre_ParCSRMatrix   **AN_ptr)
{
   MPI_Comm 	       comm            = hypre_ParCSRMatrixComm(A);
   hypre_CSRMatrix    *A_diag          = hypre_ParCSRMatrixDiag(A);
   HYPRE_Int                *A_diag_i        = hypre_CSRMatrixI(A_diag);
   double             *A_diag_data     = hypre_CSRMatrixData(A_diag);


   hypre_CSRMatrix    *A_offd          = hypre_ParCSRMatrixOffd(A);
   HYPRE_Int                *A_offd_i        = hypre_CSRMatrixI(A_offd);
   double             *A_offd_data     = hypre_CSRMatrixData(A_offd);
   HYPRE_Int                *A_diag_j        = hypre_CSRMatrixJ(A_diag);
   HYPRE_Int                *A_offd_j        = hypre_CSRMatrixJ(A_offd);

   HYPRE_Int 		      *row_starts      = hypre_ParCSRMatrixRowStarts(A);
   HYPRE_Int 		      *col_map_offd    = hypre_ParCSRMatrixColMapOffd(A);
   HYPRE_Int                 num_variables   = hypre_CSRMatrixNumRows(A_diag);
   HYPRE_Int 		       num_nonzeros_offd = 0;
   HYPRE_Int 		       num_cols_offd = 0;
                  
   hypre_ParCSRMatrix *AN;
   hypre_CSRMatrix    *AN_diag;
   HYPRE_Int                *AN_diag_i;
   HYPRE_Int                *AN_diag_j;
   double             *AN_diag_data; 
   hypre_CSRMatrix    *AN_offd;
   HYPRE_Int                *AN_offd_i;
   HYPRE_Int                *AN_offd_j;
   double             *AN_offd_data; 
   HYPRE_Int		      *col_map_offd_AN;
   HYPRE_Int		      *new_col_map_offd;
   HYPRE_Int		      *row_starts_AN;
   HYPRE_Int		       AN_num_nonzeros_diag = 0;
   HYPRE_Int		       AN_num_nonzeros_offd = 0;
   HYPRE_Int		       num_cols_offd_AN;
   HYPRE_Int		       new_num_cols_offd;
                 
   hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   HYPRE_Int		       num_sends;
   HYPRE_Int		       num_recvs;
   HYPRE_Int		      *send_procs;
   HYPRE_Int		      *send_map_starts;
   HYPRE_Int		      *send_map_elmts;
   HYPRE_Int		      *new_send_map_elmts;
   HYPRE_Int		      *recv_procs;
   HYPRE_Int		      *recv_vec_starts;

   hypre_ParCSRCommPkg *comm_pkg_AN;
   HYPRE_Int		      *send_procs_AN;
   HYPRE_Int		      *send_map_starts_AN;
   HYPRE_Int		      *send_map_elmts_AN;
   HYPRE_Int		      *recv_procs_AN;
   HYPRE_Int		      *recv_vec_starts_AN;

   HYPRE_Int                 i, j, k, k_map;
                      
   HYPRE_Int                 ierr = 0;

   HYPRE_Int		       index, row;
   HYPRE_Int		       start_index;
   HYPRE_Int		       num_procs;
   HYPRE_Int		       node, cnt;
   HYPRE_Int		       mode;
   HYPRE_Int		       new_send_elmts_size;

   HYPRE_Int		       global_num_nodes;
   HYPRE_Int		       num_nodes;
   HYPRE_Int		       num_fun2;
   HYPRE_Int		      *map_to_node;
   HYPRE_Int		      *map_to_map;
   HYPRE_Int		      *counter;

   double sum;
   double *data;
   

   hypre_MPI_Comm_size(comm,&num_procs);

   if (!comm_pkg)
   {
      hypre_MatvecCommPkgCreate(A);
      comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   }

   mode = fabs(option);

   comm_pkg_AN = NULL;
   col_map_offd_AN = NULL;

#ifdef HYPRE_NO_GLOBAL_PARTITION
   row_starts_AN = hypre_CTAlloc(HYPRE_Int, 2);

   for (i=0; i < 2; i++)
   {
      row_starts_AN[i] = row_starts[i]/num_functions;
      if (row_starts_AN[i]*num_functions < row_starts[i])
      {
	  hypre_printf("nodes not properly aligned or incomplete info!\n");
	  return (87);
      }
   }
   
   global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(A)/num_functions;


#else
   row_starts_AN = hypre_CTAlloc(HYPRE_Int, num_procs+1);

  for (i=0; i < num_procs+1; i++)
   {
      row_starts_AN[i] = row_starts[i]/num_functions;
      if (row_starts_AN[i]*num_functions < row_starts[i])
      {
	  hypre_printf("nodes not properly aligned or incomplete info!\n");
	  return (87);
      }
   }
   
   global_num_nodes = row_starts_AN[num_procs];

#endif

 
   num_nodes =  num_variables/num_functions;
   num_fun2 = num_functions*num_functions;

   map_to_node = hypre_CTAlloc(HYPRE_Int, num_variables);
   AN_diag_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1);
   counter = hypre_CTAlloc(HYPRE_Int, num_nodes);
   for (i=0; i < num_variables; i++)
      map_to_node[i] = i/num_functions;
   for (i=0; i < num_nodes; i++)
      counter[i] = -1;

   AN_num_nonzeros_diag = 0;
   row = 0;
   for (i=0; i < num_nodes; i++)
   {
      AN_diag_i[i] = AN_num_nonzeros_diag;
      for (j=0; j < num_functions; j++)
      {
	 for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	 {
	    k_map = map_to_node[A_diag_j[k]];
	    if (counter[k_map] < i)
	    {
	       counter[k_map] = i;
	       AN_num_nonzeros_diag++;
	    }
	 }
	 row++;
      }
   }
   AN_diag_i[num_nodes] = AN_num_nonzeros_diag;

   AN_diag_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_diag);	
   AN_diag_data = hypre_CTAlloc(double, AN_num_nonzeros_diag);	

   AN_diag = hypre_CSRMatrixCreate(num_nodes,num_nodes,AN_num_nonzeros_diag);
   hypre_CSRMatrixI(AN_diag) = AN_diag_i;
   hypre_CSRMatrixJ(AN_diag) = AN_diag_j;
   hypre_CSRMatrixData(AN_diag) = AN_diag_data;
       
   for (i=0; i < num_nodes; i++)
      counter[i] = -1;
   index = 0;
   start_index = 0;
   row = 0;

   switch (mode)
   {
      case 1:  /* frobenius norm */
      {
         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             AN_diag_data[index] = A_diag_data[k]*A_diag_data[k];
	             index++;
	          }
	          else
	          {
	             AN_diag_data[counter[k_map]] += 
				A_diag_data[k]*A_diag_data[k];
	          }
	       }
	       row++;
            }
            start_index = index;
         }
         for (i=0; i < AN_num_nonzeros_diag; i++)
            AN_diag_data[i] = sqrt(AN_diag_data[i]);

      }
      break;
      
      case 2:  /* sum of abs. value of all elements in each block */
      {
         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             AN_diag_data[index] = fabs(A_diag_data[k]);
	             index++;
	          }
	          else
	          {
	             AN_diag_data[counter[k_map]] += fabs(A_diag_data[k]);
	          }
	       }
	       row++;
            }
            start_index = index;
         }
         for (i=0; i < AN_num_nonzeros_diag; i++)
            AN_diag_data[i] /= num_fun2;
      }
      break;

      case 3:  /* largest element of each block (sets true value - not abs. value) */
      {

         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
      	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
      	       {
      	          k_map = map_to_node[A_diag_j[k]];
      	          if (counter[k_map] < start_index)
      	          {
      	             counter[k_map] = index;
      	             AN_diag_j[index] = k_map;
      	             AN_diag_data[index] = A_diag_data[k];
      	             index++;
      	          }
      	          else
      	          {
      	             if (fabs(A_diag_data[k]) > 
				fabs(AN_diag_data[counter[k_map]]))
      	                AN_diag_data[counter[k_map]] = A_diag_data[k];
      	          }
      	       }
      	       row++;
            }
            start_index = index;
         }
      }
      break;

      case 4:  /* inf. norm (row-sum)  */
      {

         data = hypre_CTAlloc(double, AN_num_nonzeros_diag*num_functions);

         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             data[index*num_functions + j] = fabs(A_diag_data[k]);
	             index++;
	          }
	          else
	          {
	             data[(counter[k_map])*num_functions + j] += fabs(A_diag_data[k]);
	          }
	       }
	       row++;
            }
            start_index = index;
         }
         for (i=0; i < AN_num_nonzeros_diag; i++)
         {
            AN_diag_data[i]  = data[i*num_functions];
            
            for (j=1; j< num_functions; j++)
            {
               AN_diag_data[i]  = hypre_max( AN_diag_data[i],data[i*num_functions+j]);
            }
         }
         hypre_TFree(data);
      
      }
      break;

      case 6:  /* sum of all elements in each block */
      {
         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             AN_diag_data[index] = (A_diag_data[k]);
	             index++;
	          }
	          else
	          {
	             AN_diag_data[counter[k_map]] += (A_diag_data[k]);
	          }
	       }
	       row++;
            }
            start_index = index;
         }
      }
      break;

   }

   if (diag_option ==1 )
   {
      /* make the diag entry the negative of the sum of off-diag entries (DO MORE BELOW) */
      for (i=0; i < num_nodes; i++)
      {
         index = AN_diag_i[i]; 
         sum = 0.0;
         for (k = AN_diag_i[i]+1; k < AN_diag_i[i+1]; k++)
         {
            sum += AN_diag_data[k];
            
         }
         AN_diag_data[index] = -sum;
      }
      
   }
   else if (diag_option == 2)
   {
      
      /*  make all diagonal entries negative */
      /* the diagonal is the first element listed in each row - */
      
      for (i=0; i < num_nodes; i++)
      {
         index = AN_diag_i[i];
         AN_diag_data[index] = - AN_diag_data[index];
      }
   }






   num_nonzeros_offd = A_offd_i[num_variables];
   AN_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1);

   num_cols_offd_AN = 0;

   if (comm_pkg)
   {
      comm_pkg_AN = hypre_CTAlloc(hypre_ParCSRCommPkg,1);
      hypre_ParCSRCommPkgComm(comm_pkg_AN) = comm;
      num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
      hypre_ParCSRCommPkgNumSends(comm_pkg_AN) = num_sends;
      num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
      hypre_ParCSRCommPkgNumRecvs(comm_pkg_AN) = num_recvs;
      send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
      send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
      send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
      recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
      recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
      send_procs_AN = NULL;
      send_map_elmts_AN = NULL;
      if (num_sends) 
      {
         send_procs_AN = hypre_CTAlloc(HYPRE_Int,num_sends);
         send_map_elmts_AN = hypre_CTAlloc(HYPRE_Int,send_map_starts[num_sends]);
      }
      send_map_starts_AN = hypre_CTAlloc(HYPRE_Int,num_sends+1);
      recv_vec_starts_AN = hypre_CTAlloc(HYPRE_Int,num_recvs+1);
      recv_procs_AN = NULL;
      if (num_recvs) recv_procs_AN = hypre_CTAlloc(HYPRE_Int,num_recvs);
      for (i=0; i < num_sends; i++)
         send_procs_AN[i] = send_procs[i];
      for (i=0; i < num_recvs; i++)
         recv_procs_AN[i] = recv_procs[i];

      send_map_starts_AN[0] = 0;
      cnt = 0;
      for (i=0; i < num_sends; i++)
      {
	 k_map = send_map_starts[i];
	 if (send_map_starts[i+1]-k_map)
            send_map_elmts_AN[cnt++] = send_map_elmts[k_map]/num_functions;
         for (j=send_map_starts[i]+1; j < send_map_starts[i+1]; j++)
         {
            node = send_map_elmts[j]/num_functions;
            if (node > send_map_elmts_AN[cnt-1])
	       send_map_elmts_AN[cnt++] = node; 
         }
         send_map_starts_AN[i+1] = cnt;
      }
      hypre_ParCSRCommPkgSendProcs(comm_pkg_AN) = send_procs_AN;
      hypre_ParCSRCommPkgSendMapStarts(comm_pkg_AN) = send_map_starts_AN;
      hypre_ParCSRCommPkgSendMapElmts(comm_pkg_AN) = send_map_elmts_AN;
      hypre_ParCSRCommPkgRecvProcs(comm_pkg_AN) = recv_procs_AN;
      hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_AN) = recv_vec_starts_AN;
   }

   num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
   if (num_cols_offd)
   {
      if (num_cols_offd > num_variables)
      {
         hypre_TFree(map_to_node);
         map_to_node = hypre_CTAlloc(HYPRE_Int,num_cols_offd);
      }

      num_cols_offd_AN = 1;
      map_to_node[0] = col_map_offd[0]/num_functions;
      for (i=1; i < num_cols_offd; i++)
      {
         map_to_node[i] = col_map_offd[i]/num_functions;
         if (map_to_node[i] > map_to_node[i-1]) num_cols_offd_AN++;
      }
      
      if (num_cols_offd_AN > num_nodes)
      {
         hypre_TFree(counter);
         counter = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN);
      }

      map_to_map = NULL;
      col_map_offd_AN = NULL;
      map_to_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
      col_map_offd_AN = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN);
      col_map_offd_AN[0] = map_to_node[0];
      recv_vec_starts_AN[0] = 0;
      cnt = 1;
      for (i=0; i < num_recvs; i++)
      {
         for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++)
         {
            node = map_to_node[j];
	    if (node > col_map_offd_AN[cnt-1])
	    {
	       col_map_offd_AN[cnt++] = node; 
	    }
	    map_to_map[j] = cnt-1;
         }
         recv_vec_starts_AN[i+1] = cnt;
      }

      for (i=0; i < num_cols_offd_AN; i++)
         counter[i] = -1;

      AN_num_nonzeros_offd = 0;
      row = 0;
      for (i=0; i < num_nodes; i++)
      {
         AN_offd_i[i] = AN_num_nonzeros_offd;
         for (j=0; j < num_functions; j++)
         {
	    for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
	    {
	       k_map = map_to_map[A_offd_j[k]];
	       if (counter[k_map] < i)
	       {
	          counter[k_map] = i;
	          AN_num_nonzeros_offd++;
	       }
	    }
	    row++;
         }
      }
      AN_offd_i[num_nodes] = AN_num_nonzeros_offd;
   }

       
   AN_offd = hypre_CSRMatrixCreate(num_nodes,num_cols_offd_AN,	
		AN_num_nonzeros_offd);
   hypre_CSRMatrixI(AN_offd) = AN_offd_i;
   if (AN_num_nonzeros_offd)
   {
      AN_offd_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_offd);	
      AN_offd_data = hypre_CTAlloc(double, AN_num_nonzeros_offd);	
      hypre_CSRMatrixJ(AN_offd) = AN_offd_j;
      hypre_CSRMatrixData(AN_offd) = AN_offd_data;
   
      for (i=0; i < num_cols_offd_AN; i++)
         counter[i] = -1;
      index = 0;
      row = 0;
      AN_offd_i[0] = 0;
      start_index = 0;
      switch (mode)
      {
         case 1: /* frobenius norm */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
	          for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
	          {
	             k_map = map_to_map[A_offd_j[k]];
	             if (counter[k_map] < start_index)
	             {
	                counter[k_map] = index;
	                AN_offd_j[index] = k_map;
	                AN_offd_data[index] = A_offd_data[k]*A_offd_data[k];
	                index++;
	             }
	             else
	             {
	                AN_offd_data[counter[k_map]] += 
				A_offd_data[k]*A_offd_data[k];
	             }
	          }
	          row++;
               }
               start_index = index;
            }
            for (i=0; i < AN_num_nonzeros_offd; i++)
	       AN_offd_data[i] = sqrt(AN_offd_data[i]);
         }
         break;
      
         case 2:  /* sum of abs. value of all elements in block */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
	          for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
	          {
	             k_map = map_to_map[A_offd_j[k]];
	             if (counter[k_map] < start_index)
	             {
	                counter[k_map] = index;
	                AN_offd_j[index] = k_map;
	                AN_offd_data[index] = fabs(A_offd_data[k]);
	                index++;
	             }
	             else
	             {
	                AN_offd_data[counter[k_map]] += fabs(A_offd_data[k]);
	             }
	          }
	          row++;
               }
               start_index = index;
            }
            for (i=0; i < AN_num_nonzeros_offd; i++)
               AN_offd_data[i] /= num_fun2;
         }
         break;

         case 3: /* largest element in each block (not abs. value ) */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
      	          for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
      	          {
      	             k_map = map_to_map[A_offd_j[k]];
      	             if (counter[k_map] < start_index)
      	             {
      	                counter[k_map] = index;
      	                AN_offd_j[index] = k_map;
      	                AN_offd_data[index] = A_offd_data[k];
      	                index++;
      	             }
      	             else
      	             {
      	                if (fabs(A_offd_data[k]) > 
				fabs(AN_offd_data[counter[k_map]]))
      	                   AN_offd_data[counter[k_map]] = A_offd_data[k];
      	             }
      	          }
      	          row++;
               }
               start_index = index;
            }
         }
         break;
         
         case 4:  /* inf. norm (row-sum)  */
         {
            
            data = hypre_CTAlloc(double, AN_num_nonzeros_offd*num_functions);
            
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
                  for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
                  {
                     k_map = map_to_map[A_offd_j[k]];
                     if (counter[k_map] < start_index)
                     {
                        counter[k_map] = index;
                        AN_offd_j[index] = k_map;
                        data[index*num_functions + j] = fabs(A_offd_data[k]);
                        index++;
                     }
                     else
                     {
                        data[(counter[k_map])*num_functions + j] += fabs(A_offd_data[k]);
                     }
                  }
                  row++;
               }
               start_index = index;
            }
            for (i=0; i < AN_num_nonzeros_offd; i++)
            {
               AN_offd_data[i]  = data[i*num_functions];
               
               for (j=1; j< num_functions; j++)
               {
                  AN_offd_data[i]  = hypre_max( AN_offd_data[i],data[i*num_functions+j]);
               }
            }
            hypre_TFree(data);
            
         }
         break;
         
         case 6:  /* sum of value of all elements in block */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
                  for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
                  {
                     k_map = map_to_map[A_offd_j[k]];
                     if (counter[k_map] < start_index)
                     {
                        counter[k_map] = index;
                        AN_offd_j[index] = k_map;
                        AN_offd_data[index] = (A_offd_data[k]);
                        index++;
                     }
                     else
                     {
                        AN_offd_data[counter[k_map]] += (A_offd_data[k]);
                     }
                  }
                  row++;
               }
               start_index = index;
            }
            
         }
         break;
      }
   
      hypre_TFree(map_to_map);
   }

   if (diag_option ==1 )
   {
      /* make the diag entry the negative of the sum of off-diag entries (here we are adding the 
         off_diag contribution)*/
      /* the diagonal is the first element listed in each row of AN_diag_data - */
      for (i=0; i < num_nodes; i++)
      {
         sum = 0.0;
         for (k = AN_offd_i[i]; k < AN_offd_i[i+1]; k++)
         {
            sum += AN_offd_data[k];
            
         }
         index = AN_diag_i[i];/* location of diag entry in data */ 
         AN_diag_data[index] -= sum; /* subtract from current value */
      }
      
   }

    
   AN = hypre_ParCSRMatrixCreate(comm, global_num_nodes, global_num_nodes,
		row_starts_AN, row_starts_AN, num_cols_offd_AN,
		AN_num_nonzeros_diag, AN_num_nonzeros_offd);

   /* we already created the diag and offd matrices - so we don't need the ones
      created above */
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(AN));
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(AN));
   hypre_ParCSRMatrixDiag(AN) = AN_diag;
   hypre_ParCSRMatrixOffd(AN) = AN_offd;


   hypre_ParCSRMatrixColMapOffd(AN) = col_map_offd_AN;
   hypre_ParCSRMatrixCommPkg(AN) = comm_pkg_AN;

   new_num_cols_offd = num_functions*num_cols_offd_AN;

   if (new_num_cols_offd > num_cols_offd)
   {
      new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols_offd);
      cnt = 0;
      for (i=0; i < num_cols_offd_AN; i++)
      {
	 for (j=0; j < num_functions; j++)
         {
 	    new_col_map_offd[cnt++] = num_functions*col_map_offd_AN[i]+j;
         }
      }
      cnt = 0;
      for (i=0; i < num_cols_offd; i++)
      {
         while (col_map_offd[i] >  new_col_map_offd[cnt])
            cnt++;
         col_map_offd[i] = cnt++;
      }
      for (i=0; i < num_recvs+1; i++)
      {
         recv_vec_starts[i] = num_functions*recv_vec_starts_AN[i];
      }

      for (i=0; i < num_nonzeros_offd; i++)
      {
         j = A_offd_j[i];
	 A_offd_j[i] = col_map_offd[j];
      }
      hypre_ParCSRMatrixColMapOffd(A) = new_col_map_offd;
      hypre_CSRMatrixNumCols(A_offd) = new_num_cols_offd;
      hypre_TFree(col_map_offd);
   }
 
   hypre_TFree(map_to_node);
   new_send_elmts_size = send_map_starts_AN[num_sends]*num_functions;

   if (new_send_elmts_size > send_map_starts[num_sends])
   {
      new_send_map_elmts = hypre_CTAlloc(HYPRE_Int,new_send_elmts_size);
      cnt = 0;
      send_map_starts[0] = 0;
      for (i=0; i < num_sends; i++)
      {
         send_map_starts[i+1] = send_map_starts_AN[i+1]*num_functions;
         for (j=send_map_starts_AN[i]; j < send_map_starts_AN[i+1]; j++)
	 {
            for (k=0; k < num_functions; k++)
	       new_send_map_elmts[cnt++] = send_map_elmts_AN[j]*num_functions+k;
	 }
      }
      hypre_TFree(send_map_elmts);
      hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = new_send_map_elmts;
   }
 
   *AN_ptr        = AN;

   hypre_TFree(counter);

   return (ierr);
}
예제 #17
0
void hypre_BoomerAMGTruncateInterp( hypre_ParCSRMatrix *P,
                                    HYPRE_Real eps, HYPRE_Real dlt,
                                    HYPRE_Int * CF_marker )
/* Truncate the interpolation matrix P, but only in rows for which the
   marker is <0.  Truncation means that an element P(i,j) is set to 0 if
   P(i,j)>0 and P(i,j)<eps*max( P(i,j) )  or if
   P(i,j)>0 and P(i,j)<dlt*max( -P(i,j) )  or if
   P(i,j)<0 and P(i,j)>dlt*min( -P(i,j) )  or if
   P(i,j)<0 and P(i,j)>eps*min( P(i,j) )
      ( 0<eps,dlt<1, typically 0.1=dlt<eps=0.2, )
   The min and max are only computed locally, as I'm guessing that there isn't
   usually much to be gained (in the way of improved performance) by getting
   them perfectly right.
*/

/* The function hypre_BoomerAMGInterpTruncation in par_interp.c is
   very similar.  It looks at fabs(value) rather than separately
   dealing with value<0 and value>0 as recommended by Klaus Stuben,
   thus as this function does.  In this function, only "marked" rows
   are affected.  Lastly, in hypre_BoomerAMGInterpTruncation, if any
   element gets discarded, it reallocates arrays to the new size.
*/
{
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
   HYPRE_Real      *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);
   HYPRE_Real      *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   HYPRE_Int             *P_offd_j = hypre_CSRMatrixJ(P_offd);
   HYPRE_Int             *new_P_diag_i;
   HYPRE_Int             *new_P_offd_i;
   HYPRE_Int	num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
   HYPRE_Int	num_rows_offd_P = hypre_CSRMatrixNumRows(P_offd);
   HYPRE_Int num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(P_diag);
   HYPRE_Int num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(P_offd);
#if 0
   MPI_Comm comm = hypre_ParCSRMatrixComm( P );
   HYPRE_Real vmax1, vmin1;
#endif
   HYPRE_Real vmax = 0.0;
   HYPRE_Real vmin = 0.0;
   HYPRE_Real v, old_sum, new_sum, scale, wmax, wmin;
   HYPRE_Int i1, m, m1d, m1o;

   /* compute vmax = eps*max(P(i,j)), vmin = eps*min(P(i,j)) */
   for ( i1 = 0; i1 < num_rows_diag_P; i1++ )
   {
      for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m )
         {
            v = P_diag_data[m];
            vmax = hypre_max( v, vmax );
            vmin = hypre_min( v, vmin );
         }
      for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m )
         {
            v = P_offd_data[m];
            vmax = hypre_max( v, vmax );
            vmin = hypre_min( v, vmin );
         }
   }
#if 0
   /* This can make max,min global so results don't depend on no. processors
      We don't want this except for testing, or maybe this could be put
      someplace better.  I don't like adding communication here, for a minor reason.
   */
   vmax1 = vmax; vmin1 = vmin;
   hypre_MPI_Allreduce( &vmax1, &vmax, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm );
   hypre_MPI_Allreduce( &vmin1, &vmin, 1, HYPRE_MPI_REAL, hypre_MPI_MIN, comm );
#endif
   if ( vmax <= 0.0 ) vmax =  1.0;  /* make sure no v is v>vmax if no v is v>0 */
   if ( vmin >= 0.0 ) vmin = -1.0;  /* make sure no v is v<vmin if no v is v<0 */
   wmax = - dlt * vmin;
   wmin = - dlt * vmax;
   vmax *= eps;
   vmin *= eps;

   /* Repack the i,j,and data arrays so as to discard the small elements of P.
      Elements of Coarse rows (CF_marker>=0) are always kept.
      The arrays are not re-allocated, so there will generally be unused space
      at the ends of the arrays. */
   new_P_diag_i = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P+1 );
   new_P_offd_i = hypre_CTAlloc( HYPRE_Int, num_rows_offd_P+1 );
   m1d = P_diag_i[0];
   m1o = P_offd_i[0];
   for ( i1 = 0; i1 < num_rows_diag_P; i1++ )
   {
      old_sum = 0;
      new_sum = 0;
      for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m )
      {
         v = P_diag_data[m];
         old_sum += v;
         if ( CF_marker[i1]>=0 || ( v>=vmax && v>=wmax ) || ( v<=vmin && v<=wmin ) )
         {  /* keep v */
            new_sum += v;
            P_diag_j[m1d] = P_diag_j[m];
            P_diag_data[m1d] = P_diag_data[m];
            ++m1d;
         }
         else
         {  /* discard v */
            --num_nonzeros_diag;
         }
      }
      for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m )
      {
         v = P_offd_data[m];
         old_sum += v;
         if ( CF_marker[i1]>=0 || ( v>=vmax && v>=wmax ) || ( v<=vmin && v<=wmin ) )
         {  /* keep v */
            new_sum += v;
            P_offd_j[m1o] = P_offd_j[m];
            P_offd_data[m1o] = P_offd_data[m];
            ++m1o;
         }
         else
         {  /* discard v */
            --num_nonzeros_offd;
         }
      }

      new_P_diag_i[i1+1] = m1d;
      if ( i1<num_rows_offd_P ) new_P_offd_i[i1+1] = m1o;

      /* rescale to keep row sum the same */
      if (new_sum!=0) scale = old_sum/new_sum; else scale = 1.0;
      for ( m=new_P_diag_i[i1]; m<new_P_diag_i[i1+1]; ++m )
         P_diag_data[m] *= scale;
      if ( i1<num_rows_offd_P ) /* this test fails when there is no offd block */
         for ( m=new_P_offd_i[i1]; m<new_P_offd_i[i1+1]; ++m )
            P_offd_data[m] *= scale;

   }

   for ( i1 = 1; i1 <= num_rows_diag_P; i1++ )
   {
      P_diag_i[i1] = new_P_diag_i[i1];
      if ( i1<=num_rows_offd_P && num_nonzeros_offd>0 ) P_offd_i[i1] = new_P_offd_i[i1];
   }
   hypre_TFree( new_P_diag_i );
   if ( num_rows_offd_P>0 ) hypre_TFree( new_P_offd_i );

   hypre_CSRMatrixNumNonzeros(P_diag) = num_nonzeros_diag;
   hypre_CSRMatrixNumNonzeros(P_offd) = num_nonzeros_offd;
   hypre_ParCSRMatrixSetDNumNonzeros( P );
   hypre_ParCSRMatrixSetNumNonzeros( P );

}
예제 #18
0
void hypre_ParMatScaleDiagInv_F(
   hypre_ParCSRMatrix * C, hypre_ParCSRMatrix * A, double weight, HYPRE_Int * CF_marker )
   /* hypre_ParMatScaleDiagInv scales certain rows of its first
    * argument by premultiplying with a submatrix of the inverse of
    * the diagonal of its second argument; and _also_ multiplying by the scalar
    * third argument.
    * The marker array determines rows are changed and which diagonal elements
    * are used.
    */
{
   /*
     If A=(Aij),C=(Cik), i&j in Fine+Coarse, k in Coarse, we want
        new Cik = (1/aii)*Cik, for Fine i only, all k.
     Unlike a matmul, this computation is purely local, only the diag
     blocks are involved.
   */

   hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
   hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C);
   hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C);

   double          *A_diag_data = hypre_CSRMatrixData(A_diag);
   HYPRE_Int             *A_diag_i = hypre_CSRMatrixI(A_diag);
   HYPRE_Int             *A_diag_j = hypre_CSRMatrixJ(A_diag);
   double          *C_diag_data = hypre_CSRMatrixData(C_diag);
   double          *C_offd_data = hypre_CSRMatrixData(C_offd);
   HYPRE_Int             *C_diag_i = hypre_CSRMatrixI(C_diag);
   HYPRE_Int             *C_offd_i = hypre_CSRMatrixI(C_offd);


   HYPRE_Int	num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag);
   HYPRE_Int	num_cols_offd_C = hypre_CSRMatrixNumCols(C_offd);

   HYPRE_Int              i1, i2;
   HYPRE_Int              jj2, jj3;
   double           a_entry;

   /*-----------------------------------------------------------------------
    *  Loop over C_diag rows.
    *-----------------------------------------------------------------------*/
    
   for (i1 = 0; i1 < num_rows_diag_C; i1++)
   {
      
      if ( CF_marker[i1] < 0 )  /* Fine data only */
      {

         /*-----------------------------------------------------------------
          *  Loop over A_diag data
          *-----------------------------------------------------------------*/

         for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
         {
            i2 = A_diag_j[jj2];
            if ( i1==i2 )  /* diagonal of A only */
            {
               a_entry = A_diag_data[jj2] * weight;
            
               /*-----------------------------------------------------------
                *  Loop over entries in current row of C_diag.
                *-----------------------------------------------------------*/

               for (jj3 = C_diag_i[i2]; jj3 < C_diag_i[i2+1]; jj3++)
               {
                  C_diag_data[jj3] = C_diag_data[jj3] / a_entry;
                  
               }
               /*-----------------------------------------------------------
                *  Loop over entries in current row of C_offd.
                *-----------------------------------------------------------*/

               if ( num_cols_offd_C )
               {
                  for (jj3 = C_offd_i[i2]; jj3 < C_offd_i[i2+1]; jj3++)
                  {
                     C_offd_data[jj3] = C_offd_data[jj3] / a_entry;
                  }
               }
            }
         }
      }
   }

}
예제 #19
0
HYPRE_ParCSRMatrix
GenerateRotate7pt( MPI_Comm comm,
                   HYPRE_Int      nx,
                   HYPRE_Int      ny,
                   HYPRE_Int      P,
                   HYPRE_Int      Q,
                   HYPRE_Int      p,
                   HYPRE_Int      q,
                   HYPRE_Real   alpha,
                   HYPRE_Real   eps )
{
    hypre_ParCSRMatrix *A;
    hypre_CSRMatrix *diag;
    hypre_CSRMatrix *offd;

    HYPRE_Int    *diag_i;
    HYPRE_Int    *diag_j;
    HYPRE_Real *diag_data;

    HYPRE_Int    *offd_i;
    HYPRE_Int    *offd_j;
    HYPRE_Real *offd_data;

    HYPRE_Real *value;
    HYPRE_Real ac, bc, cc, s, c, pi, x;
    HYPRE_Int *global_part;
    HYPRE_Int ix, iy;
    HYPRE_Int cnt, o_cnt;
    HYPRE_Int local_num_rows;
    HYPRE_Int *col_map_offd;
    HYPRE_Int *work;
    HYPRE_Int row_index;
    HYPRE_Int i,j;

    HYPRE_Int nx_local, ny_local;
    HYPRE_Int nx_size, ny_size;
    HYPRE_Int num_cols_offd;
    HYPRE_Int grid_size;

    HYPRE_Int *nx_part;
    HYPRE_Int *ny_part;

    HYPRE_Int num_procs, my_id;
    HYPRE_Int P_busy, Q_busy;

    hypre_MPI_Comm_size(comm,&num_procs);
    hypre_MPI_Comm_rank(comm,&my_id);

    grid_size = nx*ny;

    value = hypre_CTAlloc(HYPRE_Real,4);
    pi = 4.0*atan(1.0);
    x = pi*alpha/180.0;
    s = sin(x);
    c = cos(x);
    ac = -(c*c + eps*s*s);
    bc = 2.0*(1.0 - eps)*s*c;
    cc = -(s*s + eps*c*c);
    value[0] = -2*(2*ac+bc+2*cc);
    value[1] = 2*ac+bc;
    value[2] = bc+2*cc;
    value[3] = -bc;

    hypre_GeneratePartitioning(nx,P,&nx_part);
    hypre_GeneratePartitioning(ny,Q,&ny_part);

    global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1);

    global_part[0] = 0;
    cnt = 1;
    for (iy = 0; iy < Q; iy++)
    {
        ny_size = ny_part[iy+1]-ny_part[iy];
        for (ix = 0; ix < P; ix++)
        {
            nx_size = nx_part[ix+1] - nx_part[ix];
            global_part[cnt] = global_part[cnt-1];
            global_part[cnt++] += nx_size*ny_size;
        }
    }

    nx_local = nx_part[p+1] - nx_part[p];
    ny_local = ny_part[q+1] - ny_part[q];

    my_id = q*P + p;
    num_procs = P*Q;

    local_num_rows = nx_local*ny_local;
    diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);
    offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);

    P_busy = hypre_min(nx,P);
    Q_busy = hypre_min(ny,Q);

    num_cols_offd = 0;
    if (p) num_cols_offd += ny_local;
    if (p < P_busy-1) num_cols_offd += ny_local;
    if (q) num_cols_offd += nx_local;
    if (q < Q_busy-1) num_cols_offd += nx_local;
    if (p && q) num_cols_offd++;
    if (p && q < Q_busy-1 ) num_cols_offd++;
    if (p < P_busy-1 && q ) num_cols_offd++;
    if (p < P_busy-1 && q < Q_busy-1 ) num_cols_offd++;

    if (!local_num_rows) num_cols_offd = 0;

    col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);

    cnt = 0;
    o_cnt = 0;
    diag_i[0] = 0;
    offd_i[0] = 0;
    for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
    {
        for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
        {
            cnt++;
            o_cnt++;
            diag_i[cnt] = diag_i[cnt-1];
            offd_i[o_cnt] = offd_i[o_cnt-1];
            diag_i[cnt]++;
            if (iy > ny_part[q])
            {
                diag_i[cnt]++;
                if (ix > nx_part[p])
                {
                    diag_i[cnt]++;
                }
                else
                {
                    if (ix)
                        offd_i[o_cnt]++;
                }
            }
            else
            {
                if (iy)
                {
                    offd_i[o_cnt]++;
                    if (ix > nx_part[p])
                    {
                        offd_i[o_cnt]++;
                    }
                    else if (ix)
                    {
                        offd_i[o_cnt]++;
                    }
                }
            }
            if (ix > nx_part[p])
                diag_i[cnt]++;
            else
            {
                if (ix)
                {
                    offd_i[o_cnt]++;
                }
            }
            if (ix+1 < nx_part[p+1])
                diag_i[cnt]++;
            else
            {
                if (ix+1 < nx)
                {
                    offd_i[o_cnt]++;
                }
            }
            if (iy+1 < ny_part[q+1])
            {
                diag_i[cnt]++;
                if (ix < nx_part[p+1]-1)
                {
                    diag_i[cnt]++;
                }
                else
                {
                    if (ix+1 < nx)
                        offd_i[o_cnt]++;
                }
            }
            else
            {
                if (iy+1 < ny)
                {
                    offd_i[o_cnt]++;
                    if (ix < nx_part[p+1]-1)
                    {
                        offd_i[o_cnt]++;
                    }
                    else if (ix < nx-1)
                    {
                        offd_i[o_cnt]++;
                    }
                }
            }
        }
    }

    diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]);
    diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]);

    if (num_procs > 1)
    {
        offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]);
        offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]);
    }

    row_index = 0;
    cnt = 0;
    o_cnt = 0;
    for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
    {
        for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
        {
            diag_j[cnt] = row_index;
            diag_data[cnt++] = value[0];
            if (iy > ny_part[q])
            {
                if (ix > nx_part[p])
                {
                    diag_j[cnt] = row_index-nx_local-1 ;
                    diag_data[cnt++] = value[3];
                }
                else
                {
                    if (ix)
                    {
                        offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                }
                diag_j[cnt] = row_index-nx_local;
                diag_data[cnt++] = value[2];
            }
            else
            {
                if (iy)
                {
                    if (ix > nx_part[p])
                    {
                        offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p,q-1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                    else if (ix)
                    {
                        offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q-1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                    offd_j[o_cnt] = hypre_map2(ix,iy-1,p,q-1,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[2];
                }
            }
            if (ix > nx_part[p])
            {
                diag_j[cnt] = row_index-1;
                diag_data[cnt++] = value[1];
            }
            else
            {
                if (ix)
                {
                    offd_j[o_cnt] = hypre_map2(ix-1,iy,p-1,q,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[1];
                }
            }
            if (ix+1 < nx_part[p+1])
            {
                diag_j[cnt] = row_index+1;
                diag_data[cnt++] = value[1];
            }
            else
            {
                if (ix+1 < nx)
                {
                    offd_j[o_cnt] = hypre_map2(ix+1,iy,p+1,q,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[1];
                }
            }
            if (iy+1 < ny_part[q+1])
            {
                diag_j[cnt] = row_index+nx_local;
                diag_data[cnt++] = value[2];
                if (ix < nx_part[p+1]-1)
                {
                    diag_j[cnt] = row_index+nx_local+1 ;
                    diag_data[cnt++] = value[3];
                }
                else
                {
                    if (ix+1 < nx)
                    {
                        offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                }
            }
            else
            {
                if (iy+1 < ny)
                {
                    offd_j[o_cnt] = hypre_map2(ix,iy+1,p,q+1,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[2];
                    if (ix < nx_part[p+1]-1)
                    {
                        offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p,q+1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                    else if (ix < nx-1)
                    {
                        offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q+1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                }
            }
            row_index++;
        }
    }

    if (num_procs > 1)
    {
        work = hypre_CTAlloc(HYPRE_Int,o_cnt);

        for (i=0; i < o_cnt; i++)
            work[i] = offd_j[i];

        qsort0(work, 0, o_cnt-1);

        col_map_offd[0] = work[0];
        cnt = 0;
        for (i=0; i < o_cnt; i++)
        {
            if (work[i] > col_map_offd[cnt])
            {
                cnt++;
                col_map_offd[cnt] = work[i];
            }
        }

        for (i=0; i < o_cnt; i++)
        {
            for (j=0; j < num_cols_offd; j++)
            {
                if (offd_j[i] == col_map_offd[j])
                {
                    offd_j[i] = j;
                    break;
                }
            }
        }

        hypre_TFree(work);
    }

    A = hypre_ParCSRMatrixCreate(comm, grid_size, grid_size,
                                 global_part, global_part, num_cols_offd,
                                 diag_i[local_num_rows],
                                 offd_i[local_num_rows]);

    hypre_ParCSRMatrixColMapOffd(A) = col_map_offd;

    diag = hypre_ParCSRMatrixDiag(A);
    hypre_CSRMatrixI(diag) = diag_i;
    hypre_CSRMatrixJ(diag) = diag_j;
    hypre_CSRMatrixData(diag) = diag_data;

    offd = hypre_ParCSRMatrixOffd(A);
    hypre_CSRMatrixI(offd) = offd_i;
    if (num_cols_offd)
    {
        hypre_CSRMatrixJ(offd) = offd_j;
        hypre_CSRMatrixData(offd) = offd_data;
    }

    hypre_TFree(nx_part);
    hypre_TFree(ny_part);
    hypre_TFree(value);

    return (HYPRE_ParCSRMatrix) A;
}
예제 #20
0
HYPRE_Int hypre_seqAMGSetup( hypre_ParAMGData *amg_data,
                      HYPRE_Int p_level,
                      HYPRE_Int coarse_threshold)


{

   /* Par Data Structure variables */
   hypre_ParCSRMatrix **Par_A_array = hypre_ParAMGDataAArray(amg_data);

   MPI_Comm 	      comm = hypre_ParCSRMatrixComm(Par_A_array[0]); 
   MPI_Comm 	      new_comm, seq_comm;

   hypre_ParCSRMatrix   *A_seq = NULL;
   hypre_CSRMatrix  *A_seq_diag;
   hypre_CSRMatrix  *A_seq_offd;
   hypre_ParVector   *F_seq = NULL;
   hypre_ParVector   *U_seq = NULL;

   hypre_ParCSRMatrix *A;

   HYPRE_Int               **dof_func_array;   
   HYPRE_Int                num_procs, my_id;

   HYPRE_Int                not_finished_coarsening;
   HYPRE_Int                level;

   HYPRE_Solver  coarse_solver;

   /* misc */
   dof_func_array = hypre_ParAMGDataDofFuncArray(amg_data);

   /*MPI Stuff */
   hypre_MPI_Comm_size(comm, &num_procs);   
   hypre_MPI_Comm_rank(comm,&my_id);
  
   /*initial */
   level = p_level;
   
   not_finished_coarsening = 1;
  
   /* convert A at this level to sequential */
   A = Par_A_array[level];

   {
      double *A_seq_data = NULL;
      HYPRE_Int *A_seq_i = NULL;
      HYPRE_Int *A_seq_offd_i = NULL;
      HYPRE_Int *A_seq_j = NULL;

      double *A_tmp_data = NULL;
      HYPRE_Int *A_tmp_i = NULL;
      HYPRE_Int *A_tmp_j = NULL;

      HYPRE_Int *info, *displs, *displs2;
      HYPRE_Int i, j, size, num_nonzeros, total_nnz, cnt;
  
      hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
      hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
      HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
      HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
      HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
      HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
      HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
      double *A_diag_data = hypre_CSRMatrixData(A_diag);
      double *A_offd_data = hypre_CSRMatrixData(A_offd);
      HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
      HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A);

      hypre_MPI_Group orig_group, new_group; 
      HYPRE_Int *ranks, new_num_procs, *row_starts;

      info = hypre_CTAlloc(HYPRE_Int, num_procs);

      hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm);

      ranks = hypre_CTAlloc(HYPRE_Int, num_procs);

      new_num_procs = 0;
      for (i=0; i < num_procs; i++)
         if (info[i]) 
         {
            ranks[new_num_procs] = i;
            info[new_num_procs++] = info[i];
         }

      MPI_Comm_group(comm, &orig_group);
      hypre_MPI_Group_incl(orig_group, new_num_procs, ranks, &new_group);
      MPI_Comm_create(comm, new_group, &new_comm);
      hypre_MPI_Group_free(&new_group);
      hypre_MPI_Group_free(&orig_group);

      if (num_rows)
      {
         /* alloc space in seq data structure only for participating procs*/
         HYPRE_BoomerAMGCreate(&coarse_solver);
         HYPRE_BoomerAMGSetMaxRowSum(coarse_solver,
		hypre_ParAMGDataMaxRowSum(amg_data)); 
         HYPRE_BoomerAMGSetStrongThreshold(coarse_solver,
		hypre_ParAMGDataStrongThreshold(amg_data)); 
         HYPRE_BoomerAMGSetCoarsenType(coarse_solver,
		hypre_ParAMGDataCoarsenType(amg_data)); 
         HYPRE_BoomerAMGSetInterpType(coarse_solver,
		hypre_ParAMGDataInterpType(amg_data)); 
         HYPRE_BoomerAMGSetTruncFactor(coarse_solver, 
		hypre_ParAMGDataTruncFactor(amg_data)); 
         HYPRE_BoomerAMGSetPMaxElmts(coarse_solver, 
		hypre_ParAMGDataPMaxElmts(amg_data)); 
	 if (hypre_ParAMGDataUserRelaxType(amg_data) > -1) 
            HYPRE_BoomerAMGSetRelaxType(coarse_solver, 
		hypre_ParAMGDataUserRelaxType(amg_data)); 
         HYPRE_BoomerAMGSetRelaxOrder(coarse_solver, 
		hypre_ParAMGDataRelaxOrder(amg_data)); 
         HYPRE_BoomerAMGSetRelaxWt(coarse_solver, 
		hypre_ParAMGDataUserRelaxWeight(amg_data)); 
	 if (hypre_ParAMGDataUserNumSweeps(amg_data) > -1) 
            HYPRE_BoomerAMGSetNumSweeps(coarse_solver, 
		hypre_ParAMGDataUserNumSweeps(amg_data)); 
         HYPRE_BoomerAMGSetNumFunctions(coarse_solver, 
		hypre_ParAMGDataNumFunctions(amg_data)); 
         HYPRE_BoomerAMGSetMaxIter(coarse_solver, 1); 
         HYPRE_BoomerAMGSetTol(coarse_solver, 0); 

         /* Create CSR Matrix, will be Diag part of new matrix */
         A_tmp_i = hypre_CTAlloc(HYPRE_Int, num_rows+1);

         A_tmp_i[0] = 0;
         for (i=1; i < num_rows+1; i++)
            A_tmp_i[i] = A_diag_i[i]-A_diag_i[i-1]+A_offd_i[i]-A_offd_i[i-1];

         num_nonzeros = A_offd_i[num_rows]+A_diag_i[num_rows];

         A_tmp_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros);
         A_tmp_data = hypre_CTAlloc(double, num_nonzeros);

         cnt = 0;
         for (i=0; i < num_rows; i++)
         {
            for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++)
	    {
	       A_tmp_j[cnt] = A_diag_j[j]+first_row_index;
	       A_tmp_data[cnt++] = A_diag_data[j];
	    }
            for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++)
	    {
	       A_tmp_j[cnt] = col_map_offd[A_offd_j[j]];
	       A_tmp_data[cnt++] = A_offd_data[j];
	    }
         }

         displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);
         displs[0] = 0;
         for (i=1; i < new_num_procs+1; i++)
            displs[i] = displs[i-1]+info[i-1];
         size = displs[new_num_procs];
  
         A_seq_i = hypre_CTAlloc(HYPRE_Int, size+1);
         A_seq_offd_i = hypre_CTAlloc(HYPRE_Int, size+1);

         hypre_MPI_Allgatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, 
			displs, HYPRE_MPI_INT, new_comm );

         displs2 = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);

         A_seq_i[0] = 0;
         displs2[0] = 0;
         for (j=1; j < displs[1]; j++)
            A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1];
         for (i=1; i < new_num_procs; i++)
         {
            for (j=displs[i]; j < displs[i+1]; j++)
            {
               A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1];
            }
         }
         A_seq_i[size] = A_seq_i[size]+A_seq_i[size-1];
         displs2[new_num_procs] = A_seq_i[size];
         for (i=1; i < new_num_procs+1; i++)
         {
            displs2[i] = A_seq_i[displs[i]];
            info[i-1] = displs2[i] - displs2[i-1];
         }

         total_nnz = displs2[new_num_procs];
         A_seq_j = hypre_CTAlloc(HYPRE_Int, total_nnz);
         A_seq_data = hypre_CTAlloc(double, total_nnz);

         hypre_MPI_Allgatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT,
                       A_seq_j, info, displs2,
                       HYPRE_MPI_INT, new_comm );

         hypre_MPI_Allgatherv ( A_tmp_data, num_nonzeros, hypre_MPI_DOUBLE,
                       A_seq_data, info, displs2,
                       hypre_MPI_DOUBLE, new_comm );

         hypre_TFree(displs);
         hypre_TFree(displs2);
         hypre_TFree(A_tmp_i);
         hypre_TFree(A_tmp_j);
         hypre_TFree(A_tmp_data);
   
         row_starts = hypre_CTAlloc(HYPRE_Int,2);
         row_starts[0] = 0; 
         row_starts[1] = size;
 
         /* Create 1 proc communicator */
         seq_comm = hypre_MPI_COMM_SELF;

         A_seq = hypre_ParCSRMatrixCreate(seq_comm,size,size,
					  row_starts, row_starts,
						0,total_nnz,0); 

         A_seq_diag = hypre_ParCSRMatrixDiag(A_seq);
         A_seq_offd = hypre_ParCSRMatrixOffd(A_seq);

         hypre_CSRMatrixData(A_seq_diag) = A_seq_data;
         hypre_CSRMatrixI(A_seq_diag) = A_seq_i;
         hypre_CSRMatrixJ(A_seq_diag) = A_seq_j;
         hypre_CSRMatrixI(A_seq_offd) = A_seq_offd_i;

         F_seq = hypre_ParVectorCreate(seq_comm, size, row_starts);
         U_seq = hypre_ParVectorCreate(seq_comm, size, row_starts);
         hypre_ParVectorOwnsPartitioning(F_seq) = 0;
         hypre_ParVectorOwnsPartitioning(U_seq) = 0;
         hypre_ParVectorInitialize(F_seq);
         hypre_ParVectorInitialize(U_seq);

         hypre_BoomerAMGSetup(coarse_solver,A_seq,F_seq,U_seq);

         hypre_ParAMGDataCoarseSolver(amg_data) = coarse_solver;
         hypre_ParAMGDataACoarse(amg_data) = A_seq;
         hypre_ParAMGDataFCoarse(amg_data) = F_seq;
         hypre_ParAMGDataUCoarse(amg_data) = U_seq;
         hypre_ParAMGDataNewComm(amg_data) = new_comm;
      }
      hypre_TFree(info);
      hypre_TFree(ranks);
   }
 
   return 0;
   
   
}
예제 #21
0
HYPRE_Int
hypre_SchwarzSetup(void               *schwarz_vdata,
                   hypre_ParCSRMatrix *A,
                   hypre_ParVector    *f,
                   hypre_ParVector    *u         )
{

   hypre_SchwarzData   *schwarz_data = schwarz_vdata;
   HYPRE_Int *dof_func;
   double *scale;
   hypre_CSRMatrix *domain_structure;
   hypre_CSRMatrix *A_boundary;
   hypre_ParVector *Vtemp;

   HYPRE_Int *pivots = NULL;

   HYPRE_Int variant = hypre_SchwarzDataVariant(schwarz_data);
   HYPRE_Int domain_type = hypre_SchwarzDataDomainType(schwarz_data);
   HYPRE_Int overlap = hypre_SchwarzDataOverlap(schwarz_data);
   HYPRE_Int num_functions = hypre_SchwarzDataNumFunctions(schwarz_data);
   double relax_weight = hypre_SchwarzDataRelaxWeight(schwarz_data);
   HYPRE_Int use_nonsymm = hypre_SchwarzDataUseNonSymm(schwarz_data);
   

   dof_func = hypre_SchwarzDataDofFunc(schwarz_data);

   Vtemp = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
			hypre_ParCSRMatrixGlobalNumRows(A),
			hypre_ParCSRMatrixRowStarts(A));
   hypre_ParVectorSetPartitioningOwner(Vtemp,0);
   hypre_ParVectorInitialize(Vtemp);
   hypre_SchwarzDataVtemp(schwarz_data) = Vtemp;

   if (variant > 1)
   {
      hypre_ParAMGCreateDomainDof(A,
                                  domain_type, overlap,
                                  num_functions, dof_func,
                                  &domain_structure, &pivots, use_nonsymm);

      if (variant == 2)
      {
         hypre_ParGenerateScale(A, domain_structure, relax_weight,
		&scale);
         hypre_SchwarzDataScale(schwarz_data) = scale;
      }
      else
      {
         hypre_ParGenerateHybridScale(A, domain_structure, &A_boundary, &scale);
         hypre_SchwarzDataScale(schwarz_data) = scale;
         if (hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A)))
            hypre_SchwarzDataABoundary(schwarz_data) = A_boundary;
         else
            hypre_SchwarzDataABoundary(schwarz_data) = NULL;
      }
   }
   else
   {
      hypre_AMGCreateDomainDof (hypre_ParCSRMatrixDiag(A),
                                domain_type, overlap,
                                num_functions, dof_func,
                                &domain_structure, &pivots, use_nonsymm);
      if (variant == 1)
      {
         hypre_GenerateScale(domain_structure, 
		hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A)),
		relax_weight, &scale);
         hypre_SchwarzDataScale(schwarz_data) = scale;
      }
   }

   hypre_SchwarzDataDomainStructure(schwarz_data) = domain_structure;
   hypre_SchwarzDataPivots(schwarz_data) = pivots;

   return hypre_error_flag;

}
예제 #22
0
HYPRE_Int AmgCGCGraphAssemble (hypre_ParCSRMatrix *S,HYPRE_Int *vertexrange,HYPRE_Int *CF_marker,HYPRE_Int *CF_marker_offd,HYPRE_Int coarsen_type,
			 HYPRE_IJMatrix *ijG)
/* assemble a graph representing the connections between the grids
 * ================================================================================================
 * S : the strength matrix
 * vertexrange : the parallel layout of the candidate coarse grid vertices
 * CF_marker, CF_marker_offd : the coarse/fine markers 
 * coarsen_type : the coarsening type
 * ijG : the created graph
 * ================================================================================================*/
{
  HYPRE_Int ierr=0;
  HYPRE_Int i,/* ii,*/ip,j,jj,m,n,p;
  HYPRE_Int mpisize,mpirank;

  HYPRE_Real weight;

  MPI_Comm comm = hypre_ParCSRMatrixComm(S);
/*   hypre_MPI_Status status; */

  HYPRE_IJMatrix ijmatrix;
  hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag (S);
  hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd (S);
/*   HYPRE_Int *S_i = hypre_CSRMatrixI(S_diag); */
/*   HYPRE_Int *S_j = hypre_CSRMatrixJ(S_diag); */
  HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd);
  HYPRE_Int *S_offd_j = NULL;
  HYPRE_Int num_variables = hypre_CSRMatrixNumRows (S_diag);
  HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (S_offd);
  HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd (S);
  HYPRE_Int pointrange_start,pointrange_end;
  HYPRE_Int *pointrange,*pointrange_nonlocal,*pointrange_strong=NULL;
  HYPRE_Int vertexrange_start,vertexrange_end;
  HYPRE_Int *vertexrange_strong= NULL;
  HYPRE_Int *vertexrange_nonlocal;
  HYPRE_Int num_recvs,num_recvs_strong;
  HYPRE_Int *recv_procs,*recv_procs_strong=NULL;
  HYPRE_Int /* *zeros,*rownz,*/*rownz_diag,*rownz_offd;
  HYPRE_Int nz;
  HYPRE_Int nlocal;
  HYPRE_Int one=1;

  hypre_ParCSRCommPkg    *comm_pkg    = hypre_ParCSRMatrixCommPkg (S);
 

  hypre_MPI_Comm_size (comm,&mpisize);
  hypre_MPI_Comm_rank (comm,&mpirank);

  /* determine neighbor processors */
  num_recvs = hypre_ParCSRCommPkgNumRecvs (comm_pkg);
  recv_procs = hypre_ParCSRCommPkgRecvProcs (comm_pkg);
  pointrange = hypre_ParCSRMatrixRowStarts (S);
  pointrange_nonlocal = hypre_CTAlloc  (HYPRE_Int, 2*num_recvs);
  vertexrange_nonlocal = hypre_CTAlloc (HYPRE_Int, 2*num_recvs);
#ifdef HYPRE_NO_GLOBAL_PARTITION
  {
    HYPRE_Int num_sends  =  hypre_ParCSRCommPkgNumSends (comm_pkg);
    HYPRE_Int *send_procs =  hypre_ParCSRCommPkgSendProcs (comm_pkg);
    HYPRE_Int *int_buf_data   = hypre_CTAlloc (HYPRE_Int,4*num_sends);
    HYPRE_Int *int_buf_data2  = int_buf_data + 2*num_sends;
    hypre_MPI_Request *sendrequest,*recvrequest;

    nlocal = vertexrange[1] - vertexrange[0];
    pointrange_start = pointrange[0];
    pointrange_end   = pointrange[1];
    vertexrange_start = vertexrange[0];
    vertexrange_end   = vertexrange[1];
    sendrequest = hypre_CTAlloc (hypre_MPI_Request,2*(num_sends+num_recvs));
    recvrequest = sendrequest+2*num_sends;

    for (i=0;i<num_recvs;i++) {
      hypre_MPI_Irecv (pointrange_nonlocal+2*i,2,HYPRE_MPI_INT,recv_procs[i],tag_pointrange,comm,&recvrequest[2*i]);
      hypre_MPI_Irecv (vertexrange_nonlocal+2*i,2,HYPRE_MPI_INT,recv_procs[i],tag_vertexrange,comm,&recvrequest[2*i+1]);
    }
    for (i=0;i<num_sends;i++) {
      int_buf_data[2*i] = pointrange_start;
      int_buf_data[2*i+1] = pointrange_end;
      int_buf_data2[2*i] = vertexrange_start;
      int_buf_data2[2*i+1] = vertexrange_end;
      hypre_MPI_Isend (int_buf_data+2*i,2,HYPRE_MPI_INT,send_procs[i],tag_pointrange,comm,&sendrequest[2*i]);
      hypre_MPI_Isend (int_buf_data2+2*i,2,HYPRE_MPI_INT,send_procs[i],tag_vertexrange,comm,&sendrequest[2*i+1]);
    }
    hypre_MPI_Waitall (2*(num_sends+num_recvs),sendrequest,hypre_MPI_STATUSES_IGNORE);
    hypre_TFree (int_buf_data);
    hypre_TFree (sendrequest);
  }
#else
  nlocal = vertexrange[mpirank+1] - vertexrange[mpirank];
  pointrange_start = pointrange[mpirank];
  pointrange_end   = pointrange[mpirank+1];
  vertexrange_start = vertexrange[mpirank];
  vertexrange_end   = vertexrange[mpirank+1];
  for (i=0;i<num_recvs;i++) {
    pointrange_nonlocal[2*i] = pointrange[recv_procs[i]];
    pointrange_nonlocal[2*i+1] = pointrange[recv_procs[i]+1];
    vertexrange_nonlocal[2*i] = vertexrange[recv_procs[i]];
    vertexrange_nonlocal[2*i+1] = vertexrange[recv_procs[i]+1];
  }  
#endif
  /* now we have the array recv_procs. However, it may contain too many entries as it is 
     inherited from A. We now have to determine the subset which contains only the
     strongly connected neighbors */
  if (num_cols_offd) {
    S_offd_j = hypre_CSRMatrixJ(S_offd);
  
    recv_procs_strong = hypre_CTAlloc (HYPRE_Int,num_recvs);
    memset (recv_procs_strong,0,num_recvs*sizeof(HYPRE_Int));
    /* don't forget to shorten the pointrange and vertexrange arrays accordingly */
    pointrange_strong = hypre_CTAlloc (HYPRE_Int,2*num_recvs);
    memset (pointrange_strong,0,2*num_recvs*sizeof(HYPRE_Int));
    vertexrange_strong = hypre_CTAlloc (HYPRE_Int,2*num_recvs);
    memset (vertexrange_strong,0,2*num_recvs*sizeof(HYPRE_Int));
    
    for (i=0;i<num_variables;i++)
      for (j=S_offd_i[i];j<S_offd_i[i+1];j++) {
	jj = col_map_offd[S_offd_j[j]];
	for (p=0;p<num_recvs;p++) /* S_offd_j is NOT sorted! */
	  if (jj >= pointrange_nonlocal[2*p] && jj < pointrange_nonlocal[2*p+1]) break;
#if 0
	hypre_printf ("Processor %d, remote point %d on processor %d\n",mpirank,jj,recv_procs[p]);
#endif
	recv_procs_strong [p]=1;
      }
    
    for (p=0,num_recvs_strong=0;p<num_recvs;p++) {
      if (recv_procs_strong[p]) {
	recv_procs_strong[num_recvs_strong]=recv_procs[p];
	pointrange_strong[2*num_recvs_strong] = pointrange_nonlocal[2*p];
	pointrange_strong[2*num_recvs_strong+1] = pointrange_nonlocal[2*p+1];
	vertexrange_strong[2*num_recvs_strong] = vertexrange_nonlocal[2*p];
	vertexrange_strong[2*num_recvs_strong+1] = vertexrange_nonlocal[2*p+1];
	num_recvs_strong++;
      }
    }
  }
  else num_recvs_strong=0;

  hypre_TFree (pointrange_nonlocal);
  hypre_TFree (vertexrange_nonlocal);

  rownz_diag = hypre_CTAlloc (HYPRE_Int,2*nlocal);
  rownz_offd = rownz_diag + nlocal;
  for (p=0,nz=0;p<num_recvs_strong;p++) {
    nz += vertexrange_strong[2*p+1]-vertexrange_strong[2*p];
  }
  for (m=0;m<nlocal;m++) {
    rownz_diag[m]=nlocal-1;
    rownz_offd[m]=nz;
  }
 
  
 
  HYPRE_IJMatrixCreate(comm, vertexrange_start, vertexrange_end-1, vertexrange_start, vertexrange_end-1, &ijmatrix);
  HYPRE_IJMatrixSetObjectType(ijmatrix, HYPRE_PARCSR);
  HYPRE_IJMatrixSetDiagOffdSizes (ijmatrix, rownz_diag, rownz_offd);
  HYPRE_IJMatrixInitialize(ijmatrix);
  hypre_TFree (rownz_diag);

  /* initialize graph */
  weight = -1;
  for (m=vertexrange_start;m<vertexrange_end;m++) {
    for (p=0;p<num_recvs_strong;p++) {
      for (n=vertexrange_strong[2*p];n<vertexrange_strong[2*p+1];n++) {
	ierr = HYPRE_IJMatrixAddToValues (ijmatrix,1,&one,&m,&n,&weight);
#if 0
	if (ierr) hypre_printf ("Processor %d: error %d while initializing graphs at (%d, %d)\n",mpirank,ierr,m,n);
#endif
      }
    }
  }
  
  /* weight graph */
  for (i=0;i<num_variables;i++) {

    for (j=S_offd_i[i];j<S_offd_i[i+1];j++) {
      jj = S_offd_j[j]; /* jj is not a global index!!! */
      /* determine processor */
      for (p=0;p<num_recvs_strong;p++) 
	if (col_map_offd[jj] >= pointrange_strong[2*p] && col_map_offd[jj] < pointrange_strong[2*p+1]) break;
      ip=recv_procs_strong[p];
      /* loop over all coarse grids constructed on this processor domain */
      for (m=vertexrange_start;m<vertexrange_end;m++) {
	/* loop over all coarse grids constructed on neighbor processor domain */
	for (n=vertexrange_strong[2*p];n<vertexrange_strong[2*p+1];n++) {
	  /* coarse grid counting inside gridpartition->local/gridpartition->nonlocal starts with one
	     while counting inside range starts with zero */
	  if (CF_marker[i]-1==m && CF_marker_offd[jj]-1==n)
	    /* C-C-coupling */
	    weight = -1;
	  else if ( (CF_marker[i]-1==m && (CF_marker_offd[jj]==0 || CF_marker_offd[jj]-1!=n) )
		   || ( (CF_marker[i]==0 || CF_marker[i]-1!=m) && CF_marker_offd[jj]-1==n ) )
	    /* C-F-coupling */
	    weight = 0;
	  else weight = -8; /* F-F-coupling */
	  ierr = HYPRE_IJMatrixAddToValues (ijmatrix,1,&one,&m,&n,&weight);
#if 0
	  if (ierr) hypre_printf ("Processor %d: error %d while adding %lf to entry (%d, %d)\n",mpirank,ierr,weight,m,n);
#endif
	}
      }
    }
  }

  /* assemble */
  HYPRE_IJMatrixAssemble (ijmatrix);
  /*if (num_recvs_strong) {*/
    hypre_TFree (recv_procs_strong); 
    hypre_TFree (pointrange_strong);
    hypre_TFree (vertexrange_strong);
  /*} */

  *ijG = ijmatrix;
  return (ierr);
}
예제 #23
0
HYPRE_Int AmgCGCPrepare (hypre_ParCSRMatrix *S,HYPRE_Int nlocal,HYPRE_Int *CF_marker,HYPRE_Int **CF_marker_offd,HYPRE_Int coarsen_type,HYPRE_Int **vrange)
/* assemble a graph representing the connections between the grids
 * ================================================================================================
 * S : the strength matrix
 * nlocal : the number of locally created coarse grids
 * CF_marker, CF_marker_offd : the coare/fine markers
 * coarsen_type : the coarsening type
 * vrange : the ranges of the vertices representing coarse grids
 * ================================================================================================*/
{
  HYPRE_Int ierr=0;
  HYPRE_Int mpisize,mpirank;
  HYPRE_Int num_sends;
  HYPRE_Int *vertexrange=NULL;
  HYPRE_Int vstart,vend;
  HYPRE_Int *int_buf_data;
  HYPRE_Int start;
  HYPRE_Int i,ii,j;
  HYPRE_Int num_variables = hypre_CSRMatrixNumRows (hypre_ParCSRMatrixDiag(S));
  HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (hypre_ParCSRMatrixOffd (S));

  MPI_Comm comm = hypre_ParCSRMatrixComm(S);
/*   hypre_MPI_Status status; */

  hypre_ParCSRCommPkg    *comm_pkg    = hypre_ParCSRMatrixCommPkg (S);
  hypre_ParCSRCommHandle *comm_handle;


  hypre_MPI_Comm_size (comm,&mpisize);
  hypre_MPI_Comm_rank (comm,&mpirank);

  if (!comm_pkg) {
    hypre_MatvecCommPkgCreate (S);
    comm_pkg = hypre_ParCSRMatrixCommPkg (S);
  }
  num_sends = hypre_ParCSRCommPkgNumSends (comm_pkg);

  if (coarsen_type % 2 == 0) nlocal++; /* even coarsen_type means allow_emptygrids */
#ifdef HYPRE_NO_GLOBAL_PARTITION
   {
      HYPRE_Int scan_recv;
      
      vertexrange = hypre_CTAlloc(HYPRE_Int,2);
      hypre_MPI_Scan(&nlocal, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
      /* first point in my range */ 
      vertexrange[0] = scan_recv - nlocal;
      /* first point in next proc's range */
      vertexrange[1] = scan_recv;
      vstart = vertexrange[0];
      vend   = vertexrange[1];
   }
#else
  vertexrange = hypre_CTAlloc (HYPRE_Int,mpisize+1);
 
  hypre_MPI_Allgather (&nlocal,1,HYPRE_MPI_INT,vertexrange+1,1,HYPRE_MPI_INT,comm);
  vertexrange[0]=0;
  for (i=2;i<=mpisize;i++) vertexrange[i]+=vertexrange[i-1];
  vstart = vertexrange[mpirank];
  vend   = vertexrange[mpirank+1];
#endif

  /* Note: vstart uses 0-based indexing, while CF_marker uses 1-based indexing */
  if (coarsen_type % 2 == 1) { /* see above */
    for (i=0;i<num_variables;i++)
      if (CF_marker[i]>0)
	CF_marker[i]+=vstart;
  }
  else {
/*      hypre_printf ("processor %d: empty grid allowed\n",mpirank);  */
    for (i=0;i<num_variables;i++) {
      if (CF_marker[i]>0)
	CF_marker[i]+=vstart+1; /* add one because vertexrange[mpirank]+1 denotes the empty grid.
				   Hence, vertexrange[mpirank]+2 is the first coarse grid denoted in
				   global indices, ... */
    }
  }

  /* exchange data */
  *CF_marker_offd = hypre_CTAlloc (HYPRE_Int,num_cols_offd);
  int_buf_data = hypre_CTAlloc (HYPRE_Int,hypre_ParCSRCommPkgSendMapStart (comm_pkg,num_sends));

  for (i=0,ii=0;i<num_sends;i++) {
    start = hypre_ParCSRCommPkgSendMapStart (comm_pkg,i);
    for (j=start;j<hypre_ParCSRCommPkgSendMapStart (comm_pkg,i+1);j++)
      int_buf_data [ii++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
  }

  if (mpisize>1) {
    comm_handle = hypre_ParCSRCommHandleCreate (11,comm_pkg,int_buf_data,*CF_marker_offd);
    hypre_ParCSRCommHandleDestroy (comm_handle);
  }
  hypre_TFree (int_buf_data);
  *vrange=vertexrange;
  return (ierr);
}
예제 #24
0
/**************************************************************
 *
 *      CGC Coarsening routine
 *
 **************************************************************/
HYPRE_Int
hypre_BoomerAMGCoarsenCGCb( hypre_ParCSRMatrix    *S,
                            hypre_ParCSRMatrix    *A,
                            HYPRE_Int                    measure_type,
                            HYPRE_Int                    coarsen_type,
			    HYPRE_Int                    cgc_its,
                            HYPRE_Int                    debug_flag,
                            HYPRE_Int                  **CF_marker_ptr)
{
   MPI_Comm         comm          = hypre_ParCSRMatrixComm(S);
   hypre_ParCSRCommPkg   *comm_pkg      = hypre_ParCSRMatrixCommPkg(S);
   hypre_ParCSRCommHandle *comm_handle;
   hypre_CSRMatrix *S_diag        = hypre_ParCSRMatrixDiag(S);
   hypre_CSRMatrix *S_offd        = hypre_ParCSRMatrixOffd(S);
   HYPRE_Int             *S_i           = hypre_CSRMatrixI(S_diag);
   HYPRE_Int             *S_j           = hypre_CSRMatrixJ(S_diag);
   HYPRE_Int             *S_offd_i      = hypre_CSRMatrixI(S_offd);
   HYPRE_Int             *S_offd_j;
   HYPRE_Int              num_variables = hypre_CSRMatrixNumRows(S_diag);
   HYPRE_Int              num_cols_offd = hypre_CSRMatrixNumCols(S_offd);
                  
   hypre_CSRMatrix *S_ext;
   HYPRE_Int             *S_ext_i;
   HYPRE_Int             *S_ext_j;
                 
   hypre_CSRMatrix *ST;
   HYPRE_Int             *ST_i;
   HYPRE_Int             *ST_j;
                 
   HYPRE_Int             *CF_marker;
   HYPRE_Int             *CF_marker_offd=NULL;
   HYPRE_Int              ci_tilde = -1;
   HYPRE_Int              ci_tilde_mark = -1;

   HYPRE_Int             *measure_array;
   HYPRE_Int             *measure_array_master;
   HYPRE_Int             *graph_array;
   HYPRE_Int 	           *int_buf_data=NULL;
   /*HYPRE_Int 	           *ci_array=NULL;*/

   HYPRE_Int              i, j, k, l, jS;
   HYPRE_Int		    ji, jj, index;
   HYPRE_Int		    set_empty = 1;
   HYPRE_Int		    C_i_nonempty = 0;
   HYPRE_Int		    num_nonzeros;
   HYPRE_Int		    num_procs, my_id;
   HYPRE_Int		    num_sends = 0;
   HYPRE_Int		    first_col, start;
   HYPRE_Int		    col_0, col_n;

   hypre_LinkList   LoL_head;
   hypre_LinkList   LoL_tail;

   HYPRE_Int             *lists, *where;
   HYPRE_Int              measure, new_meas;
   HYPRE_Int              num_left;
   HYPRE_Int              nabor, nabor_two;

   HYPRE_Int              ierr = 0;
   HYPRE_Int              use_commpkg_A = 0;
   HYPRE_Real	    wall_time;

   HYPRE_Int              measure_max; /* BM Aug 30, 2006: maximal measure, needed for CGC */

   if (coarsen_type < 0) coarsen_type = -coarsen_type;

   /*-------------------------------------------------------
    * Initialize the C/F marker, LoL_head, LoL_tail  arrays
    *-------------------------------------------------------*/

   LoL_head = NULL;
   LoL_tail = NULL;
   lists = hypre_CTAlloc(HYPRE_Int, num_variables);
   where = hypre_CTAlloc(HYPRE_Int, num_variables);

#if 0 /* debugging */
   char  filename[256];
   FILE *fp;
   HYPRE_Int   iter = 0;
#endif

   /*--------------------------------------------------------------
    * Compute a CSR strength matrix, S.
    *
    * For now, the "strength" of dependence/influence is defined in
    * the following way: i depends on j if
    *     aij > hypre_max (k != i) aik,    aii < 0
    * or
    *     aij < hypre_min (k != i) aik,    aii >= 0
    * Then S_ij = 1, else S_ij = 0.
    *
    * NOTE: the entries are negative initially, corresponding
    * to "unaccounted-for" dependence.
    *----------------------------------------------------------------*/

   if (debug_flag == 3) wall_time = time_getWallclockSeconds();

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);

   if (!comm_pkg)
   {
        use_commpkg_A = 1;
        comm_pkg = hypre_ParCSRMatrixCommPkg(A); 
   }

   if (!comm_pkg)
   {
        hypre_MatvecCommPkgCreate(A);
        comm_pkg = hypre_ParCSRMatrixCommPkg(A); 
   }

   num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);

   if (num_cols_offd) S_offd_j = hypre_CSRMatrixJ(S_offd);

   jS = S_i[num_variables];

   ST = hypre_CSRMatrixCreate(num_variables, num_variables, jS);
   ST_i = hypre_CTAlloc(HYPRE_Int,num_variables+1);
   ST_j = hypre_CTAlloc(HYPRE_Int,jS);
   hypre_CSRMatrixI(ST) = ST_i;
   hypre_CSRMatrixJ(ST) = ST_j;

   /*----------------------------------------------------------
    * generate transpose of S, ST
    *----------------------------------------------------------*/

   for (i=0; i <= num_variables; i++)
      ST_i[i] = 0;
 
   for (i=0; i < jS; i++)
   {
	 ST_i[S_j[i]+1]++;
   }
   for (i=0; i < num_variables; i++)
   {
      ST_i[i+1] += ST_i[i];
   }
   for (i=0; i < num_variables; i++)
   {
      for (j=S_i[i]; j < S_i[i+1]; j++)
      {
	 index = S_j[j];
       	 ST_j[ST_i[index]] = i;
       	 ST_i[index]++;
      }
   }      
   for (i = num_variables; i > 0; i--)
   {
      ST_i[i] = ST_i[i-1];
   }
   ST_i[0] = 0;

   /*----------------------------------------------------------
    * Compute the measures
    *
    * The measures are given by the row sums of ST.
    * Hence, measure_array[i] is the number of influences
    * of variable i.
    * correct actual measures through adding influences from
    * neighbor processors
    *----------------------------------------------------------*/

   measure_array_master = hypre_CTAlloc(HYPRE_Int, num_variables);
   measure_array = hypre_CTAlloc(HYPRE_Int, num_variables);

   for (i = 0; i < num_variables; i++)
   {
      measure_array_master[i] = ST_i[i+1]-ST_i[i];
   }

   if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) 
		&& num_procs > 1)
   {
      if (use_commpkg_A)
         S_ext      = hypre_ParCSRMatrixExtractBExt(S,A,0);
      else
         S_ext      = hypre_ParCSRMatrixExtractBExt(S,S,0);
      S_ext_i    = hypre_CSRMatrixI(S_ext);
      S_ext_j    = hypre_CSRMatrixJ(S_ext);
      num_nonzeros = S_ext_i[num_cols_offd];
      first_col = hypre_ParCSRMatrixFirstColDiag(S);
      col_0 = first_col-1;
      col_n = col_0+num_variables;
      if (measure_type)
      {
	 for (i=0; i < num_nonzeros; i++)
         {
	    index = S_ext_j[i] - first_col;
	    if (index > -1 && index < num_variables)
		measure_array_master[index]++;
         } 
      } 
   }

   /*---------------------------------------------------
    * Loop until all points are either fine or coarse.
    *---------------------------------------------------*/

   if (debug_flag == 3) wall_time = time_getWallclockSeconds();

   /* first coarsening phase */

  /*************************************************************
   *
   *   Initialize the lists
   *
   *************************************************************/

   CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables);
   
   num_left = 0;
   for (j = 0; j < num_variables; j++)
   {
     if ((S_i[j+1]-S_i[j])== 0 &&
	 (S_offd_i[j+1]-S_offd_i[j]) == 0)
     {
       CF_marker[j] = SF_PT;
       measure_array_master[j] = 0;
     }
     else
     {
       CF_marker[j] = UNDECIDED; 
       /*        num_left++; */ /* BM May 19, 2006: see below*/
     }
   } 

   if (coarsen_type==22) {
     /* BM Sep 8, 2006: allow_emptygrids only if the following holds for all points j: 
        (a) the point has no strong connections at all, OR
        (b) the point has a strong connection across a boundary */
     for (j=0;j<num_variables;j++)
       if (S_i[j+1]>S_i[j] && S_offd_i[j+1] == S_offd_i[j]) {coarsen_type=21;break;}
   }

   for (l = 1; l <= cgc_its; l++)
   {
     LoL_head = NULL;
     LoL_tail = NULL;
     num_left = 0;  /* compute num_left before each RS coarsening loop */
     memcpy (measure_array,measure_array_master,num_variables*sizeof(HYPRE_Int));
     memset (lists,0,sizeof(HYPRE_Int)*num_variables);
     memset (where,0,sizeof(HYPRE_Int)*num_variables);

     for (j = 0; j < num_variables; j++) 
     {    
       measure = measure_array[j];
       if (CF_marker[j] != SF_PT)  
       {
	 if (measure > 0)
	 {
	   enter_on_lists(&LoL_head, &LoL_tail, measure, j, lists, where);
	   num_left++; /* compute num_left before each RS coarsening loop */
	 }
	 else if (CF_marker[j] == 0) /* increase weight of strongly coupled neighbors only 
					if j is not conained in a previously constructed coarse grid.
					Reason: these neighbors should start with the same initial weight
					in each CGC iteration.                    BM Aug 30, 2006 */
					
	 {
	   if (measure < 0) hypre_printf("negative measure!\n");
/* 	   CF_marker[j] = f_pnt; */
	   for (k = S_i[j]; k < S_i[j+1]; k++)
	   {
	     nabor = S_j[k];
/*  	     if (CF_marker[nabor] != SF_PT)  */
 	     if (CF_marker[nabor] == 0)  /* BM Aug 30, 2006: don't alter weights of points 
 					    contained in other candidate coarse grids */ 
	     {
	       if (nabor < j)
	       {
		 new_meas = measure_array[nabor];
		 if (new_meas > 0)
		   remove_point(&LoL_head, &LoL_tail, new_meas, 
				nabor, lists, where);
		 else num_left++; /* BM Aug 29, 2006 */
		 
		 new_meas = ++(measure_array[nabor]);
		 enter_on_lists(&LoL_head, &LoL_tail, new_meas,
				nabor, lists, where);
	       }
	       else
	       {
		 new_meas = ++(measure_array[nabor]);
	       }
	     }
	   }
	   /* 	   --num_left; */ /* BM May 19, 2006 */
         }
       }
     }

     /* BM Aug 30, 2006: first iteration: determine maximal weight */
     if (num_left && l==1) measure_max = measure_array[LoL_head->head]; 
     /* BM Aug 30, 2006: break CGC iteration if no suitable 
	starting point is available any more */
     if (!num_left || measure_array[LoL_head->head]<measure_max) {
       while (LoL_head) {
	 hypre_LinkList list_ptr = LoL_head;
	 LoL_head = LoL_head->next_elt;
	 dispose_elt (list_ptr);
       }
       break;
     }

   /****************************************************************
    *
    *  Main loop of Ruge-Stueben first coloring pass.
    *
    *  WHILE there are still points to classify DO:
    *        1) find first point, i,  on list with max_measure
    *           make i a C-point, remove it from the lists
    *        2) For each point, j,  in S_i^T,
    *           a) Set j to be an F-point
    *           b) For each point, k, in S_j
    *                  move k to the list in LoL with measure one
    *                  greater than it occupies (creating new LoL
    *                  entry if necessary)
    *        3) For each point, j,  in S_i,
    *                  move j to the list in LoL with measure one
    *                  smaller than it occupies (creating new LoL
    *                  entry if necessary)
    *
    ****************************************************************/

     while (num_left > 0)
     {
       index = LoL_head -> head;
/*         index = LoL_head -> tail;  */

/*        CF_marker[index] = C_PT; */
       CF_marker[index] = l;  /* BM Aug 18, 2006 */
       measure = measure_array[index];
       measure_array[index] = 0;
       measure_array_master[index] = 0; /* BM May 19: for CGC */
       --num_left;
      
       remove_point(&LoL_head, &LoL_tail, measure, index, lists, where);
  
       for (j = ST_i[index]; j < ST_i[index+1]; j++)
       {
         nabor = ST_j[j];
/*          if (CF_marker[nabor] == UNDECIDED) */
	 if (measure_array[nabor]>0) /* undecided point */
	 {
	   /* 	   CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */
	   measure = measure_array[nabor];
	   measure_array[nabor]=0;

	   remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where);
	   --num_left;
	   
	   for (k = S_i[nabor]; k < S_i[nabor+1]; k++)
           {
	     nabor_two = S_j[k];
/* 	     if (CF_marker[nabor_two] == UNDECIDED) */
	     if (measure_array[nabor_two]>0) /* undecided point */
             {
	       measure = measure_array[nabor_two];
	       remove_point(&LoL_head, &LoL_tail, measure, 
			    nabor_two, lists, where);
	       
	       new_meas = ++(measure_array[nabor_two]);
	       
	       enter_on_lists(&LoL_head, &LoL_tail, new_meas,
			      nabor_two, lists, where);
	     }
	   }
         }
       }
       for (j = S_i[index]; j < S_i[index+1]; j++)
       {
         nabor = S_j[j];
/*          if (CF_marker[nabor] == UNDECIDED) */
	 if (measure_array[nabor]>0) /* undecided point */
         {
	   measure = measure_array[nabor];
	   
	   remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where);
	   
	   measure_array[nabor] = --measure;
	   
	   if (measure > 0)
	     enter_on_lists(&LoL_head, &LoL_tail, measure, nabor, 
			    lists, where);
	   else
	   {
/* 	     CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */
	     --num_left;

	     for (k = S_i[nabor]; k < S_i[nabor+1]; k++)
             {
	       nabor_two = S_j[k];
/* 	       if (CF_marker[nabor_two] == UNDECIDED) */
	       if (measure_array[nabor_two]>0)
               {
		 new_meas = measure_array[nabor_two];
		 remove_point(&LoL_head, &LoL_tail, new_meas, 
			      nabor_two, lists, where);
		 
		 new_meas = ++(measure_array[nabor_two]);
                 
		 enter_on_lists(&LoL_head, &LoL_tail, new_meas,
				nabor_two, lists, where);
	       }
	     }
	   }
         }
       }
     }
     if (LoL_head) hypre_printf ("Linked list not empty! head: %d\n",LoL_head->head);
   }
   l--; /* BM Aug 15, 2006 */

   hypre_TFree(measure_array);
   hypre_TFree(measure_array_master);
   hypre_CSRMatrixDestroy(ST);

   if (debug_flag == 3)
   {
      wall_time = time_getWallclockSeconds() - wall_time;
      hypre_printf("Proc = %d    Coarsen 1st pass = %f\n",
                     my_id, wall_time); 
   }

   hypre_TFree(lists);
   hypre_TFree(where);
   
     if (num_procs>1) {
       if (debug_flag == 3)  wall_time = time_getWallclockSeconds();
       hypre_BoomerAMGCoarsenCGC (S,l,coarsen_type,CF_marker);
       
       if (debug_flag == 3)  { 
	 wall_time = time_getWallclockSeconds() - wall_time; 
	 hypre_printf("Proc = %d    Coarsen CGC = %f\n", 
		my_id, wall_time);  
       } 
     }
     else {
       /* the first candiate coarse grid is the coarse grid */ 
       for (j=0;j<num_variables;j++) {
	 if (CF_marker[j]==1) CF_marker[j]=C_PT;
	 else CF_marker[j]=F_PT;
       }
     }

   /* BM May 19, 2006:
      Set all undecided points to be fine grid points. */
   for (j=0;j<num_variables;j++)
     if (!CF_marker[j]) CF_marker[j]=F_PT;

   /*---------------------------------------------------
    * Initialize the graph array
    *---------------------------------------------------*/

   graph_array = hypre_CTAlloc(HYPRE_Int, num_variables);

   for (i = 0; i < num_variables; i++)
   {
      graph_array[i] = -1;
   }

   if (debug_flag == 3) wall_time = time_getWallclockSeconds();

      for (i=0; i < num_variables; i++)
      {
	 if (ci_tilde_mark != i) ci_tilde = -1;
         if (CF_marker[i] == -1)
         {
   	    for (ji = S_i[i]; ji < S_i[i+1]; ji++)
   	    {
   	       j = S_j[ji];
   	       if (CF_marker[j] > 0)
   	          graph_array[j] = i;
    	    }
   	    for (ji = S_i[i]; ji < S_i[i+1]; ji++)
   	    {
   	       j = S_j[ji];
   	       if (CF_marker[j] == -1)
   	       {
   	          set_empty = 1;
   	          for (jj = S_i[j]; jj < S_i[j+1]; jj++)
   	          {
   		     index = S_j[jj];
   		     if (graph_array[index] == i)
   		     {
   		        set_empty = 0;
   		        break;
   		     }
   	          }
   	          if (set_empty)
   	          {
   		     if (C_i_nonempty)
   		     {
   		        CF_marker[i] = 1;
   		        if (ci_tilde > -1)
   		        {
   			   CF_marker[ci_tilde] = -1;
   		           ci_tilde = -1;
   		        }
   	    		C_i_nonempty = 0;
   		        break;
   		     }
   		     else
   		     {
   		        ci_tilde = j;
   		        ci_tilde_mark = i;
   		        CF_marker[j] = 1;
   		        C_i_nonempty = 1;
		        i--;
		        break;
		     }
	          }
	       }
	    }
	 }
      }

   if (debug_flag == 3 && coarsen_type != 2)
   {
      wall_time = time_getWallclockSeconds() - wall_time;
      hypre_printf("Proc = %d    Coarsen 2nd pass = %f\n",
                       my_id, wall_time); 
   }

   /* third pass, check boundary fine points for coarse neighbors */

      /*------------------------------------------------
       * Exchange boundary data for CF_marker
       *------------------------------------------------*/

      if (debug_flag == 3) wall_time = time_getWallclockSeconds();
    
      CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
      int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg,
                                                   num_sends));
    
      index = 0;
      for (i = 0; i < num_sends; i++)
      {
        start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
        for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
                int_buf_data[index++]
                 = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
      }
    
      if (num_procs > 1)
      {
      comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
        CF_marker_offd);
    
      hypre_ParCSRCommHandleDestroy(comm_handle);
      }
      AmgCGCBoundaryFix (S,CF_marker,CF_marker_offd);
      if (debug_flag == 3)
      {
         wall_time = time_getWallclockSeconds() - wall_time;
         hypre_printf("Proc = %d    CGC boundary fix = %f\n",
                       my_id, wall_time); 
      }

   /*---------------------------------------------------
    * Clean up and return
    *---------------------------------------------------*/

   /*if (coarsen_type != 1)
   { */  
     if (CF_marker_offd) hypre_TFree(CF_marker_offd);  /* BM Aug 21, 2006 */
     if (int_buf_data) hypre_TFree(int_buf_data); /* BM Aug 21, 2006 */
     /*if (ci_array) hypre_TFree(ci_array);*/ /* BM Aug 21, 2006 */
   /*} */   
   hypre_TFree(graph_array);
   if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) 
		&& num_procs > 1)
   	hypre_CSRMatrixDestroy(S_ext); 
   
   *CF_marker_ptr   = CF_marker;
   
   return (ierr);
}
예제 #25
0
hypre_ParCSRMatrix * hypre_ParMatMinus_F(
   hypre_ParCSRMatrix * P, hypre_ParCSRMatrix * C, HYPRE_Int * CF_marker )
/* hypre_ParMatMinus_F subtracts selected rows of its second argument
   from selected rows of its first argument.  The marker array
   determines which rows are affected - those for which CF_marker<0.
   The result is returned as a new matrix.
*/
{
   /*
     If P=(Pik),C=(Cik), i in Fine+Coarse, k in Coarse, we want
        new Pik = Pik - Cik, for Fine i only, all k.
     This computation is purely local.
   */
   /* This is _not_ a general-purpose matrix subtraction function.
      This is written for an interpolation problem where it is known that C(i,k)
      exists whenever P(i,k) does (because C=A*P where A has nonzero diagonal elements).
   */

   hypre_ParCSRMatrix *Pnew;
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
   hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C);
   hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C);
   hypre_CSRMatrix *Pnew_diag;
   hypre_CSRMatrix *Pnew_offd;

   double          *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);
   double          *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   HYPRE_Int             *P_offd_j = hypre_CSRMatrixJ(P_offd);
   HYPRE_Int             *P_col_map_offd = hypre_ParCSRMatrixColMapOffd( P );
   double          *C_diag_data = hypre_CSRMatrixData(C_diag);
   HYPRE_Int             *C_diag_i = hypre_CSRMatrixI(C_diag);
   HYPRE_Int             *C_diag_j = hypre_CSRMatrixJ(C_diag);
   double          *C_offd_data = hypre_CSRMatrixData(C_offd);
   HYPRE_Int             *C_offd_i = hypre_CSRMatrixI(C_offd);
   HYPRE_Int             *C_offd_j = hypre_CSRMatrixJ(C_offd);
   HYPRE_Int             *C_col_map_offd = hypre_ParCSRMatrixColMapOffd( C );
   HYPRE_Int             *Pnew_diag_i;
   HYPRE_Int             *Pnew_diag_j;
   double          *Pnew_diag_data;
   HYPRE_Int             *Pnew_offd_i;
   HYPRE_Int             *Pnew_offd_j;
   double          *Pnew_offd_data;
   HYPRE_Int             *Pnew_j2m;
   HYPRE_Int             *Pnew_col_map_offd;

   HYPRE_Int	num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag);
   /* HYPRE_Int	num_rows_offd_C = hypre_CSRMatrixNumRows(C_offd); */
   HYPRE_Int	num_cols_offd_C = hypre_CSRMatrixNumCols(C_offd);
   HYPRE_Int	num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd);
   HYPRE_Int  num_cols_offd_Pnew, num_rows_offd_Pnew;
   
   HYPRE_Int              i1, jmin, jmax, jrange, jrangem1;
   HYPRE_Int              j, m, mc, mp, jc, jp, jP, jC, jg, jCg, jPg;
   double           dc, dp;

/*   Pnew = hypre_ParCSRMatrixCompleteClone( C );*/

   Pnew = hypre_ParCSRMatrixUnion( C, P );
;
   hypre_ParCSRMatrixZero_F( Pnew, CF_marker );  /* fine rows of Pnew set to 0 */
   hypre_ParCSRMatrixCopy_C( Pnew, C, CF_marker ); /* coarse rows of Pnew copied from C (or P) */
   /* ...Zero_F may not be needed depending on how Pnew is made */
   Pnew_diag = hypre_ParCSRMatrixDiag(Pnew);
   Pnew_offd = hypre_ParCSRMatrixOffd(Pnew);
   Pnew_diag_i = hypre_CSRMatrixI(Pnew_diag);
   Pnew_diag_j = hypre_CSRMatrixJ(Pnew_diag);
   Pnew_offd_i = hypre_CSRMatrixI(Pnew_offd);
   Pnew_offd_j = hypre_CSRMatrixJ(Pnew_offd);
   Pnew_diag_data = hypre_CSRMatrixData(Pnew_diag);
   Pnew_offd_data = hypre_CSRMatrixData(Pnew_offd);
   Pnew_col_map_offd = hypre_ParCSRMatrixColMapOffd( Pnew );
   num_rows_offd_Pnew = hypre_CSRMatrixNumRows(Pnew_offd);
   num_cols_offd_Pnew = hypre_CSRMatrixNumCols(Pnew_offd);


   /* Find the j-ranges, needed to allocate a "reverse lookup" array. */
   /* This is the max j - min j over P and Pnew (which here is a copy of C).
      Only the diag block is considered. */
   /* For scalability reasons (jrange can get big) this won't work for the offd block.
      Also, indexing is more complicated in the offd block (c.f. col_map_offd).
      It's not clear, though whether the "quadratic" algorithm I'm using for the offd
      block is really any slower than the more complicated "linear" algorithm here. */
   jrange = 0;
   jrangem1=-1;
   for ( i1 = 0; i1 < num_rows_diag_C; i1++ )
   {
      if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 )  /* only Fine rows matter */
      {
         jmin = Pnew_diag_j[ Pnew_diag_i[i1] ];
         jmax = Pnew_diag_j[ Pnew_diag_i[i1+1]-1 ];
         jrangem1 = jmax-jmin;
         jrange = hypre_max(jrange,jrangem1+1);
         /* If columns (of a given row) were in increasing order, the above would be sufficient.
            If not, the following would be necessary (and sufficient) */
         jmin = Pnew_diag_j[ Pnew_diag_i[i1] ];
         jmax = Pnew_diag_j[ Pnew_diag_i[i1] ];
         for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m )
         {
            j = Pnew_diag_j[m];
            jmin = hypre_min( jmin, j );
            jmax = hypre_max( jmax, j );
         }
         for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m )
         {
            j = P_diag_j[m];
            jmin = hypre_min( jmin, j );
            jmax = hypre_max( jmax, j );
         }
         jrangem1 = jmax-jmin;
         jrange = hypre_max(jrange,jrangem1+1);
      }
   }


   /*-----------------------------------------------------------------------
    *  Loop over Pnew_diag rows.  Construct a temporary reverse array:
    *  If j is a column number, Pnew_j2m[j] is the array index for j, i.e.
    *  Pnew_diag_j[ Pnew_j2m[j] ] = j
    *-----------------------------------------------------------------------*/

   Pnew_j2m = hypre_CTAlloc( HYPRE_Int, jrange );

   for ( i1 = 0; i1 < num_rows_diag_C; i1++ )
   {
      if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 )  /* Fine data only */
      {
         /* just needed for an assertion below... */
         for ( j=0; j<jrange; ++j ) Pnew_j2m[j] = -1;
         jmin = Pnew_diag_j[ Pnew_diag_i[i1] ];
            /* If columns (of a given row) were in increasing order, the above line would be sufficient.
               If not, the following loop would have to be added (or store the jmin computed above )*/
         for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m )
         {
            j = Pnew_diag_j[m];
            jmin = hypre_min( jmin, j );
         }
         for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m )
         {
            j = P_diag_j[m];
            jmin = hypre_min( jmin, j );
         }
         for ( m = Pnew_diag_i[i1]; m<Pnew_diag_i[i1+1]; ++m )
         {
            j = Pnew_diag_j[m];
            hypre_assert( j-jmin>=0 );
            hypre_assert( j-jmin<jrange );
            Pnew_j2m[ j-jmin ] = m;
         }

         /*-----------------------------------------------------------------------
          *  Loop over C_diag data for the current row.
          *  Subtract each C data entry from the corresponding Pnew entry.
          *-----------------------------------------------------------------------*/

         for ( mc=C_diag_i[i1]; mc<C_diag_i[i1+1]; ++mc )
         {
            jc = C_diag_j[mc];
            dc = C_diag_data[mc];
            m = Pnew_j2m[jc-jmin];
            hypre_assert( m>=0 );
            Pnew_diag_data[m] -= dc;
         }

         /*-----------------------------------------------------------------------
          *  Loop over P_diag data for the current row.
          *  Add each P data entry from the corresponding Pnew entry.
          *-----------------------------------------------------------------------*/

         for ( mp=P_diag_i[i1]; mp<P_diag_i[i1+1]; ++mp )
         {
            jp = P_diag_j[mp];
            dp = P_diag_data[mp];
            m = Pnew_j2m[jp-jmin];
            hypre_assert( m>=0 );
            Pnew_diag_data[m] += dp;
         }
      }
   }

         /*-----------------------------------------------------------------------
          * Repeat for the offd block.
          *-----------------------------------------------------------------------*/

   for ( i1 = 0; i1 < num_rows_offd_Pnew; i1++ )
   {
      if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_offd)>0 )  /* Fine data only */
      {
         if ( num_cols_offd_Pnew )
         {
            /*  This is a simple quadratic algorithm.  If necessary I may try
               to implement the ideas used on the diag block later. */
            for ( m = Pnew_offd_i[i1]; m<Pnew_offd_i[i1+1]; ++m )
            {
               j = Pnew_offd_j[m];
               jg = Pnew_col_map_offd[j];
               Pnew_offd_data[m] = 0;
               if ( num_cols_offd_C )
                  for ( mc=C_offd_i[i1]; mc<C_offd_i[i1+1]; ++mc )
                  {
                     jC = C_offd_j[mc];
                     jCg = C_col_map_offd[jC];
                     if ( jCg==jg ) Pnew_offd_data[m] -= C_offd_data[mc];
                  }
               if ( num_cols_offd_P )
                  for ( mp=P_offd_i[i1]; mp<P_offd_i[i1+1]; ++mp )
                  {
                     jP = P_offd_j[mp];
                     jPg = P_col_map_offd[jP];
                     if ( jPg==jg ) Pnew_offd_data[m] += P_offd_data[mp];
                  }
            }
         }
      }
   }


   hypre_TFree(Pnew_j2m);

   return Pnew;
}
예제 #26
0
파일: par_stats.c 프로젝트: Kyushick/sight
int
hypre_BoomerAMGSetupStats( void               *amg_vdata,
                        hypre_ParCSRMatrix *A         )
{
   MPI_Comm 	      comm = hypre_ParCSRMatrixComm(A);   

   hypre_ParAMGData *amg_data = (hypre_ParAMGData*)amg_vdata;

   /*hypre_SeqAMGData *seq_data = hypre_ParAMGDataSeqData(amg_data);*/

   /* Data Structure variables */

   hypre_ParCSRMatrix **A_array;
   hypre_ParCSRMatrix **P_array;

   hypre_CSRMatrix *A_diag;
   double          *A_diag_data;
   int             *A_diag_i;

   hypre_CSRMatrix *A_offd;   
   double          *A_offd_data;
   int             *A_offd_i;

   hypre_CSRMatrix *P_diag;
   double          *P_diag_data;
   int             *P_diag_i;

   hypre_CSRMatrix *P_offd;   
   double          *P_offd_data;
   int             *P_offd_i;


   int	    numrows;

   HYPRE_BigInt	    *row_starts;

 
   int      num_levels; 
   int      coarsen_type;
   int      interp_type;
   int      measure_type;
   double   global_nonzeros;

   double  *send_buff;
   double  *gather_buff;
 
   /* Local variables */

   int       level;
   int       j;
   HYPRE_BigInt fine_size;
 
   int       min_entries;
   int       max_entries;

   int       num_procs,my_id, num_threads;


   double    min_rowsum;
   double    max_rowsum;
   double    sparse;


   int       i;
   

   HYPRE_BigInt coarse_size;
   int       entries;

   double    avg_entries;
   double    rowsum;

   double    min_weight;
   double    max_weight;

   int       global_min_e;
   int       global_max_e;
   double    global_min_rsum;
   double    global_max_rsum;
   double    global_min_wt;
   double    global_max_wt;

   double  *num_coeffs;
   double  *num_variables;
   double   total_variables; 
   double   operat_cmplxty;
   double   grid_cmplxty;

   /* amg solve params */
   int      max_iter;
   int      cycle_type;    
   int     *num_grid_sweeps;  
   int     *grid_relax_type;   
   int      relax_order;
   int    **grid_relax_points; 
   double  *relax_weight;
   double  *omega;
   double   tol;


   int one = 1;
   int minus_one = -1;
   int zero = 0;
   int smooth_type;
   int smooth_num_levels;
   int agg_num_levels;
   /*int seq_cg = 0;*/
   
   /*if (seq_data)
      seq_cg = 1;*/


   MPI_Comm_size(comm, &num_procs);   
   MPI_Comm_rank(comm,&my_id);
   num_threads = hypre_NumThreads();

   if (my_id == 0)
      printf("\nNumber of MPI processes: %d , Number of OpenMP threads: %d\n", num_procs, num_threads);
   A_array = hypre_ParAMGDataAArray(amg_data);
   P_array = hypre_ParAMGDataPArray(amg_data);
   num_levels = hypre_ParAMGDataNumLevels(amg_data);
   coarsen_type = hypre_ParAMGDataCoarsenType(amg_data);
   interp_type = hypre_ParAMGDataInterpType(amg_data);
   measure_type = hypre_ParAMGDataMeasureType(amg_data);
   smooth_type = hypre_ParAMGDataSmoothType(amg_data);
   smooth_num_levels = hypre_ParAMGDataSmoothNumLevels(amg_data);
   agg_num_levels = hypre_ParAMGDataAggNumLevels(amg_data);


   /*----------------------------------------------------------
    * Get the amg_data data
    *----------------------------------------------------------*/

   num_levels = hypre_ParAMGDataNumLevels(amg_data);
   max_iter   = hypre_ParAMGDataMaxIter(amg_data);
   cycle_type = hypre_ParAMGDataCycleType(amg_data);    
   num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data);  
   grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data);
   grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data);
   relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); 
   relax_order = hypre_ParAMGDataRelaxOrder(amg_data); 
   omega = hypre_ParAMGDataOmega(amg_data); 
   tol = hypre_ParAMGDataTol(amg_data);

   /*block_mode = hypre_ParAMGDataBlockMode(amg_data);*/

   send_buff     = hypre_CTAlloc(double, 6);
#ifdef HYPRE_NO_GLOBAL_PARTITION
   gather_buff = hypre_CTAlloc(double,6);    
#else
   gather_buff = hypre_CTAlloc(double,6*num_procs);    
#endif

   if (my_id==0)
   {
      printf("\nBoomerAMG SETUP PARAMETERS:\n\n");
      printf(" Max levels = %d\n",hypre_ParAMGDataMaxLevels(amg_data));
      printf(" Num levels = %d\n\n",num_levels);
      printf(" Strength Threshold = %f\n", 
                         hypre_ParAMGDataStrongThreshold(amg_data));
      printf(" Interpolation Truncation Factor = %f\n", 
                         hypre_ParAMGDataTruncFactor(amg_data));
      printf(" Maximum Row Sum Threshold for Dependency Weakening = %f\n\n", 
                         hypre_ParAMGDataMaxRowSum(amg_data));

      if (coarsen_type == 0)
      {
	printf(" Coarsening Type = Cleary-Luby-Jones-Plassman\n");
      }
      else if (abs(coarsen_type) == 1) 
      {
	printf(" Coarsening Type = Ruge\n");
      }
      else if (abs(coarsen_type) == 2) 
      {
	printf(" Coarsening Type = Ruge2B\n");
      }
      else if (abs(coarsen_type) == 3) 
      {
	printf(" Coarsening Type = Ruge3\n");
      }
      else if (abs(coarsen_type) == 4) 
      {
	printf(" Coarsening Type = Ruge 3c \n");
      }
      else if (abs(coarsen_type) == 5) 
      {
	printf(" Coarsening Type = Ruge relax special points \n");
      }
      else if (abs(coarsen_type) == 6) 
      {
	printf(" Coarsening Type = Falgout-CLJP \n");
      }
      else if (abs(coarsen_type) == 8) 
      {
	printf(" Coarsening Type = PMIS \n");
      }
      else if (abs(coarsen_type) == 10) 
      {
	printf(" Coarsening Type = HMIS \n");
      }
      else if (abs(coarsen_type) == 11) 
      {
	printf(" Coarsening Type = Ruge 1st pass only \n");
      }
      else if (abs(coarsen_type) == 9) 
      {
	printf(" Coarsening Type = PMIS fixed random \n");
      }
      else if (abs(coarsen_type) == 7) 
      {
	printf(" Coarsening Type = CLJP, fixed random \n");
      }
      if (coarsen_type > 0) 
      {
	printf(" Hybrid Coarsening (switch to CLJP when coarsening slows)\n");
      }
      

      if (coarsen_type)
      	printf(" measures are determined %s\n\n", 
                  (measure_type ? "globally" : "locally"));

      if (agg_num_levels)
	printf(" no. of levels of aggressive coarsening: %d\n\n", agg_num_levels);

#ifdef HYPRE_NO_GLOBAL_PARTITION
      printf( "\n No global partition option chosen.\n\n");
#endif

      if (interp_type == 0)
      {
	printf(" Interpolation = modified classical interpolation\n");
      }
      else if (interp_type == 1) 
      {
	printf(" Interpolation = LS interpolation \n");
      }
      else if (interp_type == 2) 
      {
	printf(" Interpolation = modified classical interpolation for hyperbolic PDEs\n");
      }
      else if (interp_type == 3) 
      {
	printf(" Interpolation = direct interpolation with separation of weights\n");
      }
      else if (interp_type == 4) 
      {
	printf(" Interpolation = multipass interpolation\n");
      }
      else if (interp_type == 5) 
      {
	printf(" Interpolation = multipass interpolation with separation of weights\n");
      }
      else if (interp_type == 6) 
      {
	printf(" Interpolation = extended+i interpolation\n");
      }
      else if (interp_type == 7) 
      {
	printf(" Interpolation = extended+i interpolation (only when needed)\n");
      }
      else if (interp_type == 8) 
      {
	printf(" Interpolation = standard interpolation\n");
      }
      else if (interp_type == 9) 
      {
	printf(" Interpolation = standard interpolation with separation of weights\n");
      }
      else if (interp_type == 12) 
      {
	printf(" FF interpolation \n");
      }
      else if (interp_type == 13) 
      {
	printf(" FF1 interpolation \n");
      }

      {
         printf( "\nOperator Matrix Information:\n\n");
      }
#if HYPRE_LONG_LONG
      printf("                  nonzero         entries p");
      printf("er row        row sums\n");
      printf("lev        rows   entries  sparse  min  max   ");
      printf("avg       min         max\n");
      printf("=======================================");
      printf("==================================\n");
#else      
      printf("            nonzero         entries p");
      printf("er row        row sums\n");
      printf("lev   rows  entries  sparse  min  max   ");
      printf("avg       min         max\n");
      printf("=======================================");
      printf("============================\n");
#endif
   }
  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/

   num_coeffs = hypre_CTAlloc(double,num_levels);

   num_variables = hypre_CTAlloc(double,num_levels);

   for (level = 0; level < num_levels; level++)
   { 

      {
         A_diag = hypre_ParCSRMatrixDiag(A_array[level]);
         A_diag_data = hypre_CSRMatrixData(A_diag);
         A_diag_i = hypre_CSRMatrixI(A_diag);
         
         A_offd = hypre_ParCSRMatrixOffd(A_array[level]);   
         A_offd_data = hypre_CSRMatrixData(A_offd);
         A_offd_i = hypre_CSRMatrixI(A_offd);
         
         row_starts = hypre_ParCSRMatrixRowStarts(A_array[level]);
         
         fine_size = hypre_ParCSRMatrixGlobalNumRows(A_array[level]);
         global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(A_array[level]);
         num_coeffs[level] = global_nonzeros;
         num_variables[level] = (double) fine_size;
         
         sparse = global_nonzeros /((double) fine_size * (double) fine_size);

         min_entries = 0;
         max_entries = 0;
         min_rowsum = 0.0;
         max_rowsum = 0.0;
         
         if (hypre_CSRMatrixNumRows(A_diag))
         {
            min_entries = (A_diag_i[1]-A_diag_i[0])+(A_offd_i[1]-A_offd_i[0]);
            for (j = A_diag_i[0]; j < A_diag_i[1]; j++)
               min_rowsum += A_diag_data[j];
            for (j = A_offd_i[0]; j < A_offd_i[1]; j++)
               min_rowsum += A_offd_data[j];
            
            max_rowsum = min_rowsum;
            
            for (j = 0; j < hypre_CSRMatrixNumRows(A_diag); j++)
            {
               entries = (A_diag_i[j+1]-A_diag_i[j])+(A_offd_i[j+1]-A_offd_i[j]);
               min_entries = hypre_min(entries, min_entries);
               max_entries = hypre_max(entries, max_entries);
               
               rowsum = 0.0;
               for (i = A_diag_i[j]; i < A_diag_i[j+1]; i++)
                  rowsum += A_diag_data[i];
               
               for (i = A_offd_i[j]; i < A_offd_i[j+1]; i++)
                  rowsum += A_offd_data[i];
               
               min_rowsum = hypre_min(rowsum, min_rowsum);
               max_rowsum = hypre_max(rowsum, max_rowsum);
            }
         }
         avg_entries = global_nonzeros / ((double) fine_size);
      }
      
#ifdef HYPRE_NO_GLOBAL_PARTITION       

       numrows = (int)(row_starts[1]-row_starts[0]);
       if (!numrows) /* if we don't have any rows, then don't have this count toward
                         min row sum or min num entries */
       {
          min_entries = 1000000;
          min_rowsum =  1.0e7;
       }
       
       send_buff[0] = - (double) min_entries;
       send_buff[1] = (double) max_entries;
       send_buff[2] = - min_rowsum;
       send_buff[3] = max_rowsum;

       MPI_Reduce(send_buff, gather_buff, 4, MPI_DOUBLE, MPI_MAX, 0, comm);
       
       if (my_id ==0)
       {
          global_min_e = - gather_buff[0];
          global_max_e = gather_buff[1];
          global_min_rsum = - gather_buff[2];
          global_max_rsum = gather_buff[3];
#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#else          
          printf( "%2d %7d %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#endif          
          printf("  %4.1f  %10.3e  %10.3e\n", avg_entries,
                 global_min_rsum, global_max_rsum);
       }
       
#else

       send_buff[0] = (double) min_entries;
       send_buff[1] = (double) max_entries;
       send_buff[2] = min_rowsum;
       send_buff[3] = max_rowsum;
       
       MPI_Gather(send_buff,4,MPI_DOUBLE,gather_buff,4,MPI_DOUBLE,0,comm);

       if (my_id == 0)
       {
          global_min_e = 1000000;
          global_max_e = 0;
          global_min_rsum = 1.0e7;
          global_max_rsum = 0.0;
          for (j = 0; j < num_procs; j++)
          {
             numrows = row_starts[j+1]-row_starts[j];
             if (numrows)
             {
                global_min_e = hypre_min(global_min_e, (int) gather_buff[j*4]);
                global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*4 +2]);
             }
             global_max_e = hypre_max(global_max_e, (int) gather_buff[j*4 +1]);
             global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*4 +3]);
          }

#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#else          
          printf( "%2d %7d %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#endif          
          printf("  %4.1f  %10.3e  %10.3e\n", avg_entries,
                 global_min_rsum, global_max_rsum);
       }

#endif

        
   }

       
   if (my_id == 0)
   {
      {
         printf( "\n\nInterpolation Matrix Information:\n\n");
      }
#if HYPRE_LONG_LONG
      printf("                             entries/row    min     max");
      printf("         row sums\n");
      printf("lev        rows x cols          min max  ");
      printf("   weight   weight     min       max \n");
      printf("=======================================");
      printf("======================================\n");
#else      
      printf("                 entries/row    min     max");
      printf("         row sums\n");
      printf("lev  rows cols    min max  ");
      printf("   weight   weight     min       max \n");
      printf("=======================================");
      printf("==========================\n");
#endif
   }
  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/


   for (level = 0; level < num_levels-1; level++)
   {
    
      {
         P_diag = hypre_ParCSRMatrixDiag(P_array[level]);
         P_diag_data = hypre_CSRMatrixData(P_diag);
         P_diag_i = hypre_CSRMatrixI(P_diag);
         
         P_offd = hypre_ParCSRMatrixOffd(P_array[level]);   
         P_offd_data = hypre_CSRMatrixData(P_offd);
         P_offd_i = hypre_CSRMatrixI(P_offd);
         
         row_starts = hypre_ParCSRMatrixRowStarts(P_array[level]);
         
         fine_size = hypre_ParCSRMatrixGlobalNumRows(P_array[level]);
         coarse_size = hypre_ParCSRMatrixGlobalNumCols(P_array[level]);
         global_nonzeros = hypre_ParCSRMatrixNumNonzeros(P_array[level]);
         
         min_weight = 1.0;
         max_weight = 0.0;
         max_rowsum = 0.0;
         min_rowsum = 0.0;
         min_entries = 0;
         max_entries = 0;
         
         if (hypre_CSRMatrixNumRows(P_diag))
         {
            if (hypre_CSRMatrixNumCols(P_diag)) min_weight = P_diag_data[0];
            for (j = P_diag_i[0]; j < P_diag_i[1]; j++)
            {
               min_weight = hypre_min(min_weight, P_diag_data[j]);
               if (P_diag_data[j] != 1.0)
                  max_weight = hypre_max(max_weight, P_diag_data[j]);
               min_rowsum += P_diag_data[j];
            }
            for (j = P_offd_i[0]; j < P_offd_i[1]; j++)
            {        
               min_weight = hypre_min(min_weight, P_offd_data[j]); 
               if (P_offd_data[j] != 1.0)
                  max_weight = hypre_max(max_weight, P_offd_data[j]);     
               min_rowsum += P_offd_data[j];
            }
            
            max_rowsum = min_rowsum;
            
            min_entries = (P_diag_i[1]-P_diag_i[0])+(P_offd_i[1]-P_offd_i[0]); 
            max_entries = 0;
            
            for (j = 0; j < hypre_CSRMatrixNumRows(P_diag); j++)
            {
               entries = (P_diag_i[j+1]-P_diag_i[j])+(P_offd_i[j+1]-P_offd_i[j]);
               min_entries = hypre_min(entries, min_entries);
               max_entries = hypre_max(entries, max_entries);
               
               rowsum = 0.0;
               for (i = P_diag_i[j]; i < P_diag_i[j+1]; i++)
               {
                  min_weight = hypre_min(min_weight, P_diag_data[i]);
                  if (P_diag_data[i] != 1.0)
                     max_weight = hypre_max(max_weight, P_diag_data[i]);
                  rowsum += P_diag_data[i];
               }
               
               for (i = P_offd_i[j]; i < P_offd_i[j+1]; i++)
               {
                  min_weight = hypre_min(min_weight, P_offd_data[i]);
                  if (P_offd_data[i] != 1.0) 
                     max_weight = hypre_max(max_weight, P_offd_data[i]);
                  rowsum += P_offd_data[i];
               }
               
               min_rowsum = hypre_min(rowsum, min_rowsum);
               max_rowsum = hypre_max(rowsum, max_rowsum);
            }
         
         }
         avg_entries = ((double) global_nonzeros) / ((double) fine_size);
      }

#ifdef HYPRE_NO_GLOBAL_PARTITION

      numrows = (int)(row_starts[1]-row_starts[0]);
      if (!numrows) /* if we don't have any rows, then don't have this count toward
                       min row sum or min num entries */
      {
         min_entries = 1000000;
         min_rowsum =  1.0e7;
         min_weight = 1.0e7;
       }
       
      send_buff[0] = - (double) min_entries;
      send_buff[1] = (double) max_entries;
      send_buff[2] = - min_rowsum;
      send_buff[3] = max_rowsum;
      send_buff[4] = - min_weight;
      send_buff[5] = max_weight;

      MPI_Reduce(send_buff, gather_buff, 6, MPI_DOUBLE, MPI_MAX, 0, comm);

      if (my_id == 0)
      {
         global_min_e = - gather_buff[0];
         global_max_e = gather_buff[1];
         global_min_rsum = -gather_buff[2];
         global_max_rsum = gather_buff[3];
         global_min_wt = -gather_buff[4];
         global_max_wt = gather_buff[5];

#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld x %-12lld %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#else          
          printf( "%2d %5d x %-5d %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#endif          
         printf("  %10.3e %9.3e %9.3e %9.3e\n",
                global_min_wt, global_max_wt, 
                global_min_rsum, global_max_rsum);
      }


#else
      
      send_buff[0] = (double) min_entries;
      send_buff[1] = (double) max_entries;
      send_buff[2] = min_rowsum;
      send_buff[3] = max_rowsum;
      send_buff[4] = min_weight;
      send_buff[5] = max_weight;
      
      MPI_Gather(send_buff,6,MPI_DOUBLE,gather_buff,6,MPI_DOUBLE,0,comm);
      
      if (my_id == 0)
      {
         global_min_e = 1000000;
         global_max_e = 0;
         global_min_rsum = 1.0e7;
         global_max_rsum = 0.0;
         global_min_wt = 1.0e7;
         global_max_wt = 0.0;
         
         for (j = 0; j < num_procs; j++)
         {
            numrows = row_starts[j+1] - row_starts[j];
            if (numrows)
            {
               global_min_e = hypre_min(global_min_e, (int) gather_buff[j*6]);
               global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*6+2]);
               global_min_wt = hypre_min(global_min_wt, gather_buff[j*6+4]);
            }
            global_max_e = hypre_max(global_max_e, (int) gather_buff[j*6+1]);
            global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*6+3]);
            global_max_wt = hypre_max(global_max_wt, gather_buff[j*6+5]);
         }
         
#ifdef HYPRE_LONG_LONG
         printf( "%2d %12lld x %-12lld %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#else          
         printf( "%2d %5d x %-5d %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#endif          
         printf("  %10.3e %9.3e %9.3e %9.3e\n",
                global_min_wt, global_max_wt, 
                global_min_rsum, global_max_rsum);
      }

#endif

   }


   total_variables = 0;
   operat_cmplxty = 0;
   for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++)
   {
      operat_cmplxty +=  num_coeffs[j] / num_coeffs[0];
      total_variables += num_variables[j];
   }
   if (num_variables[0] != 0)
      grid_cmplxty = total_variables / num_variables[0];
 
   if (my_id == 0 )
   {
      printf("\n\n     Complexity:    grid = %f\n",grid_cmplxty);
      printf("                operator = %f\n",operat_cmplxty);
   }

   if (my_id == 0) printf("\n\n");

   if (my_id == 0)
   { 
      printf("\n\nBoomerAMG SOLVER PARAMETERS:\n\n");
      printf( "  Maximum number of cycles:         %d \n",max_iter);
      printf( "  Stopping Tolerance:               %e \n",tol); 
      printf( "  Cycle type (1 = V, 2 = W, etc.):  %d\n\n", cycle_type);
      printf( "  Relaxation Parameters:\n");
      printf( "   Visiting Grid:                     down   up  coarse\n");
      printf( "            Number of partial sweeps: %4d   %2d  %4d \n",
              num_grid_sweeps[1],
              num_grid_sweeps[2],num_grid_sweeps[3]);
      printf( "   Type 0=Jac, 3=hGS, 6=hSGS, 9=GE:   %4d   %2d  %4d \n",
              grid_relax_type[1],
              grid_relax_type[2],grid_relax_type[3]);
#if 1 /* TO DO: may not want this to print if CG in the coarse grid */
      printf( "   Point types, partial sweeps (1=C, -1=F):\n");
      if (grid_relax_points)
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d", grid_relax_points[1][j]);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d", grid_relax_points[2][j]);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", grid_relax_points[3][j]);
         printf( "\n\n");
      }
      else if (relax_order == 1)
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d  %2d", one, minus_one);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d  %2d", minus_one, one);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", zero);
         printf( "\n\n");
      }
      else 
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d", zero);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d", zero);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", zero);
         printf( "\n\n");
      }
#endif
      if (smooth_type == 6)
         for (j=0; j < smooth_num_levels; j++)
            printf( " Schwarz Relaxation Weight %f level %d\n",
			hypre_ParAMGDataSchwarzRlxWeight(amg_data),j);
      for (j=0; j < num_levels; j++)
         if (relax_weight[j] != 1)
	       printf( " Relaxation Weight %f level %d\n",relax_weight[j],j);
      for (j=0; j < num_levels; j++)
         if (omega[j] != 1)
               printf( " Outer relaxation weight %f level %d\n",omega[j],j);
   }


   /*if (seq_cg) 
   {
      hypre_seqAMGSetupStats(amg_data,num_coeffs[0],num_variables[0], 
                             operat_cmplxty, grid_cmplxty );
   }*/
   




   hypre_TFree(num_coeffs);
   hypre_TFree(num_variables);
   hypre_TFree(send_buff);
   hypre_TFree(gather_buff);
   
   return(0);
}  
예제 #27
0
파일: driver_matvec.c 프로젝트: LLNL/COGENT
HYPRE_Int main( HYPRE_Int   argc, char *argv[] )
{
   hypre_ParCSRMatrix      *par_matrix, *g_matrix, **submatrices;
   hypre_CSRMatrix         *A_diag, *A_offd;
   hypre_CSRBlockMatrix    *diag;
   hypre_CSRBlockMatrix    *offd;
   hypre_ParCSRBlockMatrix *par_blk_matrix, *par_blk_matrixT, *rap_matrix;
   hypre_Vector        *x_local;
   hypre_Vector        *y_local;
   hypre_ParVector     *x;
   hypre_ParVector     *y;
   HYPRE_Solver        gmres_solver, precon;
   HYPRE_Int                 *diag_i, *diag_j, *offd_i, *offd_j;
   HYPRE_Int                 *diag_i2, *diag_j2, *offd_i2, *offd_j2;
   double              *diag_d, *diag_d2, *offd_d, *offd_d2;
   HYPRE_Int		       mypid, local_size, nprocs;
   HYPRE_Int		       global_num_rows, global_num_cols, num_cols_offd;
   HYPRE_Int		       num_nonzeros_diag, num_nonzeros_offd, *colMap;
   HYPRE_Int 		       ii, jj, kk, row, col, nnz, *indices, *colMap2;
   double 	       *data, ddata, *y_data;
   HYPRE_Int 		       *row_starts, *col_starts, *rstarts, *cstarts;
   HYPRE_Int 		       *row_starts2, *col_starts2;
   HYPRE_Int                 block_size=2, bnnz=4, *index_set;
   FILE                *fp;

   /* --------------------------------------------- */
   /* Initialize MPI                                */
   /* --------------------------------------------- */

   hypre_MPI_Init(&argc, &argv);
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &mypid);
   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &nprocs);

   /* build and fetch matrix */
   MyBuildParLaplacian9pt((HYPRE_ParCSRMatrix *) &par_matrix);
   global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix);
   global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix);
   row_starts = hypre_ParCSRMatrixRowStarts(par_matrix);
   col_starts = hypre_ParCSRMatrixColStarts(par_matrix);
   A_diag = hypre_ParCSRMatrixDiag(par_matrix);
   A_offd = hypre_ParCSRMatrixOffd(par_matrix);
   num_cols_offd     = hypre_CSRMatrixNumCols(A_offd);
   num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(A_diag);
   num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(A_offd);

   /* --------------------------------------------- */
   /* build vector and apply matvec                 */
   /* --------------------------------------------- */

   x = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_cols,col_starts);
   hypre_ParVectorSetPartitioningOwner(x,0);
   hypre_ParVectorInitialize(x);
   x_local = hypre_ParVectorLocalVector(x);
   data    = hypre_VectorData(x_local);
   local_size = col_starts[mypid+1] - col_starts[mypid];
   for (ii = 0; ii < local_size; ii++) data[ii] = 1.0;
   y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_rows,row_starts);
   hypre_ParVectorSetPartitioningOwner(y,0);
   hypre_ParVectorInitialize(y);
   hypre_ParCSRMatrixMatvec (1.0, par_matrix, x, 0.0, y);
   ddata = hypre_ParVectorInnerProd(y, y);
   if (mypid == 0) hypre_printf("y inner product = %e\n", ddata);
   hypre_ParVectorDestroy(x);
   hypre_ParVectorDestroy(y);

   /* --------------------------------------------- */
   /* build block matrix                            */
   /* --------------------------------------------- */

   rstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1);
   for (ii = 0; ii <= nprocs; ii++) rstarts[ii] = row_starts[ii];
   cstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1);
   for (ii = 0; ii <= nprocs; ii++) cstarts[ii] = col_starts[ii];

   par_blk_matrix = hypre_ParCSRBlockMatrixCreate(hypre_MPI_COMM_WORLD,block_size,
                          global_num_rows, global_num_cols, rstarts,
                          cstarts, num_cols_offd, num_nonzeros_diag,
                          num_nonzeros_offd);
   colMap  = hypre_ParCSRMatrixColMapOffd(par_matrix);
   if (num_cols_offd > 0) colMap2 = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
   else                   colMap2 = NULL;
   for (ii = 0; ii < num_cols_offd; ii++) colMap2[ii] = colMap[ii];
   hypre_ParCSRBlockMatrixColMapOffd(par_blk_matrix) = colMap2;
   diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(par_matrix));
   diag_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixDiag(par_matrix));
   diag_d = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(par_matrix));
   diag = hypre_ParCSRBlockMatrixDiag(par_blk_matrix);
   diag_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1);
   diag_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_diag);
   diag_d2 = hypre_CTAlloc(double, num_nonzeros_diag*bnnz);
   for (ii = 0; ii <= local_size; ii++) diag_i2[ii] = diag_i[ii];
   for (ii = 0; ii < num_nonzeros_diag; ii++) diag_j2[ii] = diag_j[ii];
   hypre_CSRBlockMatrixI(diag) = diag_i2;
   hypre_CSRBlockMatrixJ(diag) = diag_j2;
   for (ii = 0; ii < num_nonzeros_diag; ii++)
   {
      for (jj = 0; jj < block_size; jj++)
         for (kk = 0; kk < block_size; kk++)
         {
            if (jj <= kk)
               diag_d2[ii*bnnz+jj*block_size+kk] = diag_d[ii];
            else
               diag_d2[ii*bnnz+jj*block_size+kk] = 0.0;
         }
   }
   hypre_CSRBlockMatrixData(diag) = diag_d2;

   offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(par_matrix));
   offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(par_matrix));
   offd_d = hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(par_matrix));
   offd   = hypre_ParCSRBlockMatrixOffd(par_blk_matrix);
   offd_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1);
   for (ii = 0; ii <= local_size; ii++) offd_i2[ii] = offd_i[ii];
   hypre_CSRBlockMatrixI(offd) = offd_i2;
   if (num_cols_offd)
   {
      offd_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_offd);
      for (ii = 0; ii < num_nonzeros_offd; ii++) offd_j2[ii] = offd_j[ii];
      hypre_CSRBlockMatrixJ(offd) = offd_j2;
      offd_d2 = hypre_CTAlloc(double, num_nonzeros_offd*bnnz);
      for (ii = 0; ii < num_nonzeros_offd; ii++)
      {
         for (jj = 0; jj < block_size; jj++)
            for (kk = 0; kk < block_size; kk++)
            {
               if (jj <= kk)
                  offd_d2[ii*bnnz+jj*block_size+kk] = offd_d[ii];
               else
                  offd_d2[ii*bnnz+jj*block_size+kk] = 0.0;
            }
      }
      hypre_CSRBlockMatrixData(offd) = offd_d2;
   }
   else
   {
예제 #28
0
HYPRE_Int
hypre_ParCSRMatrixMatvecT( HYPRE_Complex       alpha,
                           hypre_ParCSRMatrix *A,
                           hypre_ParVector    *x,
                           HYPRE_Complex       beta,
                           hypre_ParVector    *y )
{
   hypre_ParCSRCommHandle **comm_handle;
   hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   hypre_CSRMatrix     *diag = hypre_ParCSRMatrixDiag(A);
   hypre_CSRMatrix     *offd = hypre_ParCSRMatrixOffd(A);
   hypre_Vector        *x_local = hypre_ParVectorLocalVector(x);
   hypre_Vector        *y_local = hypre_ParVectorLocalVector(y);
   hypre_Vector        *y_tmp;
   HYPRE_Int            vecstride = hypre_VectorVectorStride( y_local );
   HYPRE_Int            idxstride = hypre_VectorIndexStride( y_local );
   HYPRE_Complex       *y_tmp_data, **y_buf_data;
   HYPRE_Complex       *y_local_data = hypre_VectorData(y_local);

   HYPRE_Int         num_rows  = hypre_ParCSRMatrixGlobalNumRows(A);
   HYPRE_Int         num_cols  = hypre_ParCSRMatrixGlobalNumCols(A);
   HYPRE_Int         num_cols_offd = hypre_CSRMatrixNumCols(offd);
   HYPRE_Int         x_size = hypre_ParVectorGlobalSize(x);
   HYPRE_Int         y_size = hypre_ParVectorGlobalSize(y);
   HYPRE_Int         num_vectors = hypre_VectorNumVectors(y_local);

   HYPRE_Int         i, j, jv, index, start, num_sends;

   HYPRE_Int         ierr  = 0;

   /*---------------------------------------------------------------------
    *  Check for size compatibility.  MatvecT returns ierr = 1 if
    *  length of X doesn't equal the number of rows of A,
    *  ierr = 2 if the length of Y doesn't equal the number of 
    *  columns of A, and ierr = 3 if both are true.
    *
    *  Because temporary vectors are often used in MatvecT, none of 
    *  these conditions terminates processing, and the ierr flag
    *  is informational only.
    *--------------------------------------------------------------------*/
 
   if (num_rows != x_size)
      ierr = 1;

   if (num_cols != y_size)
      ierr = 2;

   if (num_rows != x_size && num_cols != y_size)
      ierr = 3;
   /*-----------------------------------------------------------------------
    *-----------------------------------------------------------------------*/

   comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors);

   if ( num_vectors==1 )
   {
      y_tmp = hypre_SeqVectorCreate(num_cols_offd);
   }
   else
   {
      y_tmp = hypre_SeqMultiVectorCreate(num_cols_offd,num_vectors);
   }
   hypre_SeqVectorInitialize(y_tmp);

   /*---------------------------------------------------------------------
    * If there exists no CommPkg for A, a CommPkg is generated using
    * equally load balanced partitionings
    *--------------------------------------------------------------------*/
   if (!comm_pkg)
   {
      hypre_MatvecCommPkgCreate(A);
      comm_pkg = hypre_ParCSRMatrixCommPkg(A); 
   }

   num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
   y_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors );
   for ( jv=0; jv<num_vectors; ++jv )
      y_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart
                                     (comm_pkg, num_sends));
   y_tmp_data = hypre_VectorData(y_tmp);
   y_local_data = hypre_VectorData(y_local);

   hypre_assert( idxstride==1 ); /* only 'column' storage of multivectors
                                  * implemented so far */

   if (num_cols_offd) hypre_CSRMatrixMatvecT(alpha, offd, x_local, 0.0, y_tmp);

   for ( jv=0; jv<num_vectors; ++jv )
   {
      /* this is where we assume multivectors are 'column' storage */
      comm_handle[jv] = hypre_ParCSRCommHandleCreate
         ( 2, comm_pkg, &(y_tmp_data[jv*num_cols_offd]), y_buf_data[jv] );
   }

   hypre_CSRMatrixMatvecT(alpha, diag, x_local, beta, y_local);

   for ( jv=0; jv<num_vectors; ++jv )
   {
      hypre_ParCSRCommHandleDestroy(comm_handle[jv]);
      comm_handle[jv] = NULL;
   }
   hypre_TFree(comm_handle);

   if ( num_vectors==1 )
   {
      index = 0;
      for (i = 0; i < num_sends; i++)
      {
         start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
         for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
            y_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]
               += y_buf_data[0][index++];
      }
   }
   else
      for ( jv=0; jv<num_vectors; ++jv )
      {
         index = 0;
         for (i = 0; i < num_sends; i++)
         {
            start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
            for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
               y_local_data[ jv*vecstride +
                             idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ]
                  += y_buf_data[jv][index++];
         }
      }
        
   hypre_SeqVectorDestroy(y_tmp);
   y_tmp = NULL;
   for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(y_buf_data[jv]);
   hypre_TFree(y_buf_data);

   return ierr;
}
예제 #29
0
HYPRE_Int
hypre_BoomerAMGCreateScalarCFS(hypre_ParCSRMatrix    *SN,
                       HYPRE_Int                   *CFN_marker,
                       HYPRE_Int                   *col_offd_SN_to_AN,
                       HYPRE_Int                    num_functions,
                       HYPRE_Int                    nodal,
                       HYPRE_Int                    data,
                       HYPRE_Int                  **dof_func_ptr,
                       HYPRE_Int                  **CF_marker_ptr,
                       HYPRE_Int                  **col_offd_S_to_A_ptr,
                       hypre_ParCSRMatrix   **S_ptr)
{
   MPI_Comm	       comm = hypre_ParCSRMatrixComm(SN);
   hypre_ParCSRMatrix *S;
   hypre_CSRMatrix    *S_diag;
   HYPRE_Int		      *S_diag_i;
   HYPRE_Int		      *S_diag_j;
   double	      *S_diag_data;
   hypre_CSRMatrix    *S_offd;
   HYPRE_Int		      *S_offd_i;
   HYPRE_Int		      *S_offd_j;
   double	      *S_offd_data;
   HYPRE_Int		      *row_starts_S;
   HYPRE_Int		      *col_starts_S;
   HYPRE_Int		      *row_starts_SN = hypre_ParCSRMatrixRowStarts(SN);
   HYPRE_Int		      *col_starts_SN = hypre_ParCSRMatrixColStarts(SN);
   hypre_CSRMatrix    *SN_diag = hypre_ParCSRMatrixDiag(SN);
   HYPRE_Int		      *SN_diag_i = hypre_CSRMatrixI(SN_diag);
   HYPRE_Int		      *SN_diag_j = hypre_CSRMatrixJ(SN_diag);
   double	      *SN_diag_data;
   hypre_CSRMatrix    *SN_offd = hypre_ParCSRMatrixOffd(SN);
   HYPRE_Int		      *SN_offd_i = hypre_CSRMatrixI(SN_offd);
   HYPRE_Int		      *SN_offd_j = hypre_CSRMatrixJ(SN_offd);
   double	      *SN_offd_data;
   HYPRE_Int		      *CF_marker;
   HYPRE_Int		      *col_map_offd_SN = hypre_ParCSRMatrixColMapOffd(SN);
   HYPRE_Int		      *col_map_offd_S;
   HYPRE_Int		      *dof_func;
   HYPRE_Int		       num_nodes = hypre_CSRMatrixNumRows(SN_diag);
   HYPRE_Int		       num_variables;
   hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(SN);
   HYPRE_Int		       num_sends;
   HYPRE_Int		       num_recvs;
   HYPRE_Int		      *send_procs;
   HYPRE_Int		      *send_map_starts;
   HYPRE_Int		      *send_map_elmts;
   HYPRE_Int		      *recv_procs;
   HYPRE_Int		      *recv_vec_starts;
   hypre_ParCSRCommPkg *comm_pkg_S;
   HYPRE_Int		      *send_procs_S;
   HYPRE_Int		      *send_map_starts_S;
   HYPRE_Int		      *send_map_elmts_S;
   HYPRE_Int		      *recv_procs_S;
   HYPRE_Int		      *recv_vec_starts_S;
   HYPRE_Int		      *col_offd_S_to_A = NULL;
   
   HYPRE_Int		       num_coarse_nodes;
   HYPRE_Int		       i,j,k,k1,jj,cnt;
   HYPRE_Int		       row, start, end;
   HYPRE_Int		       num_procs;
   HYPRE_Int		       num_cols_offd_SN = hypre_CSRMatrixNumCols(SN_offd);
   HYPRE_Int		       num_cols_offd_S;
   HYPRE_Int		       SN_num_nonzeros_diag;
   HYPRE_Int		       SN_num_nonzeros_offd;
   HYPRE_Int		       S_num_nonzeros_diag;
   HYPRE_Int		       S_num_nonzeros_offd;
   HYPRE_Int		       global_num_vars;
   HYPRE_Int		       global_num_cols;
   HYPRE_Int		       global_num_nodes;
   HYPRE_Int		       ierr = 0;
 
   hypre_MPI_Comm_size(comm, &num_procs);
 
   num_variables = num_functions*num_nodes;
   CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables);

   if (nodal < 0)
   {
      cnt = 0;
      num_coarse_nodes = 0;
      for (i=0; i < num_nodes; i++)
      {
	 if (CFN_marker[i] == 1) num_coarse_nodes++;
         for (j=0; j < num_functions; j++)
	    CF_marker[cnt++] = CFN_marker[i];
      }

      dof_func = hypre_CTAlloc(HYPRE_Int,num_coarse_nodes*num_functions);
      cnt = 0;
      for (i=0; i < num_nodes; i++)
      {
	 if (CFN_marker[i] == 1)
	 {
	    for (k=0; k < num_functions; k++)
	       dof_func[cnt++] = k;
	 }
      }
      *dof_func_ptr = dof_func;
   }
   else
   {
      cnt = 0;
      for (i=0; i < num_nodes; i++)
         for (j=0; j < num_functions; j++)
	    CF_marker[cnt++] = CFN_marker[i];
   }

   *CF_marker_ptr = CF_marker;


#ifdef HYPRE_NO_GLOBAL_PARTITION
   row_starts_S = hypre_CTAlloc(HYPRE_Int,2);
   for (i=0; i < 2; i++)
      row_starts_S[i] = num_functions*row_starts_SN[i];

   if (row_starts_SN != col_starts_SN)
   {
      col_starts_S = hypre_CTAlloc(HYPRE_Int,2);
      for (i=0; i < 2; i++)
         col_starts_S[i] = num_functions*col_starts_SN[i];
   }
   else
   {
      col_starts_S = row_starts_S;
   }
#else
   row_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1);
   for (i=0; i < num_procs+1; i++)
      row_starts_S[i] = num_functions*row_starts_SN[i];

   if (row_starts_SN != col_starts_SN)
   {
      col_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1);
      for (i=0; i < num_procs+1; i++)
         col_starts_S[i] = num_functions*col_starts_SN[i];
   }
   else
   {
      col_starts_S = row_starts_S;
   }
#endif


   SN_num_nonzeros_diag = SN_diag_i[num_nodes];
   SN_num_nonzeros_offd = SN_offd_i[num_nodes];
 
   global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(SN);
   global_num_cols = hypre_ParCSRMatrixGlobalNumCols(SN)*num_functions;
 
   global_num_vars = global_num_nodes*num_functions;
   S_num_nonzeros_diag = num_functions*SN_num_nonzeros_diag;
   S_num_nonzeros_offd = num_functions*SN_num_nonzeros_offd;
   num_cols_offd_S = num_functions*num_cols_offd_SN;
   S = hypre_ParCSRMatrixCreate(comm, global_num_vars, global_num_cols,
		row_starts_S, col_starts_S, num_cols_offd_S,
		S_num_nonzeros_diag, S_num_nonzeros_offd);

   S_diag = hypre_ParCSRMatrixDiag(S);
   S_offd = hypre_ParCSRMatrixOffd(S);
   S_diag_i = hypre_CTAlloc(HYPRE_Int, num_variables+1);
   S_offd_i = hypre_CTAlloc(HYPRE_Int, num_variables+1);
   S_diag_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_diag);
   hypre_CSRMatrixI(S_diag) = S_diag_i;
   hypre_CSRMatrixJ(S_diag) = S_diag_j;
   if (data) 
   {
      SN_diag_data = hypre_CSRMatrixData(SN_diag);
      S_diag_data = hypre_CTAlloc(double, S_num_nonzeros_diag);
      hypre_CSRMatrixData(S_diag) = S_diag_data;
      if (num_cols_offd_S)
      {
         SN_offd_data = hypre_CSRMatrixData(SN_offd);
         S_offd_data = hypre_CTAlloc(double, S_num_nonzeros_offd);
         hypre_CSRMatrixData(S_offd) = S_offd_data;
      }

   }
   hypre_CSRMatrixI(S_offd) = S_offd_i;

   if (comm_pkg)
   {
      comm_pkg_S = hypre_CTAlloc(hypre_ParCSRCommPkg,1);
      hypre_ParCSRCommPkgComm(comm_pkg_S) = comm;
      num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
      hypre_ParCSRCommPkgNumSends(comm_pkg_S) = num_sends;
      num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
      hypre_ParCSRCommPkgNumRecvs(comm_pkg_S) = num_recvs;
      send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
      send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
      send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
      recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
      recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
      send_procs_S = NULL;
      send_map_elmts_S = NULL;
      if (num_sends) 
      {
         send_procs_S = hypre_CTAlloc(HYPRE_Int,num_sends);
         send_map_elmts_S = hypre_CTAlloc(HYPRE_Int,
		num_functions*send_map_starts[num_sends]);
      }
      send_map_starts_S = hypre_CTAlloc(HYPRE_Int,num_sends+1);
      recv_vec_starts_S = hypre_CTAlloc(HYPRE_Int,num_recvs+1);
      recv_procs_S = NULL;
      if (num_recvs) recv_procs_S = hypre_CTAlloc(HYPRE_Int,num_recvs);
      send_map_starts_S[0] = 0;
      for (i=0; i < num_sends; i++)
      {
         send_procs_S[i] = send_procs[i];
         send_map_starts_S[i+1] = num_functions*send_map_starts[i+1];
      }
      recv_vec_starts_S[0] = 0;
      for (i=0; i < num_recvs; i++)
      {
         recv_procs_S[i] = recv_procs[i];
         recv_vec_starts_S[i+1] = num_functions*recv_vec_starts[i+1];
      }

      cnt = 0;
      for (i=0; i < send_map_starts[num_sends]; i++)
      {
	 k1 = num_functions*send_map_elmts[i];
         for (j=0; j < num_functions; j++)
         {
	    send_map_elmts_S[cnt++] = k1+j;
         }
      }
      hypre_ParCSRCommPkgSendProcs(comm_pkg_S) = send_procs_S;
      hypre_ParCSRCommPkgSendMapStarts(comm_pkg_S) = send_map_starts_S;
      hypre_ParCSRCommPkgSendMapElmts(comm_pkg_S) = send_map_elmts_S;
      hypre_ParCSRCommPkgRecvProcs(comm_pkg_S) = recv_procs_S;
      hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_S) = recv_vec_starts_S;
      hypre_ParCSRMatrixCommPkg(S) = comm_pkg_S;
   }

   if (num_cols_offd_S)
   {
      S_offd_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_offd);
      hypre_CSRMatrixJ(S_offd) = S_offd_j;

      col_map_offd_S = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S);

      cnt = 0;
      for (i=0; i < num_cols_offd_SN; i++)
      {
         k1 = col_map_offd_SN[i]*num_functions;
         for (j=0; j < num_functions; j++)
            col_map_offd_S[cnt++] = k1+j;
      }
      hypre_ParCSRMatrixColMapOffd(S) = col_map_offd_S;
   }
   
   if (col_offd_SN_to_AN)
   {
      col_offd_S_to_A = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S);

      cnt = 0;
      for (i=0; i < num_cols_offd_SN; i++)
      {
         k1 = col_offd_SN_to_AN[i]*num_functions;
         for (j=0; j < num_functions; j++)
            col_offd_S_to_A[cnt++] = k1+j;
      }
      *col_offd_S_to_A_ptr = col_offd_S_to_A;
   }
   


   cnt = 0;
   row = 0;
   for (i=0; i < num_nodes; i++)
   {
      row++;
      start = cnt;
      for (j=SN_diag_i[i]; j < SN_diag_i[i+1]; j++)
      {
         jj = SN_diag_j[j];
	 if (data) S_diag_data[cnt] = SN_diag_data[j];
	 S_diag_j[cnt++] = jj*num_functions;
      }
      end = cnt;
      S_diag_i[row] = cnt;
      for (k1=1; k1 < num_functions; k1++)
      {
         row++;
	 for (k=start; k < end; k++)
	 {
	    if (data) S_diag_data[cnt] = S_diag_data[k];
	    S_diag_j[cnt++] = S_diag_j[k]+k1;
	 }
         S_diag_i[row] = cnt;
      }
   } 

   cnt = 0;
   row = 0;
   for (i=0; i < num_nodes; i++)
   {
      row++;
      start = cnt;
      for (j=SN_offd_i[i]; j < SN_offd_i[i+1]; j++)
      {
         jj = SN_offd_j[j];
	 if (data) S_offd_data[cnt] = SN_offd_data[j];
	 S_offd_j[cnt++] = jj*num_functions;
      }
      end = cnt;
      S_offd_i[row] = cnt;
      for (k1=1; k1 < num_functions; k1++)
      {
         row++;
	 for (k=start; k < end; k++)
	 {
	    if (data) S_offd_data[cnt] = S_offd_data[k];
	    S_offd_j[cnt++] = S_offd_j[k]+k1;
	 }
         S_offd_i[row] = cnt;
      }
   } 

   *S_ptr = S; 

   return (ierr);
}
예제 #30
0
hypre_ParCSRBlockMatrix *
hypre_ParCSRBlockMatrixConvertFromParCSRMatrix(hypre_ParCSRMatrix *matrix,
                                               HYPRE_Int matrix_C_block_size )
{
   MPI_Comm comm = hypre_ParCSRMatrixComm(matrix);
   hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(matrix);
   hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(matrix);
   HYPRE_Int global_num_rows = hypre_ParCSRMatrixGlobalNumRows(matrix);
   HYPRE_Int global_num_cols = hypre_ParCSRMatrixGlobalNumCols(matrix);
   HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(matrix);
   HYPRE_Int *col_starts = hypre_ParCSRMatrixColStarts(matrix);
   HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd);
   HYPRE_Int *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(matrix);
   HYPRE_Int *map_to_node=NULL, *counter=NULL, *col_in_j_map=NULL;
   HYPRE_Int *matrix_C_col_map_offd = NULL;
   
   HYPRE_Int matrix_C_num_cols_offd;
   HYPRE_Int matrix_C_num_nonzeros_offd;
   HYPRE_Int num_rows, num_nodes;
   
   HYPRE_Int *offd_i        = hypre_CSRMatrixI(offd);
   HYPRE_Int *offd_j        = hypre_CSRMatrixJ(offd);
   HYPRE_Complex * offd_data = hypre_CSRMatrixData(offd);

   hypre_ParCSRBlockMatrix *matrix_C;
   HYPRE_Int *matrix_C_row_starts;
   HYPRE_Int *matrix_C_col_starts;
   hypre_CSRBlockMatrix *matrix_C_diag;
   hypre_CSRBlockMatrix *matrix_C_offd;

   HYPRE_Int *matrix_C_offd_i=NULL, *matrix_C_offd_j = NULL;
   HYPRE_Complex *matrix_C_offd_data = NULL;
   
   HYPRE_Int num_procs, i, j, k, k_map, count, index, start_index, pos, row;
   
   hypre_MPI_Comm_size(comm,&num_procs);

#ifdef HYPRE_NO_GLOBAL_PARTITION
   matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, 2);
   matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, 2);
   for(i = 0; i < 2; i++)
   {
      matrix_C_row_starts[i] = row_starts[i]/matrix_C_block_size;
      matrix_C_col_starts[i] = col_starts[i]/matrix_C_block_size;
   }
#else
   matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1);
   matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1);
   for(i = 0; i < num_procs + 1; i++)
   {
      matrix_C_row_starts[i] = row_starts[i]/matrix_C_block_size;
      matrix_C_col_starts[i] = col_starts[i]/matrix_C_block_size;
   }
#endif

   /************* create the diagonal part ************/
   matrix_C_diag = hypre_CSRBlockMatrixConvertFromCSRMatrix(diag, 
                                                            matrix_C_block_size);
 
   /*******  the offd part *******************/

   /* can't use the same function for the offd part - because this isn't square
      and the offd j entries aren't global numbering (have to consider the offd
      map) - need to look at col_map_offd first */

   /* figure out the new number of offd columns (num rows is same as diag) */
   num_cols_offd = hypre_CSRMatrixNumCols(offd);
   num_rows = hypre_CSRMatrixNumRows(diag);
   num_nodes =  num_rows/matrix_C_block_size;
   
   matrix_C_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes + 1);

   matrix_C_num_cols_offd = 0;
   matrix_C_offd_i[0] = 0;
   matrix_C_num_nonzeros_offd = 0;

   if (num_cols_offd)
   {
      map_to_node = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
      matrix_C_num_cols_offd = 1;
      map_to_node[0] = col_map_offd[0]/matrix_C_block_size;
      for (i=1; i < num_cols_offd; i++)
      {
         map_to_node[i] = col_map_offd[i]/matrix_C_block_size;
         if (map_to_node[i] > map_to_node[i-1]) matrix_C_num_cols_offd++;
      }

      matrix_C_col_map_offd = hypre_CTAlloc(HYPRE_Int, matrix_C_num_cols_offd);
      col_in_j_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd);

      matrix_C_col_map_offd[0] = map_to_node[0];
      col_in_j_map[0] = 0;
      count = 1;
      j = 1;
       
      /* fill in the col_map_off_d - these are global numbers.  Then we need to
         map these to j entries (these have local numbers) */
      for (i=1; i < num_cols_offd; i++)
      {
         if (map_to_node[i] > map_to_node[i-1])
         {
            matrix_C_col_map_offd[count++] = map_to_node[i];
         }
         col_in_j_map[j++] = count - 1;
      }
      
      /* now figure the nonzeros */   
      matrix_C_num_nonzeros_offd = 0;
      counter = hypre_CTAlloc(HYPRE_Int, matrix_C_num_cols_offd);
      for (i=0; i < matrix_C_num_cols_offd; i++)
         counter[i] = -1;

      for (i=0; i < num_nodes; i++) /* for each block row */
      {
         matrix_C_offd_i[i] = matrix_C_num_nonzeros_offd;
         for (j=0; j < matrix_C_block_size; j++)
         {
            row = i*matrix_C_block_size+j;
            for (k=offd_i[row]; k < offd_i[row+1]; k++) /* go through single row */
            {
               k_map = col_in_j_map[offd_j[k]]; /*nodal col - see if this has
                                                  been in this block row (i)
                                                  already*/
               
               if (counter[k_map] < i) /* not yet counted for this nodal row */
               {
                  counter[k_map] = i;
                  matrix_C_num_nonzeros_offd++;
               }
            }
         }
      }
      /* fill in final i entry */
      matrix_C_offd_i[num_nodes] = matrix_C_num_nonzeros_offd;
   }

   /* create offd matrix */
   matrix_C_offd = hypre_CSRBlockMatrixCreate(matrix_C_block_size, num_nodes,
                                              matrix_C_num_cols_offd,   
                                              matrix_C_num_nonzeros_offd);

   /* assign i */
   hypre_CSRBlockMatrixI(matrix_C_offd) = matrix_C_offd_i;
   

   /* create (and allocate j and data) */
   if (matrix_C_num_nonzeros_offd)
   {
      matrix_C_offd_j = hypre_CTAlloc(HYPRE_Int, matrix_C_num_nonzeros_offd);   
      matrix_C_offd_data =
         hypre_CTAlloc(HYPRE_Complex,
                       matrix_C_num_nonzeros_offd*matrix_C_block_size*
                       matrix_C_block_size);  
      hypre_CSRBlockMatrixJ(matrix_C_offd) = matrix_C_offd_j;
      hypre_CSRMatrixData(matrix_C_offd) = matrix_C_offd_data;
   
      for (i=0; i < matrix_C_num_cols_offd; i++)
         counter[i] = -1;

      index = 0; /*keep track of entry in matrix_C_offd_j*/
      start_index = 0;
      for (i=0; i < num_nodes; i++) /* for each block row */
      {
         
         for (j=0; j < matrix_C_block_size; j++) /* for each row in block */
         {
            row = i*matrix_C_block_size+j;
            for (k=offd_i[row]; k < offd_i[row+1]; k++) /* go through single row's cols */
            {
               k_map = col_in_j_map[offd_j[k]]; /*nodal col  for off_d */
               if (counter[k_map] < start_index) /* not yet counted for this nodal row */
               {
                  counter[k_map] = index;
                  matrix_C_offd_j[index] = k_map;
                  /*copy the data: which position (corresponds to j array) + which row + which col */                
                  pos =  (index * matrix_C_block_size * matrix_C_block_size) + (j * matrix_C_block_size) + 
                     col_map_offd[offd_j[k]]%matrix_C_block_size;
                  matrix_C_offd_data[pos] = offd_data[k];
                  index ++;
               }
               else  /* this col has already been listed for this row */
               {

                  /*copy the data: which position (corresponds to j array) + which row + which col */                
                  pos =  (counter[k_map]* matrix_C_block_size * matrix_C_block_size) + (j * matrix_C_block_size) + 
                     col_map_offd[offd_j[k]]%matrix_C_block_size;
                  matrix_C_offd_data[pos] = offd_data[k];
               }
            }
         }
         start_index = index; /* first index for current nodal row */
      }
   }

   /* *********create the new matrix  *************/
   matrix_C = hypre_ParCSRBlockMatrixCreate(comm, matrix_C_block_size, 
                                            global_num_rows/matrix_C_block_size, 
                                            global_num_cols/matrix_C_block_size, matrix_C_row_starts, 
                                            matrix_C_col_starts, matrix_C_num_cols_offd, 
                                            hypre_CSRBlockMatrixNumNonzeros(matrix_C_diag), 
                                            matrix_C_num_nonzeros_offd);

   /* use the diag and off diag matrices we have already created */
   hypre_CSRBlockMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C));
   hypre_ParCSRBlockMatrixDiag(matrix_C) = matrix_C_diag;
   hypre_CSRBlockMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C));
   hypre_ParCSRBlockMatrixOffd(matrix_C) = matrix_C_offd;

   hypre_ParCSRMatrixColMapOffd(matrix_C) = matrix_C_col_map_offd;

   /* *********don't bother to copy the comm_pkg *************/
   
   hypre_ParCSRBlockMatrixCommPkg(matrix_C) = NULL;
 
   /* CLEAN UP !!!! */
   hypre_TFree(map_to_node);
   hypre_TFree(col_in_j_map);
   hypre_TFree(counter);

   return matrix_C;
}