Esempio n. 1
0
int
hypre_BoomerAMGIndepSetInit( hypre_ParCSRMatrix *S,
                          double             *measure_array ,
                          int   seq_rand)
{
   hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
   MPI_Comm         comm = hypre_ParCSRMatrixComm(S);
   int              S_num_nodes = hypre_CSRMatrixNumRows(S_diag);
   HYPRE_BigInt     i;
   int              j, my_id;
   int              ierr = 0;

   MPI_Comm_rank(comm,&my_id);
   j = 2747+my_id;
   if (seq_rand) j = 2747;
   hypre_SeedRand(j);
   if (seq_rand)
   {
      for (i = 0; i < hypre_ParCSRMatrixFirstRowIndex(S); i++)
	hypre_Rand(); 
   }
   for (j = 0; j < S_num_nodes; j++)
   {
      measure_array[j] += hypre_Rand();
   }

   return (ierr);
}
/*
  Assume that we are given a fine and coarse topology and the
  coarse degrees of freedom (DOFs) have been chosen. Assume also,
  that the global interpolation matrix dof_DOF has a prescribed
  nonzero pattern. Then, the fine degrees of freedom can be split
  into 4 groups (here "i" stands for "interior"):

  NODEidof - dofs which are interpolated only from the DOF
             in one coarse vertex
  EDGEidof - dofs which are interpolated only from the DOFs
             in one coarse edge
  FACEidof - dofs which are interpolated only from the DOFs
             in one coarse face
  ELEMidof - dofs which are interpolated only from the DOFs
             in one coarse element

  The interpolation operator dof_DOF can be build in 4 steps, by
  consequently filling-in the rows corresponding to the above groups.
  The code below uses harmonic extension to extend the interpolation
  from one group to the next.
*/
HYPRE_Int hypre_ND1AMGeInterpolation (hypre_ParCSRMatrix       * Aee,
                                hypre_ParCSRMatrix       * ELEM_idof,
                                hypre_ParCSRMatrix       * FACE_idof,
                                hypre_ParCSRMatrix       * EDGE_idof,
                                hypre_ParCSRMatrix       * ELEM_FACE,
                                hypre_ParCSRMatrix       * ELEM_EDGE,
                                HYPRE_Int                  num_OffProcRows,
                                hypre_MaxwellOffProcRow ** OffProcRows,
                                hypre_IJMatrix           * IJ_dof_DOF)
{
   HYPRE_Int ierr = 0;

   HYPRE_Int  i, j, k;
   HYPRE_Int *offproc_rnums, *swap;

   hypre_ParCSRMatrix * dof_DOF = hypre_IJMatrixObject(IJ_dof_DOF);
   hypre_ParCSRMatrix * ELEM_DOF = ELEM_EDGE;
   hypre_ParCSRMatrix * ELEM_FACEidof;
   hypre_ParCSRMatrix * ELEM_EDGEidof;
   hypre_CSRMatrix *A, *P;
   HYPRE_Int numELEM = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(ELEM_EDGE));

   HYPRE_Int getrow_ierr;
   HYPRE_Int three_dimensional_problem;

   MPI_Comm comm= hypre_ParCSRMatrixComm(Aee);
   HYPRE_Int      myproc;

   hypre_MPI_Comm_rank(comm, &myproc);

#if 0
   hypre_IJMatrix * ij_dof_DOF = hypre_CTAlloc(hypre_IJMatrix, 1);
   /* Convert dof_DOF to IJ matrix, so we can use AddToValues */
   hypre_IJMatrixComm(ij_dof_DOF) = hypre_ParCSRMatrixComm(dof_DOF);
   hypre_IJMatrixRowPartitioning(ij_dof_DOF) =
      hypre_ParCSRMatrixRowStarts(dof_DOF);
   hypre_IJMatrixColPartitioning(ij_dof_DOF) =
      hypre_ParCSRMatrixColStarts(dof_DOF);
   hypre_IJMatrixObject(ij_dof_DOF) = dof_DOF;
   hypre_IJMatrixAssembleFlag(ij_dof_DOF) = 1;
#endif

  /* sort the offproc rows to get quicker comparison for later */
   if (num_OffProcRows)
   {
      offproc_rnums= hypre_TAlloc(HYPRE_Int, num_OffProcRows);
      swap         = hypre_TAlloc(HYPRE_Int, num_OffProcRows);
      for (i= 0; i< num_OffProcRows; i++)
      {
         offproc_rnums[i]=(OffProcRows[i] -> row);
         swap[i]         = i;
      }
   }

   if (num_OffProcRows > 1)
   {
      hypre_qsort2i(offproc_rnums, swap, 0, num_OffProcRows-1);
   }

   if (FACE_idof == EDGE_idof)
      three_dimensional_problem = 0;
   else
      three_dimensional_problem = 1;

   /* ELEM_FACEidof = ELEM_FACE x FACE_idof */
   if (three_dimensional_problem)
      ELEM_FACEidof = hypre_ParMatmul(ELEM_FACE, FACE_idof);

   /* ELEM_EDGEidof = ELEM_EDGE x EDGE_idof */
   ELEM_EDGEidof = hypre_ParMatmul(ELEM_EDGE, EDGE_idof);

   /* Loop over local coarse elements */
   k = hypre_ParCSRMatrixFirstRowIndex(ELEM_EDGE);
   for (i = 0; i < numELEM; i++, k++)
   {
      HYPRE_Int size1, size2;
      HYPRE_Int *col_ind0, *col_ind1, *col_ind2;

      HYPRE_Int num_DOF, *DOF0, *DOF;
      HYPRE_Int num_idof, *idof0, *idof;
      HYPRE_Int num_bdof, *bdof;

      double *boolean_data;

      /* Determine the coarse DOFs */
      hypre_ParCSRMatrixGetRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data);
      DOF= hypre_TAlloc(HYPRE_Int, num_DOF);
      for (j= 0; j< num_DOF; j++)
      {
         DOF[j]= DOF0[j];
      }
      hypre_ParCSRMatrixRestoreRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data);

      qsort0(DOF,0,num_DOF-1);

      /* Find the fine dofs interior for the current coarse element */
      hypre_ParCSRMatrixGetRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data);
      idof= hypre_TAlloc(HYPRE_Int, num_idof);
      for (j= 0; j< num_idof; j++)
      {
         idof[j]= idof0[j];
      }
      hypre_ParCSRMatrixRestoreRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data);

      /* Sort the interior dofs according to their global number */
      qsort0(idof,0,num_idof-1);

      /* Find the fine dofs on the boundary of the current coarse element */
      if (three_dimensional_problem)
      {
         hypre_ParCSRMatrixGetRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data);
         col_ind1= hypre_TAlloc(HYPRE_Int, size1);
         for (j= 0; j< size1; j++)
         {
            col_ind1[j]= col_ind0[j];
         }
         hypre_ParCSRMatrixRestoreRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data);
      }
      else
         size1 = 0;

      hypre_ParCSRMatrixGetRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data);
      col_ind2= hypre_TAlloc(HYPRE_Int, size2);
      for (j= 0; j< size2; j++)
      {
         col_ind2[j]= col_ind0[j];
      }
      hypre_ParCSRMatrixRestoreRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data);

      /* Merge and sort the boundary dofs according to their global number */
      num_bdof = size1 + size2;
      bdof = hypre_CTAlloc(HYPRE_Int, num_bdof);
      if (three_dimensional_problem)
         memcpy(bdof, col_ind1, size1*sizeof(HYPRE_Int));
      memcpy(bdof+size1, col_ind2, size2*sizeof(HYPRE_Int));

      qsort0(bdof,0,num_bdof-1);

      /* A = extract_rows(Aee, idof) */
      A = hypre_CSRMatrixCreate (num_idof, num_idof + num_bdof,
                                 num_idof * (num_idof + num_bdof));
      hypre_CSRMatrixInitialize(A);
      {
         HYPRE_Int *I = hypre_CSRMatrixI(A);
         HYPRE_Int *J = hypre_CSRMatrixJ(A);
         double *data = hypre_CSRMatrixData(A);

         HYPRE_Int *tmp_J;
         double *tmp_data;

         I[0] = 0;
         for (j = 0; j < num_idof; j++)
         {
            getrow_ierr= hypre_ParCSRMatrixGetRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data);
            if (getrow_ierr <0)
               hypre_printf("getrow Aee off proc[%d] = \n",myproc);
            memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int));
            memcpy(data, tmp_data, I[j+1]*sizeof(double));
            J+= I[j+1];
            data+= I[j+1];
            hypre_ParCSRMatrixRestoreRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data);
            I[j+1] += I[j];
         }
      }

      /* P = extract_rows(dof_DOF, idof+bdof) */
      P = hypre_CSRMatrixCreate (num_idof + num_bdof, num_DOF,
                                 (num_idof + num_bdof) * num_DOF);
      hypre_CSRMatrixInitialize(P);
      {
         HYPRE_Int *I = hypre_CSRMatrixI(P);
         HYPRE_Int *J = hypre_CSRMatrixJ(P);
         double *data = hypre_CSRMatrixData(P);
         HYPRE_Int     m;

         HYPRE_Int *tmp_J;
         double *tmp_data;
     
         I[0] = 0;
         for (j = 0; j < num_idof; j++)
         {
            getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data);
            if (getrow_ierr >= 0)
            {
               memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int));
               memcpy(data, tmp_data, I[j+1]*sizeof(double));
               J+= I[j+1];
               data+= I[j+1];
               hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data);
               I[j+1] += I[j];
            }
            else    /* row offproc */
            {
               hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data);
              /* search for OffProcRows */
               m= 0;
               while (m < num_OffProcRows)
               {
                  if (offproc_rnums[m] == idof[j])
                  { 
                     break;
                  }
                  else
                  {
                     m++;
                  }
               }
               I[j+1]= (OffProcRows[swap[m]] -> ncols);
               tmp_J = (OffProcRows[swap[m]] -> cols);
               tmp_data= (OffProcRows[swap[m]] -> data);
               memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int));
               memcpy(data, tmp_data, I[j+1]*sizeof(double));
               J+= I[j+1];
               data+= I[j+1];
               I[j+1] += I[j];
            }

         }
         for ( ; j < num_idof + num_bdof; j++)
         {
            getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data);
            if (getrow_ierr >= 0)
            {
               memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int));
               memcpy(data, tmp_data, I[j+1]*sizeof(double));
               J+= I[j+1];
               data+= I[j+1];
               hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data);
               I[j+1] += I[j];
            }
            else    /* row offproc */
            {
               hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data);
              /* search for OffProcRows */
               m= 0;
               while (m < num_OffProcRows)
               {
                  if (offproc_rnums[m] == bdof[j-num_idof])
                  {
                     break;
                  }
                  else
                  {
                     m++;
                  }
               }
               if (m>= num_OffProcRows)hypre_printf("here the mistake\n");
               I[j+1]= (OffProcRows[swap[m]] -> ncols);
               tmp_J = (OffProcRows[swap[m]] -> cols);
               tmp_data= (OffProcRows[swap[m]] -> data);
               memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int));
               memcpy(data, tmp_data, I[j+1]*sizeof(double));
               J+= I[j+1];
               data+= I[j+1];
               I[j+1] += I[j];
            }
         }
      }

      /* Pi = Aii^{-1} Aib Pb */
      hypre_HarmonicExtension (A, P, num_DOF, DOF,
                               num_idof, idof, num_bdof, bdof);

      /* Insert Pi in dof_DOF */
      {
         HYPRE_Int * ncols = hypre_CTAlloc(HYPRE_Int, num_idof);

         for (j = 0; j < num_idof; j++)
            ncols[j] = num_DOF;

         hypre_IJMatrixAddToValuesParCSR (IJ_dof_DOF,
                                          num_idof, ncols, idof,
                                          hypre_CSRMatrixJ(P),
                                          hypre_CSRMatrixData(P));

         hypre_TFree(ncols);
      }

      hypre_TFree(DOF);
      hypre_TFree(idof);
      if (three_dimensional_problem)
      {
         hypre_TFree(col_ind1);
      }
      hypre_TFree(col_ind2);
      hypre_TFree(bdof);

      hypre_CSRMatrixDestroy(A);
      hypre_CSRMatrixDestroy(P);
   }

#if 0
   hypre_TFree(ij_dof_DOF);
#endif

   if (three_dimensional_problem)
      hypre_ParCSRMatrixDestroy(ELEM_FACEidof);
   hypre_ParCSRMatrixDestroy(ELEM_EDGEidof);

   if (num_OffProcRows)
   {
      hypre_TFree(offproc_rnums);
      hypre_TFree(swap);
   }

   return ierr;
}
Esempio n. 3
0
HYPRE_Int hypre_seqAMGSetup( hypre_ParAMGData *amg_data,
                      HYPRE_Int p_level,
                      HYPRE_Int coarse_threshold)


{

   /* Par Data Structure variables */
   hypre_ParCSRMatrix **Par_A_array = hypre_ParAMGDataAArray(amg_data);

   MPI_Comm 	      comm = hypre_ParCSRMatrixComm(Par_A_array[0]); 
   MPI_Comm 	      new_comm, seq_comm;

   hypre_ParCSRMatrix   *A_seq = NULL;
   hypre_CSRMatrix  *A_seq_diag;
   hypre_CSRMatrix  *A_seq_offd;
   hypre_ParVector   *F_seq = NULL;
   hypre_ParVector   *U_seq = NULL;

   hypre_ParCSRMatrix *A;

   HYPRE_Int               **dof_func_array;   
   HYPRE_Int                num_procs, my_id;

   HYPRE_Int                not_finished_coarsening;
   HYPRE_Int                level;

   HYPRE_Solver  coarse_solver;

   /* misc */
   dof_func_array = hypre_ParAMGDataDofFuncArray(amg_data);

   /*MPI Stuff */
   hypre_MPI_Comm_size(comm, &num_procs);   
   hypre_MPI_Comm_rank(comm,&my_id);
  
   /*initial */
   level = p_level;
   
   not_finished_coarsening = 1;
  
   /* convert A at this level to sequential */
   A = Par_A_array[level];

   {
      double *A_seq_data = NULL;
      HYPRE_Int *A_seq_i = NULL;
      HYPRE_Int *A_seq_offd_i = NULL;
      HYPRE_Int *A_seq_j = NULL;

      double *A_tmp_data = NULL;
      HYPRE_Int *A_tmp_i = NULL;
      HYPRE_Int *A_tmp_j = NULL;

      HYPRE_Int *info, *displs, *displs2;
      HYPRE_Int i, j, size, num_nonzeros, total_nnz, cnt;
  
      hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
      hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
      HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
      HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
      HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
      HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
      HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
      double *A_diag_data = hypre_CSRMatrixData(A_diag);
      double *A_offd_data = hypre_CSRMatrixData(A_offd);
      HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
      HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A);

      hypre_MPI_Group orig_group, new_group; 
      HYPRE_Int *ranks, new_num_procs, *row_starts;

      info = hypre_CTAlloc(HYPRE_Int, num_procs);

      hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm);

      ranks = hypre_CTAlloc(HYPRE_Int, num_procs);

      new_num_procs = 0;
      for (i=0; i < num_procs; i++)
         if (info[i]) 
         {
            ranks[new_num_procs] = i;
            info[new_num_procs++] = info[i];
         }

      MPI_Comm_group(comm, &orig_group);
      hypre_MPI_Group_incl(orig_group, new_num_procs, ranks, &new_group);
      MPI_Comm_create(comm, new_group, &new_comm);
      hypre_MPI_Group_free(&new_group);
      hypre_MPI_Group_free(&orig_group);

      if (num_rows)
      {
         /* alloc space in seq data structure only for participating procs*/
         HYPRE_BoomerAMGCreate(&coarse_solver);
         HYPRE_BoomerAMGSetMaxRowSum(coarse_solver,
		hypre_ParAMGDataMaxRowSum(amg_data)); 
         HYPRE_BoomerAMGSetStrongThreshold(coarse_solver,
		hypre_ParAMGDataStrongThreshold(amg_data)); 
         HYPRE_BoomerAMGSetCoarsenType(coarse_solver,
		hypre_ParAMGDataCoarsenType(amg_data)); 
         HYPRE_BoomerAMGSetInterpType(coarse_solver,
		hypre_ParAMGDataInterpType(amg_data)); 
         HYPRE_BoomerAMGSetTruncFactor(coarse_solver, 
		hypre_ParAMGDataTruncFactor(amg_data)); 
         HYPRE_BoomerAMGSetPMaxElmts(coarse_solver, 
		hypre_ParAMGDataPMaxElmts(amg_data)); 
	 if (hypre_ParAMGDataUserRelaxType(amg_data) > -1) 
            HYPRE_BoomerAMGSetRelaxType(coarse_solver, 
		hypre_ParAMGDataUserRelaxType(amg_data)); 
         HYPRE_BoomerAMGSetRelaxOrder(coarse_solver, 
		hypre_ParAMGDataRelaxOrder(amg_data)); 
         HYPRE_BoomerAMGSetRelaxWt(coarse_solver, 
		hypre_ParAMGDataUserRelaxWeight(amg_data)); 
	 if (hypre_ParAMGDataUserNumSweeps(amg_data) > -1) 
            HYPRE_BoomerAMGSetNumSweeps(coarse_solver, 
		hypre_ParAMGDataUserNumSweeps(amg_data)); 
         HYPRE_BoomerAMGSetNumFunctions(coarse_solver, 
		hypre_ParAMGDataNumFunctions(amg_data)); 
         HYPRE_BoomerAMGSetMaxIter(coarse_solver, 1); 
         HYPRE_BoomerAMGSetTol(coarse_solver, 0); 

         /* Create CSR Matrix, will be Diag part of new matrix */
         A_tmp_i = hypre_CTAlloc(HYPRE_Int, num_rows+1);

         A_tmp_i[0] = 0;
         for (i=1; i < num_rows+1; i++)
            A_tmp_i[i] = A_diag_i[i]-A_diag_i[i-1]+A_offd_i[i]-A_offd_i[i-1];

         num_nonzeros = A_offd_i[num_rows]+A_diag_i[num_rows];

         A_tmp_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros);
         A_tmp_data = hypre_CTAlloc(double, num_nonzeros);

         cnt = 0;
         for (i=0; i < num_rows; i++)
         {
            for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++)
	    {
	       A_tmp_j[cnt] = A_diag_j[j]+first_row_index;
	       A_tmp_data[cnt++] = A_diag_data[j];
	    }
            for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++)
	    {
	       A_tmp_j[cnt] = col_map_offd[A_offd_j[j]];
	       A_tmp_data[cnt++] = A_offd_data[j];
	    }
         }

         displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);
         displs[0] = 0;
         for (i=1; i < new_num_procs+1; i++)
            displs[i] = displs[i-1]+info[i-1];
         size = displs[new_num_procs];
  
         A_seq_i = hypre_CTAlloc(HYPRE_Int, size+1);
         A_seq_offd_i = hypre_CTAlloc(HYPRE_Int, size+1);

         hypre_MPI_Allgatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, 
			displs, HYPRE_MPI_INT, new_comm );

         displs2 = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);

         A_seq_i[0] = 0;
         displs2[0] = 0;
         for (j=1; j < displs[1]; j++)
            A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1];
         for (i=1; i < new_num_procs; i++)
         {
            for (j=displs[i]; j < displs[i+1]; j++)
            {
               A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1];
            }
         }
         A_seq_i[size] = A_seq_i[size]+A_seq_i[size-1];
         displs2[new_num_procs] = A_seq_i[size];
         for (i=1; i < new_num_procs+1; i++)
         {
            displs2[i] = A_seq_i[displs[i]];
            info[i-1] = displs2[i] - displs2[i-1];
         }

         total_nnz = displs2[new_num_procs];
         A_seq_j = hypre_CTAlloc(HYPRE_Int, total_nnz);
         A_seq_data = hypre_CTAlloc(double, total_nnz);

         hypre_MPI_Allgatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT,
                       A_seq_j, info, displs2,
                       HYPRE_MPI_INT, new_comm );

         hypre_MPI_Allgatherv ( A_tmp_data, num_nonzeros, hypre_MPI_DOUBLE,
                       A_seq_data, info, displs2,
                       hypre_MPI_DOUBLE, new_comm );

         hypre_TFree(displs);
         hypre_TFree(displs2);
         hypre_TFree(A_tmp_i);
         hypre_TFree(A_tmp_j);
         hypre_TFree(A_tmp_data);
   
         row_starts = hypre_CTAlloc(HYPRE_Int,2);
         row_starts[0] = 0; 
         row_starts[1] = size;
 
         /* Create 1 proc communicator */
         seq_comm = hypre_MPI_COMM_SELF;

         A_seq = hypre_ParCSRMatrixCreate(seq_comm,size,size,
					  row_starts, row_starts,
						0,total_nnz,0); 

         A_seq_diag = hypre_ParCSRMatrixDiag(A_seq);
         A_seq_offd = hypre_ParCSRMatrixOffd(A_seq);

         hypre_CSRMatrixData(A_seq_diag) = A_seq_data;
         hypre_CSRMatrixI(A_seq_diag) = A_seq_i;
         hypre_CSRMatrixJ(A_seq_diag) = A_seq_j;
         hypre_CSRMatrixI(A_seq_offd) = A_seq_offd_i;

         F_seq = hypre_ParVectorCreate(seq_comm, size, row_starts);
         U_seq = hypre_ParVectorCreate(seq_comm, size, row_starts);
         hypre_ParVectorOwnsPartitioning(F_seq) = 0;
         hypre_ParVectorOwnsPartitioning(U_seq) = 0;
         hypre_ParVectorInitialize(F_seq);
         hypre_ParVectorInitialize(U_seq);

         hypre_BoomerAMGSetup(coarse_solver,A_seq,F_seq,U_seq);

         hypre_ParAMGDataCoarseSolver(amg_data) = coarse_solver;
         hypre_ParAMGDataACoarse(amg_data) = A_seq;
         hypre_ParAMGDataFCoarse(amg_data) = F_seq;
         hypre_ParAMGDataUCoarse(amg_data) = U_seq;
         hypre_ParAMGDataNewComm(amg_data) = new_comm;
      }
      hypre_TFree(info);
      hypre_TFree(ranks);
   }
 
   return 0;
   
   
}
Esempio n. 4
0
HYPRE_Int hypre_BlockTridiagSetup(void *data, hypre_ParCSRMatrix *A,
                            hypre_ParVector *b, hypre_ParVector *x) 
{
   HYPRE_Int                i, j, *index_set1, print_level, nsweeps, relax_type;
   HYPRE_Int                nrows, nrows1, nrows2, start1, start2, *index_set2;
   HYPRE_Int                count, ierr;
   double             threshold;
   hypre_ParCSRMatrix **submatrices;
   HYPRE_Solver       precon1;
   HYPRE_Solver       precon2;
   HYPRE_IJVector     ij_u1, ij_u2, ij_f1, ij_f2;
   hypre_ParVector    *vector;
   MPI_Comm           comm;
   hypre_BlockTridiagData *b_data = (hypre_BlockTridiagData *) data;

   HYPRE_ParCSRMatrixGetComm((HYPRE_ParCSRMatrix) A, &comm);
   index_set1 = b_data->index_set1;
   nrows1 = index_set1[0];
   nrows  = hypre_ParCSRMatrixNumRows(A);
   nrows2 = nrows - nrows1;
   b_data->index_set2 = hypre_CTAlloc(HYPRE_Int, nrows2+1);
   index_set2 = b_data->index_set2;
   index_set2[0] = nrows2;
   count = 1;
   for (i = 0; i < index_set1[1]; i++) index_set2[count++] = i;
   for (i = 1; i < nrows1; i++) 
      for (j = index_set1[i]+1; j < index_set1[i+1]; j++) 
         index_set2[count++] = j;
   for (i = index_set1[nrows1]+1; i < nrows; i++) index_set2[count++] = i;

   submatrices = hypre_CTAlloc(hypre_ParCSRMatrix *, 4);
   hypre_ParCSRMatrixExtractSubmatrices(A, index_set1, &submatrices);

   nrows1 = hypre_ParCSRMatrixNumRows(submatrices[0]);
   nrows2 = hypre_ParCSRMatrixNumRows(submatrices[3]); 
   start1 = hypre_ParCSRMatrixFirstRowIndex(submatrices[0]);
   start2 = hypre_ParCSRMatrixFirstRowIndex(submatrices[3]);
   HYPRE_IJVectorCreate(comm, start1, start1+nrows1-1, &ij_u1);
   HYPRE_IJVectorSetObjectType(ij_u1, HYPRE_PARCSR);
   ierr  = HYPRE_IJVectorInitialize(ij_u1);
   ierr += HYPRE_IJVectorAssemble(ij_u1);
   hypre_assert(!ierr);
   HYPRE_IJVectorCreate(comm, start1, start1+nrows1-1, &ij_f1);
   HYPRE_IJVectorSetObjectType(ij_f1, HYPRE_PARCSR);
   ierr  = HYPRE_IJVectorInitialize(ij_f1);
   ierr += HYPRE_IJVectorAssemble(ij_f1);
   hypre_assert(!ierr);
   HYPRE_IJVectorCreate(comm, start2, start2+nrows2-1, &ij_u2);
   HYPRE_IJVectorSetObjectType(ij_u2, HYPRE_PARCSR);
   ierr  = HYPRE_IJVectorInitialize(ij_u2);
   ierr += HYPRE_IJVectorAssemble(ij_u2);
   hypre_assert(!ierr);
   HYPRE_IJVectorCreate(comm, start2, start2+nrows1-1, &ij_f2);
   HYPRE_IJVectorSetObjectType(ij_f2, HYPRE_PARCSR);
   ierr  = HYPRE_IJVectorInitialize(ij_f2);
   ierr += HYPRE_IJVectorAssemble(ij_f2);
   hypre_assert(!ierr);
   HYPRE_IJVectorGetObject(ij_f1, (void **) &vector);
   b_data->F1 = vector;
   HYPRE_IJVectorGetObject(ij_u1, (void **) &vector);
   b_data->U1 = vector;
   HYPRE_IJVectorGetObject(ij_f2, (void **) &vector);
   b_data->F2 = vector;
   HYPRE_IJVectorGetObject(ij_u2, (void **) &vector);
   b_data->U2 = vector;

   print_level = b_data->print_level;
   threshold   = b_data->threshold;
   nsweeps     = b_data->num_sweeps;
   relax_type  = b_data->relax_type;
   threshold = b_data->threshold;
   HYPRE_BoomerAMGCreate(&precon1);
   HYPRE_BoomerAMGSetMaxIter(precon1, 1);
   HYPRE_BoomerAMGSetCycleType(precon1, 1);
   HYPRE_BoomerAMGSetPrintLevel(precon1, print_level);
   HYPRE_BoomerAMGSetMaxLevels(precon1, 25);
   HYPRE_BoomerAMGSetMeasureType(precon1, 0);
   HYPRE_BoomerAMGSetCoarsenType(precon1, 0);
   HYPRE_BoomerAMGSetStrongThreshold(precon1, threshold);
   HYPRE_BoomerAMGSetNumFunctions(precon1, 1);
   HYPRE_BoomerAMGSetNumSweeps(precon1, nsweeps);
   HYPRE_BoomerAMGSetRelaxType(precon1, relax_type);
   hypre_BoomerAMGSetup(precon1, submatrices[0], b_data->U1, b_data->F1);

   HYPRE_BoomerAMGCreate(&precon2);
   HYPRE_BoomerAMGSetMaxIter(precon2, 1);
   HYPRE_BoomerAMGSetCycleType(precon2, 1);
   HYPRE_BoomerAMGSetPrintLevel(precon2, print_level);
   HYPRE_BoomerAMGSetMaxLevels(precon2, 25);
   HYPRE_BoomerAMGSetMeasureType(precon2, 0);
   HYPRE_BoomerAMGSetCoarsenType(precon2, 0);
   HYPRE_BoomerAMGSetMeasureType(precon2, 1);
   HYPRE_BoomerAMGSetStrongThreshold(precon2, threshold);
   HYPRE_BoomerAMGSetNumFunctions(precon2, 1);
   HYPRE_BoomerAMGSetNumSweeps(precon2, nsweeps);
   HYPRE_BoomerAMGSetRelaxType(precon2, relax_type);
   hypre_BoomerAMGSetup(precon2, submatrices[3], NULL, NULL);

   b_data->precon1 = precon1;
   b_data->precon2 = precon2;

   b_data->A11 = submatrices[0];
   hypre_ParCSRMatrixDestroy(submatrices[1]);
   b_data->A21 = submatrices[2];
   b_data->A22 = submatrices[3];

   hypre_TFree(submatrices);
   return (0);
}
/*--------------------------------------------------------------------------
 * hypre_SStructSharedDOF_ParcsrMatRowsComm
 *   Given a sstruct_grid & parcsr matrix with rows corresponding to the
 *   sstruct_grid, determine and extract the rows that must be communicated.
 *   These rows are for shared dof that geometrically lie on processor 
 *   boundaries but internally are stored on one processor.
 *   Algo:
 *       for each cellbox
 *         RECVs:
 *          i)  stretch the cellbox to the variable box
 *          ii) in the appropriate (dof-dependent) direction, take the
 *              boundary and boxman_intersect to extract boxmanentries
 *              that contain these boundary edges.
 *          iii)loop over the boxmanentries and see if they belong
 *              on this proc or another proc
 *                 a) if belong on another proc, these are the recvs:
 *                    count and prepare the communication buffers and
 *                    values.
 *          
 *         SENDs:
 *          i)  form layer of cells that is one layer off cellbox
 *              (stretches in the appropriate direction)
 *          ii) boxman_intersect with the cellgrid boxman
 *          iii)loop over the boxmanentries and see if they belong
 *              on this proc or another proc
 *                 a) if belong on another proc, these are the sends:
 *                    count and prepare the communication buffers and
 *                    values.
 *
 * Note: For the recv data, the dof can come from only one processor.
 *       For the send data, the dof can go to more than one processor 
 *       (the same dof is on the boundary of several cells).
 *--------------------------------------------------------------------------*/
HYPRE_Int
hypre_SStructSharedDOF_ParcsrMatRowsComm( hypre_SStructGrid    *grid,
                                          hypre_ParCSRMatrix   *A,
                                          HYPRE_Int            *num_offprocrows_ptr,
                                          hypre_MaxwellOffProcRow ***OffProcRows_ptr)
{
   MPI_Comm             A_comm= hypre_ParCSRMatrixComm(A);
   MPI_Comm          grid_comm= hypre_SStructGridComm(grid);

   HYPRE_Int       matrix_type= HYPRE_PARCSR;

   HYPRE_Int            nparts= hypre_SStructGridNParts(grid);
   HYPRE_Int            ndim  = hypre_SStructGridNDim(grid);

   hypre_SStructGrid     *cell_ssgrid;

   hypre_SStructPGrid    *pgrid;
   hypre_StructGrid      *cellgrid;
   hypre_BoxArray        *cellboxes;
   hypre_Box             *box, *cellbox, vbox, boxman_entry_box;

   hypre_Index            loop_size, start;
   HYPRE_Int              loopi, loopj, loopk;
   HYPRE_Int              start_rank, end_rank, rank; 

   HYPRE_Int              i, j, k, m, n, t, part, var, nvars;

   HYPRE_SStructVariable *vartypes;
   HYPRE_Int              nbdry_slabs;
   hypre_BoxArray        *recv_slabs, *send_slabs;
   hypre_Index            varoffset;

   hypre_BoxManager     **boxmans, *cell_boxman;
   hypre_BoxManEntry    **boxman_entries, *entry;
   HYPRE_Int              nboxman_entries;

   hypre_Index            ishift, jshift, kshift, zero_index;
   hypre_Index            ilower, iupper, index;

   HYPRE_Int              proc, nprocs, myproc;
   HYPRE_Int             *SendToProcs, *RecvFromProcs;
   HYPRE_Int            **send_RowsNcols;       /* buffer for rows & ncols */
   HYPRE_Int             *send_RowsNcols_alloc; 
   HYPRE_Int             *send_ColsData_alloc;
   HYPRE_Int             *tot_nsendRowsNcols, *tot_sendColsData;
   double               **vals;  /* buffer for cols & data */

   HYPRE_Int             *col_inds;
   double                *values;

   hypre_MPI_Request           *requests;
   hypre_MPI_Status            *status;
   HYPRE_Int            **rbuffer_RowsNcols;
   double               **rbuffer_ColsData;
   HYPRE_Int              num_sends, num_recvs;

   hypre_MaxwellOffProcRow **OffProcRows;
   HYPRE_Int                *starts;

   HYPRE_Int              ierr= 0;

   hypre_MPI_Comm_rank(A_comm, &myproc);
   hypre_MPI_Comm_size(grid_comm, &nprocs);

   start_rank= hypre_ParCSRMatrixFirstRowIndex(A);
   end_rank  = hypre_ParCSRMatrixLastRowIndex(A);

   hypre_SetIndex(ishift, 1, 0, 0);
   hypre_SetIndex(jshift, 0, 1, 0);
   hypre_SetIndex(kshift, 0, 0, 1);
   hypre_SetIndex(zero_index, 0, 0, 0);

  /* need a cellgrid boxman to determine the send boxes -> only the cell dofs
     are unique so a boxman intersect can be used to get the edges that
     must be sent. */
   HYPRE_SStructGridCreate(grid_comm, ndim, nparts, &cell_ssgrid);
   vartypes= hypre_CTAlloc(HYPRE_SStructVariable, 1);
   vartypes[0]= HYPRE_SSTRUCT_VARIABLE_CELL;

   for (i= 0; i< nparts; i++)
   {
      pgrid= hypre_SStructGridPGrid(grid, i);
      cellgrid= hypre_SStructPGridCellSGrid(pgrid);

      cellboxes= hypre_StructGridBoxes(cellgrid);
      hypre_ForBoxI(j, cellboxes)
      {
         box= hypre_BoxArrayBox(cellboxes, j);
         HYPRE_SStructGridSetExtents(cell_ssgrid, i,
                                     hypre_BoxIMin(box), hypre_BoxIMax(box));
      }
      HYPRE_SStructGridSetVariables(cell_ssgrid, i, 1, vartypes);
   }
Esempio n. 6
0
hypre_CSRMatrix *
hypre_ParCSRMatrixExtractAExt( hypre_ParCSRMatrix *A,
                               HYPRE_Int data,
                               HYPRE_Int ** pA_ext_row_map )
{
    /* Note that A's role as the first factor in A*A^T is used only
       through ...CommPkgT(A), which basically says which rows of A
       (columns of A^T) are needed.  In all the other places where A
       serves as an input, it is through its role as A^T, the matrix
       whose data needs to be passed between processors. */
    MPI_Comm comm = hypre_ParCSRMatrixComm(A);
    HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(A);
    HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A);
    HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);

    hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkgT(A);
    /* ... CommPkgT(A) should identify all rows of A^T needed for A*A^T (that is
     * generally a bigger set than ...CommPkg(A), the rows of B needed for A*B) */
    HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
    HYPRE_Int *recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
    HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
    HYPRE_Int *send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
    HYPRE_Int *send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);

    hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A);

    HYPRE_Int *diag_i = hypre_CSRMatrixI(diag);
    HYPRE_Int *diag_j = hypre_CSRMatrixJ(diag);
    HYPRE_Complex *diag_data = hypre_CSRMatrixData(diag);

    hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A);

    HYPRE_Int *offd_i = hypre_CSRMatrixI(offd);
    HYPRE_Int *offd_j = hypre_CSRMatrixJ(offd);
    HYPRE_Complex *offd_data = hypre_CSRMatrixData(offd);

    HYPRE_Int num_cols_A, num_nonzeros;
    HYPRE_Int num_rows_A_ext;

    hypre_CSRMatrix *A_ext;

    HYPRE_Int *A_ext_i;
    HYPRE_Int *A_ext_j;
    HYPRE_Complex *A_ext_data;

    num_cols_A = hypre_ParCSRMatrixGlobalNumCols(A);
    num_rows_A_ext = recv_vec_starts[num_recvs];

    hypre_ParCSRMatrixExtractBExt_Arrays
    ( &A_ext_i, &A_ext_j, &A_ext_data, pA_ext_row_map,
      &num_nonzeros,
      data, 1, comm, comm_pkg,
      num_cols_A, num_recvs, num_sends,
      first_col_diag, first_row_index,
      recv_vec_starts, send_map_starts, send_map_elmts,
      diag_i, diag_j, offd_i, offd_j, col_map_offd,
      diag_data, offd_data
    );

    A_ext = hypre_CSRMatrixCreate(num_rows_A_ext,num_cols_A,num_nonzeros);
    hypre_CSRMatrixI(A_ext) = A_ext_i;
    hypre_CSRMatrixJ(A_ext) = A_ext_j;
    if (data) hypre_CSRMatrixData(A_ext) = A_ext_data;

    return A_ext;
}
Esempio n. 7
0
hypre_ParCSRMatrix *hypre_ParCSRAAt( hypre_ParCSRMatrix  *A )
{
    MPI_Comm         comm = hypre_ParCSRMatrixComm(A);

    hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);

    HYPRE_Complex   *A_diag_data = hypre_CSRMatrixData(A_diag);
    HYPRE_Int       *A_diag_i = hypre_CSRMatrixI(A_diag);
    HYPRE_Int       *A_diag_j = hypre_CSRMatrixJ(A_diag);

    hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);

    HYPRE_Complex   *A_offd_data = hypre_CSRMatrixData(A_offd);
    HYPRE_Int       *A_offd_i = hypre_CSRMatrixI(A_offd);
    HYPRE_Int       *A_offd_j = hypre_CSRMatrixJ(A_offd);
    HYPRE_Int       *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
    HYPRE_Int       *A_ext_row_map;

    HYPRE_Int       *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
    HYPRE_Int        num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
    HYPRE_Int        num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);

    hypre_ParCSRMatrix *C;
    HYPRE_Int          *col_map_offd_C;

    hypre_CSRMatrix *C_diag;

    HYPRE_Complex   *C_diag_data;
    HYPRE_Int       *C_diag_i;
    HYPRE_Int       *C_diag_j;

    hypre_CSRMatrix *C_offd;

    HYPRE_Complex   *C_offd_data=NULL;
    HYPRE_Int       *C_offd_i=NULL;
    HYPRE_Int       *C_offd_j=NULL;
    HYPRE_Int       *new_C_offd_j;

    HYPRE_Int        C_diag_size;
    HYPRE_Int        C_offd_size;
    HYPRE_Int        last_col_diag_C;
    HYPRE_Int        num_cols_offd_C;

    hypre_CSRMatrix *A_ext;

    HYPRE_Complex   *A_ext_data;
    HYPRE_Int       *A_ext_i;
    HYPRE_Int       *A_ext_j;
    HYPRE_Int        num_rows_A_ext=0;

    HYPRE_Int        first_row_index_A = hypre_ParCSRMatrixFirstRowIndex(A);
    HYPRE_Int        first_col_diag_A = hypre_ParCSRMatrixFirstColDiag(A);
    HYPRE_Int       *B_marker;

    HYPRE_Int        i;
    HYPRE_Int        i1, i2, i3;
    HYPRE_Int        jj2, jj3;

    HYPRE_Int        jj_count_diag, jj_count_offd;
    HYPRE_Int        jj_row_begin_diag, jj_row_begin_offd;
    HYPRE_Int        start_indexing = 0; /* start indexing for C_data at 0 */
    HYPRE_Int        count;
    HYPRE_Int        n_rows_A, n_cols_A;

    HYPRE_Complex    a_entry;
    HYPRE_Complex    a_b_product;

    HYPRE_Complex    zero = 0.0;

    n_rows_A = hypre_ParCSRMatrixGlobalNumRows(A);
    n_cols_A = hypre_ParCSRMatrixGlobalNumCols(A);

    if (n_cols_A != n_rows_A)
    {
        hypre_printf(" Error! Incompatible matrix dimensions!\n");
        return NULL;
    }
    /*-----------------------------------------------------------------------
     *  Extract A_ext, i.e. portion of A that is stored on neighbor procs
     *  and needed locally for A^T in the matrix matrix product A*A^T
     *-----------------------------------------------------------------------*/

    if (num_rows_diag_A != n_rows_A)
    {
        /*---------------------------------------------------------------------
         * If there exists no CommPkg for A, a CommPkg is generated using
         * equally load balanced partitionings
         *--------------------------------------------------------------------*/
        if (!hypre_ParCSRMatrixCommPkg(A))
        {
            hypre_MatTCommPkgCreate(A);
        }

        A_ext = hypre_ParCSRMatrixExtractAExt( A, 1, &A_ext_row_map );
        A_ext_data = hypre_CSRMatrixData(A_ext);
        A_ext_i    = hypre_CSRMatrixI(A_ext);
        A_ext_j    = hypre_CSRMatrixJ(A_ext);
        num_rows_A_ext = hypre_CSRMatrixNumRows(A_ext);
    }
    /*-----------------------------------------------------------------------
     *  Allocate marker array.
     *-----------------------------------------------------------------------*/

    B_marker = hypre_CTAlloc(HYPRE_Int, num_rows_diag_A+num_rows_A_ext );

    /*-----------------------------------------------------------------------
     *  Initialize some stuff.
     *-----------------------------------------------------------------------*/

    for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 )
    {
        B_marker[i1] = -1;
    }


    hypre_ParAat_RowSizes(
        &C_diag_i, &C_offd_i, B_marker,
        A_diag_i, A_diag_j,
        A_offd_i, A_offd_j, A_col_map_offd,
        A_ext_i, A_ext_j, A_ext_row_map,
        &C_diag_size, &C_offd_size,
        num_rows_diag_A, num_cols_offd_A,
        num_rows_A_ext,
        first_col_diag_A, first_row_index_A
    );

#if 0
    /* debugging output: */
    hypre_printf("A_ext_row_map (%i):",num_rows_A_ext);
    for ( i1=0; i1<num_rows_A_ext; ++i1 ) hypre_printf(" %i",A_ext_row_map[i1] );
    hypre_printf("\nC_diag_i (%i):",C_diag_size);
    for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_diag_i[i1] );
    hypre_printf("\nC_offd_i (%i):",C_offd_size);
    for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_offd_i[i1] );
    hypre_printf("\n");
#endif

    /*-----------------------------------------------------------------------
     *  Allocate C_diag_data and C_diag_j arrays.
     *  Allocate C_offd_data and C_offd_j arrays.
     *-----------------------------------------------------------------------*/

    last_col_diag_C = first_row_index_A + num_rows_diag_A - 1;
    C_diag_data = hypre_CTAlloc(HYPRE_Complex, C_diag_size);
    C_diag_j    = hypre_CTAlloc(HYPRE_Int, C_diag_size);
    if (C_offd_size)
    {
        C_offd_data = hypre_CTAlloc(HYPRE_Complex, C_offd_size);
        C_offd_j    = hypre_CTAlloc(HYPRE_Int, C_offd_size);
    }

    /*-----------------------------------------------------------------------
     *  Second Pass: Fill in C_diag_data and C_diag_j.
     *  Second Pass: Fill in C_offd_data and C_offd_j.
     *-----------------------------------------------------------------------*/

    /*-----------------------------------------------------------------------
     *  Initialize some stuff.
     *-----------------------------------------------------------------------*/

    jj_count_diag = start_indexing;
    jj_count_offd = start_indexing;
    for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 )
    {
        B_marker[i1] = -1;
    }

    /*-----------------------------------------------------------------------
     *  Loop over interior c-points.
     *-----------------------------------------------------------------------*/

    for (i1 = 0; i1 < num_rows_diag_A; i1++)
    {

        /*--------------------------------------------------------------------
         *  Create diagonal entry, C_{i1,i1}
         *--------------------------------------------------------------------*/

        B_marker[i1] = jj_count_diag;
        jj_row_begin_diag = jj_count_diag;
        jj_row_begin_offd = jj_count_offd;
        C_diag_data[jj_count_diag] = zero;
        C_diag_j[jj_count_diag] = i1;
        jj_count_diag++;

        /*-----------------------------------------------------------------
         *  Loop over entries in row i1 of A_offd.
         *-----------------------------------------------------------------*/

        /* There are 3 CSRMatrix or CSRBooleanMatrix objects here:
           ext*ext, ext*diag, and ext*offd belong to another processor.
           diag*offd and offd*diag don't count - never share a column by definition.
           So we have to do 4 cases:
           diag*ext, offd*ext, diag*diag, and offd*offd.
        */

        for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
        {
            i2 = A_diag_j[jj2];
            a_entry = A_diag_data[jj2];

            /* diag*ext */
            /*-----------------------------------------------------------
             *  Loop over entries (columns) i3 in row i2 of (A_ext)^T
             *  That is, rows i3 having a column i2 of A_ext.
             *  For now, for each row i3 of A_ext we crudely check _all_
             *  columns to see whether one matches i2.
             *  For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2)
             *  to C(i1,i3) .  This contributes to both the diag and offd
             *  blocks of C.
             *-----------------------------------------------------------*/

            for ( i3=0; i3<num_rows_A_ext; i3++ ) {
                for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) {
                    if ( A_ext_j[jj3]==i2+first_col_diag_A ) {
                        /* row i3, column i2 of A_ext; or,
                           row i2, column i3 of (A_ext)^T */

                        a_b_product = a_entry * A_ext_data[jj3];

                        /*--------------------------------------------------------
                         *  Check B_marker to see that C_{i1,i3} has not already
                         *  been accounted for. If it has not, create a new entry.
                         *  If it has, add new contribution.
                         *--------------------------------------------------------*/

                        if ( A_ext_row_map[i3] < first_row_index_A ||
                                A_ext_row_map[i3] > last_col_diag_C ) { /* offd */
                            if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) {
                                B_marker[i3+num_rows_diag_A] = jj_count_offd;
                                C_offd_data[jj_count_offd] = a_b_product;
                                C_offd_j[jj_count_offd] = i3;
                                jj_count_offd++;
                            }
                            else
                                C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                        }
                        else {                                              /* diag */
                            if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) {
                                B_marker[i3+num_rows_diag_A] = jj_count_diag;
                                C_diag_data[jj_count_diag] = a_b_product;
                                C_diag_j[jj_count_diag] = i3-first_col_diag_A;
                                jj_count_diag++;
                            }
                            else
                                C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                        }
                    }
                }
            }
        }

        if (num_cols_offd_A)
        {
            for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
            {
                i2 = A_offd_j[jj2];
                a_entry = A_offd_data[jj2];

                /* offd * ext */
                /*-----------------------------------------------------------
                 *  Loop over entries (columns) i3 in row i2 of (A_ext)^T
                 *  That is, rows i3 having a column i2 of A_ext.
                 *  For now, for each row i3 of A_ext we crudely check _all_
                 *  columns to see whether one matches i2.
                 *  For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2)
                 *  to C(i1,i3) .  This contributes to both the diag and offd
                 *  blocks of C.
                 *-----------------------------------------------------------*/

                for ( i3=0; i3<num_rows_A_ext; i3++ ) {
                    for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) {
                        if ( A_ext_j[jj3]==A_col_map_offd[i2] ) {
                            /* row i3, column i2 of A_ext; or,
                               row i2, column i3 of (A_ext)^T */

                            a_b_product = a_entry * A_ext_data[jj3];

                            /*--------------------------------------------------------
                             *  Check B_marker to see that C_{i1,i3} has not already
                             *  been accounted for. If it has not, create a new entry.
                             *  If it has, add new contribution.
                             *--------------------------------------------------------*/

                            if ( A_ext_row_map[i3] < first_row_index_A ||
                                    A_ext_row_map[i3] > last_col_diag_C ) { /* offd */
                                if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) {
                                    B_marker[i3+num_rows_diag_A] = jj_count_offd;
                                    C_offd_data[jj_count_offd] = a_b_product;
                                    C_offd_j[jj_count_offd] = i3;
                                    jj_count_offd++;
                                }
                                else
                                    C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                            }
                            else {                                              /* diag */
                                if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) {
                                    B_marker[i3+num_rows_diag_A] = jj_count_diag;
                                    C_diag_data[jj_count_diag] = a_b_product;
                                    C_diag_j[jj_count_diag] = i3-first_row_index_A;
                                    jj_count_diag++;
                                }
                                else
                                    C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product;
                            }
                        }
                    }
                }
            }
        }

        /* diag * diag */
        /*-----------------------------------------------------------------
         *  Loop over entries (columns) i2 in row i1 of A_diag.
         *  For each such column we will find the contributions of the
         *  corresponding rows i2 of A^T to C=A*A^T .  Now we only look
         *  at the local part of A^T - with columns (rows of A) living
         *  on this processor.
         *-----------------------------------------------------------------*/

        for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
        {
            i2 = A_diag_j[jj2];
            a_entry = A_diag_data[jj2];

            /*-----------------------------------------------------------
             *  Loop over entries (columns) i3 in row i2 of A^T
             *  That is, rows i3 having a column i2 of A (local part).
             *  For now, for each row i3 of A we crudely check _all_
             *  columns to see whether one matches i2.
             *  This i3-loop is for the diagonal block of A.
             *  It contributes to the diagonal block of C.
             *  For each entry (i2,i3) of A^T,  add A(i1,i2)*A(i3,i2)
             *  to C(i1,i3)
             *-----------------------------------------------------------*/
            for ( i3=0; i3<num_rows_diag_A; i3++ ) {
                for ( jj3=A_diag_i[i3]; jj3<A_diag_i[i3+1]; jj3++ ) {
                    if ( A_diag_j[jj3]==i2 ) {
                        /* row i3, column i2 of A; or,
                           row i2, column i3 of A^T */
                        a_b_product = a_entry * A_diag_data[jj3];

                        /*--------------------------------------------------------
                         *  Check B_marker to see that C_{i1,i3} has not already
                         *  been accounted for. If it has not, mark it and increment
                         *  counter.
                         *--------------------------------------------------------*/
                        if (B_marker[i3] < jj_row_begin_diag)
                        {
                            B_marker[i3] = jj_count_diag;
                            C_diag_data[jj_count_diag] = a_b_product;
                            C_diag_j[jj_count_diag] = i3;
                            jj_count_diag++;
                        }
                        else
                        {
                            C_diag_data[B_marker[i3]] += a_b_product;
                        }
                    }
                }
            } /* end of i3 loop */
        } /* end of third i2 loop */

        /* offd * offd */
        /*-----------------------------------------------------------
         *  Loop over offd columns i2 of A in A*A^T.  Then
         *  loop over offd entries (columns) i3 in row i2 of A^T
         *  That is, rows i3 having a column i2 of A (local part).
         *  For now, for each row i3 of A we crudely check _all_
         *  columns to see whether one matches i2.
         *  This i3-loop is for the off-diagonal block of A.
         *  It contributes to the diag block of C.
         *  For each entry (i2,i3) of A^T, add A*A^T to C
         *-----------------------------------------------------------*/
        if (num_cols_offd_A) {

            for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
            {
                i2 = A_offd_j[jj2];
                a_entry = A_offd_data[jj2];

                for ( i3=0; i3<num_rows_diag_A; i3++ ) {
                    /* ... note that num_rows_diag_A == num_rows_offd_A */
                    for ( jj3=A_offd_i[i3]; jj3<A_offd_i[i3+1]; jj3++ ) {
                        if ( A_offd_j[jj3]==i2 ) {
                            /* row i3, column i2 of A; or,
                               row i2, column i3 of A^T */
                            a_b_product = a_entry * A_offd_data[jj3];

                            /*--------------------------------------------------------
                             *  Check B_marker to see that C_{i1,i3} has not already
                             *  been accounted for. If it has not, create a new entry.
                             *  If it has, add new contribution
                             *--------------------------------------------------------*/

                            if (B_marker[i3] < jj_row_begin_diag)
                            {
                                B_marker[i3] = jj_count_diag;
                                C_diag_data[jj_count_diag] = a_b_product;
                                C_diag_j[jj_count_diag] = i3;
                                jj_count_diag++;
                            }
                            else
                            {
                                C_diag_data[B_marker[i3]] += a_b_product;
                            }
                        }
                    }
                }  /* end of last i3 loop */
            }     /* end of if (num_cols_offd_A) */

        }        /* end of fourth and last i2 loop */
#if 0          /* debugging printout */
        hypre_printf("end of i1 loop: i1=%i jj_count_diag=%i\n", i1, jj_count_diag );
        hypre_printf("  C_diag_j=");
        for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%i ",C_diag_j[jj3]);
        hypre_printf("  C_diag_data=");
        for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%f ",C_diag_data[jj3]);
        hypre_printf("\n");
        hypre_printf("  C_offd_j=");
        for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%i ",C_offd_j[jj3]);
        hypre_printf("  C_offd_data=");
        for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%f ",C_offd_data[jj3]);
        hypre_printf("\n");
        hypre_printf( "  B_marker =" );
        for ( it=0; it<num_rows_diag_A+num_rows_A_ext; ++it )
            hypre_printf(" %i", B_marker[it] );
        hypre_printf( "\n" );
#endif
    }           /* end of i1 loop */

    /*-----------------------------------------------------------------------
     *  Delete 0-columns in C_offd, i.e. generate col_map_offd and reset
     *  C_offd_j.  Note that (with the indexing we have coming into this
     *  block) col_map_offd_C[i3]==A_ext_row_map[i3].
     *-----------------------------------------------------------------------*/

    for ( i=0; i<num_rows_diag_A+num_rows_A_ext; ++i )
        B_marker[i] = -1;
    for ( i=0; i<C_offd_size; i++ )
        B_marker[ C_offd_j[i] ] = -2;

    count = 0;
    for (i=0; i < num_rows_diag_A + num_rows_A_ext; i++) {
        if (B_marker[i] == -2) {
            B_marker[i] = count;
            count++;
        }
    }
    num_cols_offd_C = count;

    if (num_cols_offd_C) {
        col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C);
        new_C_offd_j = hypre_CTAlloc(HYPRE_Int,C_offd_size);
        /* ... a bit big, but num_cols_offd_C is too small.  It might be worth
           computing the correct size, which is sum( no. columns in row i, over all rows i )
        */

        for (i=0; i < C_offd_size; i++) {
            new_C_offd_j[i] = B_marker[C_offd_j[i]];
            col_map_offd_C[ new_C_offd_j[i] ] = A_ext_row_map[ C_offd_j[i] ];
        }

        hypre_TFree(C_offd_j);
        C_offd_j = new_C_offd_j;

    }

    /*----------------------------------------------------------------
     * Create C
     *----------------------------------------------------------------*/

    C = hypre_ParCSRMatrixCreate(comm, n_rows_A, n_rows_A, row_starts_A,
                                 row_starts_A, num_cols_offd_C,
                                 C_diag_size, C_offd_size);

    /* Note that C does not own the partitionings */
    hypre_ParCSRMatrixSetRowStartsOwner(C,0);
    hypre_ParCSRMatrixSetColStartsOwner(C,0);

    C_diag = hypre_ParCSRMatrixDiag(C);
    hypre_CSRMatrixData(C_diag) = C_diag_data;
    hypre_CSRMatrixI(C_diag) = C_diag_i;
    hypre_CSRMatrixJ(C_diag) = C_diag_j;

    if (num_cols_offd_C)
    {
        C_offd = hypre_ParCSRMatrixOffd(C);
        hypre_CSRMatrixData(C_offd) = C_offd_data;
        hypre_CSRMatrixI(C_offd) = C_offd_i;
        hypre_CSRMatrixJ(C_offd) = C_offd_j;
        hypre_ParCSRMatrixOffd(C) = C_offd;
        hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;

    }
    else
        hypre_TFree(C_offd_i);

    /*-----------------------------------------------------------------------
     *  Free B_ext and marker array.
     *-----------------------------------------------------------------------*/

    if (num_cols_offd_A)
    {
        hypre_CSRMatrixDestroy(A_ext);
        A_ext = NULL;
    }
    hypre_TFree(B_marker);
    if ( num_rows_diag_A != n_rows_A )
        hypre_TFree(A_ext_row_map);

    return C;

}
Esempio n. 8
0
int
main(int argc, char *argv[])
{
    GRID *g;
    DOF *u_h;
    MAT *A, *A0, *B;
    MAP *map;
    INT i;
    size_t nnz, mem, mem_peak;
    VEC *x, *y0, *y1, *y2;
    double t0, t1, dnz, dnz1, mflops, mop;
    char *fn = "../test/cube.dat";
    FLOAT mem_max = 300;
    INT refine = 0;

    phgOptionsRegisterFilename("-mesh_file", "Mesh file", (char **)&fn);
    phgOptionsRegisterInt("-loop_count", "Loop count", &loop_count);
    phgOptionsRegisterInt("-refine", "Refinement level", &refine);
    phgOptionsRegisterFloat("-mem_max", "Maximum memory", &mem_max);

    phgInit(&argc, &argv);
    g = phgNewGrid(-1);
    if (!phgImport(g, fn, FALSE))
	phgError(1, "can't read file \"%s\".\n", fn);
    phgRefineAllElements(g, refine);
    u_h = phgDofNew(g, DOF_DEFAULT, 1, "u_h", DofNoAction);

    while (TRUE) {
	phgPrintf("\n");
	if (phgBalanceGrid(g, 1.2, 1, NULL, 0.))
	    phgPrintf("Repartition mesh, %d submeshes, load imbalance: %lg\n",
			g->nprocs, (double)g->lif);
	map = phgMapCreate(u_h, NULL);
	A = phgMapCreateMat(map, map);
	A->handle_bdry_eqns = TRUE;
	build_matrix(A, u_h);
	phgMatAssemble(A);

	/* Note: A is unsymmetric (A' != A) if boundary entries not removed */
	phgMatRemoveBoundaryEntries(A);

#if 0
	/* test block matrix operation */
	A0 = phgMatCreateBlockMatrix(g->comm, 1, 1, &A, NULL);
#else
	A0 = A;
#endif

	phgPrintf("%d DOF, %d elems, %d submeshes, matrix size: %d, LIF: %lg\n",
			DofGetDataCountGlobal(u_h), g->nleaf_global,
			g->nprocs, A->rmap->nglobal, (double)g->lif);

	/* test PHG mat-vec multiply */
	x = phgMapCreateVec(A->cmap, 1);
	y1 = phgMapCreateVec(A->rmap, 1);
	phgVecRandomize(x, 123);
	phgMatVec(MAT_OP_N, 1.0, A0, x, 0.0, &y1);

	phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	t0 = phgGetTime(NULL);
	for (i = 0; i < loop_count; i++) {
	    phgMatVec(MAT_OP_N, 1.0, A0, x, 0.0, &y1);
	}
	t1 = phgGetTime(NULL);
	mflops = phgPerfGetMflops(g, NULL, NULL);
	y0 = phgVecCopy(y1, NULL);
	nnz = A->nnz_d + A->nnz_o;
#if USE_MPI
	dnz1 = nnz;
	MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm);
#else
	dnz = nnz;
#endif
	mop = loop_count * (dnz + dnz - A->rmap->nlocal) * 1e-6;

	phgPrintf("\n");
	t1 -= t0;
	phgPrintf("   PHG:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF)\n",
			t1, dnz, mop / (t1 == 0 ? 1. : t1), mflops);

	/* test trans(A)*x */
	phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	t0 = phgGetTime(NULL);
	for (i = 0; i < loop_count; i++) {
	    phgMatVec(MAT_OP_T, 1.0, A0, x, 0.0, &y1);
	}
	t1 = phgGetTime(NULL);
	mflops = phgPerfGetMflops(g, NULL, NULL);
	t1 -= t0;
	phgPrintf("  A'*x:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), "
		  "err: %le\n", t1, dnz, mop / (t1 == 0 ? 1. : t1), mflops,
		 (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL));

	/* time A * trans(A) */
	phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	t0 = phgGetTime(NULL);
	B = phgMatMat(MAT_OP_N, MAT_OP_N, 1.0, A, A, 0.0, NULL);
	t1 = phgGetTime(NULL);
	mflops = phgPerfGetMflops(g, NULL, NULL);
	nnz = B->nnz_d + B->nnz_o;
#if USE_MPI
	dnz1 = nnz;
	MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm);
#else
	dnz = nnz;
#endif
	/* compare B*x <--> A*A*x */
	y2 = phgMatVec(MAT_OP_N, 1.0, B, x, 0.0, NULL);
	phgMatVec(MAT_OP_N, 1.0, A0, y0, 0.0, &y1);
	phgMatDestroy(&B);
	t1 -= t0;
	phgPrintf("   A*A:  time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n",
		  t1, dnz, mflops,
		 (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL));

#if USE_PETSC
	{
	    Mat ma, mb;
	    MatInfo info;
	    Vec va, vb, vc;
	    PetscScalar *vec;

	    ma = phgPetscCreateMatAIJ(A);
	    MatGetVecs(ma, PETSC_NULL, &va);
	    VecDuplicate(va, &vb);
	    VecGetArray(va, &vec);
	    memcpy(vec, x->data, x->map->nlocal * sizeof(*vec));
	    VecRestoreArray(va, &vec);
	    MatMult(ma, va, vb);
	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    for (i = 0; i < loop_count; i++) {
		MatMult(ma, va, vb);
	    }
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    VecGetArray(vb, &vec);
	    memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec));
	    VecRestoreArray(vb, &vec);

	    MatGetInfo(ma, MAT_GLOBAL_SUM, &info);
	    /*phgPrintf("    --------------------------------------------"
		      "-------------------------\n");*/
	    phgPrintf("\n");
	    t1 -= t0;
	    dnz = info.nz_used;
	    phgPrintf(" PETSc:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), "
		      "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops,
		 (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL));

	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    for (i = 0; i < loop_count; i++) {
		MatMultTranspose(ma, va, vb);
	    }
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    VecGetArray(vb, &vec);
	    memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec));
	    VecRestoreArray(vb, &vec);
	    t1 -= t0;
	    phgPrintf("  A'*x:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), "
		      "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops,
		(double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL));

	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    MatMatMult(ma, ma, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &mb);
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    t1 -= t0;
	    MatGetInfo(mb, MAT_GLOBAL_SUM, &info);
	    dnz = info.nz_used;
	    VecDuplicate(va, &vc);
	    /* compare B*x <--> A*A*x */
	    MatMult(ma, vb, vc);
	    MatMult(mb, va, vb);
	    VecGetArray(vb, &vec);
	    memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec));
	    VecRestoreArray(vb, &vec);
	    VecGetArray(vc, &vec);
	    memcpy(y2->data, vec, x->map->nlocal * sizeof(*vec));
	    VecRestoreArray(vc, &vec);
	    phgPrintf("   A*A:  time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n",
		  t1, dnz, mflops,
		 (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL));

	    phgPetscMatDestroy(&mb);
	    phgPetscMatDestroy(&ma);
	    phgPetscVecDestroy(&va);
	    phgPetscVecDestroy(&vb);
	    phgPetscVecDestroy(&vc);
	}
#endif	/* USE_PETSC */

#if USE_HYPRE
	{
	    HYPRE_IJMatrix ma;
	    HYPRE_IJVector va, vb, vc;
	    HYPRE_ParCSRMatrix  par_ma;
	    hypre_ParCSRMatrix  *par_mb;
	    HYPRE_ParVector	par_va, par_vb, par_vc;
	    HYPRE_Int offset, *ni, start, end;
	    assert(sizeof(INT)==sizeof(int) && sizeof(FLOAT)==sizeof(double));
	    setup_hypre_mat(A, &ma);
	    ni = phgAlloc(2 * A->rmap->nlocal * sizeof(*ni));
	    offset = A->cmap->partition[A->cmap->rank];
	    for (i = 0; i < A->rmap->nlocal; i++)
		ni[i] = i + offset;
	    HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1,
				 &va);
	    HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1,
				 &vb);
	    HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1,
				 &vc);
	    HYPRE_IJVectorSetObjectType(va, HYPRE_PARCSR);
	    HYPRE_IJVectorSetObjectType(vb, HYPRE_PARCSR);
	    HYPRE_IJVectorSetObjectType(vc, HYPRE_PARCSR);
	    HYPRE_IJVectorSetMaxOffProcElmts(va, 0);
	    HYPRE_IJVectorSetMaxOffProcElmts(vb, 0);
	    HYPRE_IJVectorSetMaxOffProcElmts(vc, 0);
	    HYPRE_IJVectorInitialize(va);
	    HYPRE_IJVectorInitialize(vb);
	    HYPRE_IJVectorInitialize(vc);
	    HYPRE_IJMatrixGetObject(ma, (void **)(void *)&par_ma);
	    HYPRE_IJVectorGetObject(va, (void **)(void *)&par_va);
	    HYPRE_IJVectorGetObject(vb, (void **)(void *)&par_vb);
	    HYPRE_IJVectorGetObject(vc, (void **)(void *)&par_vc);
	    HYPRE_IJVectorSetValues(va, A->cmap->nlocal, ni, (double *)x->data);
	    HYPRE_IJVectorAssemble(va);
	    HYPRE_IJVectorAssemble(vb);
	    HYPRE_IJVectorAssemble(vc);

	    HYPRE_IJMatrixGetRowCounts(ma, A->cmap->nlocal,
					ni, ni + A->rmap->nlocal);
	    for (i = 0, nnz = 0; i < A->rmap->nlocal; i++)
		nnz += ni[A->rmap->nlocal + i];
#if USE_MPI
	    dnz1 = nnz;
	    MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm);
#else
	    dnz = nnz;
#endif

	    HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_va, 0.0, par_vb);
	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    for (i = 0; i < loop_count; i++) {
		HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_va, 0.0, par_vb);
	    }
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data);
	    /*phgPrintf("    --------------------------------------------"
		      "-------------------------\n");*/
	    phgPrintf("\n");
	    t1 -= t0;
	    phgPrintf(" HYPRE:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), "
		      "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops,
		(double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL));

	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    for (i = 0; i < loop_count; i++) {
		HYPRE_ParCSRMatrixMatvecT(1.0, par_ma, par_va, 0.0, par_vb);
	    }
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data);
	    t1 -= t0;
	    phgPrintf("  A'*x:  time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), "
		      "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops,
		(double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL));

	    phgPerfGetMflops(g, NULL, NULL);	/* reset flops counter */
	    t0 = phgGetTime(NULL);
	    /* Note: 'HYPRE_ParCSRMatrix' is currently typedef'ed to
	     *	     'hypre_ParCSRMatrix *' */
	    par_mb = hypre_ParMatmul((hypre_ParCSRMatrix *)par_ma,
					(hypre_ParCSRMatrix *)par_ma);
	    t1 = phgGetTime(NULL);
	    mflops = phgPerfGetMflops(g, NULL, NULL);
	    start = hypre_ParCSRMatrixFirstRowIndex(par_mb);
	    end = hypre_ParCSRMatrixLastRowIndex(par_mb) + 1;
	    for (i = start, nnz = 0; i < end; i++) {
		HYPRE_Int ncols;
		hypre_ParCSRMatrixGetRow(par_mb, i, &ncols, NULL, NULL);
		hypre_ParCSRMatrixRestoreRow(par_mb, i, &ncols, NULL, NULL);
		nnz += ncols;
	    }
#if USE_MPI
	    dnz1 = nnz;
	    MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm);
#else
	    dnz = nnz;
#endif
	    /* compare B*x <--> A*A*x */
	    HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_vb, 0.0, par_vc);
	    HYPRE_ParCSRMatrixMatvec(1.0, (void *)par_mb, par_va, 0.0, par_vb);
	    HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data);
	    HYPRE_IJVectorGetValues(vc, A->rmap->nlocal, ni, (double*)y2->data);
	    hypre_ParCSRMatrixDestroy((par_mb));
	    t1 -= t0;
	    phgPrintf("   A*A:  time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n",
		  t1, dnz, mflops,
		 (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL));

	    phgFree(ni);
	    HYPRE_IJMatrixDestroy(ma);
	    HYPRE_IJVectorDestroy(va);
	    HYPRE_IJVectorDestroy(vb);
	    HYPRE_IJVectorDestroy(vc);
	}
#endif	/* USE_HYPRE */

	if (A0 != A)
	    phgMatDestroy(&A0);
#if 0
if (A->rmap->nglobal > 1000) {
    VEC *v = phgMapCreateVec(A->rmap, 3);
    for (i = 0; i < v->map->nlocal; i++) {
	v->data[i + 0 * v->map->nlocal] = 1 * (i + v->map->partition[g->rank]);
	v->data[i + 1 * v->map->nlocal] = 2 * (i + v->map->partition[g->rank]);
	v->data[i + 2 * v->map->nlocal] = 3 * (i + v->map->partition[g->rank]);
    }
    phgMatDumpMATLAB(A, "A", "A.m");
    phgVecDumpMATLAB(v, "v", "v.m");
    phgFinalize();
    exit(0);
}
#endif
	phgMatDestroy(&A);
	phgVecDestroy(&x);
	phgVecDestroy(&y0);
	phgVecDestroy(&y1);
	phgVecDestroy(&y2);
	phgMapDestroy(&map);
	mem = phgMemoryUsage(g, &mem_peak);
	dnz = mem / (1024.0 * 1024.0);
	dnz1 = mem_peak / (1024.0 * 1024.0);
	/*phgPrintf("    --------------------------------------------"
		  "-------------------------\n");*/
	phgPrintf("\n");
	phgPrintf("  Memory: current %0.4lgMB, peak %0.4lgMB\n", dnz, dnz1);
#if 0
{
    static int loop_count = 0;
    if (++loop_count == 4)
	break;
}
#endif
	if (mem_peak > 1024 * (size_t)1024 * mem_max)
	    break;
	phgRefineAllElements(g, 1);
    }
    phgDofFree(&u_h);
    phgFreeGrid(&g);
    phgFinalize();

    return 0;
}
Esempio n. 9
0
HYPRE_Int
hypre_ParCSRMatrixToParChordMatrix(
   hypre_ParCSRMatrix *Ap,
   MPI_Comm comm,
   hypre_ParChordMatrix **pAc )
{
   HYPRE_Int * row_starts = hypre_ParCSRMatrixRowStarts(Ap);
   HYPRE_Int * col_starts = hypre_ParCSRMatrixColStarts(Ap);
   hypre_CSRMatrix * diag = hypre_ParCSRMatrixDiag(Ap);
   hypre_CSRMatrix * offd = hypre_ParCSRMatrixOffd(Ap);
   HYPRE_Int * offd_j = hypre_CSRMatrixJ(offd);
   HYPRE_Int * diag_j = hypre_CSRMatrixJ(diag);
   HYPRE_Int * col_map_offd = hypre_ParCSRMatrixColMapOffd(Ap);
   HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(Ap);

   hypre_ParChordMatrix * Ac;
   hypre_NumbersNode * rdofs, * offd_cols_me;
   hypre_NumbersNode ** offd_cols;
   HYPRE_Int ** offd_col_array;
   HYPRE_Int * len_offd_col_array, * offd_col_array_me;
   HYPRE_Int len_offd_col_array_me;
   HYPRE_Int num_idofs, num_rdofs, j_local, j_global, row_global;
   HYPRE_Int i, j, jj, p, pto, q, qto, my_id, my_q, row, ireq;
   HYPRE_Int num_inprocessors, num_toprocessors, num_procs, len_num_rdofs_toprocessor;
   HYPRE_Int *inprocessor, *toprocessor, *pcr, *qcr, *num_inchords, *chord, *chordto;
   HYPRE_Int *inproc, *toproc, *num_rdofs_toprocessor;
   HYPRE_Int **inchord_idof, **inchord_rdof, **rdof_toprocessor;
   double **inchord_data;
   double data;
   HYPRE_Int *first_index_idof, *first_index_rdof;
   hypre_MPI_Request * request;
   hypre_MPI_Status * status;

   hypre_MPI_Comm_rank(comm, &my_id);
   hypre_MPI_Comm_size(comm, &num_procs);
   num_idofs = row_starts[my_id+1] - row_starts[my_id];
   num_rdofs = col_starts[my_id+1] - col_starts[my_id];

   hypre_ParChordMatrixCreate( pAc, comm, num_idofs, num_rdofs );
   Ac = *pAc;

/* The following block sets Inprocessor:
   On each proc. my_id, we find the columns in the offd and diag blocks
   (global no.s).  The columns are rdofs (contrary to what I wrote in
   ChordMatrix.txt).
   For each such col/rdof r, find the proc. p which owns row/idof r.
   We set the temporary array pcr[p]=1 for such p.
   An MPI all-to-all will exchange such arrays so my_id's array qcr has
   qcr[q]=1 iff, on proc. q, pcr[my_id]=1.  In other words, qcr[q]=1 if
   my_id owns a row/idof i which is the same as a col/rdof owned by q.
   Collect all such q's into in the array Inprocessor.
   While constructing pcr, we also construct pj such that for any index jj
   into offd_j,offd_data, pj[jj] is the processor which owns jj as a row (idof)
   (the number jj is local to this processor).
   */
   pcr = hypre_CTAlloc( HYPRE_Int, num_procs );
   qcr = hypre_CTAlloc( HYPRE_Int, num_procs );
   for ( p=0; p<num_procs; ++p ) pcr[p]=0;
   for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(offd); ++jj ) {
      j_local = offd_j[jj];
      j_global =  col_map_offd[j_local];
      for ( p=0; p<num_procs; ++p ) {
         if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) {
            pcr[p]=1;
/* not used yet...            pj[jj] = p;*/
            break;
         }
      }
   }
   /*   jjd = jj; ...not used yet */

   /* pcr[my_id] = 1; ...for square matrices (with nonzero diag block)
      this one line  would do the job of the following nested loop.
      For non-square matrices, the data distribution is too arbitrary. */
   for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(diag); ++jj ) {
      j_local = diag_j[jj];
      j_global = j_local + first_col_diag;
      for ( p=0; p<num_procs; ++p ) {
         if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) {
            pcr[p]=1;
/* not used yet...            pj[jj+jjd] = p;*/
            break;
         }
      }
   }


   /* Now pcr[p]=1 iff my_id owns a col/rdof r which proc. p owns as a row/idof */
   hypre_MPI_Alltoall( pcr, 1, HYPRE_MPI_INT, qcr, 1, HYPRE_MPI_INT, comm );
   /* Now qcr[q]==1 if my_id owns a row/idof i which is a col/rdof of proc. q
    The array of such q's is the array Inprocessor. */

   num_inprocessors = 0;
   for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) ++num_inprocessors;
   inprocessor = hypre_CTAlloc( HYPRE_Int, num_inprocessors );
   p = 0;
   for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) inprocessor[ p++ ] = q;
   num_toprocessors = 0;
   for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) ++num_toprocessors;
   toprocessor = hypre_CTAlloc( HYPRE_Int, num_toprocessors );
   p = 0;
   for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) toprocessor[ p++ ] = q;

   hypre_ParChordMatrixNumInprocessors(Ac) = num_inprocessors;
   hypre_ParChordMatrixInprocessor(Ac) = inprocessor;
   hypre_ParChordMatrixNumToprocessors(Ac) = num_toprocessors;
   hypre_ParChordMatrixToprocessor(Ac) = toprocessor;
   hypre_TFree( qcr );

   /* FirstIndexIdof[p] is the global index of proc. p's row 0 */
   /* FirstIndexRdof[p] is the global index of proc. p's col 0 */
   /* Fir FirstIndexIdof, we copy the array row_starts rather than its pointers,
      because the chord matrix will think it's free to delete FirstIndexIdof */
   /* col_starts[p] contains the global index of the first column
      in the diag block of p.  But for first_index_rdof we want the global
      index of the first column in p (whether that's in the diag or offd block).
      So it's more involved than row/idof: we also check the offd block, and
      have to do a gather to get first_index_rdof for every proc. on every proc. */
   first_index_idof = hypre_CTAlloc( HYPRE_Int, num_procs+1 );
   first_index_rdof = hypre_CTAlloc( HYPRE_Int, num_procs+1 );
   for ( p=0; p<=num_procs; ++p ) {
      first_index_idof[p] = row_starts[p];
      first_index_rdof[p] = col_starts[p];
   };
   if ( hypre_CSRMatrixNumRows(offd) > 0  && hypre_CSRMatrixNumCols(offd) > 0 )
      first_index_rdof[my_id] =
         col_starts[my_id]<col_map_offd[0] ? col_starts[my_id] : col_map_offd[0];
   hypre_MPI_Allgather( &first_index_rdof[my_id], 1, HYPRE_MPI_INT,
                  first_index_rdof, 1, HYPRE_MPI_INT, comm );

   /* Set num_inchords: num_inchords[p] is no. chords on my_id connected to p.
      Set each chord (idof,jdof,data).
      We go through each matrix element in the diag block, find what processor
      owns its column no. as a row, then update num_inchords[p], inchord_idof[p],
      inchord_rdof[p], inchord_data[p].
   */

   inchord_idof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors );
   inchord_rdof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors );
   inchord_data = hypre_CTAlloc( double*, num_inprocessors );
   num_inchords = hypre_CTAlloc( HYPRE_Int, num_inprocessors );
   chord = hypre_CTAlloc( HYPRE_Int, num_inprocessors );
   chordto = hypre_CTAlloc( HYPRE_Int, num_toprocessors );
   num_rdofs = 0;
   for ( q=0; q<num_inprocessors; ++q ) num_inchords[q] = 0;
   my_q = -1;
   for ( q=0; q<num_inprocessors; ++q ) if ( inprocessor[q]==my_id ) my_q = q;
   hypre_assert( my_q>=0 );

   /* diag block: first count chords (from my_id to my_id),
      then set them from diag block's CSR data structure */
   num_idofs = hypre_CSRMatrixNumRows(diag);
   rdofs = hypre_NumbersNewNode();
   for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) {
      for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) {
         j_local = hypre_CSRMatrixJ(diag)[i];
         hypre_NumbersEnter( rdofs, j_local );
         ++num_inchords[my_q];
      }
   };
   num_rdofs = hypre_NumbersNEntered( rdofs );
   inchord_idof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] );
   inchord_rdof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] );
   inchord_data[my_q] = hypre_CTAlloc( double, num_inchords[my_q] );
   chord[0] = 0;
   for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) {
      for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) {
         j_local = hypre_CSRMatrixJ(diag)[i];
         data = hypre_CSRMatrixData(diag)[i];
         inchord_idof[my_q][chord[0]] = row;
         /* Here We need to convert from j_local - a column local to
            the diag of this proc., to a j which is local only to this
            processor - a column (rdof) numbering scheme to be shared by the
            diag and offd blocks...  */
         j_global = j_local + hypre_ParCSRMatrixColStarts(Ap)[my_q];
         j = j_global - first_index_rdof[my_q];
         inchord_rdof[my_q][chord[0]] = j;
         inchord_data[my_q][chord[0]] = data;
         hypre_assert( chord[0] < num_inchords[my_q] );
         ++chord[0];
      }
   };
   hypre_NumbersDeleteNode(rdofs);


   /* offd block: */

   /* >>> offd_cols_me duplicates rdofs */
   offd_cols_me = hypre_NumbersNewNode();
   for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) {
      for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) {
         j_local = hypre_CSRMatrixJ(offd)[i];
         j_global =  col_map_offd[j_local];
         hypre_NumbersEnter( offd_cols_me, j_global );
      }
   }
   offd_col_array = hypre_CTAlloc( HYPRE_Int*, num_inprocessors );
   len_offd_col_array = hypre_CTAlloc( HYPRE_Int, num_inprocessors );
   offd_col_array_me = hypre_NumbersArray( offd_cols_me );
   len_offd_col_array_me = hypre_NumbersNEntered( offd_cols_me );
   request = hypre_CTAlloc(hypre_MPI_Request, 2*num_procs );
   ireq = 0;
   for ( q=0; q<num_inprocessors; ++q )
      hypre_MPI_Irecv( &len_offd_col_array[q], 1, HYPRE_MPI_INT,
                 inprocessor[q], 0, comm, &request[ireq++] );
   for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) {
      hypre_MPI_Isend( &len_offd_col_array_me, 1, HYPRE_MPI_INT, p, 0, comm, &request[ireq++] );
   }
   status = hypre_CTAlloc(hypre_MPI_Status, ireq );
   hypre_MPI_Waitall( ireq, request, status );
   hypre_TFree(status);
   ireq = 0;
   for ( q=0; q<num_inprocessors; ++q )
      offd_col_array[q] = hypre_CTAlloc( HYPRE_Int, len_offd_col_array[q] );
   for ( q=0; q<num_inprocessors; ++q )
      hypre_MPI_Irecv( offd_col_array[q], len_offd_col_array[q], HYPRE_MPI_INT,
                 inprocessor[q], 0, comm, &request[ireq++] );
   for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) {
      hypre_MPI_Isend( offd_col_array_me, len_offd_col_array_me,
                 HYPRE_MPI_INT, p, 0, comm, &request[ireq++] );
   }
   status = hypre_CTAlloc(hypre_MPI_Status, ireq );
   hypre_MPI_Waitall( ireq, request, status );
   hypre_TFree(request);
   hypre_TFree(status);
   offd_cols = hypre_CTAlloc( hypre_NumbersNode *, num_inprocessors );
   for ( q=0; q<num_inprocessors; ++q ) {
      offd_cols[q] = hypre_NumbersNewNode();
      for ( i=0; i<len_offd_col_array[q]; ++i )
         hypre_NumbersEnter( offd_cols[q], offd_col_array[q][i] );
   }

   len_num_rdofs_toprocessor = 1 + hypre_CSRMatrixI(offd)
      [hypre_CSRMatrixNumRows(offd)];
   inproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor );
   toproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor );
   num_rdofs_toprocessor = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor );
   for ( qto=0; qto<len_num_rdofs_toprocessor; ++qto ) {
      inproc[qto] = -1;
      toproc[qto] = -1;
      num_rdofs_toprocessor[qto] = 0;
   };
   rdofs = hypre_NumbersNewNode();
   for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) {
      for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) {
         j_local = hypre_CSRMatrixJ(offd)[i];
         j_global =  col_map_offd[j_local];
         hypre_NumbersEnter( rdofs, j_local );
         
         /* TO DO: find faster ways to do the two processor lookups below.*/
         /* Find a processor p (local index q) from the inprocessor list,
            which owns the column(rdof) whichis the same as this processor's
            row(idof) row. Update num_inchords for p.
            Save q as inproc[i] for quick recall later.  It represents
            an inprocessor (not unique) connected to a chord i.
         */
         inproc[i] = -1;
         for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) {
            p = inprocessor[q];
            if ( hypre_NumbersQuery( offd_cols[q],
                                     row+hypre_ParCSRMatrixFirstRowIndex(Ap) )
                 == 1 ) {
               /* row is one of the offd columns of p */
               ++num_inchords[q];
               inproc[i] = q;
               break;
            }
         }
         if ( inproc[i]<0 ) {
            /* For square matrices, we would have found the column in some
               other processor's offd.  But for non-square matrices it could
               exist only in some other processor's diag...*/
            /* Note that all data in a diag block is stored.  We don't check
               whether the value of a data entry is zero. */
            for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) {
               p = inprocessor[q];
               row_global = row+hypre_ParCSRMatrixFirstRowIndex(Ap);
               if ( row_global>=col_starts[p] &&
                    row_global< col_starts[p+1] ) {
                  /* row is one of the diag columns of p */
                  ++num_inchords[q];
                  inproc[i] = q;
                  break;
               }
            }  
         }
         hypre_assert( inproc[i]>=0 );

         /* Find the processor pto (local index qto) from the toprocessor list,
            which owns the row(idof) which is the  same as this processor's
            column(rdof) j_global. Update num_rdofs_toprocessor for pto.
            Save pto as toproc[i] for quick recall later. It represents
            the toprocessor connected to a chord i. */
         for ( qto=0; qto<num_toprocessors; ++qto ) {
            pto = toprocessor[qto];
            if ( j_global >= row_starts[pto] && j_global<row_starts[pto+1] ) {
               hypre_assert( qto < len_num_rdofs_toprocessor );
               ++num_rdofs_toprocessor[qto];
               /* ... an overestimate, as if two chords share an rdof, that
                  rdof will be counted twice in num_rdofs_toprocessor.
                  It can be fixed up later.*/
               toproc[i] = qto;
               break;
            }
         }
      }
   };
   num_rdofs += hypre_NumbersNEntered(rdofs);
   hypre_NumbersDeleteNode(rdofs);

   for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) {
      inchord_idof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] );
      inchord_rdof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] );
      inchord_data[q] = hypre_CTAlloc( double, num_inchords[q] );
      chord[q] = 0;
   };