Ejemplo n.º 1
0
int ML_Reitzinger_Check_Hierarchy(ML *ml, ML_Operator **Tmat_array, int incr_or_decr)
{
  int i,j;
  int finest_level, coarsest_level;
  ML_Operator *Amat, *Tmat;
  double *randvec, *result, *result1;
  double dnorm;

  finest_level = ml->ML_finest_level;
  coarsest_level = ml->ML_coarsest_level;

  if (incr_or_decr == ML_INCREASING) {
    if (ml->comm->ML_mypid == 0) {
      printf("ML_Reitzinger_Check_Hierarchy: ML_INCREASING is not supported ");
      printf(" at this time.  Not checking hierarchy.\n");
    }
    return 1;
  }

  if ( ML_Get_PrintLevel() > 5 ) {
    printf("ML_Reitzinger_Check_Hierarchy: Checking null space\n");
  }

  for (i=finest_level; i>coarsest_level; i--) {

     Amat = ml->Amat+i;
     Tmat = Tmat_array[i];

     /* normalized random vector */
     randvec = (double *) ML_allocate(Tmat->invec_leng * sizeof(double) );
     ML_random_vec(randvec,Tmat->invec_leng, ml->comm);
     dnorm = sqrt( ML_gdot(Tmat->invec_leng, randvec, randvec, ml->comm) );
     for (j=0; j<Tmat->invec_leng; j++) randvec[j] /=  dnorm;

     result = (double *) ML_allocate(Amat->invec_leng * sizeof(double) );
     result1 = (double *) ML_allocate(Amat->outvec_leng * sizeof(double) );

     ML_Operator_Apply(Tmat, Tmat->invec_leng, randvec,
                       Tmat->outvec_leng, result);
     ML_Operator_Apply(Amat, Amat->invec_leng, result,
                       Amat->outvec_leng, result1);

     dnorm = sqrt( ML_gdot(Amat->outvec_leng, result1, result1, ml->comm) );
     if ( (ML_Get_PrintLevel() > 5) && (ml->comm->ML_mypid == 0) ) {
       printf("Level %d: for random v,  ||S*T*v|| = %15.10e\n",i,dnorm);
     }

     ML_free(randvec);
     ML_free(result);
     ML_free(result1);
  }
  if ( (ML_Get_PrintLevel() > 5) && (ml->comm->ML_mypid == 0) ) printf("\n");

  return 0;

}
Ejemplo n.º 2
0
// ====================================================================== 
int GetPrintLevel() 
{
  if (GetMyPID())
    return(0);
  else
    return(ML_Get_PrintLevel());
}
Ejemplo n.º 3
0
// ================================================ ====== ==== ==== == = 
int ML_Epetra::FaceMatrixFreePreconditioner::NodeAggregate(ML_Aggregate_Struct *&MLAggr,ML_Operator *&P,ML_Operator* TMT_ML,int &NumAggregates){
  /* Pull Teuchos Options */
  string CoarsenType = List_.get("aggregation: type", "Uncoupled");
  double Threshold   = List_.get("aggregation: threshold", 0.0);  
  int    NodesPerAggr = List_.get("aggregation: nodes per aggregate", 
                                  ML_Aggregate_Get_OptimalNumberOfNodesPerAggregate());

  string PrintMsg_ = "FMFP (Level 0): ";

  ML_Aggregate_Create(&MLAggr);
  ML_Aggregate_Set_MaxLevels(MLAggr, 2);
  ML_Aggregate_Set_StartLevel(MLAggr, 0);
  ML_Aggregate_Set_Threshold(MLAggr, Threshold);
  ML_Aggregate_Set_MaxCoarseSize(MLAggr,1);
  MLAggr->cur_level = 0;
  ML_Aggregate_Set_Reuse(MLAggr); 
  MLAggr->keep_agg_information = 1;  
  P = ML_Operator_Create(ml_comm_);
  
  /* Process Teuchos Options */
  if (CoarsenType == "Uncoupled")
    ML_Aggregate_Set_CoarsenScheme_Uncoupled(MLAggr);
  else if (CoarsenType == "Uncoupled-MIS"){
    ML_Aggregate_Set_CoarsenScheme_UncoupledMIS(MLAggr);
  }
  else if (CoarsenType == "METIS"){
    ML_Aggregate_Set_CoarsenScheme_METIS(MLAggr);
    ML_Aggregate_Set_NodesPerAggr(0, MLAggr, 0, NodesPerAggr);
  }/*end if*/
  else {
    if(!Comm_->MyPID()) printf("FMFP: Unsupported (1,1) block aggregation type(%s), resetting to uncoupled-mis\n",CoarsenType.c_str());
    ML_Aggregate_Set_CoarsenScheme_UncoupledMIS(MLAggr);
  }

  /* Aggregate Nodes */
  int printlevel=ML_Get_PrintLevel();
  ML_Set_PrintLevel(10);
  NumAggregates = ML_Aggregate_Coarsen(MLAggr, TMT_ML, &P, ml_comm_);
  ML_Set_PrintLevel(printlevel);

  if (NumAggregates == 0){
    cerr << "Found 0 aggregates, perhaps the problem is too small." << endl;
    ML_CHK_ERR(-2);
  }/*end if*/
  else if(very_verbose_) printf("[%d] FMFP: %d aggregates created invec_leng=%d\n",Comm_->MyPID(),NumAggregates,P->invec_leng);

  int globalAggs;
  Comm_->SumAll(&NumAggregates,&globalAggs,1);
  if( verbose_ && !Comm_->MyPID()) {
    std::cout << PrintMsg_ << "Aggregation threshold = " << Threshold << std::endl;
    std::cout << PrintMsg_ << "Global aggregates = " << globalAggs << std::endl;
    //ML_Aggregate_Print_Complexity(MLAggr);
  }


  if(P==0) {fprintf(stderr,"%s","ERROR: No tentative prolongator found\n");ML_CHK_ERR(-5);}
  return 0;
}  
Ejemplo n.º 4
0
static void print_out(const Epetra_Comm& Comm, const int level, const char* what)
{
  if (Comm.MyPID() == 0 && ML_Get_PrintLevel() > 2)
#ifdef TFLOP
    printf("Amesos (level %d) : Building %s\n", level, what);
#else
    std::cout << "Amesos (level " << level << ") : Building " << what << "\n";
#endif
}
Ejemplo n.º 5
0
void ML_rap(ML_Operator *Rmat, ML_Operator *Amat,
            ML_Operator *Pmat, ML_Operator *Result, int matrix_type)
{
  int         max_per_proc, i, j, N_input_vector;
   ML_Operator *APmat, *RAPmat, *Pcomm, *RAPcomm, *APcomm, *AP2comm, *tptr;
   ML_CommInfoOP *getrow_comm;
   double      *scales = NULL;
#  ifdef ML_TIMING
   double tpre,tmult,tpost,ttotal;
#  endif

   /* Check that N_input_vector is reasonable */

#  ifdef ML_TIMING
   tpre = GetClock();
   ttotal = GetClock();
#  endif

   N_input_vector = Pmat->invec_leng;
   getrow_comm = Pmat->getrow->pre_comm;
   if ( getrow_comm != NULL) {
      for (i = 0; i < getrow_comm->N_neighbors; i++) {
         for (j = 0; j < getrow_comm->neighbors[i].N_send; j++) {
            if (getrow_comm->neighbors[i].send_list[j] >= N_input_vector) {
              printf("(%d) Error: N_input_vector (%d) argument to rap() is not \n", Amat->comm->ML_mypid,N_input_vector);
              printf("(%d) Error: larger than %dth element (%d) sent to node %d\n", Amat->comm->ML_mypid,j+1,
                     getrow_comm->neighbors[i].send_list[j],
                     getrow_comm->neighbors[i].ML_id);
printf("(%d) Error: Amat(%d,%d)  Rmat(%d,%d)  Pmat(%d,%d)\n",
                Amat->comm->ML_mypid,
                Amat->outvec_leng,Amat->invec_leng,
                Rmat->outvec_leng,Rmat->invec_leng,
                Pmat->outvec_leng,Pmat->invec_leng);
fflush(stdout);
              exit(1);
            }
         }
      }
   }


   ML_create_unique_col_id(N_input_vector, &(Pmat->getrow->loc_glob_map),
                           getrow_comm, &max_per_proc, Pmat->comm);
   Pmat->getrow->use_loc_glob_map = ML_YES;



   if (Amat->getrow->pre_comm != NULL)
      ML_exchange_rows( Pmat, &Pcomm, Amat->getrow->pre_comm);
   else Pcomm = Pmat;

#ifdef DEBUG
   if ( Pmat->comm->ML_mypid == 0 )
      printf("ML_rap : A * P begins...\n");
#endif

#  ifdef ML_TIMING
   tpre = GetClock() - tpre;
   tmult = GetClock();
#  endif

   ML_matmat_mult(Amat, Pcomm , &APmat);

#  ifdef ML_TIMING
   tmult = GetClock() - tmult;
   tpost = GetClock();
#  endif

#ifdef DEBUG
   if ( Pmat->comm->ML_mypid == 0 )
      printf("ML_rap : A * P ends.\n");
#endif

   ML_free(Pmat->getrow->loc_glob_map); Pmat->getrow->loc_glob_map = NULL;
   Pmat->getrow->use_loc_glob_map = ML_NO;
   if (Amat->getrow->pre_comm != NULL) {
      tptr = Pcomm;
      while ( (tptr!= NULL) && (tptr->sub_matrix != Pmat))
         tptr = tptr->sub_matrix;
      if (tptr != NULL) tptr->sub_matrix = NULL;
      ML_RECUR_CSR_MSRdata_Destroy(Pcomm);
      ML_Operator_Destroy(&Pcomm);
   }

   if (Amat->getrow->post_comm != NULL) {
      ML_exchange_rows(APmat, &APcomm, Amat->getrow->post_comm);
   }
   else APcomm = APmat;

   /* Take into account any scaling in Amat */

   if (Rmat->from != NULL)
      ML_DVector_GetDataPtr(Rmat->from->Amat_Normalization,&scales);
   if (scales != NULL)
      ML_Scale_CSR(APcomm, scales, 0);

   if (Rmat->getrow->pre_comm != NULL)
      ML_exchange_rows( APcomm, &AP2comm, Rmat->getrow->pre_comm);
   else AP2comm = APcomm;

#  ifdef ML_TIMING
   tpost = GetClock() - tpost;
   if ( Pmat->comm->ML_mypid == 0 && ML_Get_PrintLevel() > 5) {
     int level=-1;
     if (Amat->from != NULL)
       level = Amat->from->levelnum-1;
     printf("Timing summary (in seconds) for product RAP on level %d\n", level);
     printf("     (level %d) RAP right: pre-multiply communication time    = %3.2e\n", level, tpre);
     printf("     (level %d) RAP right: multiply time                      = %3.2e\n", level, tmult);
     printf("     (level %d) RAP right: post-multiply communication time   = %3.2e\n", level, tpost);
   }
#  endif

#ifdef DEBUG
   if ( Pmat->comm->ML_mypid == 0 )
      printf("ML_rap : R * AP begins...\n");
#endif

#  ifdef ML_TIMING
   tmult = GetClock();
#  endif
   ML_matmat_mult(Rmat,AP2comm, &RAPmat);

#ifdef DEBUG
   if ( Pmat->comm->ML_mypid == 0 )
      printf("ML_rap : R * AP ends.\n");
#endif

   ML_RECUR_CSR_MSRdata_Destroy(AP2comm);
   ML_Operator_Destroy(&AP2comm);

#  ifdef ML_TIMING
   tmult = GetClock()-tmult;
   tpost = GetClock();
#  endif

   if (Rmat->getrow->post_comm != NULL)
      ML_exchange_rows( RAPmat, &RAPcomm, Rmat->getrow->post_comm);
   else RAPcomm = RAPmat;

   scales = NULL;
   if (Rmat->to != NULL)
      ML_DVector_GetDataPtr(Rmat->to->Amat_Normalization,&scales);
   if (scales != NULL)
      ML_Scale_CSR(RAPcomm, scales, 1);

   RAPcomm->num_PDEs = Amat->num_PDEs;
   RAPcomm->num_rigid = Amat->num_rigid;
   if (matrix_type == ML_MSR_MATRIX)
     ML_back_to_local(RAPcomm, Result, max_per_proc);
   else if (matrix_type == ML_CSR_MATRIX)
     ML_back_to_csrlocal(RAPcomm, Result, max_per_proc);
   else if (matrix_type == ML_EpetraCRS_MATRIX)
#ifdef ML_WITH_EPETRA
     ML_back_to_epetraCrs(RAPcomm, Result, Rmat, Pmat);
#else
     pr_error("ML_RAP: ML_EpetraCRS_MATRIX requires epetra to be compiled in.\n");
#endif
   else pr_error("ML_RAP: Unknown matrix type\n");
Ejemplo n.º 6
0
char * ML_memory_check(char *fmt, ... )
{
#ifdef ML_MEMORY_CHK
   size_t fragments=0;
   int total_free=0, largest_free=0, total_used=0;
   int total_swap=0, total_swap_free=0, total_swap_used=0;
   static double start_time = -1.;
   double elapsed_time;
   int id, nnodes, i;
   ml_IntLoc isrcvec[ML_NIntStats],imaxvec[ML_NIntStats],
        iminvec[ML_NIntStats];
   int isrcvec_copy[ML_NIntStats];
   int iavgvec[ML_NIntStats];
   ml_DblLoc dsrcvec[ML_NDblStats],dmaxvec[ML_NDblStats],
        dminvec[ML_NDblStats];
   double dsrcvec_copy[ML_NDblStats];
   double davgvec[ML_NDblStats];
   static char *ml_memory_label = NULL;
   va_list ap;
#ifdef  ML_TFLOP
   unsigned long ultotal_free=0, ullargest_free=0, ultotal_used=0;
#else
   struct mallinfo M;
   static int ml_total_mem = 0;
#endif
   FILE *fid;
#  define ml_meminfo_size 23
   int haveMemInfo=0, overflowDetected = 0;
   char method[80];
   int mypid=0;

  /* allocate space for string that is printed with memory information */

  if (ml_memory_label == NULL) {
    ml_memory_label = (char *) malloc(sizeof(char)*200);
                                    /* THIS MALLOC NEEDS TO STAY A */
                                    /* MALLOC AND NOT AN ML_ALLOCATE */
    ml_memory_label[0] = '\0';
  }

  /* if fmt is NULL just return the current string associated with */
  /* the memory printing. The idea is that an low level function   */
  /* can use this to get the string, append any additional info    */
  /* and use this when it invokes this routine a second time.      */
  if (fmt == NULL) return(ml_memory_label);

  /* Take variable argument and transform it to string that will   */
  /* is printed with memory statistics.                            */

  va_start(ap, fmt);
  vsprintf(ml_memory_label,fmt, ap);
  va_end(ap);


  elapsed_time = GetClock();
  if (start_time == -1.) start_time = elapsed_time;
  elapsed_time = elapsed_time - start_time;

#ifdef ML_TFLOP
   /* Memory statistics for Red Storm.  FYI, heap_info returns bytes. */
#ifndef NO_HEAPINFO
   heap_info(&fragments, &ultotal_free, &ullargest_free, &ultotal_used);  
#ifdef ML_MPI
   MPI_Comm_rank(MPI_COMM_WORLD,&mypid);
#endif
   total_free=(int) (ultotal_free / (1024*1024));
   largest_free= (int) (ullargest_free / (1024*1024));
   total_used = (int) (ultotal_used / (1024*1024));
   sprintf(method,"Using heap_info()");
#else
   total_free=0;
   largest_free=0;
   total_used=0;
#endif
#else
   /*
      Memory statistics for all other platforms, via the system call mallinfo()
      and reading file /proc/meminfo, which is available under most Linux OS's.
   */

   M = mallinfo();

   fid = fopen("/proc/meminfo","r");
   if (fid != NULL) {
     char str[80], units[10];
     int k;
     for (i=0; i< ml_meminfo_size; i++) {
       if (fscanf(fid,"%s%d%s", str, &k,units) == 3) {
         if (strcmp(str,"MemTotal:") == 0 && (ml_total_mem==0))
            ml_total_mem = k/1024;
         if (strcmp(str,"MemFree:") == 0)  {total_free = k/1024; }
         if (strcmp(str,"SwapTotal:") == 0)  {total_swap = k/1024; }
         if (strcmp(str,"SwapFree:") == 0)  {total_swap_free = k/1024; }
       }
     }
     fclose(fid);
     total_used = ml_total_mem - total_free;
     total_swap_used = total_swap - total_swap_free;
     sprintf(method,"Using /proc/meminfo");
     haveMemInfo = 1;
   }

   /* If /proc/meminfo doesn't exist, use mallinfo() instead. */
   if ( !haveMemInfo )
   {
     if (ml_total_mem == 0) ml_total_mem = ML_MaxAllocatableSize();
     if (M.hblkhd < 0) { /* try to fix overflow */
       double delta = fabs(((double) INT_MIN) - ((double) M.hblkhd)) + 1;
       total_used = (int) ( (((double) INT_MAX) + delta) / (1024*1024) );
       overflowDetected = 1;
     }
     /*Ignore this field upon overflow because I'm don't know how to handle it*/
     if (M.uordblks > 0) total_used += M.uordblks / (1024*1024);
     total_free = ml_total_mem - total_used;
     sprintf(method,"Using mallinfo()");
   }
   fragments = M.ordblks + M.hblks;

   largest_free = -1;
#endif /*ifdef ML_TFLOP*/

   /* Only print if fmt string is not empty */
   /* This allows for an intialization of   */
   /* ml_total_mem without any printing     */
   if (strlen(fmt) == 0)    return(ml_memory_label);


   /*isrcvec[0].value = fragments; */
   isrcvec[0].value = 0;
   isrcvec[1].value = total_free;
   isrcvec[2].value = largest_free;
   isrcvec[3].value = total_used;
   isrcvec[4].value = total_free + total_used;
   /*TODO could this overflow?*/
   isrcvec[5].value = (int) ( ((double)total_used*1000) /
                        ((double)(total_free+total_used)) );
   isrcvec[6].value = total_swap_free;
   isrcvec[7].value = total_swap_used;
   isrcvec[8].value = total_swap;
   /*TODO could this overflow?*/
   isrcvec[9].value = (int) ( ((double)total_swap_used*1000) /
                        ((double)(total_swap)) );
   dsrcvec[0].value = elapsed_time;
   dsrcvec[1].value = fragments;

#ifdef ML_MPI
   for (i =0; i < ML_NIntStats; i++)
      MPI_Comm_rank(MPI_COMM_WORLD,&(isrcvec[i].rank));
   for (i =0; i < ML_NDblStats; i++)
      MPI_Comm_rank(MPI_COMM_WORLD,&(dsrcvec[i].rank));
#endif

   for (i =0; i < ML_NIntStats; i++) isrcvec_copy[i] = isrcvec[i].value;
   for (i =0; i < ML_NDblStats; i++) dsrcvec_copy[i] = dsrcvec[i].value;

   nnodes = 1;
   id = 0;
#ifdef ML_MPI
   MPI_Comm_rank(MPI_COMM_WORLD,&id);
   MPI_Comm_size(MPI_COMM_WORLD,&nnodes);
   MPI_Reduce(isrcvec,imaxvec,ML_NIntStats,MPI_2INT,MPI_MAXLOC,0,MPI_COMM_WORLD); 
   MPI_Reduce(isrcvec,iminvec,ML_NIntStats,MPI_2INT,MPI_MINLOC,0,MPI_COMM_WORLD);
   MPI_Reduce(isrcvec_copy,iavgvec,ML_NIntStats,MPI_INT,MPI_SUM,0,MPI_COMM_WORLD);
   MPI_Reduce(dsrcvec,dmaxvec,ML_NDblStats,MPI_DOUBLE_INT,MPI_MAXLOC,0,MPI_COMM_WORLD); 
   MPI_Reduce(dsrcvec,dminvec,ML_NDblStats,MPI_DOUBLE_INT,MPI_MINLOC,0,MPI_COMM_WORLD);
   MPI_Reduce(dsrcvec_copy,davgvec,ML_NDblStats,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
   MPI_Reduce(&overflowDetected,&i,1,MPI_INT,MPI_MAX,0,MPI_COMM_WORLD);
   overflowDetected = i;
#else
   for (i =0; i < ML_NIntStats; i++) {
     imaxvec[i].value = isrcvec[i].value;
     iminvec[i].value = isrcvec[i].value;
     iavgvec[i] = isrcvec[i].value;
   }
   for (i =0; i < ML_NDblStats; i++) {
     dmaxvec[i].value = dsrcvec[i].value;
     dminvec[i].value = dsrcvec[i].value;
     davgvec[i] = dsrcvec[i].value;
   }
#endif
/* uncomment lines below if you want individual processor information */
/*
   printf("%s(%d): blks = %ld, free = %ld, max free = %ld, used = %ld, total = %ld, %% used = %e, time = %e\n",
	  ml_memory_label,id,fragments, total_free, largest_free, total_used,
	  total_free+total_used, 
          ((double)total_used)/((double)(total_free+total_used)),elapsed_time);
*/

   if (id == 0 && ML_Get_PrintLevel() > 0) {
     for (i =0; i < ML_NIntStats; i++)
       iavgvec[i] = (int) (iavgvec[i]/((double) nnodes));
     for (i =0; i < ML_NDblStats; i++)
       davgvec[i] = davgvec[i] / nnodes;
     printf("-------------------------------------------------------------\n");
     printf("Summary Heap data (Mbytes) at %s\n",ml_memory_label);
     printf("%s\n",method);
     if (overflowDetected)
        printf("*WARNING* mallinfo() counter overflow detected\n");
     printf("                       avg           min             max\n");
     printf("-------------------------------------------------------------\n");
     printf(" blks       %11d %11d (%5d) %11d (%5d) %s\n",
            (int) davgvec[1], (int) dminvec[1].value, dminvec[1].rank,
            (int) dmaxvec[1].value, dmaxvec[1].rank, ml_memory_label);
     printf(" free       %11d %11d (%5d) %11d (%5d) %s\n",
            iavgvec[1], iminvec[1].value, iminvec[1].rank,
            imaxvec[1].value, imaxvec[1].rank, ml_memory_label);
     if (iavgvec[2] != -1)
       printf(" max free   %11d %11d (%5d) %11d (%5d) %s\n",
              iavgvec[2], iminvec[2].value, iminvec[2].rank,
              imaxvec[2].value, imaxvec[2].rank, ml_memory_label);
     printf(" used       %11d %11d (%5d) %11d (%5d) %s\n",
              iavgvec[3], iminvec[3].value, iminvec[3].rank,
              imaxvec[3].value, imaxvec[3].rank, ml_memory_label);
     printf(" total      %11d %11d (%5d) %11d (%5d) %s\n",
              iavgvec[4], iminvec[4].value, iminvec[4].rank,
              imaxvec[4].value, imaxvec[4].rank, ml_memory_label);
     printf(" %% used       %9.1f   %9.1f (%5d)   %9.1f (%5d) %s\n",
            ((double)iavgvec[5])/10., ((double)iminvec[5].value)/10.,
            iminvec[5].rank,
            ((double)imaxvec[5].value)/10., imaxvec[5].rank, ml_memory_label);
     printf(" time         %9.1f   %9.1f (%5d)   %9.1f (%5d) %s\n",
            davgvec[0],dminvec[0].value,dminvec[0].rank,
            dmaxvec[0].value, dmaxvec[0].rank, ml_memory_label);
     if (haveMemInfo) {
       printf(" swap free  %11d %11d (%5d) %11d (%5d) %s\n",
              iavgvec[6], iminvec[6].value,iminvec[6].rank,
              imaxvec[6].value, iminvec[6].rank, ml_memory_label);
       printf(" swap used  %11d %11d (%5d) %11d (%5d) %s\n",
                iavgvec[7], iminvec[7].value, iminvec[7].rank,
                imaxvec[7].value, imaxvec[7].rank, ml_memory_label);
       printf(" total swap %11d %11d (%5d) %11d (%5d) %s\n",
                iavgvec[8], iminvec[8].value, iminvec[8].rank,
                imaxvec[8].value, imaxvec[8].rank, ml_memory_label);
       printf(" %% swap used  %9.1f   %9.1f (%5d)   %9.1f (%5d) %s\n",
              ((double)iavgvec[9])/10., ((double)iminvec[9].value)/10.,
              iminvec[9].rank,
              ((double)imaxvec[9].value)/10., imaxvec[9].rank, ml_memory_label);
     }
   } /*if (id == 0 ... */
   return(ml_memory_label);
#else
   return(NULL);
#endif
} /*ML_memory_check*/
Ejemplo n.º 7
0
int ML_Aggregate_CoarsenUser(ML_Aggregate *ml_ag, ML_Operator *Amatrix, 
			      ML_Operator **Pmatrix, ML_Comm *comm)
{
  unsigned int nbytes, length;
  int     i, j,  k, Nrows, exp_Nrows;
  int     diff_level;
  int     aggr_count, index, mypid, num_PDE_eqns;
  int     *aggr_index = NULL, nullspace_dim;
  int     Ncoarse, count;
  int     *new_ia = NULL, *new_ja = NULL, new_Nrows;
  int     exp_Ncoarse;
  int     *aggr_cnt_array = NULL;
  int     level, index3, max_agg_size;
  int     **rows_in_aggs = NULL, lwork, info;
  double  *new_val = NULL, epsilon;
  double  *nullspace_vect = NULL, *qr_tmp = NULL;
  double  *tmp_vect = NULL, *work = NULL, *new_null = NULL;
  ML_SuperNode          *aggr_head = NULL, *aggr_curr, *supernode;
  struct ML_CSR_MSRdata *csr_data;
  int                   total_nz = 0;
  char str[80];

  int * graph_decomposition = NULL;
  ML_Aggregate_Viz_Stats * aggr_viz_and_stats;
  ML_Aggregate_Viz_Stats * grid_info;
  int Nprocs;
  char * unamalg_bdry = NULL;
  char* label;
  int N_dimensions;
  double* x_coord = NULL;
  double* y_coord = NULL;
  double* z_coord = NULL;

  /* ------------------- execution begins --------------------------------- */

  label =  ML_GetUserLabel();
  sprintf(str, "%s (level %d) :", label, ml_ag->cur_level);

  /* ============================================================= */
  /* get the machine information and matrix references             */
  /* ============================================================= */

  mypid                   = comm->ML_mypid;
  Nprocs                  = comm->ML_nprocs;
  epsilon                 = ml_ag->threshold;
  num_PDE_eqns            = ml_ag->num_PDE_eqns;
  nullspace_dim           = ml_ag->nullspace_dim;
  nullspace_vect          = ml_ag->nullspace_vect;
  Nrows                   = Amatrix->outvec_leng;

  if (mypid == 0 && 5 < ML_Get_PrintLevel()) {
    printf("%s num PDE eqns = %d\n",
           str,
           num_PDE_eqns);
  }

  /* ============================================================= */
  /* check the system size versus null dimension size              */
  /* ============================================================= */

  if ( Nrows % num_PDE_eqns != 0 )
  {
    printf("ML_Aggregate_CoarsenUser ERROR : Nrows must be multiples");
    printf(" of num_PDE_eqns.\n");
    exit(EXIT_FAILURE);
  }
  diff_level = ml_ag->max_levels - ml_ag->cur_level - 1;
  if ( diff_level > 0 ) num_PDE_eqns = nullspace_dim; /* ## 12/20/99 */

  /* ============================================================= */
  /* set up the threshold for weight-based coarsening              */
  /* ============================================================= */

  diff_level = ml_ag->begin_level - ml_ag->cur_level;
  if (diff_level == 0) 
    ml_ag->curr_threshold = ml_ag->threshold;
  epsilon = ml_ag->curr_threshold;
  ml_ag->curr_threshold *= 0.5;

  if (mypid == 0 && 7 < ML_Get_PrintLevel())
    printf("%s current eps = %e\n", str, epsilon);

  epsilon = epsilon * epsilon;

  ML_Operator_AmalgamateAndDropWeak(Amatrix, num_PDE_eqns, epsilon);
  Nrows /= num_PDE_eqns;

  exp_Nrows = Nrows;

  /* ********************************************************************** */
  /* allocate memory for aggr_index, which will contain the decomposition   */
  /* ********************************************************************** */

  nbytes = (Nrows*num_PDE_eqns) * sizeof(int);

  if ( nbytes > 0 ) {
    ML_memory_alloc((void**) &aggr_index, nbytes, "ACJ");
    if( aggr_index == NULL ) {
      fprintf( stderr,
              "*ML*ERR* not enough memory for %d bytes\n"
              "*ML*ERR* (file %s, line %d)\n",
              nbytes,
              __FILE__,
              __LINE__ );
      exit( EXIT_FAILURE );
    }
  }
  else              aggr_index = NULL;

  for( i=0 ; i<Nrows*num_PDE_eqns ; i++ ) aggr_index[i] = -1;

  unamalg_bdry = (char *) ML_allocate( sizeof(char) * (Nrows+1) );

  if( unamalg_bdry == NULL ) {
    fprintf( stderr,
            "*ML*ERR* on proc %d, not enough space for %d bytes\n"
            "*ML*ERR* (file %s, line %d)\n",
            mypid,
            (int)sizeof(char) * Nrows,
            __FILE__,
            __LINE__ );
    exit( EXIT_FAILURE );
  }

  N_dimensions = ml_ag->N_dimensions;
  grid_info = (ML_Aggregate_Viz_Stats*) Amatrix->to->Grid->Grid;
  x_coord = grid_info->x;

  if (N_dimensions > 1 && x_coord)
    y_coord = grid_info->y;
  else
    y_coord = 0;
  if (N_dimensions > 2 && x_coord)
    z_coord = grid_info->z;
  else
    z_coord = 0;

  aggr_count = ML_GetUserPartitions(Amatrix,unamalg_bdry,
                                    epsilon,
                                    x_coord,y_coord,z_coord,
                                    aggr_index,&total_nz);

#ifdef ML_MPI
  MPI_Allreduce( &Nrows, &i, 1, MPI_INT, MPI_SUM, Amatrix->comm->USR_comm );
  MPI_Allreduce( &aggr_count, &j, 1, MPI_INT, MPI_SUM, Amatrix->comm->USR_comm );
#else
  i = Nrows;
  j = aggr_count;
#endif

  if( mypid == 0 && 7 < ML_Get_PrintLevel() ) {
    printf("%s Using %d (block) aggregates (globally)\n",
           str,
           j );
    printf("%s # (block) aggre/ # (block) rows = %8.5f %% ( = %d / %d)\n",
           str,
           100.0*j/i,
           j, i);
  }

  j = ML_gsum_int( aggr_count, comm );
  if (mypid == 0 && 7 < ML_Get_PrintLevel())  {
    printf("%s %d (block) aggregates (globally)\n",
           str, j );
  }   

  /* ********************************************************************** */
  /* I allocate room to copy aggr_index and pass this value to the user,    */
  /* who will be able to analyze and visualize this after the construction  */
  /* of the levels. This way, the only price we have to pay for stats and   */
  /* viz is essentially a little bit of memory.                             */
  /* this memory will be cleaned with the object ML_Aggregate ml_ag.        */
  /* I set the pointers using the ML_Aggregate_Info structure. This is      */
  /* allocated using ML_Aggregate_Info_Setup(ml,MaxNumLevels)               */
  /* ********************************************************************** */

  if (Amatrix->to->Grid->Grid != NULL) {

    graph_decomposition = (int *)ML_allocate(sizeof(int)*(Nrows+1));
    if( graph_decomposition == NULL ) {
      fprintf( stderr,
              "*ML*ERR* Not enough memory for %d bytes\n"
              "*ML*ERR* (file %s, line %d)\n",
              (int)sizeof(int)*Nrows,
              __FILE__,
              __LINE__ );
      exit( EXIT_FAILURE );
    }

    for( i=0 ; i<Nrows ; i++ ) graph_decomposition[i] = aggr_index[i];

    aggr_viz_and_stats = (ML_Aggregate_Viz_Stats *) (Amatrix->to->Grid->Grid);
    aggr_viz_and_stats->graph_decomposition = graph_decomposition;
    aggr_viz_and_stats->Nlocal = Nrows;
    aggr_viz_and_stats->Naggregates = aggr_count;
    aggr_viz_and_stats->local_or_global = ML_LOCAL_INDICES;
    aggr_viz_and_stats->is_filled = ML_YES;
    aggr_viz_and_stats->Amatrix = Amatrix;
  }

  /* ********************************************************************** */
  /* take the decomposition as created by METIS and form the aggregates     */
  /* ********************************************************************** */

  total_nz = ML_Comm_GsumInt( comm, total_nz);
  i = ML_Comm_GsumInt( comm, Nrows);

  if ( mypid == 0 && 7 < ML_Get_PrintLevel())
    printf("%s Total (block) nnz = %d ( = %5.2f/(block)row)\n",
           str,
           total_nz,1.0*total_nz/i);

  if ( ml_ag->operator_complexity == 0.0 ) {
    ml_ag->fine_complexity = total_nz;
    ml_ag->operator_complexity = total_nz;
  }
  else ml_ag->operator_complexity += total_nz;

  /* fix aggr_index for num_PDE_eqns > 1 */

  for (i = Nrows - 1; i >= 0; i-- ) {
    for (j = num_PDE_eqns-1; j >= 0; j--) {
      aggr_index[i*num_PDE_eqns+j] = aggr_index[i];
    }
  }

  if ( mypid == 0 && 8 < ML_Get_PrintLevel())
  {
    printf("Calling ML_Operator_UnAmalgamateAndDropWeak\n");
    fflush(stdout);
  }

  ML_Operator_UnAmalgamateAndDropWeak(Amatrix, num_PDE_eqns, epsilon);

  Nrows      *= num_PDE_eqns;
  exp_Nrows  *= num_PDE_eqns;

  /* count the size of each aggregate */

  aggr_cnt_array = (int *) ML_allocate(sizeof(int)*(aggr_count+1));
  for (i = 0; i < aggr_count ; i++) aggr_cnt_array[i] = 0;
  for (i = 0; i < exp_Nrows; i++) {
    if (aggr_index[i] >= 0) {
      if( aggr_index[i] >= aggr_count ) {
        fprintf( stderr,
                "*ML*WRN* on process %d, something weird happened...\n"
                "*ML*WRN* node %d belong to aggregate %d (#aggr = %d)\n"
                "*ML*WRN* (file %s, line %d)\n",
                comm->ML_mypid,
                i,
                aggr_index[i],
                aggr_count,
                __FILE__,
                __LINE__ );
      } else {
        aggr_cnt_array[aggr_index[i]]++;
      }
    }
  }

  /* ============================================================= */
  /* Form tentative prolongator                                    */
  /* ============================================================= */

  Ncoarse = aggr_count;

  /* ============================================================= */
  /* check and copy aggr_index                                     */
  /* ------------------------------------------------------------- */

  level = ml_ag->cur_level;
  nbytes = (Nrows+1) * sizeof( int );
  ML_memory_alloc((void**) &(ml_ag->aggr_info[level]), nbytes, "AGl");
  count = aggr_count;
  for ( i = 0; i < Nrows; i+=num_PDE_eqns ) 
  {
    if ( aggr_index[i] >= 0 )
    {
      for ( j = 0; j < num_PDE_eqns; j++ ) 
        ml_ag->aggr_info[level][i+j] = aggr_index[i];
      if (aggr_index[i] >= count) count = aggr_index[i] + 1;
    }
    /*else
     *{
     *   printf("%d : CoarsenMIS error : aggr_index[%d] < 0\n",
     *          mypid,i);
     *   exit(1);
     *}*/
  }
  ml_ag->aggr_count[level] = count; /* for relaxing boundary points */ 

  /* ============================================================= */
  /* set up the new operator                                       */
  /* ------------------------------------------------------------- */

  new_Nrows = Nrows;
  exp_Ncoarse = Nrows;

  for ( i = 0; i < new_Nrows; i++ ) 
  {
    if ( aggr_index[i] >= exp_Ncoarse ) 
    {
      printf("*ML*WRN* index out of bound %d = %d(%d)\n",
             i, aggr_index[i], 
             exp_Ncoarse);
    }
  }
  nbytes = ( new_Nrows+1 ) * sizeof(int); 
  ML_memory_alloc((void**)&(new_ia), nbytes, "AIA");
  nbytes = ( new_Nrows+1)  * nullspace_dim * sizeof(int); 
  ML_memory_alloc((void**)&(new_ja), nbytes, "AJA");
  nbytes = ( new_Nrows+1)  * nullspace_dim * sizeof(double); 
  ML_memory_alloc((void**)&(new_val), nbytes, "AVA");
  for ( i = 0; i < new_Nrows*nullspace_dim; i++ ) new_val[i] = 0.0;

  /* ------------------------------------------------------------- */
  /* set up the space for storing the new null space               */
  /* ------------------------------------------------------------- */

  nbytes = (Ncoarse+1) * nullspace_dim * nullspace_dim * sizeof(double);
  ML_memory_alloc((void**)&(new_null),nbytes,"AGr");
  if( new_null == NULL ) {
    fprintf( stderr,
            "*ML*ERR* on process %d, not enough memory for %d bytes\n"
            "*ML*ERR* (file %s, line %d)\n",
            mypid,
            nbytes,
            __FILE__,
            __LINE__ );
    exit( EXIT_FAILURE );
  }

  for (i = 0; i < Ncoarse*nullspace_dim*nullspace_dim; i++) 
    new_null[i] = 0.0;

  /* ------------------------------------------------------------- */
  /* initialize the row pointer for the CSR prolongation operator  */
  /* (each row will have at most nullspace_dim nonzero entries)    */
  /* ------------------------------------------------------------- */

  for (i = 0; i <= Nrows; i++) new_ia[i] = i * nullspace_dim;

  /* trying this when a Dirichlet row is taken out */
  j = 0;
  new_ia[0] = 0;
  for (i = 0; i < Nrows; i++) {
    if (aggr_index[i] != -1) j += nullspace_dim;
    new_ia[i+1] = j;
  }

  /* ------------------------------------------------------------- */
  /* generate an array to store which aggregate has which rows.Then*/
  /* loop through the rows of A checking which aggregate each row  */
  /* is in, and adding it to the appropriate spot in rows_in_aggs  */
  /* ------------------------------------------------------------- */

  ML_memory_alloc((void**)&rows_in_aggs,aggr_count*sizeof(int*),"MLs");
  for (i = 0; i < aggr_count; i++) {
    nbytes = aggr_cnt_array[i]+1;
    rows_in_aggs[i] = (int *) ML_allocate(nbytes*sizeof(int));
    aggr_cnt_array[i] = 0;
    if (rows_in_aggs[i] == NULL)  {
      printf("*ML*ERR* couldn't allocate memory in CoarsenMETIS\n");
      exit(1);
    }
  }
  for (i = 0; i < exp_Nrows; i+=num_PDE_eqns) {
    if ( aggr_index[i] >= 0 && aggr_index[i] < aggr_count)
    {
      for (j = 0; j < num_PDE_eqns; j++)
      {
        index = aggr_cnt_array[aggr_index[i]]++; 
        rows_in_aggs[aggr_index[i]][index] = i + j;
      }
    }
  }

  /* ------------------------------------------------------------- */
  /* allocate work arrays for QR factorization                     */
  /* work and lwork are needed for lapack's QR routine.  These     */
  /* settings seemed easiest since I don't quite understand        */
  /* what they do, but may want to do something better here later  */
  /* ------------------------------------------------------------- */

  max_agg_size = 0;
  for (i = 0; i < aggr_count; i++) 
  {
    if (aggr_cnt_array[i] > max_agg_size) max_agg_size = aggr_cnt_array[i];
  }
  nbytes = max_agg_size * nullspace_dim * sizeof(double);
  ML_memory_alloc((void**)&qr_tmp, nbytes, "AGu");
  nbytes = nullspace_dim * sizeof(double);
  ML_memory_alloc((void**)&tmp_vect, nbytes, "AGv");

  lwork  = nullspace_dim;
  nbytes = nullspace_dim * sizeof(double);
  ML_memory_alloc((void**)&work, nbytes, "AGw");

  /* ------------------------------------------------------------- */
  /* perform block QR decomposition                                */
  /* ------------------------------------------------------------- */

  for (i = 0; i < aggr_count; i++) 
  {
    /* ---------------------------------------------------------- */
    /* set up the matrix we want to decompose into Q and R:       */
    /* ---------------------------------------------------------- */

    length = aggr_cnt_array[i];
    if (nullspace_vect == NULL) 
    {
      for (j = 0; j < (int) length; j++)
      {
        index = rows_in_aggs[i][j];

        for (k = 0; k < nullspace_dim; k++)
        {
          if ( unamalg_bdry[index/num_PDE_eqns] == 'T')
            qr_tmp[k*length+j] = 0.;
          else
          {
            if (index % num_PDE_eqns == k) qr_tmp[k*length+j] = 1.0;
            else                           qr_tmp[k*length+j] = 0.0;
          }
        }
      }
    }
    else 
    {
      for (k = 0; k < nullspace_dim; k++)
      {
        for (j = 0; j < (int) length; j++)
        {
          index = rows_in_aggs[i][j];
          if ( unamalg_bdry[index/num_PDE_eqns] == 'T')
            qr_tmp[k*length+j] = 0.;
          else {
            if (index < Nrows) {
              qr_tmp[k*length+j] = nullspace_vect[k*Nrows+index];
            }
            else {
              fprintf( stderr,
                      "*ML*ERR* in QR\n"
                      "*ML*ERR* (file %s, line %d)\n",
                      __FILE__,
                      __LINE__ );
              exit( EXIT_FAILURE );
            }
          }
        }
      }
    }

    /* ---------------------------------------------------------- */
    /* now calculate QR using an LAPACK routine                   */
    /* ---------------------------------------------------------- */

    if (aggr_cnt_array[i] >= nullspace_dim) {

      DGEQRF_F77(&(aggr_cnt_array[i]), &nullspace_dim, qr_tmp, 
                 &(aggr_cnt_array[i]), tmp_vect, work, &lwork, &info);
      if (info != 0)
        pr_error("ErrOr in CoarsenMIS : dgeqrf returned a non-zero %d %d\n",
                 aggr_cnt_array[i],i);

      if (work[0] > lwork) 
      {
        lwork=(int) work[0]; 
        ML_memory_free((void**) &work);
        ML_memory_alloc((void**) &work, sizeof(double)*lwork, "AGx");
      }
      else lwork=(int) work[0];

      /* ---------------------------------------------------------- */
      /* the upper triangle of qr_tmp is now R, so copy that into   */
      /* the new nullspace                                          */
      /* ---------------------------------------------------------- */

      for (j = 0; j < nullspace_dim; j++)
        for (k = j; k < nullspace_dim; k++)
          new_null[i*nullspace_dim+j+k*Ncoarse*nullspace_dim] = 
            qr_tmp[j+aggr_cnt_array[i]*k];

      /* ---------------------------------------------------------- */
      /* to get this block of P, need to run qr_tmp through another */
      /* LAPACK function:                                           */
      /* ---------------------------------------------------------- */

      if ( aggr_cnt_array[i] < nullspace_dim ){
        printf("Error in dorgqr on %d row (dims are %d, %d)\n",i,aggr_cnt_array[i],
               nullspace_dim);
        printf("ERROR : performing QR on a MxN matrix where M<N.\n");
      }
      DORGQR_F77(&(aggr_cnt_array[i]), &nullspace_dim, &nullspace_dim, 
                 qr_tmp, &(aggr_cnt_array[i]), tmp_vect, work, &lwork, &info);
      if (info != 0) {
        printf("Error in dorgqr on %d row (dims are %d, %d)\n",i,aggr_cnt_array[i],
               nullspace_dim);
        pr_error("Error in CoarsenMIS: dorgqr returned a non-zero\n");
      }

      if (work[0] > lwork) 
      {
        lwork=(int) work[0]; 
        ML_memory_free((void**) &work);
        ML_memory_alloc((void**) &work, sizeof(double)*lwork, "AGy");
      }
      else lwork=(int) work[0];

      /* ---------------------------------------------------------- */
      /* now copy Q over into the appropriate part of P:            */
      /* The rows of P get calculated out of order, so I assume the */
      /* Q is totally dense and use what I know of how big each Q   */
      /* will be to determine where in ia, ja, etc each nonzero in  */
      /* Q belongs.  If I did not assume this, I would have to keep */
      /* all of P in memory in order to determine where each entry  */
      /* should go                                                  */
      /* ---------------------------------------------------------- */

      for (j = 0; j < aggr_cnt_array[i]; j++)
      {
        index = rows_in_aggs[i][j];

        if ( index < Nrows )
        {
          index3 = new_ia[index];
          for (k = 0; k < nullspace_dim; k++) 
          {
            new_ja [index3+k] = i * nullspace_dim + k;
            new_val[index3+k] = qr_tmp[ k*aggr_cnt_array[i]+j];
          }
        }
        else 
        {
          fprintf( stderr,
                  "*ML*ERR* in QR: index out of bounds (%d - %d)\n",
                  index,
                  Nrows );
        }
      }
    }
    else {
      /* We have a small aggregate such that the QR factorization can not */
      /* be performed. Instead let us copy the null space from the fine   */
      /* into the coarse grid nullspace and put the identity for the      */
      /* prolongator????                                                  */
      for (j = 0; j < nullspace_dim; j++)
        for (k = 0; k < nullspace_dim; k++)
          new_null[i*nullspace_dim+j+k*Ncoarse*nullspace_dim] = 
            qr_tmp[j+aggr_cnt_array[i]*k];
      for (j = 0; j < aggr_cnt_array[i]; j++) {
        index = rows_in_aggs[i][j];
        index3 = new_ia[index];
        for (k = 0; k < nullspace_dim; k++) {
          new_ja [index3+k] = i * nullspace_dim + k;
          if (k == j) new_val[index3+k] = 1.;
          else new_val[index3+k] = 0.;
        }
      }
    }


  }

  ML_Aggregate_Set_NullSpace(ml_ag, num_PDE_eqns, nullspace_dim, 
                             new_null, Ncoarse*nullspace_dim);
  ML_memory_free( (void **) &new_null);

  /* ------------------------------------------------------------- */
  /* set up the csr_data data structure                            */
  /* ------------------------------------------------------------- */

  ML_memory_alloc((void**) &csr_data, sizeof(struct ML_CSR_MSRdata),"CSR");
  csr_data->rowptr  = new_ia;
  csr_data->columns = new_ja;
  csr_data->values  = new_val;

  ML_Operator_Set_ApplyFuncData( *Pmatrix, nullspace_dim*Ncoarse, Nrows, 
                                csr_data, Nrows, NULL, 0);
  (*Pmatrix)->data_destroy = ML_CSR_MSR_ML_memorydata_Destroy;
  (*Pmatrix)->getrow->pre_comm = ML_CommInfoOP_Create();
  (*Pmatrix)->max_nz_per_row = 1;

  ML_Operator_Set_Getrow((*Pmatrix), Nrows, CSR_getrow);
  ML_Operator_Set_ApplyFunc((*Pmatrix), CSR_matvec);
  (*Pmatrix)->max_nz_per_row = 1;
  /* this must be set so that the hierarchy generation does not abort early
     in adaptive SA */
  (*Pmatrix)->num_PDEs = nullspace_dim;

  /* ------------------------------------------------------------- */
  /* clean up                                                      */
  /* ------------------------------------------------------------- */

  ML_free(unamalg_bdry);
  ML_memory_free((void**)&aggr_index);
  ML_free(aggr_cnt_array);
  for (i = 0; i < aggr_count; i++) ML_free(rows_in_aggs[i]);
  ML_memory_free((void**)&rows_in_aggs);
  ML_memory_free((void**)&qr_tmp);
  ML_memory_free((void**)&tmp_vect);
  ML_memory_free((void**)&work);

  aggr_curr = aggr_head;
  while ( aggr_curr != NULL ) 
  {
    supernode = aggr_curr;
    aggr_curr = aggr_curr->next;
    if ( supernode->length > 0 ) ML_free( supernode->list );
    ML_free( supernode );
  }

  return Ncoarse*nullspace_dim;

} /* ML_Aggregate_CoarsenUser */
Ejemplo n.º 8
0
int ML_Amesos_Gen(ML *ml, int curr_level, int choice, int MaxProcs, 
                  double AddToDiag, Amesos_Handle_Type *Amesos_Handle)
{
# ifdef ML_MPI
  MPI_Comm  amesosComm;
# else
  int amesosComm=1; //TODO are these going to cause a problem w/o MPI?
# endif

  ML_Operator *Ke = &(ml->Amat[curr_level]);

  /* Sanity Checking - Zero Diagonals */
  if (Ke->getrow->func_ptr == MSR_getrows) {
    struct ML_CSR_MSRdata * input_matrix = (struct ML_CSR_MSRdata *) ML_Get_MyGetrowData(Ke);
    double *val  = input_matrix->values;
    int N = Ke->outvec_leng;
    for(int i=0;i<N;i++)
      if(val[i] == 0.0)
	val[i]=1.0;
  }


  int hasRows=1;
  if(choice != ML_AMESOS_SUPERLUDIST) {
#   ifdef ML_MPI
    hasRows = MPI_UNDEFINED;
    if (Ke->invec_leng > 0 || Ke->outvec_leng > 0) hasRows = 1;
    MPI_Comm_split(Ke->comm->USR_comm,hasRows,Ke->comm->ML_mypid,&amesosComm);
    Amesos_Handle->freeMpiComm = 1;
#   endif
  }
  else {
    amesosComm=Ke->comm->USR_comm;
    Amesos_Handle->freeMpiComm = 0;
  }
/*
# ifdef ML_MPI
  hasRows = MPI_UNDEFINED;
  if (Ke->invec_leng > 0 || Ke->outvec_leng > 0) hasRows = 1;
  MPI_Comm_split(Ke->comm->USR_comm,hasRows,Ke->comm->ML_mypid,&amesosComm);
#endif
*/

  if (hasRows == 1) {
    ML_Epetra::RowMatrix* Amesos_Matrix = 
      new ML_Epetra::RowMatrix(Ke, 0, false, amesosComm);
    assert (Amesos_Matrix != 0);
    
    int NumGlobalRows = Amesos_Matrix->NumGlobalRows();
    int NumGlobalNonzeros = Amesos_Matrix->NumGlobalNonzeros();

    // sanity check, coarse matrix should not be empty
    if( NumGlobalRows == 0 && Amesos_Matrix->Comm().MyPID() == 0 ) {
      std::cerr << std::endl;
      std::cerr << "ERROR : Coarse matrix has no rows!" << std::endl;
      std::cerr << std::endl;
    }
    if( NumGlobalNonzeros == 0 && Amesos_Matrix->Comm().MyPID() == 0 ) {
      std::cerr << std::endl;
      std::cerr << "ERROR : Coarse matrix has no nonzero elements!" << std::endl;
      std::cerr << std::endl;
    }

#   ifdef TFLOP
    if( Amesos_Matrix->Comm().MyPID() == 0 && ML_Get_PrintLevel() > 2 ) {
      printf("Amesos (level %d) : NumGlobalRows = %d\n",curr_level,NumGlobalRows);
      printf("Amesos (level %d) : NumGlobalNonzeros = %d\n",curr_level,NumGlobalNonzeros);
      printf("Amesos (level %d) : fill-in = %f %\n",curr_level,100.0*NumGlobalNonzeros/(NumGlobalRows*NumGlobalRows));
    }
#   else
    if( Amesos_Matrix->Comm().MyPID() == 0 && ML_Get_PrintLevel() > 2 ) {
      std::cout << "Amesos (level " << curr_level
	 << ") : NumGlobalRows = "
	 << NumGlobalRows << std::endl;
      std::cout << "Amesos (level " << curr_level
	 << ") : NumGlobalNonzeros = "
	 << NumGlobalNonzeros << std::endl;
      std::cout << "Amesos (level " << curr_level
	 << ") : Fill-in = "
	 << 100.0*NumGlobalNonzeros/(1.0*NumGlobalRows*NumGlobalRows)
	 << " %" << std::endl;
    }

#   endif
    
    Epetra_LinearProblem *Amesos_LinearProblem = new Epetra_LinearProblem;
    Amesos_LinearProblem->SetOperator(Amesos_Matrix); 

    Teuchos::ParameterList AmesosList;

    AmesosList.set("MaxProcs",MaxProcs);
    AmesosList.set("AddToDiag", AddToDiag);

    if( ML_Get_PrintLevel() > 10 ) {
      AmesosList.set("PrintTiming",true);
      AmesosList.set("OutputLevel",1);
    }

    // don't use iterative refinement for Superludist only
    Teuchos::ParameterList & SuperludistList = AmesosList.sublist("Superludist");
    SuperludistList.set("IterRefine","NO");

    Amesos_BaseSolver* A_Base;
    Amesos A_Factory;
    const Epetra_Comm& Comm = Amesos_Matrix->Comm();

    switch (choice) {

    case ML_AMESOS_LAPACK:
      print_out(Comm, curr_level, "LAPACK");
      A_Base = A_Factory.Create("Amesos_Lapack", *Amesos_LinearProblem);
      break;

    case ML_AMESOS_UMFPACK:
      print_out(Comm, curr_level, "UMFPACK");
      A_Base = A_Factory.Create("Amesos_Klu", *Amesos_LinearProblem);
      break;

    case ML_AMESOS_SUPERLUDIST:
      print_out(Comm, curr_level, "SuperLU_DIST");
      A_Base = A_Factory.Create("Amesos_Superludist", *Amesos_LinearProblem);
      
      break;

    case ML_AMESOS_SUPERLU:
      print_out(Comm, curr_level, "SuperLU");
      A_Base = A_Factory.Create("Amesos_Superlu", *Amesos_LinearProblem);
      
      break;

    case ML_AMESOS_SCALAPACK:
      print_out(Comm, curr_level, "ScaLAPACK");
      A_Base = A_Factory.Create("Amesos_Scalapack", *Amesos_LinearProblem);
      
      break;

    case ML_AMESOS_MUMPS:
      print_out(Comm, curr_level, "MUMPS");
      A_Base = A_Factory.Create("Amesos_Mumps", *Amesos_LinearProblem);
      break;

    case ML_AMESOS_KLU:
    default:
      print_out(Comm, curr_level, "KLU");
      A_Base = A_Factory.Create("Amesos_Klu", *Amesos_LinearProblem);
      break;
    }

    // may happen the desired solver is not available. KLU is almost
    // always compiled, so try this first. If not, then LAPACK is
    // the last choice before quitting
    if (A_Base == 0) 
    {
      if (choice != ML_AMESOS_KLU)
      {
        if (Amesos_Matrix->Comm().MyPID() == 0 && ML_Get_PrintLevel() > 2)
        {
          std::cerr << "Amesos (level " << curr_level
               << ") : This coarse solver is not available." << std::endl;
          std::cerr << "Amesos (level " << curr_level
               << ") : Now re-building with KLU" << std::endl;
        }
        A_Base = A_Factory.Create("Amesos_Klu", *Amesos_LinearProblem);
      }
      if (A_Base == 0) 
      {
        if (Amesos_Matrix->Comm().MyPID() == 0) 
        {
          std::cerr << "Amesos (level " << curr_level
               << ") : This coarse solver is not available." << std::endl;
          std::cerr << "Amesos (level " << curr_level
               << ") : Now re-building with LAPACK" << std::endl;
        }
        A_Base = A_Factory.Create("Amesos_Lapack", *Amesos_LinearProblem);
        if (A_Base == 0) 
        {
          if (Amesos_Matrix->Comm().MyPID() == 0) 
          {
            std::cerr << "*ML*ERR* no Amesos solver is available!" << std::endl;
          }
          exit( EXIT_FAILURE );
        }
      }
    }

    A_Base->SetParameters(AmesosList);

    Epetra_Time Time(Amesos_Matrix->Comm());

    Time.ResetStartTime();
    int rv;
    try{rv=A_Base->NumericFactorization();}
    catch(...) {
      if (Amesos_Matrix->Comm().MyPID() == 0) 
        printf("\n*** * ML_Amesos_Gen: exception thrown from Amesos_BaseSolver->NumericFactorization(). * ***\n\n");
      exit( EXIT_FAILURE );
    }
    double Time2 = Time.ElapsedTime();
    
    if(rv){
      if(!Amesos_Matrix->Comm().MyPID())
       printf("ERROR: Amesos NumericFactorization failed... dumping relevant matrix for post-mortem\n");
#      ifdef HAVE_ML_EPETRAEXT
       EpetraExt::RowMatrixToMatlabFile("amesos-failure.dat",*Amesos_Matrix);
#      endif
    }

    Level__ = -1;

#   ifdef TFLOP
    if( Amesos_Matrix->Comm().MyPID() == 0 && ML_Get_PrintLevel()>2 ) {
      Level__ = curr_level;
      printf("Amesos (level %d) : Time for factorization = %f (s)\n",curr_level,Time2);
    }
#   else
    if( Amesos_Matrix->Comm().MyPID() == 0 && ML_Get_PrintLevel()>2 ) {
      Level__ = curr_level;
      std::cout << "Amesos (level " << curr_level << ") : Time for factorization  = "
	 << Time2 << " (s)" << std::endl;
    }
#   endif
    
    // those are very simple timing for solution
    TimeForSolve__ = 0.0;
    NumSolves__ = 0;
    
    Amesos_Handle->A_Base = (void *) A_Base ;

  } //if (hasRows==1)
  else
    Amesos_Handle->A_Base = 0;

  return 0;
} //ML_Amesos_Gen()
Ejemplo n.º 9
0
// ================================================ ====== ==== ==== == =
int ML_Epetra::RefMaxwell_Aggregate_Nodes(const Epetra_CrsMatrix & A, Teuchos::ParameterList & List, ML_Comm * ml_comm, std::string PrintMsg,
					  ML_Aggregate_Struct *& MLAggr,ML_Operator *&P, int &NumAggregates){

  /* Output level */
  bool verbose, very_verbose;
  int OutputLevel = List.get("ML output", -47);
  if(OutputLevel == -47) OutputLevel = List.get("output", 1);
  if(OutputLevel>=15) very_verbose=verbose=true;
  if(OutputLevel > 5) {very_verbose=false;verbose=true;}
  else very_verbose=verbose=false;

  /* Wrap A in a ML_Operator */
  ML_Operator* A_ML = ML_Operator_Create(ml_comm);
  ML_Operator_WrapEpetraCrsMatrix(const_cast<Epetra_CrsMatrix*>(&A),A_ML);

 /* Pull Teuchos Options */
  std::string CoarsenType  = List.get("aggregation: type", "Uncoupled");
  double Threshold    = List.get("aggregation: threshold", 0.0);
  int    NodesPerAggr = List.get("aggregation: nodes per aggregate",
                                  ML_Aggregate_Get_OptimalNumberOfNodesPerAggregate());
  bool UseAux         = List.get("aggregation: aux: enable",false);
  double AuxThreshold = List.get("aggregation: aux: threshold",0.0);
  int  MaxAuxLevels   = List.get("aggregation: aux: max levels",10);


  ML_Aggregate_Create(&MLAggr);
  ML_Aggregate_Set_MaxLevels(MLAggr, 2);
  ML_Aggregate_Set_StartLevel(MLAggr, 0);
  ML_Aggregate_Set_Threshold(MLAggr, Threshold);
  ML_Aggregate_Set_MaxCoarseSize(MLAggr,1);
  MLAggr->cur_level = 0;
  ML_Aggregate_Set_Reuse(MLAggr);
  MLAggr->keep_agg_information = 1;
  P = ML_Operator_Create(ml_comm);

  /* Process Teuchos Options */
  if (CoarsenType == "Uncoupled")
    ML_Aggregate_Set_CoarsenScheme_Uncoupled(MLAggr);
  else if (CoarsenType == "Uncoupled-MIS"){
    ML_Aggregate_Set_CoarsenScheme_UncoupledMIS(MLAggr);
  }
  else if (CoarsenType == "METIS"){
    ML_Aggregate_Set_CoarsenScheme_METIS(MLAggr);
    ML_Aggregate_Set_NodesPerAggr(0, MLAggr, 0, NodesPerAggr);
  }/*end if*/
  else {
    if(!A.Comm().MyPID()) printf("%s Unsupported (1,1) block aggregation type(%s), resetting to uncoupled-mis\n",PrintMsg.c_str(),CoarsenType.c_str());
    ML_Aggregate_Set_CoarsenScheme_UncoupledMIS(MLAggr);
  }

  /* Setup Aux Data */
  if(UseAux) {
    A_ML->aux_data->enable=1;
    A_ML->aux_data->threshold=AuxThreshold;
    A_ML->aux_data->max_level=MaxAuxLevels;
    ML_Init_Aux(A_ML,List);
    if(verbose && !A.Comm().MyPID()) {
      printf("%s Using auxiliary matrix\n",PrintMsg.c_str());
      printf("%s aux threshold = %e\n",PrintMsg.c_str(),A_ML->aux_data->threshold);
    }
  }

  /* Aggregate Nodes */
  int printlevel=ML_Get_PrintLevel();
  if(verbose) ML_Set_PrintLevel(10);
  NumAggregates = ML_Aggregate_Coarsen(MLAggr,A_ML, &P, ml_comm);
  if(verbose) ML_Set_PrintLevel(printlevel);

  if (NumAggregates == 0){
    std::cerr << "Found 0 aggregates, perhaps the problem is too small." << std::endl;
    ML_CHK_ERR(-2);
  }/*end if*/
  else if(very_verbose) printf("[%d] %s %d aggregates created invec_leng=%d\n",A.Comm().MyPID(),PrintMsg.c_str(),NumAggregates,P->invec_leng);

  if(verbose){
    int globalAggs=0;
    A.Comm().SumAll(&NumAggregates,&globalAggs,1);
    if(!A.Comm().MyPID()) {
      printf("%s Aggregation threshold = %e\n",PrintMsg.c_str(),Threshold);
      printf("%s Global aggregates     = %d\n",PrintMsg.c_str(),globalAggs);

    }
  }

  /* Cleanup */
  ML_qr_fix_Destroy();
  if(UseAux) ML_Finalize_Aux(A_ML);
  ML_Operator_Destroy(&A_ML);

  return 0;
}