int ML_Reitzinger_Check_Hierarchy(ML *ml, ML_Operator **Tmat_array, int incr_or_decr) { int i,j; int finest_level, coarsest_level; ML_Operator *Amat, *Tmat; double *randvec, *result, *result1; double dnorm; finest_level = ml->ML_finest_level; coarsest_level = ml->ML_coarsest_level; if (incr_or_decr == ML_INCREASING) { if (ml->comm->ML_mypid == 0) { printf("ML_Reitzinger_Check_Hierarchy: ML_INCREASING is not supported "); printf(" at this time. Not checking hierarchy.\n"); } return 1; } if ( ML_Get_PrintLevel() > 5 ) { printf("ML_Reitzinger_Check_Hierarchy: Checking null space\n"); } for (i=finest_level; i>coarsest_level; i--) { Amat = ml->Amat+i; Tmat = Tmat_array[i]; /* normalized random vector */ randvec = (double *) ML_allocate(Tmat->invec_leng * sizeof(double) ); ML_random_vec(randvec,Tmat->invec_leng, ml->comm); dnorm = sqrt( ML_gdot(Tmat->invec_leng, randvec, randvec, ml->comm) ); for (j=0; j<Tmat->invec_leng; j++) randvec[j] /= dnorm; result = (double *) ML_allocate(Amat->invec_leng * sizeof(double) ); result1 = (double *) ML_allocate(Amat->outvec_leng * sizeof(double) ); ML_Operator_Apply(Tmat, Tmat->invec_leng, randvec, Tmat->outvec_leng, result); ML_Operator_Apply(Amat, Amat->invec_leng, result, Amat->outvec_leng, result1); dnorm = sqrt( ML_gdot(Amat->outvec_leng, result1, result1, ml->comm) ); if ( (ML_Get_PrintLevel() > 5) && (ml->comm->ML_mypid == 0) ) { printf("Level %d: for random v, ||S*T*v|| = %15.10e\n",i,dnorm); } ML_free(randvec); ML_free(result); ML_free(result1); } if ( (ML_Get_PrintLevel() > 5) && (ml->comm->ML_mypid == 0) ) printf("\n"); return 0; }
// ====================================================================== int GetPrintLevel() { if (GetMyPID()) return(0); else return(ML_Get_PrintLevel()); }
// ================================================ ====== ==== ==== == = int ML_Epetra::FaceMatrixFreePreconditioner::NodeAggregate(ML_Aggregate_Struct *&MLAggr,ML_Operator *&P,ML_Operator* TMT_ML,int &NumAggregates){ /* Pull Teuchos Options */ string CoarsenType = List_.get("aggregation: type", "Uncoupled"); double Threshold = List_.get("aggregation: threshold", 0.0); int NodesPerAggr = List_.get("aggregation: nodes per aggregate", ML_Aggregate_Get_OptimalNumberOfNodesPerAggregate()); string PrintMsg_ = "FMFP (Level 0): "; ML_Aggregate_Create(&MLAggr); ML_Aggregate_Set_MaxLevels(MLAggr, 2); ML_Aggregate_Set_StartLevel(MLAggr, 0); ML_Aggregate_Set_Threshold(MLAggr, Threshold); ML_Aggregate_Set_MaxCoarseSize(MLAggr,1); MLAggr->cur_level = 0; ML_Aggregate_Set_Reuse(MLAggr); MLAggr->keep_agg_information = 1; P = ML_Operator_Create(ml_comm_); /* Process Teuchos Options */ if (CoarsenType == "Uncoupled") ML_Aggregate_Set_CoarsenScheme_Uncoupled(MLAggr); else if (CoarsenType == "Uncoupled-MIS"){ ML_Aggregate_Set_CoarsenScheme_UncoupledMIS(MLAggr); } else if (CoarsenType == "METIS"){ ML_Aggregate_Set_CoarsenScheme_METIS(MLAggr); ML_Aggregate_Set_NodesPerAggr(0, MLAggr, 0, NodesPerAggr); }/*end if*/ else { if(!Comm_->MyPID()) printf("FMFP: Unsupported (1,1) block aggregation type(%s), resetting to uncoupled-mis\n",CoarsenType.c_str()); ML_Aggregate_Set_CoarsenScheme_UncoupledMIS(MLAggr); } /* Aggregate Nodes */ int printlevel=ML_Get_PrintLevel(); ML_Set_PrintLevel(10); NumAggregates = ML_Aggregate_Coarsen(MLAggr, TMT_ML, &P, ml_comm_); ML_Set_PrintLevel(printlevel); if (NumAggregates == 0){ cerr << "Found 0 aggregates, perhaps the problem is too small." << endl; ML_CHK_ERR(-2); }/*end if*/ else if(very_verbose_) printf("[%d] FMFP: %d aggregates created invec_leng=%d\n",Comm_->MyPID(),NumAggregates,P->invec_leng); int globalAggs; Comm_->SumAll(&NumAggregates,&globalAggs,1); if( verbose_ && !Comm_->MyPID()) { std::cout << PrintMsg_ << "Aggregation threshold = " << Threshold << std::endl; std::cout << PrintMsg_ << "Global aggregates = " << globalAggs << std::endl; //ML_Aggregate_Print_Complexity(MLAggr); } if(P==0) {fprintf(stderr,"%s","ERROR: No tentative prolongator found\n");ML_CHK_ERR(-5);} return 0; }
static void print_out(const Epetra_Comm& Comm, const int level, const char* what) { if (Comm.MyPID() == 0 && ML_Get_PrintLevel() > 2) #ifdef TFLOP printf("Amesos (level %d) : Building %s\n", level, what); #else std::cout << "Amesos (level " << level << ") : Building " << what << "\n"; #endif }
void ML_rap(ML_Operator *Rmat, ML_Operator *Amat, ML_Operator *Pmat, ML_Operator *Result, int matrix_type) { int max_per_proc, i, j, N_input_vector; ML_Operator *APmat, *RAPmat, *Pcomm, *RAPcomm, *APcomm, *AP2comm, *tptr; ML_CommInfoOP *getrow_comm; double *scales = NULL; # ifdef ML_TIMING double tpre,tmult,tpost,ttotal; # endif /* Check that N_input_vector is reasonable */ # ifdef ML_TIMING tpre = GetClock(); ttotal = GetClock(); # endif N_input_vector = Pmat->invec_leng; getrow_comm = Pmat->getrow->pre_comm; if ( getrow_comm != NULL) { for (i = 0; i < getrow_comm->N_neighbors; i++) { for (j = 0; j < getrow_comm->neighbors[i].N_send; j++) { if (getrow_comm->neighbors[i].send_list[j] >= N_input_vector) { printf("(%d) Error: N_input_vector (%d) argument to rap() is not \n", Amat->comm->ML_mypid,N_input_vector); printf("(%d) Error: larger than %dth element (%d) sent to node %d\n", Amat->comm->ML_mypid,j+1, getrow_comm->neighbors[i].send_list[j], getrow_comm->neighbors[i].ML_id); printf("(%d) Error: Amat(%d,%d) Rmat(%d,%d) Pmat(%d,%d)\n", Amat->comm->ML_mypid, Amat->outvec_leng,Amat->invec_leng, Rmat->outvec_leng,Rmat->invec_leng, Pmat->outvec_leng,Pmat->invec_leng); fflush(stdout); exit(1); } } } } ML_create_unique_col_id(N_input_vector, &(Pmat->getrow->loc_glob_map), getrow_comm, &max_per_proc, Pmat->comm); Pmat->getrow->use_loc_glob_map = ML_YES; if (Amat->getrow->pre_comm != NULL) ML_exchange_rows( Pmat, &Pcomm, Amat->getrow->pre_comm); else Pcomm = Pmat; #ifdef DEBUG if ( Pmat->comm->ML_mypid == 0 ) printf("ML_rap : A * P begins...\n"); #endif # ifdef ML_TIMING tpre = GetClock() - tpre; tmult = GetClock(); # endif ML_matmat_mult(Amat, Pcomm , &APmat); # ifdef ML_TIMING tmult = GetClock() - tmult; tpost = GetClock(); # endif #ifdef DEBUG if ( Pmat->comm->ML_mypid == 0 ) printf("ML_rap : A * P ends.\n"); #endif ML_free(Pmat->getrow->loc_glob_map); Pmat->getrow->loc_glob_map = NULL; Pmat->getrow->use_loc_glob_map = ML_NO; if (Amat->getrow->pre_comm != NULL) { tptr = Pcomm; while ( (tptr!= NULL) && (tptr->sub_matrix != Pmat)) tptr = tptr->sub_matrix; if (tptr != NULL) tptr->sub_matrix = NULL; ML_RECUR_CSR_MSRdata_Destroy(Pcomm); ML_Operator_Destroy(&Pcomm); } if (Amat->getrow->post_comm != NULL) { ML_exchange_rows(APmat, &APcomm, Amat->getrow->post_comm); } else APcomm = APmat; /* Take into account any scaling in Amat */ if (Rmat->from != NULL) ML_DVector_GetDataPtr(Rmat->from->Amat_Normalization,&scales); if (scales != NULL) ML_Scale_CSR(APcomm, scales, 0); if (Rmat->getrow->pre_comm != NULL) ML_exchange_rows( APcomm, &AP2comm, Rmat->getrow->pre_comm); else AP2comm = APcomm; # ifdef ML_TIMING tpost = GetClock() - tpost; if ( Pmat->comm->ML_mypid == 0 && ML_Get_PrintLevel() > 5) { int level=-1; if (Amat->from != NULL) level = Amat->from->levelnum-1; printf("Timing summary (in seconds) for product RAP on level %d\n", level); printf(" (level %d) RAP right: pre-multiply communication time = %3.2e\n", level, tpre); printf(" (level %d) RAP right: multiply time = %3.2e\n", level, tmult); printf(" (level %d) RAP right: post-multiply communication time = %3.2e\n", level, tpost); } # endif #ifdef DEBUG if ( Pmat->comm->ML_mypid == 0 ) printf("ML_rap : R * AP begins...\n"); #endif # ifdef ML_TIMING tmult = GetClock(); # endif ML_matmat_mult(Rmat,AP2comm, &RAPmat); #ifdef DEBUG if ( Pmat->comm->ML_mypid == 0 ) printf("ML_rap : R * AP ends.\n"); #endif ML_RECUR_CSR_MSRdata_Destroy(AP2comm); ML_Operator_Destroy(&AP2comm); # ifdef ML_TIMING tmult = GetClock()-tmult; tpost = GetClock(); # endif if (Rmat->getrow->post_comm != NULL) ML_exchange_rows( RAPmat, &RAPcomm, Rmat->getrow->post_comm); else RAPcomm = RAPmat; scales = NULL; if (Rmat->to != NULL) ML_DVector_GetDataPtr(Rmat->to->Amat_Normalization,&scales); if (scales != NULL) ML_Scale_CSR(RAPcomm, scales, 1); RAPcomm->num_PDEs = Amat->num_PDEs; RAPcomm->num_rigid = Amat->num_rigid; if (matrix_type == ML_MSR_MATRIX) ML_back_to_local(RAPcomm, Result, max_per_proc); else if (matrix_type == ML_CSR_MATRIX) ML_back_to_csrlocal(RAPcomm, Result, max_per_proc); else if (matrix_type == ML_EpetraCRS_MATRIX) #ifdef ML_WITH_EPETRA ML_back_to_epetraCrs(RAPcomm, Result, Rmat, Pmat); #else pr_error("ML_RAP: ML_EpetraCRS_MATRIX requires epetra to be compiled in.\n"); #endif else pr_error("ML_RAP: Unknown matrix type\n");
char * ML_memory_check(char *fmt, ... ) { #ifdef ML_MEMORY_CHK size_t fragments=0; int total_free=0, largest_free=0, total_used=0; int total_swap=0, total_swap_free=0, total_swap_used=0; static double start_time = -1.; double elapsed_time; int id, nnodes, i; ml_IntLoc isrcvec[ML_NIntStats],imaxvec[ML_NIntStats], iminvec[ML_NIntStats]; int isrcvec_copy[ML_NIntStats]; int iavgvec[ML_NIntStats]; ml_DblLoc dsrcvec[ML_NDblStats],dmaxvec[ML_NDblStats], dminvec[ML_NDblStats]; double dsrcvec_copy[ML_NDblStats]; double davgvec[ML_NDblStats]; static char *ml_memory_label = NULL; va_list ap; #ifdef ML_TFLOP unsigned long ultotal_free=0, ullargest_free=0, ultotal_used=0; #else struct mallinfo M; static int ml_total_mem = 0; #endif FILE *fid; # define ml_meminfo_size 23 int haveMemInfo=0, overflowDetected = 0; char method[80]; int mypid=0; /* allocate space for string that is printed with memory information */ if (ml_memory_label == NULL) { ml_memory_label = (char *) malloc(sizeof(char)*200); /* THIS MALLOC NEEDS TO STAY A */ /* MALLOC AND NOT AN ML_ALLOCATE */ ml_memory_label[0] = '\0'; } /* if fmt is NULL just return the current string associated with */ /* the memory printing. The idea is that an low level function */ /* can use this to get the string, append any additional info */ /* and use this when it invokes this routine a second time. */ if (fmt == NULL) return(ml_memory_label); /* Take variable argument and transform it to string that will */ /* is printed with memory statistics. */ va_start(ap, fmt); vsprintf(ml_memory_label,fmt, ap); va_end(ap); elapsed_time = GetClock(); if (start_time == -1.) start_time = elapsed_time; elapsed_time = elapsed_time - start_time; #ifdef ML_TFLOP /* Memory statistics for Red Storm. FYI, heap_info returns bytes. */ #ifndef NO_HEAPINFO heap_info(&fragments, &ultotal_free, &ullargest_free, &ultotal_used); #ifdef ML_MPI MPI_Comm_rank(MPI_COMM_WORLD,&mypid); #endif total_free=(int) (ultotal_free / (1024*1024)); largest_free= (int) (ullargest_free / (1024*1024)); total_used = (int) (ultotal_used / (1024*1024)); sprintf(method,"Using heap_info()"); #else total_free=0; largest_free=0; total_used=0; #endif #else /* Memory statistics for all other platforms, via the system call mallinfo() and reading file /proc/meminfo, which is available under most Linux OS's. */ M = mallinfo(); fid = fopen("/proc/meminfo","r"); if (fid != NULL) { char str[80], units[10]; int k; for (i=0; i< ml_meminfo_size; i++) { if (fscanf(fid,"%s%d%s", str, &k,units) == 3) { if (strcmp(str,"MemTotal:") == 0 && (ml_total_mem==0)) ml_total_mem = k/1024; if (strcmp(str,"MemFree:") == 0) {total_free = k/1024; } if (strcmp(str,"SwapTotal:") == 0) {total_swap = k/1024; } if (strcmp(str,"SwapFree:") == 0) {total_swap_free = k/1024; } } } fclose(fid); total_used = ml_total_mem - total_free; total_swap_used = total_swap - total_swap_free; sprintf(method,"Using /proc/meminfo"); haveMemInfo = 1; } /* If /proc/meminfo doesn't exist, use mallinfo() instead. */ if ( !haveMemInfo ) { if (ml_total_mem == 0) ml_total_mem = ML_MaxAllocatableSize(); if (M.hblkhd < 0) { /* try to fix overflow */ double delta = fabs(((double) INT_MIN) - ((double) M.hblkhd)) + 1; total_used = (int) ( (((double) INT_MAX) + delta) / (1024*1024) ); overflowDetected = 1; } /*Ignore this field upon overflow because I'm don't know how to handle it*/ if (M.uordblks > 0) total_used += M.uordblks / (1024*1024); total_free = ml_total_mem - total_used; sprintf(method,"Using mallinfo()"); } fragments = M.ordblks + M.hblks; largest_free = -1; #endif /*ifdef ML_TFLOP*/ /* Only print if fmt string is not empty */ /* This allows for an intialization of */ /* ml_total_mem without any printing */ if (strlen(fmt) == 0) return(ml_memory_label); /*isrcvec[0].value = fragments; */ isrcvec[0].value = 0; isrcvec[1].value = total_free; isrcvec[2].value = largest_free; isrcvec[3].value = total_used; isrcvec[4].value = total_free + total_used; /*TODO could this overflow?*/ isrcvec[5].value = (int) ( ((double)total_used*1000) / ((double)(total_free+total_used)) ); isrcvec[6].value = total_swap_free; isrcvec[7].value = total_swap_used; isrcvec[8].value = total_swap; /*TODO could this overflow?*/ isrcvec[9].value = (int) ( ((double)total_swap_used*1000) / ((double)(total_swap)) ); dsrcvec[0].value = elapsed_time; dsrcvec[1].value = fragments; #ifdef ML_MPI for (i =0; i < ML_NIntStats; i++) MPI_Comm_rank(MPI_COMM_WORLD,&(isrcvec[i].rank)); for (i =0; i < ML_NDblStats; i++) MPI_Comm_rank(MPI_COMM_WORLD,&(dsrcvec[i].rank)); #endif for (i =0; i < ML_NIntStats; i++) isrcvec_copy[i] = isrcvec[i].value; for (i =0; i < ML_NDblStats; i++) dsrcvec_copy[i] = dsrcvec[i].value; nnodes = 1; id = 0; #ifdef ML_MPI MPI_Comm_rank(MPI_COMM_WORLD,&id); MPI_Comm_size(MPI_COMM_WORLD,&nnodes); MPI_Reduce(isrcvec,imaxvec,ML_NIntStats,MPI_2INT,MPI_MAXLOC,0,MPI_COMM_WORLD); MPI_Reduce(isrcvec,iminvec,ML_NIntStats,MPI_2INT,MPI_MINLOC,0,MPI_COMM_WORLD); MPI_Reduce(isrcvec_copy,iavgvec,ML_NIntStats,MPI_INT,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(dsrcvec,dmaxvec,ML_NDblStats,MPI_DOUBLE_INT,MPI_MAXLOC,0,MPI_COMM_WORLD); MPI_Reduce(dsrcvec,dminvec,ML_NDblStats,MPI_DOUBLE_INT,MPI_MINLOC,0,MPI_COMM_WORLD); MPI_Reduce(dsrcvec_copy,davgvec,ML_NDblStats,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&overflowDetected,&i,1,MPI_INT,MPI_MAX,0,MPI_COMM_WORLD); overflowDetected = i; #else for (i =0; i < ML_NIntStats; i++) { imaxvec[i].value = isrcvec[i].value; iminvec[i].value = isrcvec[i].value; iavgvec[i] = isrcvec[i].value; } for (i =0; i < ML_NDblStats; i++) { dmaxvec[i].value = dsrcvec[i].value; dminvec[i].value = dsrcvec[i].value; davgvec[i] = dsrcvec[i].value; } #endif /* uncomment lines below if you want individual processor information */ /* printf("%s(%d): blks = %ld, free = %ld, max free = %ld, used = %ld, total = %ld, %% used = %e, time = %e\n", ml_memory_label,id,fragments, total_free, largest_free, total_used, total_free+total_used, ((double)total_used)/((double)(total_free+total_used)),elapsed_time); */ if (id == 0 && ML_Get_PrintLevel() > 0) { for (i =0; i < ML_NIntStats; i++) iavgvec[i] = (int) (iavgvec[i]/((double) nnodes)); for (i =0; i < ML_NDblStats; i++) davgvec[i] = davgvec[i] / nnodes; printf("-------------------------------------------------------------\n"); printf("Summary Heap data (Mbytes) at %s\n",ml_memory_label); printf("%s\n",method); if (overflowDetected) printf("*WARNING* mallinfo() counter overflow detected\n"); printf(" avg min max\n"); printf("-------------------------------------------------------------\n"); printf(" blks %11d %11d (%5d) %11d (%5d) %s\n", (int) davgvec[1], (int) dminvec[1].value, dminvec[1].rank, (int) dmaxvec[1].value, dmaxvec[1].rank, ml_memory_label); printf(" free %11d %11d (%5d) %11d (%5d) %s\n", iavgvec[1], iminvec[1].value, iminvec[1].rank, imaxvec[1].value, imaxvec[1].rank, ml_memory_label); if (iavgvec[2] != -1) printf(" max free %11d %11d (%5d) %11d (%5d) %s\n", iavgvec[2], iminvec[2].value, iminvec[2].rank, imaxvec[2].value, imaxvec[2].rank, ml_memory_label); printf(" used %11d %11d (%5d) %11d (%5d) %s\n", iavgvec[3], iminvec[3].value, iminvec[3].rank, imaxvec[3].value, imaxvec[3].rank, ml_memory_label); printf(" total %11d %11d (%5d) %11d (%5d) %s\n", iavgvec[4], iminvec[4].value, iminvec[4].rank, imaxvec[4].value, imaxvec[4].rank, ml_memory_label); printf(" %% used %9.1f %9.1f (%5d) %9.1f (%5d) %s\n", ((double)iavgvec[5])/10., ((double)iminvec[5].value)/10., iminvec[5].rank, ((double)imaxvec[5].value)/10., imaxvec[5].rank, ml_memory_label); printf(" time %9.1f %9.1f (%5d) %9.1f (%5d) %s\n", davgvec[0],dminvec[0].value,dminvec[0].rank, dmaxvec[0].value, dmaxvec[0].rank, ml_memory_label); if (haveMemInfo) { printf(" swap free %11d %11d (%5d) %11d (%5d) %s\n", iavgvec[6], iminvec[6].value,iminvec[6].rank, imaxvec[6].value, iminvec[6].rank, ml_memory_label); printf(" swap used %11d %11d (%5d) %11d (%5d) %s\n", iavgvec[7], iminvec[7].value, iminvec[7].rank, imaxvec[7].value, imaxvec[7].rank, ml_memory_label); printf(" total swap %11d %11d (%5d) %11d (%5d) %s\n", iavgvec[8], iminvec[8].value, iminvec[8].rank, imaxvec[8].value, imaxvec[8].rank, ml_memory_label); printf(" %% swap used %9.1f %9.1f (%5d) %9.1f (%5d) %s\n", ((double)iavgvec[9])/10., ((double)iminvec[9].value)/10., iminvec[9].rank, ((double)imaxvec[9].value)/10., imaxvec[9].rank, ml_memory_label); } } /*if (id == 0 ... */ return(ml_memory_label); #else return(NULL); #endif } /*ML_memory_check*/
int ML_Aggregate_CoarsenUser(ML_Aggregate *ml_ag, ML_Operator *Amatrix, ML_Operator **Pmatrix, ML_Comm *comm) { unsigned int nbytes, length; int i, j, k, Nrows, exp_Nrows; int diff_level; int aggr_count, index, mypid, num_PDE_eqns; int *aggr_index = NULL, nullspace_dim; int Ncoarse, count; int *new_ia = NULL, *new_ja = NULL, new_Nrows; int exp_Ncoarse; int *aggr_cnt_array = NULL; int level, index3, max_agg_size; int **rows_in_aggs = NULL, lwork, info; double *new_val = NULL, epsilon; double *nullspace_vect = NULL, *qr_tmp = NULL; double *tmp_vect = NULL, *work = NULL, *new_null = NULL; ML_SuperNode *aggr_head = NULL, *aggr_curr, *supernode; struct ML_CSR_MSRdata *csr_data; int total_nz = 0; char str[80]; int * graph_decomposition = NULL; ML_Aggregate_Viz_Stats * aggr_viz_and_stats; ML_Aggregate_Viz_Stats * grid_info; int Nprocs; char * unamalg_bdry = NULL; char* label; int N_dimensions; double* x_coord = NULL; double* y_coord = NULL; double* z_coord = NULL; /* ------------------- execution begins --------------------------------- */ label = ML_GetUserLabel(); sprintf(str, "%s (level %d) :", label, ml_ag->cur_level); /* ============================================================= */ /* get the machine information and matrix references */ /* ============================================================= */ mypid = comm->ML_mypid; Nprocs = comm->ML_nprocs; epsilon = ml_ag->threshold; num_PDE_eqns = ml_ag->num_PDE_eqns; nullspace_dim = ml_ag->nullspace_dim; nullspace_vect = ml_ag->nullspace_vect; Nrows = Amatrix->outvec_leng; if (mypid == 0 && 5 < ML_Get_PrintLevel()) { printf("%s num PDE eqns = %d\n", str, num_PDE_eqns); } /* ============================================================= */ /* check the system size versus null dimension size */ /* ============================================================= */ if ( Nrows % num_PDE_eqns != 0 ) { printf("ML_Aggregate_CoarsenUser ERROR : Nrows must be multiples"); printf(" of num_PDE_eqns.\n"); exit(EXIT_FAILURE); } diff_level = ml_ag->max_levels - ml_ag->cur_level - 1; if ( diff_level > 0 ) num_PDE_eqns = nullspace_dim; /* ## 12/20/99 */ /* ============================================================= */ /* set up the threshold for weight-based coarsening */ /* ============================================================= */ diff_level = ml_ag->begin_level - ml_ag->cur_level; if (diff_level == 0) ml_ag->curr_threshold = ml_ag->threshold; epsilon = ml_ag->curr_threshold; ml_ag->curr_threshold *= 0.5; if (mypid == 0 && 7 < ML_Get_PrintLevel()) printf("%s current eps = %e\n", str, epsilon); epsilon = epsilon * epsilon; ML_Operator_AmalgamateAndDropWeak(Amatrix, num_PDE_eqns, epsilon); Nrows /= num_PDE_eqns; exp_Nrows = Nrows; /* ********************************************************************** */ /* allocate memory for aggr_index, which will contain the decomposition */ /* ********************************************************************** */ nbytes = (Nrows*num_PDE_eqns) * sizeof(int); if ( nbytes > 0 ) { ML_memory_alloc((void**) &aggr_index, nbytes, "ACJ"); if( aggr_index == NULL ) { fprintf( stderr, "*ML*ERR* not enough memory for %d bytes\n" "*ML*ERR* (file %s, line %d)\n", nbytes, __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } } else aggr_index = NULL; for( i=0 ; i<Nrows*num_PDE_eqns ; i++ ) aggr_index[i] = -1; unamalg_bdry = (char *) ML_allocate( sizeof(char) * (Nrows+1) ); if( unamalg_bdry == NULL ) { fprintf( stderr, "*ML*ERR* on proc %d, not enough space for %d bytes\n" "*ML*ERR* (file %s, line %d)\n", mypid, (int)sizeof(char) * Nrows, __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } N_dimensions = ml_ag->N_dimensions; grid_info = (ML_Aggregate_Viz_Stats*) Amatrix->to->Grid->Grid; x_coord = grid_info->x; if (N_dimensions > 1 && x_coord) y_coord = grid_info->y; else y_coord = 0; if (N_dimensions > 2 && x_coord) z_coord = grid_info->z; else z_coord = 0; aggr_count = ML_GetUserPartitions(Amatrix,unamalg_bdry, epsilon, x_coord,y_coord,z_coord, aggr_index,&total_nz); #ifdef ML_MPI MPI_Allreduce( &Nrows, &i, 1, MPI_INT, MPI_SUM, Amatrix->comm->USR_comm ); MPI_Allreduce( &aggr_count, &j, 1, MPI_INT, MPI_SUM, Amatrix->comm->USR_comm ); #else i = Nrows; j = aggr_count; #endif if( mypid == 0 && 7 < ML_Get_PrintLevel() ) { printf("%s Using %d (block) aggregates (globally)\n", str, j ); printf("%s # (block) aggre/ # (block) rows = %8.5f %% ( = %d / %d)\n", str, 100.0*j/i, j, i); } j = ML_gsum_int( aggr_count, comm ); if (mypid == 0 && 7 < ML_Get_PrintLevel()) { printf("%s %d (block) aggregates (globally)\n", str, j ); } /* ********************************************************************** */ /* I allocate room to copy aggr_index and pass this value to the user, */ /* who will be able to analyze and visualize this after the construction */ /* of the levels. This way, the only price we have to pay for stats and */ /* viz is essentially a little bit of memory. */ /* this memory will be cleaned with the object ML_Aggregate ml_ag. */ /* I set the pointers using the ML_Aggregate_Info structure. This is */ /* allocated using ML_Aggregate_Info_Setup(ml,MaxNumLevels) */ /* ********************************************************************** */ if (Amatrix->to->Grid->Grid != NULL) { graph_decomposition = (int *)ML_allocate(sizeof(int)*(Nrows+1)); if( graph_decomposition == NULL ) { fprintf( stderr, "*ML*ERR* Not enough memory for %d bytes\n" "*ML*ERR* (file %s, line %d)\n", (int)sizeof(int)*Nrows, __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } for( i=0 ; i<Nrows ; i++ ) graph_decomposition[i] = aggr_index[i]; aggr_viz_and_stats = (ML_Aggregate_Viz_Stats *) (Amatrix->to->Grid->Grid); aggr_viz_and_stats->graph_decomposition = graph_decomposition; aggr_viz_and_stats->Nlocal = Nrows; aggr_viz_and_stats->Naggregates = aggr_count; aggr_viz_and_stats->local_or_global = ML_LOCAL_INDICES; aggr_viz_and_stats->is_filled = ML_YES; aggr_viz_and_stats->Amatrix = Amatrix; } /* ********************************************************************** */ /* take the decomposition as created by METIS and form the aggregates */ /* ********************************************************************** */ total_nz = ML_Comm_GsumInt( comm, total_nz); i = ML_Comm_GsumInt( comm, Nrows); if ( mypid == 0 && 7 < ML_Get_PrintLevel()) printf("%s Total (block) nnz = %d ( = %5.2f/(block)row)\n", str, total_nz,1.0*total_nz/i); if ( ml_ag->operator_complexity == 0.0 ) { ml_ag->fine_complexity = total_nz; ml_ag->operator_complexity = total_nz; } else ml_ag->operator_complexity += total_nz; /* fix aggr_index for num_PDE_eqns > 1 */ for (i = Nrows - 1; i >= 0; i-- ) { for (j = num_PDE_eqns-1; j >= 0; j--) { aggr_index[i*num_PDE_eqns+j] = aggr_index[i]; } } if ( mypid == 0 && 8 < ML_Get_PrintLevel()) { printf("Calling ML_Operator_UnAmalgamateAndDropWeak\n"); fflush(stdout); } ML_Operator_UnAmalgamateAndDropWeak(Amatrix, num_PDE_eqns, epsilon); Nrows *= num_PDE_eqns; exp_Nrows *= num_PDE_eqns; /* count the size of each aggregate */ aggr_cnt_array = (int *) ML_allocate(sizeof(int)*(aggr_count+1)); for (i = 0; i < aggr_count ; i++) aggr_cnt_array[i] = 0; for (i = 0; i < exp_Nrows; i++) { if (aggr_index[i] >= 0) { if( aggr_index[i] >= aggr_count ) { fprintf( stderr, "*ML*WRN* on process %d, something weird happened...\n" "*ML*WRN* node %d belong to aggregate %d (#aggr = %d)\n" "*ML*WRN* (file %s, line %d)\n", comm->ML_mypid, i, aggr_index[i], aggr_count, __FILE__, __LINE__ ); } else { aggr_cnt_array[aggr_index[i]]++; } } } /* ============================================================= */ /* Form tentative prolongator */ /* ============================================================= */ Ncoarse = aggr_count; /* ============================================================= */ /* check and copy aggr_index */ /* ------------------------------------------------------------- */ level = ml_ag->cur_level; nbytes = (Nrows+1) * sizeof( int ); ML_memory_alloc((void**) &(ml_ag->aggr_info[level]), nbytes, "AGl"); count = aggr_count; for ( i = 0; i < Nrows; i+=num_PDE_eqns ) { if ( aggr_index[i] >= 0 ) { for ( j = 0; j < num_PDE_eqns; j++ ) ml_ag->aggr_info[level][i+j] = aggr_index[i]; if (aggr_index[i] >= count) count = aggr_index[i] + 1; } /*else *{ * printf("%d : CoarsenMIS error : aggr_index[%d] < 0\n", * mypid,i); * exit(1); *}*/ } ml_ag->aggr_count[level] = count; /* for relaxing boundary points */ /* ============================================================= */ /* set up the new operator */ /* ------------------------------------------------------------- */ new_Nrows = Nrows; exp_Ncoarse = Nrows; for ( i = 0; i < new_Nrows; i++ ) { if ( aggr_index[i] >= exp_Ncoarse ) { printf("*ML*WRN* index out of bound %d = %d(%d)\n", i, aggr_index[i], exp_Ncoarse); } } nbytes = ( new_Nrows+1 ) * sizeof(int); ML_memory_alloc((void**)&(new_ia), nbytes, "AIA"); nbytes = ( new_Nrows+1) * nullspace_dim * sizeof(int); ML_memory_alloc((void**)&(new_ja), nbytes, "AJA"); nbytes = ( new_Nrows+1) * nullspace_dim * sizeof(double); ML_memory_alloc((void**)&(new_val), nbytes, "AVA"); for ( i = 0; i < new_Nrows*nullspace_dim; i++ ) new_val[i] = 0.0; /* ------------------------------------------------------------- */ /* set up the space for storing the new null space */ /* ------------------------------------------------------------- */ nbytes = (Ncoarse+1) * nullspace_dim * nullspace_dim * sizeof(double); ML_memory_alloc((void**)&(new_null),nbytes,"AGr"); if( new_null == NULL ) { fprintf( stderr, "*ML*ERR* on process %d, not enough memory for %d bytes\n" "*ML*ERR* (file %s, line %d)\n", mypid, nbytes, __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } for (i = 0; i < Ncoarse*nullspace_dim*nullspace_dim; i++) new_null[i] = 0.0; /* ------------------------------------------------------------- */ /* initialize the row pointer for the CSR prolongation operator */ /* (each row will have at most nullspace_dim nonzero entries) */ /* ------------------------------------------------------------- */ for (i = 0; i <= Nrows; i++) new_ia[i] = i * nullspace_dim; /* trying this when a Dirichlet row is taken out */ j = 0; new_ia[0] = 0; for (i = 0; i < Nrows; i++) { if (aggr_index[i] != -1) j += nullspace_dim; new_ia[i+1] = j; } /* ------------------------------------------------------------- */ /* generate an array to store which aggregate has which rows.Then*/ /* loop through the rows of A checking which aggregate each row */ /* is in, and adding it to the appropriate spot in rows_in_aggs */ /* ------------------------------------------------------------- */ ML_memory_alloc((void**)&rows_in_aggs,aggr_count*sizeof(int*),"MLs"); for (i = 0; i < aggr_count; i++) { nbytes = aggr_cnt_array[i]+1; rows_in_aggs[i] = (int *) ML_allocate(nbytes*sizeof(int)); aggr_cnt_array[i] = 0; if (rows_in_aggs[i] == NULL) { printf("*ML*ERR* couldn't allocate memory in CoarsenMETIS\n"); exit(1); } } for (i = 0; i < exp_Nrows; i+=num_PDE_eqns) { if ( aggr_index[i] >= 0 && aggr_index[i] < aggr_count) { for (j = 0; j < num_PDE_eqns; j++) { index = aggr_cnt_array[aggr_index[i]]++; rows_in_aggs[aggr_index[i]][index] = i + j; } } } /* ------------------------------------------------------------- */ /* allocate work arrays for QR factorization */ /* work and lwork are needed for lapack's QR routine. These */ /* settings seemed easiest since I don't quite understand */ /* what they do, but may want to do something better here later */ /* ------------------------------------------------------------- */ max_agg_size = 0; for (i = 0; i < aggr_count; i++) { if (aggr_cnt_array[i] > max_agg_size) max_agg_size = aggr_cnt_array[i]; } nbytes = max_agg_size * nullspace_dim * sizeof(double); ML_memory_alloc((void**)&qr_tmp, nbytes, "AGu"); nbytes = nullspace_dim * sizeof(double); ML_memory_alloc((void**)&tmp_vect, nbytes, "AGv"); lwork = nullspace_dim; nbytes = nullspace_dim * sizeof(double); ML_memory_alloc((void**)&work, nbytes, "AGw"); /* ------------------------------------------------------------- */ /* perform block QR decomposition */ /* ------------------------------------------------------------- */ for (i = 0; i < aggr_count; i++) { /* ---------------------------------------------------------- */ /* set up the matrix we want to decompose into Q and R: */ /* ---------------------------------------------------------- */ length = aggr_cnt_array[i]; if (nullspace_vect == NULL) { for (j = 0; j < (int) length; j++) { index = rows_in_aggs[i][j]; for (k = 0; k < nullspace_dim; k++) { if ( unamalg_bdry[index/num_PDE_eqns] == 'T') qr_tmp[k*length+j] = 0.; else { if (index % num_PDE_eqns == k) qr_tmp[k*length+j] = 1.0; else qr_tmp[k*length+j] = 0.0; } } } } else { for (k = 0; k < nullspace_dim; k++) { for (j = 0; j < (int) length; j++) { index = rows_in_aggs[i][j]; if ( unamalg_bdry[index/num_PDE_eqns] == 'T') qr_tmp[k*length+j] = 0.; else { if (index < Nrows) { qr_tmp[k*length+j] = nullspace_vect[k*Nrows+index]; } else { fprintf( stderr, "*ML*ERR* in QR\n" "*ML*ERR* (file %s, line %d)\n", __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } } } } } /* ---------------------------------------------------------- */ /* now calculate QR using an LAPACK routine */ /* ---------------------------------------------------------- */ if (aggr_cnt_array[i] >= nullspace_dim) { DGEQRF_F77(&(aggr_cnt_array[i]), &nullspace_dim, qr_tmp, &(aggr_cnt_array[i]), tmp_vect, work, &lwork, &info); if (info != 0) pr_error("ErrOr in CoarsenMIS : dgeqrf returned a non-zero %d %d\n", aggr_cnt_array[i],i); if (work[0] > lwork) { lwork=(int) work[0]; ML_memory_free((void**) &work); ML_memory_alloc((void**) &work, sizeof(double)*lwork, "AGx"); } else lwork=(int) work[0]; /* ---------------------------------------------------------- */ /* the upper triangle of qr_tmp is now R, so copy that into */ /* the new nullspace */ /* ---------------------------------------------------------- */ for (j = 0; j < nullspace_dim; j++) for (k = j; k < nullspace_dim; k++) new_null[i*nullspace_dim+j+k*Ncoarse*nullspace_dim] = qr_tmp[j+aggr_cnt_array[i]*k]; /* ---------------------------------------------------------- */ /* to get this block of P, need to run qr_tmp through another */ /* LAPACK function: */ /* ---------------------------------------------------------- */ if ( aggr_cnt_array[i] < nullspace_dim ){ printf("Error in dorgqr on %d row (dims are %d, %d)\n",i,aggr_cnt_array[i], nullspace_dim); printf("ERROR : performing QR on a MxN matrix where M<N.\n"); } DORGQR_F77(&(aggr_cnt_array[i]), &nullspace_dim, &nullspace_dim, qr_tmp, &(aggr_cnt_array[i]), tmp_vect, work, &lwork, &info); if (info != 0) { printf("Error in dorgqr on %d row (dims are %d, %d)\n",i,aggr_cnt_array[i], nullspace_dim); pr_error("Error in CoarsenMIS: dorgqr returned a non-zero\n"); } if (work[0] > lwork) { lwork=(int) work[0]; ML_memory_free((void**) &work); ML_memory_alloc((void**) &work, sizeof(double)*lwork, "AGy"); } else lwork=(int) work[0]; /* ---------------------------------------------------------- */ /* now copy Q over into the appropriate part of P: */ /* The rows of P get calculated out of order, so I assume the */ /* Q is totally dense and use what I know of how big each Q */ /* will be to determine where in ia, ja, etc each nonzero in */ /* Q belongs. If I did not assume this, I would have to keep */ /* all of P in memory in order to determine where each entry */ /* should go */ /* ---------------------------------------------------------- */ for (j = 0; j < aggr_cnt_array[i]; j++) { index = rows_in_aggs[i][j]; if ( index < Nrows ) { index3 = new_ia[index]; for (k = 0; k < nullspace_dim; k++) { new_ja [index3+k] = i * nullspace_dim + k; new_val[index3+k] = qr_tmp[ k*aggr_cnt_array[i]+j]; } } else { fprintf( stderr, "*ML*ERR* in QR: index out of bounds (%d - %d)\n", index, Nrows ); } } } else { /* We have a small aggregate such that the QR factorization can not */ /* be performed. Instead let us copy the null space from the fine */ /* into the coarse grid nullspace and put the identity for the */ /* prolongator???? */ for (j = 0; j < nullspace_dim; j++) for (k = 0; k < nullspace_dim; k++) new_null[i*nullspace_dim+j+k*Ncoarse*nullspace_dim] = qr_tmp[j+aggr_cnt_array[i]*k]; for (j = 0; j < aggr_cnt_array[i]; j++) { index = rows_in_aggs[i][j]; index3 = new_ia[index]; for (k = 0; k < nullspace_dim; k++) { new_ja [index3+k] = i * nullspace_dim + k; if (k == j) new_val[index3+k] = 1.; else new_val[index3+k] = 0.; } } } } ML_Aggregate_Set_NullSpace(ml_ag, num_PDE_eqns, nullspace_dim, new_null, Ncoarse*nullspace_dim); ML_memory_free( (void **) &new_null); /* ------------------------------------------------------------- */ /* set up the csr_data data structure */ /* ------------------------------------------------------------- */ ML_memory_alloc((void**) &csr_data, sizeof(struct ML_CSR_MSRdata),"CSR"); csr_data->rowptr = new_ia; csr_data->columns = new_ja; csr_data->values = new_val; ML_Operator_Set_ApplyFuncData( *Pmatrix, nullspace_dim*Ncoarse, Nrows, csr_data, Nrows, NULL, 0); (*Pmatrix)->data_destroy = ML_CSR_MSR_ML_memorydata_Destroy; (*Pmatrix)->getrow->pre_comm = ML_CommInfoOP_Create(); (*Pmatrix)->max_nz_per_row = 1; ML_Operator_Set_Getrow((*Pmatrix), Nrows, CSR_getrow); ML_Operator_Set_ApplyFunc((*Pmatrix), CSR_matvec); (*Pmatrix)->max_nz_per_row = 1; /* this must be set so that the hierarchy generation does not abort early in adaptive SA */ (*Pmatrix)->num_PDEs = nullspace_dim; /* ------------------------------------------------------------- */ /* clean up */ /* ------------------------------------------------------------- */ ML_free(unamalg_bdry); ML_memory_free((void**)&aggr_index); ML_free(aggr_cnt_array); for (i = 0; i < aggr_count; i++) ML_free(rows_in_aggs[i]); ML_memory_free((void**)&rows_in_aggs); ML_memory_free((void**)&qr_tmp); ML_memory_free((void**)&tmp_vect); ML_memory_free((void**)&work); aggr_curr = aggr_head; while ( aggr_curr != NULL ) { supernode = aggr_curr; aggr_curr = aggr_curr->next; if ( supernode->length > 0 ) ML_free( supernode->list ); ML_free( supernode ); } return Ncoarse*nullspace_dim; } /* ML_Aggregate_CoarsenUser */
int ML_Amesos_Gen(ML *ml, int curr_level, int choice, int MaxProcs, double AddToDiag, Amesos_Handle_Type *Amesos_Handle) { # ifdef ML_MPI MPI_Comm amesosComm; # else int amesosComm=1; //TODO are these going to cause a problem w/o MPI? # endif ML_Operator *Ke = &(ml->Amat[curr_level]); /* Sanity Checking - Zero Diagonals */ if (Ke->getrow->func_ptr == MSR_getrows) { struct ML_CSR_MSRdata * input_matrix = (struct ML_CSR_MSRdata *) ML_Get_MyGetrowData(Ke); double *val = input_matrix->values; int N = Ke->outvec_leng; for(int i=0;i<N;i++) if(val[i] == 0.0) val[i]=1.0; } int hasRows=1; if(choice != ML_AMESOS_SUPERLUDIST) { # ifdef ML_MPI hasRows = MPI_UNDEFINED; if (Ke->invec_leng > 0 || Ke->outvec_leng > 0) hasRows = 1; MPI_Comm_split(Ke->comm->USR_comm,hasRows,Ke->comm->ML_mypid,&amesosComm); Amesos_Handle->freeMpiComm = 1; # endif } else { amesosComm=Ke->comm->USR_comm; Amesos_Handle->freeMpiComm = 0; } /* # ifdef ML_MPI hasRows = MPI_UNDEFINED; if (Ke->invec_leng > 0 || Ke->outvec_leng > 0) hasRows = 1; MPI_Comm_split(Ke->comm->USR_comm,hasRows,Ke->comm->ML_mypid,&amesosComm); #endif */ if (hasRows == 1) { ML_Epetra::RowMatrix* Amesos_Matrix = new ML_Epetra::RowMatrix(Ke, 0, false, amesosComm); assert (Amesos_Matrix != 0); int NumGlobalRows = Amesos_Matrix->NumGlobalRows(); int NumGlobalNonzeros = Amesos_Matrix->NumGlobalNonzeros(); // sanity check, coarse matrix should not be empty if( NumGlobalRows == 0 && Amesos_Matrix->Comm().MyPID() == 0 ) { std::cerr << std::endl; std::cerr << "ERROR : Coarse matrix has no rows!" << std::endl; std::cerr << std::endl; } if( NumGlobalNonzeros == 0 && Amesos_Matrix->Comm().MyPID() == 0 ) { std::cerr << std::endl; std::cerr << "ERROR : Coarse matrix has no nonzero elements!" << std::endl; std::cerr << std::endl; } # ifdef TFLOP if( Amesos_Matrix->Comm().MyPID() == 0 && ML_Get_PrintLevel() > 2 ) { printf("Amesos (level %d) : NumGlobalRows = %d\n",curr_level,NumGlobalRows); printf("Amesos (level %d) : NumGlobalNonzeros = %d\n",curr_level,NumGlobalNonzeros); printf("Amesos (level %d) : fill-in = %f %\n",curr_level,100.0*NumGlobalNonzeros/(NumGlobalRows*NumGlobalRows)); } # else if( Amesos_Matrix->Comm().MyPID() == 0 && ML_Get_PrintLevel() > 2 ) { std::cout << "Amesos (level " << curr_level << ") : NumGlobalRows = " << NumGlobalRows << std::endl; std::cout << "Amesos (level " << curr_level << ") : NumGlobalNonzeros = " << NumGlobalNonzeros << std::endl; std::cout << "Amesos (level " << curr_level << ") : Fill-in = " << 100.0*NumGlobalNonzeros/(1.0*NumGlobalRows*NumGlobalRows) << " %" << std::endl; } # endif Epetra_LinearProblem *Amesos_LinearProblem = new Epetra_LinearProblem; Amesos_LinearProblem->SetOperator(Amesos_Matrix); Teuchos::ParameterList AmesosList; AmesosList.set("MaxProcs",MaxProcs); AmesosList.set("AddToDiag", AddToDiag); if( ML_Get_PrintLevel() > 10 ) { AmesosList.set("PrintTiming",true); AmesosList.set("OutputLevel",1); } // don't use iterative refinement for Superludist only Teuchos::ParameterList & SuperludistList = AmesosList.sublist("Superludist"); SuperludistList.set("IterRefine","NO"); Amesos_BaseSolver* A_Base; Amesos A_Factory; const Epetra_Comm& Comm = Amesos_Matrix->Comm(); switch (choice) { case ML_AMESOS_LAPACK: print_out(Comm, curr_level, "LAPACK"); A_Base = A_Factory.Create("Amesos_Lapack", *Amesos_LinearProblem); break; case ML_AMESOS_UMFPACK: print_out(Comm, curr_level, "UMFPACK"); A_Base = A_Factory.Create("Amesos_Klu", *Amesos_LinearProblem); break; case ML_AMESOS_SUPERLUDIST: print_out(Comm, curr_level, "SuperLU_DIST"); A_Base = A_Factory.Create("Amesos_Superludist", *Amesos_LinearProblem); break; case ML_AMESOS_SUPERLU: print_out(Comm, curr_level, "SuperLU"); A_Base = A_Factory.Create("Amesos_Superlu", *Amesos_LinearProblem); break; case ML_AMESOS_SCALAPACK: print_out(Comm, curr_level, "ScaLAPACK"); A_Base = A_Factory.Create("Amesos_Scalapack", *Amesos_LinearProblem); break; case ML_AMESOS_MUMPS: print_out(Comm, curr_level, "MUMPS"); A_Base = A_Factory.Create("Amesos_Mumps", *Amesos_LinearProblem); break; case ML_AMESOS_KLU: default: print_out(Comm, curr_level, "KLU"); A_Base = A_Factory.Create("Amesos_Klu", *Amesos_LinearProblem); break; } // may happen the desired solver is not available. KLU is almost // always compiled, so try this first. If not, then LAPACK is // the last choice before quitting if (A_Base == 0) { if (choice != ML_AMESOS_KLU) { if (Amesos_Matrix->Comm().MyPID() == 0 && ML_Get_PrintLevel() > 2) { std::cerr << "Amesos (level " << curr_level << ") : This coarse solver is not available." << std::endl; std::cerr << "Amesos (level " << curr_level << ") : Now re-building with KLU" << std::endl; } A_Base = A_Factory.Create("Amesos_Klu", *Amesos_LinearProblem); } if (A_Base == 0) { if (Amesos_Matrix->Comm().MyPID() == 0) { std::cerr << "Amesos (level " << curr_level << ") : This coarse solver is not available." << std::endl; std::cerr << "Amesos (level " << curr_level << ") : Now re-building with LAPACK" << std::endl; } A_Base = A_Factory.Create("Amesos_Lapack", *Amesos_LinearProblem); if (A_Base == 0) { if (Amesos_Matrix->Comm().MyPID() == 0) { std::cerr << "*ML*ERR* no Amesos solver is available!" << std::endl; } exit( EXIT_FAILURE ); } } } A_Base->SetParameters(AmesosList); Epetra_Time Time(Amesos_Matrix->Comm()); Time.ResetStartTime(); int rv; try{rv=A_Base->NumericFactorization();} catch(...) { if (Amesos_Matrix->Comm().MyPID() == 0) printf("\n*** * ML_Amesos_Gen: exception thrown from Amesos_BaseSolver->NumericFactorization(). * ***\n\n"); exit( EXIT_FAILURE ); } double Time2 = Time.ElapsedTime(); if(rv){ if(!Amesos_Matrix->Comm().MyPID()) printf("ERROR: Amesos NumericFactorization failed... dumping relevant matrix for post-mortem\n"); # ifdef HAVE_ML_EPETRAEXT EpetraExt::RowMatrixToMatlabFile("amesos-failure.dat",*Amesos_Matrix); # endif } Level__ = -1; # ifdef TFLOP if( Amesos_Matrix->Comm().MyPID() == 0 && ML_Get_PrintLevel()>2 ) { Level__ = curr_level; printf("Amesos (level %d) : Time for factorization = %f (s)\n",curr_level,Time2); } # else if( Amesos_Matrix->Comm().MyPID() == 0 && ML_Get_PrintLevel()>2 ) { Level__ = curr_level; std::cout << "Amesos (level " << curr_level << ") : Time for factorization = " << Time2 << " (s)" << std::endl; } # endif // those are very simple timing for solution TimeForSolve__ = 0.0; NumSolves__ = 0; Amesos_Handle->A_Base = (void *) A_Base ; } //if (hasRows==1) else Amesos_Handle->A_Base = 0; return 0; } //ML_Amesos_Gen()
// ================================================ ====== ==== ==== == = int ML_Epetra::RefMaxwell_Aggregate_Nodes(const Epetra_CrsMatrix & A, Teuchos::ParameterList & List, ML_Comm * ml_comm, std::string PrintMsg, ML_Aggregate_Struct *& MLAggr,ML_Operator *&P, int &NumAggregates){ /* Output level */ bool verbose, very_verbose; int OutputLevel = List.get("ML output", -47); if(OutputLevel == -47) OutputLevel = List.get("output", 1); if(OutputLevel>=15) very_verbose=verbose=true; if(OutputLevel > 5) {very_verbose=false;verbose=true;} else very_verbose=verbose=false; /* Wrap A in a ML_Operator */ ML_Operator* A_ML = ML_Operator_Create(ml_comm); ML_Operator_WrapEpetraCrsMatrix(const_cast<Epetra_CrsMatrix*>(&A),A_ML); /* Pull Teuchos Options */ std::string CoarsenType = List.get("aggregation: type", "Uncoupled"); double Threshold = List.get("aggregation: threshold", 0.0); int NodesPerAggr = List.get("aggregation: nodes per aggregate", ML_Aggregate_Get_OptimalNumberOfNodesPerAggregate()); bool UseAux = List.get("aggregation: aux: enable",false); double AuxThreshold = List.get("aggregation: aux: threshold",0.0); int MaxAuxLevels = List.get("aggregation: aux: max levels",10); ML_Aggregate_Create(&MLAggr); ML_Aggregate_Set_MaxLevels(MLAggr, 2); ML_Aggregate_Set_StartLevel(MLAggr, 0); ML_Aggregate_Set_Threshold(MLAggr, Threshold); ML_Aggregate_Set_MaxCoarseSize(MLAggr,1); MLAggr->cur_level = 0; ML_Aggregate_Set_Reuse(MLAggr); MLAggr->keep_agg_information = 1; P = ML_Operator_Create(ml_comm); /* Process Teuchos Options */ if (CoarsenType == "Uncoupled") ML_Aggregate_Set_CoarsenScheme_Uncoupled(MLAggr); else if (CoarsenType == "Uncoupled-MIS"){ ML_Aggregate_Set_CoarsenScheme_UncoupledMIS(MLAggr); } else if (CoarsenType == "METIS"){ ML_Aggregate_Set_CoarsenScheme_METIS(MLAggr); ML_Aggregate_Set_NodesPerAggr(0, MLAggr, 0, NodesPerAggr); }/*end if*/ else { if(!A.Comm().MyPID()) printf("%s Unsupported (1,1) block aggregation type(%s), resetting to uncoupled-mis\n",PrintMsg.c_str(),CoarsenType.c_str()); ML_Aggregate_Set_CoarsenScheme_UncoupledMIS(MLAggr); } /* Setup Aux Data */ if(UseAux) { A_ML->aux_data->enable=1; A_ML->aux_data->threshold=AuxThreshold; A_ML->aux_data->max_level=MaxAuxLevels; ML_Init_Aux(A_ML,List); if(verbose && !A.Comm().MyPID()) { printf("%s Using auxiliary matrix\n",PrintMsg.c_str()); printf("%s aux threshold = %e\n",PrintMsg.c_str(),A_ML->aux_data->threshold); } } /* Aggregate Nodes */ int printlevel=ML_Get_PrintLevel(); if(verbose) ML_Set_PrintLevel(10); NumAggregates = ML_Aggregate_Coarsen(MLAggr,A_ML, &P, ml_comm); if(verbose) ML_Set_PrintLevel(printlevel); if (NumAggregates == 0){ std::cerr << "Found 0 aggregates, perhaps the problem is too small." << std::endl; ML_CHK_ERR(-2); }/*end if*/ else if(very_verbose) printf("[%d] %s %d aggregates created invec_leng=%d\n",A.Comm().MyPID(),PrintMsg.c_str(),NumAggregates,P->invec_leng); if(verbose){ int globalAggs=0; A.Comm().SumAll(&NumAggregates,&globalAggs,1); if(!A.Comm().MyPID()) { printf("%s Aggregation threshold = %e\n",PrintMsg.c_str(),Threshold); printf("%s Global aggregates = %d\n",PrintMsg.c_str(),globalAggs); } } /* Cleanup */ ML_qr_fix_Destroy(); if(UseAux) ML_Finalize_Aux(A_ML); ML_Operator_Destroy(&A_ML); return 0; }