int ML_Aggregate_CoarsenUser(ML_Aggregate *ml_ag, ML_Operator *Amatrix, ML_Operator **Pmatrix, ML_Comm *comm) { unsigned int nbytes, length; int i, j, k, Nrows, exp_Nrows; int diff_level; int aggr_count, index, mypid, num_PDE_eqns; int *aggr_index = NULL, nullspace_dim; int Ncoarse, count; int *new_ia = NULL, *new_ja = NULL, new_Nrows; int exp_Ncoarse; int *aggr_cnt_array = NULL; int level, index3, max_agg_size; int **rows_in_aggs = NULL, lwork, info; double *new_val = NULL, epsilon; double *nullspace_vect = NULL, *qr_tmp = NULL; double *tmp_vect = NULL, *work = NULL, *new_null = NULL; ML_SuperNode *aggr_head = NULL, *aggr_curr, *supernode; struct ML_CSR_MSRdata *csr_data; int total_nz = 0; char str[80]; int * graph_decomposition = NULL; ML_Aggregate_Viz_Stats * aggr_viz_and_stats; ML_Aggregate_Viz_Stats * grid_info; int Nprocs; char * unamalg_bdry = NULL; char* label; int N_dimensions; double* x_coord = NULL; double* y_coord = NULL; double* z_coord = NULL; /* ------------------- execution begins --------------------------------- */ label = ML_GetUserLabel(); sprintf(str, "%s (level %d) :", label, ml_ag->cur_level); /* ============================================================= */ /* get the machine information and matrix references */ /* ============================================================= */ mypid = comm->ML_mypid; Nprocs = comm->ML_nprocs; epsilon = ml_ag->threshold; num_PDE_eqns = ml_ag->num_PDE_eqns; nullspace_dim = ml_ag->nullspace_dim; nullspace_vect = ml_ag->nullspace_vect; Nrows = Amatrix->outvec_leng; if (mypid == 0 && 5 < ML_Get_PrintLevel()) { printf("%s num PDE eqns = %d\n", str, num_PDE_eqns); } /* ============================================================= */ /* check the system size versus null dimension size */ /* ============================================================= */ if ( Nrows % num_PDE_eqns != 0 ) { printf("ML_Aggregate_CoarsenUser ERROR : Nrows must be multiples"); printf(" of num_PDE_eqns.\n"); exit(EXIT_FAILURE); } diff_level = ml_ag->max_levels - ml_ag->cur_level - 1; if ( diff_level > 0 ) num_PDE_eqns = nullspace_dim; /* ## 12/20/99 */ /* ============================================================= */ /* set up the threshold for weight-based coarsening */ /* ============================================================= */ diff_level = ml_ag->begin_level - ml_ag->cur_level; if (diff_level == 0) ml_ag->curr_threshold = ml_ag->threshold; epsilon = ml_ag->curr_threshold; ml_ag->curr_threshold *= 0.5; if (mypid == 0 && 7 < ML_Get_PrintLevel()) printf("%s current eps = %e\n", str, epsilon); epsilon = epsilon * epsilon; ML_Operator_AmalgamateAndDropWeak(Amatrix, num_PDE_eqns, epsilon); Nrows /= num_PDE_eqns; exp_Nrows = Nrows; /* ********************************************************************** */ /* allocate memory for aggr_index, which will contain the decomposition */ /* ********************************************************************** */ nbytes = (Nrows*num_PDE_eqns) * sizeof(int); if ( nbytes > 0 ) { ML_memory_alloc((void**) &aggr_index, nbytes, "ACJ"); if( aggr_index == NULL ) { fprintf( stderr, "*ML*ERR* not enough memory for %d bytes\n" "*ML*ERR* (file %s, line %d)\n", nbytes, __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } } else aggr_index = NULL; for( i=0 ; i<Nrows*num_PDE_eqns ; i++ ) aggr_index[i] = -1; unamalg_bdry = (char *) ML_allocate( sizeof(char) * (Nrows+1) ); if( unamalg_bdry == NULL ) { fprintf( stderr, "*ML*ERR* on proc %d, not enough space for %d bytes\n" "*ML*ERR* (file %s, line %d)\n", mypid, (int)sizeof(char) * Nrows, __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } N_dimensions = ml_ag->N_dimensions; grid_info = (ML_Aggregate_Viz_Stats*) Amatrix->to->Grid->Grid; x_coord = grid_info->x; if (N_dimensions > 1 && x_coord) y_coord = grid_info->y; else y_coord = 0; if (N_dimensions > 2 && x_coord) z_coord = grid_info->z; else z_coord = 0; aggr_count = ML_GetUserPartitions(Amatrix,unamalg_bdry, epsilon, x_coord,y_coord,z_coord, aggr_index,&total_nz); #ifdef ML_MPI MPI_Allreduce( &Nrows, &i, 1, MPI_INT, MPI_SUM, Amatrix->comm->USR_comm ); MPI_Allreduce( &aggr_count, &j, 1, MPI_INT, MPI_SUM, Amatrix->comm->USR_comm ); #else i = Nrows; j = aggr_count; #endif if( mypid == 0 && 7 < ML_Get_PrintLevel() ) { printf("%s Using %d (block) aggregates (globally)\n", str, j ); printf("%s # (block) aggre/ # (block) rows = %8.5f %% ( = %d / %d)\n", str, 100.0*j/i, j, i); } j = ML_gsum_int( aggr_count, comm ); if (mypid == 0 && 7 < ML_Get_PrintLevel()) { printf("%s %d (block) aggregates (globally)\n", str, j ); } /* ********************************************************************** */ /* I allocate room to copy aggr_index and pass this value to the user, */ /* who will be able to analyze and visualize this after the construction */ /* of the levels. This way, the only price we have to pay for stats and */ /* viz is essentially a little bit of memory. */ /* this memory will be cleaned with the object ML_Aggregate ml_ag. */ /* I set the pointers using the ML_Aggregate_Info structure. This is */ /* allocated using ML_Aggregate_Info_Setup(ml,MaxNumLevels) */ /* ********************************************************************** */ if (Amatrix->to->Grid->Grid != NULL) { graph_decomposition = (int *)ML_allocate(sizeof(int)*(Nrows+1)); if( graph_decomposition == NULL ) { fprintf( stderr, "*ML*ERR* Not enough memory for %d bytes\n" "*ML*ERR* (file %s, line %d)\n", (int)sizeof(int)*Nrows, __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } for( i=0 ; i<Nrows ; i++ ) graph_decomposition[i] = aggr_index[i]; aggr_viz_and_stats = (ML_Aggregate_Viz_Stats *) (Amatrix->to->Grid->Grid); aggr_viz_and_stats->graph_decomposition = graph_decomposition; aggr_viz_and_stats->Nlocal = Nrows; aggr_viz_and_stats->Naggregates = aggr_count; aggr_viz_and_stats->local_or_global = ML_LOCAL_INDICES; aggr_viz_and_stats->is_filled = ML_YES; aggr_viz_and_stats->Amatrix = Amatrix; } /* ********************************************************************** */ /* take the decomposition as created by METIS and form the aggregates */ /* ********************************************************************** */ total_nz = ML_Comm_GsumInt( comm, total_nz); i = ML_Comm_GsumInt( comm, Nrows); if ( mypid == 0 && 7 < ML_Get_PrintLevel()) printf("%s Total (block) nnz = %d ( = %5.2f/(block)row)\n", str, total_nz,1.0*total_nz/i); if ( ml_ag->operator_complexity == 0.0 ) { ml_ag->fine_complexity = total_nz; ml_ag->operator_complexity = total_nz; } else ml_ag->operator_complexity += total_nz; /* fix aggr_index for num_PDE_eqns > 1 */ for (i = Nrows - 1; i >= 0; i-- ) { for (j = num_PDE_eqns-1; j >= 0; j--) { aggr_index[i*num_PDE_eqns+j] = aggr_index[i]; } } if ( mypid == 0 && 8 < ML_Get_PrintLevel()) { printf("Calling ML_Operator_UnAmalgamateAndDropWeak\n"); fflush(stdout); } ML_Operator_UnAmalgamateAndDropWeak(Amatrix, num_PDE_eqns, epsilon); Nrows *= num_PDE_eqns; exp_Nrows *= num_PDE_eqns; /* count the size of each aggregate */ aggr_cnt_array = (int *) ML_allocate(sizeof(int)*(aggr_count+1)); for (i = 0; i < aggr_count ; i++) aggr_cnt_array[i] = 0; for (i = 0; i < exp_Nrows; i++) { if (aggr_index[i] >= 0) { if( aggr_index[i] >= aggr_count ) { fprintf( stderr, "*ML*WRN* on process %d, something weird happened...\n" "*ML*WRN* node %d belong to aggregate %d (#aggr = %d)\n" "*ML*WRN* (file %s, line %d)\n", comm->ML_mypid, i, aggr_index[i], aggr_count, __FILE__, __LINE__ ); } else { aggr_cnt_array[aggr_index[i]]++; } } } /* ============================================================= */ /* Form tentative prolongator */ /* ============================================================= */ Ncoarse = aggr_count; /* ============================================================= */ /* check and copy aggr_index */ /* ------------------------------------------------------------- */ level = ml_ag->cur_level; nbytes = (Nrows+1) * sizeof( int ); ML_memory_alloc((void**) &(ml_ag->aggr_info[level]), nbytes, "AGl"); count = aggr_count; for ( i = 0; i < Nrows; i+=num_PDE_eqns ) { if ( aggr_index[i] >= 0 ) { for ( j = 0; j < num_PDE_eqns; j++ ) ml_ag->aggr_info[level][i+j] = aggr_index[i]; if (aggr_index[i] >= count) count = aggr_index[i] + 1; } /*else *{ * printf("%d : CoarsenMIS error : aggr_index[%d] < 0\n", * mypid,i); * exit(1); *}*/ } ml_ag->aggr_count[level] = count; /* for relaxing boundary points */ /* ============================================================= */ /* set up the new operator */ /* ------------------------------------------------------------- */ new_Nrows = Nrows; exp_Ncoarse = Nrows; for ( i = 0; i < new_Nrows; i++ ) { if ( aggr_index[i] >= exp_Ncoarse ) { printf("*ML*WRN* index out of bound %d = %d(%d)\n", i, aggr_index[i], exp_Ncoarse); } } nbytes = ( new_Nrows+1 ) * sizeof(int); ML_memory_alloc((void**)&(new_ia), nbytes, "AIA"); nbytes = ( new_Nrows+1) * nullspace_dim * sizeof(int); ML_memory_alloc((void**)&(new_ja), nbytes, "AJA"); nbytes = ( new_Nrows+1) * nullspace_dim * sizeof(double); ML_memory_alloc((void**)&(new_val), nbytes, "AVA"); for ( i = 0; i < new_Nrows*nullspace_dim; i++ ) new_val[i] = 0.0; /* ------------------------------------------------------------- */ /* set up the space for storing the new null space */ /* ------------------------------------------------------------- */ nbytes = (Ncoarse+1) * nullspace_dim * nullspace_dim * sizeof(double); ML_memory_alloc((void**)&(new_null),nbytes,"AGr"); if( new_null == NULL ) { fprintf( stderr, "*ML*ERR* on process %d, not enough memory for %d bytes\n" "*ML*ERR* (file %s, line %d)\n", mypid, nbytes, __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } for (i = 0; i < Ncoarse*nullspace_dim*nullspace_dim; i++) new_null[i] = 0.0; /* ------------------------------------------------------------- */ /* initialize the row pointer for the CSR prolongation operator */ /* (each row will have at most nullspace_dim nonzero entries) */ /* ------------------------------------------------------------- */ for (i = 0; i <= Nrows; i++) new_ia[i] = i * nullspace_dim; /* trying this when a Dirichlet row is taken out */ j = 0; new_ia[0] = 0; for (i = 0; i < Nrows; i++) { if (aggr_index[i] != -1) j += nullspace_dim; new_ia[i+1] = j; } /* ------------------------------------------------------------- */ /* generate an array to store which aggregate has which rows.Then*/ /* loop through the rows of A checking which aggregate each row */ /* is in, and adding it to the appropriate spot in rows_in_aggs */ /* ------------------------------------------------------------- */ ML_memory_alloc((void**)&rows_in_aggs,aggr_count*sizeof(int*),"MLs"); for (i = 0; i < aggr_count; i++) { nbytes = aggr_cnt_array[i]+1; rows_in_aggs[i] = (int *) ML_allocate(nbytes*sizeof(int)); aggr_cnt_array[i] = 0; if (rows_in_aggs[i] == NULL) { printf("*ML*ERR* couldn't allocate memory in CoarsenMETIS\n"); exit(1); } } for (i = 0; i < exp_Nrows; i+=num_PDE_eqns) { if ( aggr_index[i] >= 0 && aggr_index[i] < aggr_count) { for (j = 0; j < num_PDE_eqns; j++) { index = aggr_cnt_array[aggr_index[i]]++; rows_in_aggs[aggr_index[i]][index] = i + j; } } } /* ------------------------------------------------------------- */ /* allocate work arrays for QR factorization */ /* work and lwork are needed for lapack's QR routine. These */ /* settings seemed easiest since I don't quite understand */ /* what they do, but may want to do something better here later */ /* ------------------------------------------------------------- */ max_agg_size = 0; for (i = 0; i < aggr_count; i++) { if (aggr_cnt_array[i] > max_agg_size) max_agg_size = aggr_cnt_array[i]; } nbytes = max_agg_size * nullspace_dim * sizeof(double); ML_memory_alloc((void**)&qr_tmp, nbytes, "AGu"); nbytes = nullspace_dim * sizeof(double); ML_memory_alloc((void**)&tmp_vect, nbytes, "AGv"); lwork = nullspace_dim; nbytes = nullspace_dim * sizeof(double); ML_memory_alloc((void**)&work, nbytes, "AGw"); /* ------------------------------------------------------------- */ /* perform block QR decomposition */ /* ------------------------------------------------------------- */ for (i = 0; i < aggr_count; i++) { /* ---------------------------------------------------------- */ /* set up the matrix we want to decompose into Q and R: */ /* ---------------------------------------------------------- */ length = aggr_cnt_array[i]; if (nullspace_vect == NULL) { for (j = 0; j < (int) length; j++) { index = rows_in_aggs[i][j]; for (k = 0; k < nullspace_dim; k++) { if ( unamalg_bdry[index/num_PDE_eqns] == 'T') qr_tmp[k*length+j] = 0.; else { if (index % num_PDE_eqns == k) qr_tmp[k*length+j] = 1.0; else qr_tmp[k*length+j] = 0.0; } } } } else { for (k = 0; k < nullspace_dim; k++) { for (j = 0; j < (int) length; j++) { index = rows_in_aggs[i][j]; if ( unamalg_bdry[index/num_PDE_eqns] == 'T') qr_tmp[k*length+j] = 0.; else { if (index < Nrows) { qr_tmp[k*length+j] = nullspace_vect[k*Nrows+index]; } else { fprintf( stderr, "*ML*ERR* in QR\n" "*ML*ERR* (file %s, line %d)\n", __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } } } } } /* ---------------------------------------------------------- */ /* now calculate QR using an LAPACK routine */ /* ---------------------------------------------------------- */ if (aggr_cnt_array[i] >= nullspace_dim) { DGEQRF_F77(&(aggr_cnt_array[i]), &nullspace_dim, qr_tmp, &(aggr_cnt_array[i]), tmp_vect, work, &lwork, &info); if (info != 0) pr_error("ErrOr in CoarsenMIS : dgeqrf returned a non-zero %d %d\n", aggr_cnt_array[i],i); if (work[0] > lwork) { lwork=(int) work[0]; ML_memory_free((void**) &work); ML_memory_alloc((void**) &work, sizeof(double)*lwork, "AGx"); } else lwork=(int) work[0]; /* ---------------------------------------------------------- */ /* the upper triangle of qr_tmp is now R, so copy that into */ /* the new nullspace */ /* ---------------------------------------------------------- */ for (j = 0; j < nullspace_dim; j++) for (k = j; k < nullspace_dim; k++) new_null[i*nullspace_dim+j+k*Ncoarse*nullspace_dim] = qr_tmp[j+aggr_cnt_array[i]*k]; /* ---------------------------------------------------------- */ /* to get this block of P, need to run qr_tmp through another */ /* LAPACK function: */ /* ---------------------------------------------------------- */ if ( aggr_cnt_array[i] < nullspace_dim ){ printf("Error in dorgqr on %d row (dims are %d, %d)\n",i,aggr_cnt_array[i], nullspace_dim); printf("ERROR : performing QR on a MxN matrix where M<N.\n"); } DORGQR_F77(&(aggr_cnt_array[i]), &nullspace_dim, &nullspace_dim, qr_tmp, &(aggr_cnt_array[i]), tmp_vect, work, &lwork, &info); if (info != 0) { printf("Error in dorgqr on %d row (dims are %d, %d)\n",i,aggr_cnt_array[i], nullspace_dim); pr_error("Error in CoarsenMIS: dorgqr returned a non-zero\n"); } if (work[0] > lwork) { lwork=(int) work[0]; ML_memory_free((void**) &work); ML_memory_alloc((void**) &work, sizeof(double)*lwork, "AGy"); } else lwork=(int) work[0]; /* ---------------------------------------------------------- */ /* now copy Q over into the appropriate part of P: */ /* The rows of P get calculated out of order, so I assume the */ /* Q is totally dense and use what I know of how big each Q */ /* will be to determine where in ia, ja, etc each nonzero in */ /* Q belongs. If I did not assume this, I would have to keep */ /* all of P in memory in order to determine where each entry */ /* should go */ /* ---------------------------------------------------------- */ for (j = 0; j < aggr_cnt_array[i]; j++) { index = rows_in_aggs[i][j]; if ( index < Nrows ) { index3 = new_ia[index]; for (k = 0; k < nullspace_dim; k++) { new_ja [index3+k] = i * nullspace_dim + k; new_val[index3+k] = qr_tmp[ k*aggr_cnt_array[i]+j]; } } else { fprintf( stderr, "*ML*ERR* in QR: index out of bounds (%d - %d)\n", index, Nrows ); } } } else { /* We have a small aggregate such that the QR factorization can not */ /* be performed. Instead let us copy the null space from the fine */ /* into the coarse grid nullspace and put the identity for the */ /* prolongator???? */ for (j = 0; j < nullspace_dim; j++) for (k = 0; k < nullspace_dim; k++) new_null[i*nullspace_dim+j+k*Ncoarse*nullspace_dim] = qr_tmp[j+aggr_cnt_array[i]*k]; for (j = 0; j < aggr_cnt_array[i]; j++) { index = rows_in_aggs[i][j]; index3 = new_ia[index]; for (k = 0; k < nullspace_dim; k++) { new_ja [index3+k] = i * nullspace_dim + k; if (k == j) new_val[index3+k] = 1.; else new_val[index3+k] = 0.; } } } } ML_Aggregate_Set_NullSpace(ml_ag, num_PDE_eqns, nullspace_dim, new_null, Ncoarse*nullspace_dim); ML_memory_free( (void **) &new_null); /* ------------------------------------------------------------- */ /* set up the csr_data data structure */ /* ------------------------------------------------------------- */ ML_memory_alloc((void**) &csr_data, sizeof(struct ML_CSR_MSRdata),"CSR"); csr_data->rowptr = new_ia; csr_data->columns = new_ja; csr_data->values = new_val; ML_Operator_Set_ApplyFuncData( *Pmatrix, nullspace_dim*Ncoarse, Nrows, csr_data, Nrows, NULL, 0); (*Pmatrix)->data_destroy = ML_CSR_MSR_ML_memorydata_Destroy; (*Pmatrix)->getrow->pre_comm = ML_CommInfoOP_Create(); (*Pmatrix)->max_nz_per_row = 1; ML_Operator_Set_Getrow((*Pmatrix), Nrows, CSR_getrow); ML_Operator_Set_ApplyFunc((*Pmatrix), CSR_matvec); (*Pmatrix)->max_nz_per_row = 1; /* this must be set so that the hierarchy generation does not abort early in adaptive SA */ (*Pmatrix)->num_PDEs = nullspace_dim; /* ------------------------------------------------------------- */ /* clean up */ /* ------------------------------------------------------------- */ ML_free(unamalg_bdry); ML_memory_free((void**)&aggr_index); ML_free(aggr_cnt_array); for (i = 0; i < aggr_count; i++) ML_free(rows_in_aggs[i]); ML_memory_free((void**)&rows_in_aggs); ML_memory_free((void**)&qr_tmp); ML_memory_free((void**)&tmp_vect); ML_memory_free((void**)&work); aggr_curr = aggr_head; while ( aggr_curr != NULL ) { supernode = aggr_curr; aggr_curr = aggr_curr->next; if ( supernode->length > 0 ) ML_free( supernode->list ); ML_free( supernode ); } return Ncoarse*nullspace_dim; } /* ML_Aggregate_CoarsenUser */
int main(int argc, char *argv[]) { int num_PDE_eqns=6, N_levels=4, nsmooth=2; int leng, level, N_grid_pts, coarsest_level; /* See Aztec User's Guide for more information on the */ /* variables that follow. */ int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE]; /* data structure for matrix corresponding to the fine grid */ double *val = NULL, *xxx, *rhs, solve_time, setup_time, start_time; AZ_MATRIX *Amat; AZ_PRECOND *Pmat = NULL; ML *ml; FILE *fp; int i, j, Nrigid, *garbage = NULL; #ifdef ML_partition int nblocks; int *block_list = NULL; int k; #endif struct AZ_SCALING *scaling; ML_Aggregate *ag; double *mode, *rigid; char filename[80]; double alpha; int allocated = 0; int old_prec, old_sol; double old_tol; /* double *Amode, beta, biggest; int big_ind = -1, ii; */ ML_Operator *Amatrix; int *rowi_col = NULL, rowi_N, count2, ccc; double *rowi_val = NULL; double max_diag, min_diag, max_sum, sum; int nBlocks, *blockIndices, Ndof; #ifdef ML_partition FILE *fp2; int count; if (argc != 2) { printf("Usage: ml_read_elas num_processors\n"); exit(1); } else sscanf(argv[1],"%d",&nblocks); #endif #ifdef HAVE_MPI MPI_Init(&argc,&argv); /* get number of processors and the name of this processor */ AZ_set_proc_config(proc_config, MPI_COMM_WORLD); #else AZ_set_proc_config(proc_config, AZ_NOT_MPI); #endif /* read in the number of matrix equations */ leng = 0; if (proc_config[AZ_node] == 0) { # ifdef binary fp=fopen(".data","rb"); # else fp=fopen(".data","r"); # endif if (fp==NULL) { printf("couldn't open file .data\n"); exit(1); } # ifdef binary fread(&leng, sizeof(int), 1, fp); # else fscanf(fp,"%d",&leng); # endif fclose(fp); } leng = AZ_gsum_int(leng, proc_config); N_grid_pts=leng/num_PDE_eqns; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ if (proc_config[AZ_N_procs] == 1) i = AZ_linear; else i = AZ_file; AZ_read_update(&N_update, &update, proc_config, N_grid_pts, num_PDE_eqns,i); AZ_read_msr_matrix(update, &val, &bindx, N_update, proc_config); /* This code is to fix things up so that we are sure we have */ /* all block (including the ghost nodes the same size. */ AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update); AZ_transform_norowreordering(proc_config, &external, bindx, val, update, &update_index, &extern_index, &data_org, N_update, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); Amat = AZ_matrix_create( leng ); AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL); Amat->matrix_type = data_org[AZ_matrix_type]; data_org[AZ_N_rows] = data_org[AZ_N_internal] + data_org[AZ_N_border]; #ifdef SCALE_ME ML_MSR_sym_diagonal_scaling(Amat, proc_config, &scaling_vect); #endif start_time = AZ_second(); options[AZ_scaling] = AZ_none; ML_Create(&ml, N_levels); ML_Set_PrintLevel(10); /* set up discretization matrix and matrix vector function */ AZ_ML_Set_Amat(ml, N_levels-1, N_update, N_update, Amat, proc_config); #ifdef ML_partition /* this code is meant to partition the matrices so that things can be */ /* run in parallel later. */ /* It is meant to be run on only one processor. */ #ifdef MB_MODIF fp2 = fopen(".update","w"); #else fp2 = fopen("partition_file","w"); #endif ML_Operator_AmalgamateAndDropWeak(&(ml->Amat[N_levels-1]), num_PDE_eqns, 0.0); ML_Gen_Blocks_Metis(ml, N_levels-1, &nblocks, &block_list); for (i = 0; i < nblocks; i++) { count = 0; for (j = 0; j < ml->Amat[N_levels-1].outvec_leng; j++) { if (block_list[j] == i) count++; } fprintf(fp2," %d\n",count*num_PDE_eqns); for (j = 0; j < ml->Amat[N_levels-1].outvec_leng; j++) { if (block_list[j] == i) { for (k = 0; k < num_PDE_eqns; k++) fprintf(fp2,"%d\n",j*num_PDE_eqns+k); } } } fclose(fp2); ML_Operator_UnAmalgamateAndDropWeak(&(ml->Amat[N_levels-1]),num_PDE_eqns,0.0); #ifdef MB_MODIF printf(" partition file dumped in .update\n"); #endif exit(1); #endif ML_Aggregate_Create( &ag ); /* ML_Aggregate_Set_CoarsenScheme_MIS(ag); */ #ifdef MB_MODIF ML_Aggregate_Set_DampingFactor(ag,1.50); #else ML_Aggregate_Set_DampingFactor(ag,1.5); #endif ML_Aggregate_Set_CoarsenScheme_METIS(ag); ML_Aggregate_Set_NodesPerAggr( ml, ag, -1, 35); /* ML_Aggregate_Set_Phase3AggregateCreationAggressiveness(ag, 10.001); */ ML_Aggregate_Set_Threshold(ag, 0.0); ML_Aggregate_Set_MaxCoarseSize( ag, 300); /* read in the rigid body modes */ Nrigid = 0; /* to ensure compatibility with RBM dumping software */ if (proc_config[AZ_node] == 0) { sprintf(filename,"rigid_body_mode%02d",Nrigid+1); while( (fp = fopen(filename,"r")) != NULL) { which_filename = 1; fclose(fp); Nrigid++; sprintf(filename,"rigid_body_mode%02d",Nrigid+1); } sprintf(filename,"rigid_body_mode%d",Nrigid+1); while( (fp = fopen(filename,"r")) != NULL) { fclose(fp); Nrigid++; sprintf(filename,"rigid_body_mode%d",Nrigid+1); } } Nrigid = AZ_gsum_int(Nrigid,proc_config); if (Nrigid != 0) { rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) ); if (rigid == NULL) { printf("Error: Not enough space for rigid body modes\n"); } } rhs = (double *) malloc(leng*sizeof(double)); xxx = (double *) malloc(leng*sizeof(double)); for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; for (i = 0; i < Nrigid; i++) { if (which_filename == 1) sprintf(filename,"rigid_body_mode%02d",i+1); else sprintf(filename,"rigid_body_mode%d",i+1); AZ_input_msr_matrix(filename,update,&mode,&garbage,N_update,proc_config); AZ_reorder_vec(mode, data_org, update_index, NULL); /* here is something to stick a rigid body mode as the initial */ /* The idea is to solve A x = 0 without smoothing with a two */ /* level method. If everything is done properly, we should */ /* converge in 2 iterations. */ /* Note: we must also zero out components of the rigid body */ /* mode that correspond to Dirichlet bcs. */ if (i == -4) { for (iii = 0; iii < leng; iii++) xxx[iii] = mode[iii]; ccc = 0; Amatrix = &(ml->Amat[N_levels-1]); for (iii = 0; iii < Amatrix->outvec_leng; iii++) { ML_get_matrix_row(Amatrix,1,&iii,&allocated,&rowi_col,&rowi_val, &rowi_N, 0); count2 = 0; for (j = 0; j < rowi_N; j++) if (rowi_val[j] != 0.) count2++; if (count2 <= 1) { xxx[iii] = 0.; ccc++; } } free(rowi_col); free(rowi_val); allocated = 0; rowi_col = NULL; rowi_val = NULL; } /* * Rescale matrix/rigid body modes and checking * AZ_sym_rescale_sl(mode, Amat->data_org, options, proc_config, scaling); Amat->matvec(mode, rigid, Amat, proc_config); for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]); */ /* Here is some code to check that the rigid body modes are */ /* really rigid body modes. The idea is to multiply by A and */ /* then to zero out things that we "think" are boundaries. */ /* In this hardwired example, things near boundaries */ /* correspond to matrix rows that do not have 81 nonzeros. */ /* Amode = (double *) malloc(leng*sizeof(double)); Amat->matvec(mode, Amode, Amat, proc_config); j = 0; biggest = 0.0; for (ii = 0; ii < N_update; ii++) { if ( Amat->bindx[ii+1] - Amat->bindx[ii] != 80) { Amode[ii] = 0.; j++; } else { if ( fabs(Amode[ii]) > biggest) { biggest=fabs(Amode[ii]); big_ind = ii; } } } printf("%d entries zeroed out of %d elements\n",j,N_update); alpha = AZ_gdot(N_update, Amode, Amode, proc_config); beta = AZ_gdot(N_update, mode, mode, proc_config); printf("||A r||^2 =%e, ||r||^2 = %e, ratio = %e\n", alpha,beta,alpha/beta); printf("the biggest is %e at row %d\n",biggest,big_ind); free(Amode); */ /* orthogonalize mode with respect to previous modes. */ for (j = 0; j < i; j++) { alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/ AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); /* daxpy_(&N_update,&alpha,&(rigid[j*N_update]), &one, mode, &one); */ } #ifndef MB_MODIF printf(" after mb %e %e %e\n",mode[0],mode[1],mode[2]); #endif for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j]; free(mode); free(garbage); garbage = NULL; } if (Nrigid != 0) { ML_Aggregate_Set_BlockDiagScaling(ag); ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update); free(rigid); } #ifdef SCALE_ME ML_Aggregate_Scale_NullSpace(ag, scaling_vect, N_update); #endif coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, N_levels-1, ML_DECREASING, ag); AZ_defaults(options, params); coarsest_level = N_levels - coarsest_level; if ( proc_config[AZ_node] == 0 ) printf("Coarse level = %d \n", coarsest_level); /* set up smoothers */ for (level = N_levels-1; level > coarsest_level; level--) { /* ML_Gen_Smoother_BlockGaussSeidel(ml, level,ML_BOTH, 1, 1., num_PDE_eqns); */ /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ /* ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, parasails_loadbal, parasails_factorized); */ /* This is the symmetric Gauss-Seidel smoothing that we usually use. */ /* In parallel, it is not a true Gauss-Seidel in that each processor */ /* does a Gauss-Seidel on its local submatrix independent of the */ /* other processors. */ /* ML_Gen_Smoother_Cheby(ml, level, ML_BOTH, 30., nsmooth); */ Ndof = ml->Amat[level].invec_leng; ML_Gen_Blocks_Aggregates(ag, level, &nBlocks, &blockIndices); ML_Gen_Smoother_BlockDiagScaledCheby(ml, level, ML_BOTH, 30.,nsmooth, nBlocks, blockIndices); /* ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.); */ /* This is a true Gauss Seidel in parallel. This seems to work for */ /* elasticity problems. However, I don't believe that this is very */ /* efficient in parallel. */ /* nblocks = ml->Amat[level].invec_leng/num_PDE_eqns; blocks = (int *) ML_allocate(sizeof(int)*N_update); for (i =0; i < ml->Amat[level].invec_leng; i++) blocks[i] = i/num_PDE_eqns; ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml , level, ML_PRESMOOTHER, nsmooth, 1., nblocks, blocks); ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml, level, ML_POSTSMOOTHER, nsmooth, 1., nblocks, blocks); free(blocks); */ /* Block Jacobi Smoothing */ /* nblocks = ml->Amat[level].invec_leng/num_PDE_eqns; blocks = (int *) ML_allocate(sizeof(int)*N_update); for (i =0; i < ml->Amat[level].invec_leng; i++) blocks[i] = i/num_PDE_eqns; ML_Gen_Smoother_VBlockJacobi(ml , level, ML_BOTH, nsmooth, ML_ONE_STEP_CG, nblocks, blocks); free(blocks); */ /* Jacobi Smoothing */ /* ML_Gen_Smoother_Jacobi(ml , level, ML_PRESMOOTHER, nsmooth, ML_ONE_STEP_CG); ML_Gen_Smoother_Jacobi(ml , level, ML_POSTSMOOTHER, nsmooth,ML_ONE_STEP_CG); */ /* This does a block Gauss-Seidel (not true GS in parallel) */ /* where each processor has 'nblocks' blocks. */ /* nblocks = 250; ML_Gen_Blocks_Metis(ml, level, &nblocks, &blocks); ML_Gen_Smoother_VBlockJacobi(ml , level, ML_BOTH, nsmooth,ML_ONE_STEP_CG, nblocks, blocks); free(blocks); */ num_PDE_eqns = 6; } /* Choose coarse grid solver: mls, superlu, symGS, or Aztec */ /* ML_Gen_Smoother_Cheby(ml, coarsest_level, ML_BOTH, 30., nsmooth); ML_Gen_CoarseSolverSuperLU( ml, coarsest_level); */ /* ML_Gen_Smoother_SymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.); */ old_prec = options[AZ_precond]; old_sol = options[AZ_solver]; old_tol = params[AZ_tol]; params[AZ_tol] = 1.0e-9; params[AZ_tol] = 1.0e-5; options[AZ_precond] = AZ_Jacobi; options[AZ_solver] = AZ_cg; options[AZ_poly_ord] = 1; options[AZ_conv] = AZ_r0; options[AZ_orth_kvecs] = AZ_TRUE; j = AZ_gsum_int(ml->Amat[coarsest_level].outvec_leng, proc_config); options[AZ_keep_kvecs] = j - 6; options[AZ_max_iter] = options[AZ_keep_kvecs]; ML_Gen_SmootherAztec(ml, coarsest_level, options, params, proc_config, status, options[AZ_keep_kvecs], ML_PRESMOOTHER, NULL); options[AZ_conv] = AZ_noscaled; options[AZ_keep_kvecs] = 0; options[AZ_orth_kvecs] = 0; options[AZ_precond] = old_prec; options[AZ_solver] = old_sol; params[AZ_tol] = old_tol; /* */ #ifdef RST_MODIF ML_Gen_Solver(ml, ML_MGV, N_levels-1, coarsest_level); #else #ifdef MB_MODIF ML_Gen_Solver(ml, ML_SAAMG, N_levels-1, coarsest_level); #else ML_Gen_Solver(ml, ML_MGFULLV, N_levels-1, coarsest_level); #endif #endif options[AZ_solver] = AZ_GMRESR; options[AZ_solver] = AZ_cg; options[AZ_scaling] = AZ_none; options[AZ_precond] = AZ_user_precond; options[AZ_conv] = AZ_r0; options[AZ_conv] = AZ_noscaled; options[AZ_output] = 1; options[AZ_max_iter] = 500; options[AZ_poly_ord] = 5; options[AZ_kspace] = 40; params[AZ_tol] = 4.8e-6; AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); setup_time = AZ_second() - start_time; /* Set rhs */ fp = fopen("AZ_capture_rhs.dat","r"); if (fp == NULL) { AZ_random_vector(rhs, data_org, proc_config); if (proc_config[AZ_node] == 0) printf("taking random vector for rhs\n"); for (i = 0; i < -N_update; i++) { rhs[i] = (double) update[i]; rhs[i] = 7.; } } else { if (proc_config[AZ_node]== 0) printf("reading rhs guess from file\n"); AZ_input_msr_matrix("AZ_capture_rhs.dat", update, &rhs, &garbage, N_update, proc_config); free(garbage); } AZ_reorder_vec(rhs, data_org, update_index, NULL); printf("changing rhs by multiplying with A\n"); Amat->matvec(rhs, xxx, Amat, proc_config); for (i = 0; i < N_update; i++) rhs[i] = xxx[i]; fp = fopen("AZ_capture_init_guess.dat","r"); if (fp != NULL) { fclose(fp); if (proc_config[AZ_node]== 0) printf("reading initial guess from file\n"); AZ_input_msr_matrix("AZ_capture_init_guess.dat", update, &xxx, &garbage, N_update, proc_config); free(garbage); xxx = (double *) realloc(xxx, sizeof(double)*( Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border] + Amat->data_org[AZ_N_external])); } AZ_reorder_vec(xxx, data_org, update_index, NULL); /* if Dirichlet BC ... put the answer in */ /* for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) { if ( (val[i] > .99999999) && (val[i] < 1.0000001)) xxx[i] = rhs[i]; } */ fp = fopen("AZ_no_multilevel.dat","r"); scaling = AZ_scaling_create(); start_time = AZ_second(); if (fp != NULL) { fclose(fp); options[AZ_precond] = AZ_none; options[AZ_scaling] = AZ_sym_diag; options[AZ_ignore_scaling] = AZ_TRUE; options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); /* options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); */ } else { options[AZ_keep_info] = 1; options[AZ_conv] = AZ_noscaled; options[AZ_conv] = AZ_r0; params[AZ_tol] = 1.0e-7; /* ML_Iterate(ml, xxx, rhs); */ alpha = sqrt(AZ_gdot(N_update, xxx, xxx, proc_config)); printf("init guess = %e\n",alpha); alpha = sqrt(AZ_gdot(N_update, rhs, rhs, proc_config)); printf("rhs = %e\n",alpha); #ifdef SCALE_ME ML_MSR_scalerhs(rhs, scaling_vect, data_org[AZ_N_internal] + data_org[AZ_N_border]); ML_MSR_scalesol(xxx, scaling_vect, data_org[AZ_N_internal] + data_org[AZ_N_border]); #endif max_diag = 0.; min_diag = 1.e30; max_sum = 0.; for (i = 0; i < N_update; i++) { if (Amat->val[i] < 0.) printf("woops negative diagonal A(%d,%d) = %e\n", i,i,Amat->val[i]); if (Amat->val[i] > max_diag) max_diag = Amat->val[i]; if (Amat->val[i] < min_diag) min_diag = Amat->val[i]; sum = fabs(Amat->val[i]); for (j = Amat->bindx[i]; j < Amat->bindx[i+1]; j++) { sum += fabs(Amat->val[j]); } if (sum > max_sum) max_sum = sum; } printf("Largest diagonal = %e, min diag = %e large abs row sum = %e\n", max_diag, min_diag, max_sum); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; /* if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); */ } solve_time = AZ_second() - start_time; if (proc_config[AZ_node] == 0) printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time); if (proc_config[AZ_node] == 0) printf("Printing out a few entries of the solution ...\n"); for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 7) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 23) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 47) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 101) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 171) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} ML_Aggregate_Destroy(&ag); ML_Destroy(&ml); AZ_free((void *) Amat->data_org); AZ_free((void *) Amat->val); AZ_free((void *) Amat->bindx); AZ_free((void *) update); AZ_free((void *) external); AZ_free((void *) extern_index); AZ_free((void *) update_index); AZ_scaling_destroy(&scaling); if (Amat != NULL) AZ_matrix_destroy(&Amat); if (Pmat != NULL) AZ_precond_destroy(&Pmat); free(xxx); free(rhs); #ifdef HAVE_MPI MPI_Finalize(); #endif return 0; }
// ================================================ ====== ==== ==== == = // the tentative null space is in input because the user // has to remember to allocate and fill it, and then to delete // it after calling this method. int ML_Epetra::MultiLevelPreconditioner:: ComputeAdaptivePreconditioner(int TentativeNullSpaceSize, double* TentativeNullSpace) { if ((TentativeNullSpaceSize == 0) || (TentativeNullSpace == 0)) ML_CHK_ERR(-1); // ================================== // // get parameters from the input list // // ================================== // // maximum number of relaxation sweeps int MaxSweeps = List_.get("adaptive: max sweeps", 10); // number of std::vector to be added to the tentative null space int NumAdaptiveVectors = List_.get("adaptive: num vectors", 1); if (verbose_) { std::cout << PrintMsg_ << "*** Adaptive Smoother Aggregation setup ***" << std::endl; std::cout << PrintMsg_ << " Maximum relaxation sweeps = " << MaxSweeps << std::endl; std::cout << PrintMsg_ << " Additional vectors to compute = " << NumAdaptiveVectors << std::endl; } // ==================================================== // // compute the preconditioner, set null space from user // // (who will have to delete std::vector TentativeNullSpace) // // ==================================================== // double* NewNullSpace = 0; double* OldNullSpace = TentativeNullSpace; int OldNullSpaceSize = TentativeNullSpaceSize; // need some work otherwise matvec() with Epetra_Vbr fails. // Also, don't differentiate between range and domain here // as ML will not work if range != domain const Epetra_VbrMatrix* VbrA = NULL; VbrA = dynamic_cast<const Epetra_VbrMatrix*>(RowMatrix_); Epetra_Vector* LHS = 0; Epetra_Vector* RHS = 0; if (VbrA != 0) { LHS = new Epetra_Vector(VbrA->DomainMap()); RHS = new Epetra_Vector(VbrA->DomainMap()); } else { LHS = new Epetra_Vector(RowMatrix_->OperatorDomainMap()); RHS = new Epetra_Vector(RowMatrix_->OperatorDomainMap()); } // destroy what we may already have if (IsComputePreconditionerOK_ == true) { DestroyPreconditioner(); } // build the preconditioner for the first time List_.set("null space: type", "pre-computed"); List_.set("null space: dimension", OldNullSpaceSize); List_.set("null space: vectors", OldNullSpace); ComputePreconditioner(); // ====================== // // add one std::vector at time // // ====================== // for (int istep = 0 ; istep < NumAdaptiveVectors ; ++istep) { if (verbose_) { std::cout << PrintMsg_ << "\tAdaptation step " << istep << std::endl; std::cout << PrintMsg_ << "\t---------------" << std::endl; } // ==================== // // look for "bad" modes // // ==================== // // note: should an error occur, ML_CHK_ERR will return, // and LHS and RHS will *not* be delete'd (--> memory leak). // Anyway, this means that something wrong happened in the code // and should be fixed by the user. LHS->Random(); double Norm2; for (int i = 0 ; i < MaxSweeps ; ++i) { // RHS = (I - ML^{-1} A) LHS ML_CHK_ERR(RowMatrix_->Multiply(false,*LHS,*RHS)); // FIXME: can do something slightly better here ML_CHK_ERR(ApplyInverse(*RHS,*RHS)); ML_CHK_ERR(LHS->Update(-1.0,*RHS,1.0)); LHS->Norm2(&Norm2); if (verbose_) { std::cout << PrintMsg_ << "\titer " << i << ", ||x||_2 = "; std::cout << Norm2 << std::endl; } } // scaling vectors double NormInf; LHS->NormInf(&NormInf); LHS->Scale(1.0 / NormInf); // ========================================================= // // copy tentative and computed null space into NewNullSpace, // // which now becomes the standard null space // // ========================================================= // int NewNullSpaceSize = OldNullSpaceSize + 1; NewNullSpace = new double[NumMyRows() * NewNullSpaceSize]; assert (NewNullSpace != 0); int itmp = OldNullSpaceSize * NumMyRows(); for (int i = 0 ; i < itmp ; ++i) { NewNullSpace[i] = OldNullSpace[i]; } for (int j = 0 ; j < NumMyRows() ; ++j) { NewNullSpace[itmp + j] = (*LHS)[j]; } // =============== // // visualize modes // // =============== // if (List_.get("adaptive: visualize", false)) { double* x_coord = List_.get("viz: x-coordinates", (double*)0); double* y_coord = List_.get("viz: y-coordinates", (double*)0); double* z_coord = List_.get("viz: z-coordinates", (double*)0); assert (x_coord != 0); std::vector<double> plot_me(NumMyRows()/NumPDEEqns_); ML_Aggregate_Viz_Stats info; info.Amatrix = &(ml_->Amat[LevelID_[0]]); info.x = x_coord; info.y = y_coord; info.z = z_coord; info.Nlocal = NumMyRows() / NumPDEEqns_; info.Naggregates = 1; ML_Operator_AmalgamateAndDropWeak(&(ml_->Amat[LevelID_[0]]), NumPDEEqns_, 0.0); for (int ieqn = 0 ; ieqn < NumPDEEqns_ ; ++ieqn) { for (int j = 0 ; j < NumMyRows() ; j+=NumPDEEqns_) { plot_me[j / NumPDEEqns_] = (*LHS)[j + ieqn]; } char FileName[80]; sprintf(FileName,"nullspace-mode%d-eq%d.xyz", istep, ieqn); if (verbose_) std::cout << PrintMsg_ << "writing file " << FileName << "..." << std::endl; ML_Aggregate_VisualizeXYZ(info,FileName, ml_->comm,&plot_me[0]); } ML_Operator_UnAmalgamateAndDropWeak(&(ml_->Amat[LevelID_[0]]), NumPDEEqns_, 0.0); } // Destroy the old preconditioner DestroyPreconditioner(); // ==================================================== // // build the new preconditioner with the new null space // // ==================================================== // List_.set("null space: type", "pre-computed"); List_.set("null space: dimension", NewNullSpaceSize); List_.set("null space: vectors", NewNullSpace); ML_CHK_ERR(ComputePreconditioner()); if (istep && (istep != NumAdaptiveVectors)) delete OldNullSpace; OldNullSpace = NewNullSpace; OldNullSpaceSize = NewNullSpaceSize; } // keep trace of this pointer, it will be delete'd later NullSpaceToFree_ = NewNullSpace; delete LHS; delete RHS; return(0); }
// ================================================ ====== ==== ==== == = // Copied from ml_agg_genP.c static void ML_Init_Aux(ML_Operator* A, Teuchos::ParameterList &List) { int i, j, n, count, num_PDEs, BlockRow, BlockCol; double threshold; int* columns; double* values; int allocated, entries = 0; int N_dimensions; int DiagID; double DiagValue; int** filter; double dist; double *LaplacianDiag; int Nghost; // Boundary exchange the coords double *x_coord=0, *y_coord=0, *z_coord=0; RefMaxwell_SetupCoordinates(A,List,x_coord,y_coord,z_coord); int dim=(x_coord!=0) + (y_coord!=0) + (z_coord!=0); /* Sanity Checks */ if(dim == 0 || ((!x_coord && (y_coord || z_coord)) || (x_coord && !y_coord && z_coord))){ std::cerr<<"Error: Coordinates not defined. This is necessary for aux aggregation (found "<<dim<<" coordinates).\n"; exit(-1); } num_PDEs = A->num_PDEs; N_dimensions = dim; threshold = A->aux_data->threshold; ML_Operator_AmalgamateAndDropWeak(A, num_PDEs, 0.0); n = A->invec_leng; Nghost = ML_CommInfoOP_Compute_TotalRcvLength(A->getrow->pre_comm); LaplacianDiag = (double *) ML_allocate((A->getrow->Nrows+Nghost+1)* sizeof(double)); filter = (int**) ML_allocate(sizeof(int*) * n); allocated = 128; columns = (int *) ML_allocate(allocated * sizeof(int)); values = (double *) ML_allocate(allocated * sizeof(double)); for (i = 0 ; i < n ; ++i) { BlockRow = i; DiagID = -1; DiagValue = 0.0; ML_get_matrix_row(A,1,&i,&allocated,&columns,&values, &entries,0); for (j = 0; j < entries; j++) { BlockCol = columns[j]; if (BlockRow != BlockCol) { dist = 0.0; switch (N_dimensions) { case 3: dist += (z_coord[BlockRow] - z_coord[BlockCol]) * (z_coord[BlockRow] - z_coord[BlockCol]); case 2: dist += (y_coord[BlockRow] - y_coord[BlockCol]) * (y_coord[BlockRow] - y_coord[BlockCol]); case 1: dist += (x_coord[BlockRow] - x_coord[BlockCol]) * (x_coord[BlockRow] - x_coord[BlockCol]); } if (dist == 0.0) { printf("node %d = %e ", i, x_coord[BlockRow]); if (N_dimensions > 1) printf(" %e ", y_coord[BlockRow]); if (N_dimensions > 2) printf(" %e ", z_coord[BlockRow]); printf("\n"); printf("node %d = %e ", j, x_coord[BlockCol]); if (N_dimensions > 1) printf(" %e ", y_coord[BlockCol]); if (N_dimensions > 2) printf(" %e ", z_coord[BlockCol]); printf("\n"); printf("Operator has inlen = %d and outlen = %d\n", A->invec_leng, A->outvec_leng); } dist = 1.0 / dist; DiagValue += dist; } else if (columns[j] == i) { DiagID = j; } } if (DiagID == -1) { fprintf(stderr, "ERROR: matrix has no diagonal!\n" "ERROR: (file %s, line %d)\n", __FILE__, __LINE__); exit(EXIT_FAILURE); } LaplacianDiag[BlockRow] = DiagValue; } if ( A->getrow->pre_comm != NULL ) ML_exchange_bdry(LaplacianDiag,A->getrow->pre_comm,A->getrow->Nrows, A->comm, ML_OVERWRITE,NULL); for (i = 0 ; i < n ; ++i) { BlockRow = i; ML_get_matrix_row(A,1,&i,&allocated,&columns,&values, &entries,0); for (j = 0; j < entries; j++) { BlockCol = columns[j]; if (BlockRow != BlockCol) { dist = 0.0; switch (N_dimensions) { case 3: dist += (z_coord[BlockRow] - z_coord[BlockCol]) * (z_coord[BlockRow] - z_coord[BlockCol]); case 2: dist += (y_coord[BlockRow] - y_coord[BlockCol]) * (y_coord[BlockRow] - y_coord[BlockCol]); case 1: dist += (x_coord[BlockRow] - x_coord[BlockCol]) * (x_coord[BlockRow] - x_coord[BlockCol]); } dist = 1.0 / dist; values[j] = dist; } } count = 0; for (j = 0 ; j < entries ; ++j) { if ( (i != columns[j]) && (values[j]*values[j] < LaplacianDiag[BlockRow]*LaplacianDiag[columns[j]]*threshold*threshold)){ columns[count++] = columns[j]; } } /* insert the rows */ filter[BlockRow] = (int*) ML_allocate(sizeof(int) * (count + 1)); filter[BlockRow][0] = count; for (j = 0 ; j < count ; ++j) filter[BlockRow][j + 1] = columns[j]; } ML_free(columns); ML_free(values); ML_free(LaplacianDiag); ML_Operator_UnAmalgamateAndDropWeak(A, num_PDEs, 0.0); A->aux_data->aux_func_ptr = A->getrow->func_ptr; A->getrow->func_ptr = ML_Aux_Getrow; A->aux_data->filter = filter; A->aux_data->filter_size = n; // Cleanup ML_free(x_coord); ML_free(y_coord); ML_free(z_coord); }