void ddd_set_commit2(void) /****************************************************************** * * ddd_set_member2: * * See description for ddd_add_member2 above(). ******************************************************************/ { #ifdef PARALLEL if (ddd_internal_count > 0) { AZ_broadcast(NULL, 0, Proc_Config, AZ_SEND); ddd_internal_count = 0; } #endif }
void init_matrix_vector_structures(int proc_config[], int *update_index[], int *update[], int *data_org[], int *external[], int *extern_index[], int input_option, double *val[], int *bindx[], int *indx[], int *bpntr[], int *rpntr[], int *cpntr[]) /* * Read in the points to be updated on this processor, create the global * distributed form of the application matrix, and then convert it to a * local distributed form for AZTEC kernels. Along the way, initialize the * following quantities: * update_index[], update[], data_org[], a[], bindx[], bpntr[], cpntr[], * rpntr[], indx[], external[], extern_index[]. * * Author: Ray Tuminaro, Div 1422, SNL * Date: 3/15/95 * * Parameters * * proc_config == On input, processor information: * proc_config[AZ_node] = name of this processor * proc_config[AZ_N_procs] = # of processors used * update == On output, list of pts to be updated on this node * val,bindx == On output, local distributed form of arrays * holding matrix values * external == On output, list of external vector elements * update_index == On output, ordering of update and external * extern_index == locally on this processor. For example * 'update_index[i]' gives the index location * of the block which has the global index * 'update[i]'. * data_org == On output, indicates how the data is set out on * this node. For example, data_org[] contains * information on how many unknowns are internal, * external, and border unknowns as well as which * points need to be communicated. See User's Guide * for more details. * input_option == Indicates how update[] will be initialized. * = 0, linear decomposition * = 1, points read from file 'update'. * = 2, box decomposition * See AZ_read_update() comments for more details. * * The default finite difference MSR problem corresponds to a setting up * a series of uncoupled 3D Poisson equations on a cube. * To solve other problems, the call 'add_row_3D(...)' in * 'create_msr_matrix()' can be changed to 'add_row_5pt()' or * 'add_row_9pt()'. */ { int N_update; /* Number of pts updated on this processor */ int MSRorVBR; int chunks; int blk_size, num_blk_cols,num_blk_rows,size,kk, convert_to_vbr = 0; double *val2; int *bindx2; MSRorVBR = AZ_MSR_MATRIX; if (application == 1) MSRorVBR = AZ_VBR_MATRIX; chunks = num_PDE_eqns; if (MSRorVBR == AZ_VBR_MATRIX) chunks = 1; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ AZ_read_update(&N_update, update, proc_config, N_grid_pts, chunks, input_option); /* create the matrix: each processor creates only the */ /* rows appearing in update[] ... however this row is */ /* created as if it were on a serial machine (i.e. using */ /* the global column numbers) */ if (application == 1) create_vbr_matrix(*update, val, indx, N_update, rpntr, bpntr, bindx); else { *indx = NULL; *bpntr = NULL; *rpntr = NULL; *cpntr = NULL; if (application == 0) create_msr_matrix(*update, val, bindx, N_update); if (application == 2) create_fe_matrix(*update, proc_config[AZ_node], bindx, val, N_update); if (application == 3) { AZ_read_msr_matrix(*update, val, bindx, N_update, proc_config); } } /* convert matrix to a distributed parallel matrix */ AZ_transform(proc_config, external, *bindx, *val, *update, update_index, extern_index, data_org, N_update, *indx, *bpntr, *rpntr, cpntr, MSRorVBR); if ( (convert_to_vbr == 1) && (application == 3) ) { if (proc_config[AZ_node] == 0 ) { printf("enter the block size\n"); scanf("%d",&blk_size); } AZ_broadcast((char *) &blk_size, sizeof(int), proc_config, AZ_PACK); AZ_broadcast((char *) NULL , 0 , proc_config, AZ_SEND); if ( N_update%blk_size != 0 ) { (void) fprintf(stderr," The block size must be a multiple of the number of rows per processor.\n"); exit(-1); } num_blk_rows = N_update/blk_size; num_blk_cols = ( (*data_org)[AZ_N_external] + N_update)/blk_size; *cpntr = (int *) AZ_allocate( (num_blk_cols+2)*sizeof(int)); *rpntr = (int *) AZ_allocate( (num_blk_cols+2)*sizeof(int)); *bpntr = (int *) AZ_allocate( (num_blk_cols+2)*sizeof(int)); size = 20*(num_blk_cols+2); *indx = (int *) AZ_allocate(size*sizeof(int)); bindx2 = *bindx; val2 = *val; *bindx = (int *) AZ_allocate(size*sizeof(int)); *val = (double *) AZ_allocate(size*blk_size*blk_size*sizeof(double)); for (kk = 0 ; kk < num_blk_cols ; kk++ ) (*cpntr)[kk] = blk_size; AZ_msr2vbr(*val,*indx,*rpntr,*cpntr,*bpntr,*bindx,bindx2,val2, num_blk_rows,num_blk_cols,size,size*blk_size*blk_size,blk_size); MSRorVBR = AZ_VBR_MATRIX; N_update /= blk_size; num_PDE_eqns = blk_size; for (kk = 0 ; kk < N_update ; kk++ ) (*update)[kk] = (*update)[blk_size*kk]/blk_size; for (kk = 0 ; kk < (*data_org)[AZ_N_external] ; kk++ ) (*external)[kk] = (*external)[blk_size*kk]/blk_size; (*data_org)[AZ_matrix_type] = AZ_VBR_MATRIX; (*data_org)[AZ_N_int_blk ] /= blk_size; (*data_org)[AZ_N_bord_blk] /= blk_size; (*data_org)[AZ_N_ext_blk ] /= blk_size; AZ_free(bindx2); AZ_free(val2); } } /* init_matrix_vector_structures */
/*#define DEBUG */ void distrib_msr_matrix(int *proc_config, int N_global, int *n_nonzeros, int *N_update, int **update, double **val, int **bindx, double **x, double **b, double **xexact) { int i, n_entries, N_columns, n_global_nonzeros; int ii, j, row, have_xexact = 0 ; int kk = 0; int max_ii = 0, max_jj = 0; int ione = 1; double value; double *cnt; int *pntr, *bindx1, *pntr1; double *val1, *b1, *x1, *xexact1; #ifdef DEBUG printf("Processor %d of %d entering distrib_matrix.\n", proc_config[AZ_node],proc_config[AZ_N_procs]) ; #endif /*************** Distribute global matrix to all processors ************/ if(proc_config[AZ_node] == 0) { if ((*xexact) != NULL) have_xexact = 1; #ifdef DEBUG printf("Broadcasting exact solution\n"); #endif } if(proc_config[AZ_N_procs] > 1) { AZ_broadcast((char *) &N_global, sizeof(int), proc_config, AZ_PACK); AZ_broadcast((char *) n_nonzeros, sizeof(int), proc_config, AZ_PACK); AZ_broadcast((char *) &have_xexact, sizeof(int), proc_config, AZ_PACK); AZ_broadcast(NULL, 0, proc_config, AZ_SEND); if(proc_config[AZ_node] != 0) { (*bindx) = (int *) calloc(*n_nonzeros+1,sizeof(int)) ; (*val) = (double *) calloc(*n_nonzeros+1,sizeof(double)) ; } AZ_broadcast((char *) (*bindx), sizeof(int) *(*n_nonzeros+1), proc_config, AZ_PACK); AZ_broadcast(NULL, 0, proc_config, AZ_SEND); AZ_broadcast((char *) (*val), sizeof(double)*(*n_nonzeros+1), proc_config, AZ_PACK); AZ_broadcast(NULL, 0, proc_config, AZ_SEND); #ifdef DEBUG printf("Processor %d of %d done with matrix broadcast.\n", proc_config[AZ_node],proc_config[AZ_N_procs]) ; #endif /* Set rhs and initialize guess */ if(proc_config[AZ_node] != 0) { (*b) = (double *) calloc(N_global,sizeof(double)) ; (*x) = (double *) calloc(N_global,sizeof(double)) ; if (have_xexact) (*xexact) = (double *) calloc(N_global,sizeof(double)) ; } AZ_broadcast((char *) (*x), sizeof(double)*(N_global), proc_config, AZ_PACK); AZ_broadcast((char *) (*b), sizeof(double)*(N_global), proc_config, AZ_PACK); if (have_xexact) AZ_broadcast((char *) (*xexact), sizeof(double)*(N_global), proc_config, AZ_PACK); AZ_broadcast(NULL, 0, proc_config, AZ_SEND); #ifdef DEBUG printf("Processor %d of %d done with rhs/guess broadcast.\n", proc_config[AZ_node],proc_config[AZ_N_procs]) ; #endif } /********************** Generate update map *************************/ AZ_read_update(N_update, update, proc_config, N_global, 1, AZ_linear) ; printf("Processor %d of %d has %d rows of %d total rows.\n", proc_config[AZ_node],proc_config[AZ_N_procs],*N_update,N_global) ; /*************** Construct local matrix from global matrix ************/ /* The local matrix is a copy of the rows assigned to this processor. It is stored in MSR format and still has global indices (AZ_transform will complete conversion to local indices. */ if(proc_config[AZ_N_procs] > 1) { n_global_nonzeros = *n_nonzeros; *n_nonzeros = *N_update; for (i=0; i<*N_update; i++) *n_nonzeros += (*bindx)[(*update)[i]+1] - (*bindx)[(*update)[i]]; printf("Processor %d of %d has %d nonzeros of %d total nonzeros.\n", proc_config[AZ_node],proc_config[AZ_N_procs], *n_nonzeros,n_global_nonzeros) ; #ifdef DEBUG { double sum1 = 0.0; for (i=0;i<N_global; i++) sum1 += (*b)[i]; printf("Processor %d of %d has sum of b = %12.4g.\n", proc_config[AZ_node],proc_config[AZ_N_procs],sum1) ; } #endif /* DEBUG */ /* Allocate memory for local matrix */ bindx1 = (int *) calloc(*n_nonzeros+1,sizeof(int)) ; val1 = (double *) calloc(*n_nonzeros+1,sizeof(double)) ; b1 = (double *) calloc(*N_update,sizeof(double)) ; x1 = (double *) calloc(*N_update,sizeof(double)) ; if (have_xexact) xexact1 = (double *) calloc(*N_update,sizeof(double)) ; bindx1[0] = *N_update+1; for (i=0; i<*N_update; i++) { row = (*update)[i]; b1[i] = (*b)[row]; x1[i] = (*x)[row]; if (have_xexact) xexact1[i] = (*xexact)[row]; val1[i] = (*val)[row]; bindx1[i+1] = bindx1[i]; #ifdef DEBUG printf("Proc %d of %d: Global row = %d: Local row = %d: b = %12.4g: x = %12.4g: bindx = %d: val = %12.4g \n", proc_config[AZ_node],proc_config[AZ_N_procs], row, i, b1[i], x1[i], bindx1[i], val1[i]) ; #endif for (j = (*bindx)[row]; j < (*bindx)[row+1]; j++) { val1[ bindx1 [i+1] ] = (*val)[j]; bindx1[bindx1 [i+1] ] = (*bindx)[j]; bindx1[i+1] ++; } } #ifdef DEBUG printf("Processor %d of %d done with extracting local operators.\n", proc_config[AZ_node],proc_config[AZ_N_procs]) ; if (have_xexact) { printf( "The residual using MSR format and exact solution on processor %d is %12.4g\n", proc_config[AZ_node], smsrres (*N_update, N_global, val1, bindx1, xexact1, (*xexact), b1)); } #endif /* Release memory for global matrix, rhs and solution */ free ((void *) (*val)); free ((void *) (*bindx)); free ((void *) (*b)); free ((void *) (*x)); if (have_xexact) free((void *) *xexact); /* Return local matrix through same pointers. */ *val = val1; *bindx = bindx1; *b = b1; *x = x1; if (have_xexact) *xexact = xexact1; }
int main(int argc, char *argv[]) { int num_PDE_eqns=1, N_levels=3, nsmooth=2; int leng, level, N_grid_pts, coarsest_level; int leng1,leng2; /* See Aztec User's Guide for more information on the */ /* variables that follow. */ int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE]; /* data structure for matrix corresponding to the fine grid */ double *val = NULL, *xxx, *rhs, solve_time, setup_time, start_time; AZ_MATRIX *Amat; AZ_PRECOND *Pmat = NULL; ML *ml; FILE *fp; int i, j, Nrigid, *garbage, nblocks=0, *blocks = NULL, *block_pde=NULL; struct AZ_SCALING *scaling; ML_Aggregate *ag; double *mode, *rigid=NULL, alpha; char filename[80]; int one = 1; int proc,nprocs; char pathfilename[100]; #ifdef ML_MPI MPI_Init(&argc,&argv); /* get number of processors and the name of this processor */ AZ_set_proc_config(proc_config, MPI_COMM_WORLD); proc = proc_config[AZ_node]; nprocs = proc_config[AZ_N_procs]; #else AZ_set_proc_config(proc_config, AZ_NOT_MPI); proc = 0; nprocs = 1; #endif if (proc_config[AZ_node] == 0) { sprintf(pathfilename,"%s/inputfile",argv[1]); ML_Reader_ReadInput(pathfilename, &context); } else context = (struct reader_context *) ML_allocate(sizeof(struct reader_context)); AZ_broadcast((char *) context, sizeof(struct reader_context), proc_config, AZ_PACK); AZ_broadcast((char *) NULL , 0 , proc_config, AZ_SEND); N_levels = context->N_levels; printf("N_levels %d\n",N_levels); nsmooth = context->nsmooth; num_PDE_eqns = context->N_dofPerNode; printf("num_PDE_eqns %d\n",num_PDE_eqns); ML_Set_PrintLevel(context->output_level); /* read in the number of matrix equations */ leng = 0; if (proc_config[AZ_node] == 0) { sprintf(pathfilename,"%s/data_matrix.txt",argv[1]); fp=fopen(pathfilename,"r"); if (fp==NULL) { printf("**ERR** couldn't open file data_matrix.txt\n"); exit(1); } fscanf(fp,"%d",&leng); fclose(fp); } leng = AZ_gsum_int(leng, proc_config); N_grid_pts=leng/num_PDE_eqns; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ #if 0 if (proc_config[AZ_N_procs] == 1) i = AZ_linear; else i = AZ_file; #endif i = AZ_linear; /* cannot use AZ_input_update for variable blocks (forgot why, but debugged through it)*/ /* make a linear distribution of the matrix */ /* if the linear distribution does not align with the blocks, */ /* this is corrected in ML_AZ_Reader_ReadVariableBlocks */ leng1 = leng/nprocs; leng2 = leng-leng1*nprocs; if (proc >= leng2) { leng2 += (proc*leng1); } else { leng1++; leng2 = proc*leng1; } N_update = leng1; update = (int*)AZ_allocate((N_update+1)*sizeof(int)); if (update==NULL) { (void) fprintf (stderr, "Not enough space to allocate 'update'\n"); fflush(stderr); exit(EXIT_FAILURE); } for (i=0; i<N_update; i++) update[i] = i+leng2; #if 0 /* debug */ printf("proc %d N_update %d\n",proc_config[AZ_node],N_update); fflush(stdout); #endif sprintf(pathfilename,"%s/data_vblocks.txt",argv[1]); ML_AZ_Reader_ReadVariableBlocks(pathfilename,&nblocks,&blocks,&block_pde, &N_update,&update,proc_config); #if 0 /* debug */ printf("proc %d N_update %d\n",proc_config[AZ_node],N_update); fflush(stdout); #endif sprintf(pathfilename,"%s/data_matrix.txt",argv[1]); AZ_input_msr_matrix(pathfilename,update, &val, &bindx, N_update, proc_config); /* This code is to fix things up so that we are sure we have */ /* all blocks (including the ghost nodes) the same size. */ /* not sure, whether this is a good idea with variable blocks */ /* the examples inpufiles (see top of this file) don't need it */ /* anyway */ /* AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update); */ AZ_transform_norowreordering(proc_config, &external, bindx, val, update, &update_index, &extern_index, &data_org, N_update, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); Amat = AZ_matrix_create( leng ); AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL); Amat->matrix_type = data_org[AZ_matrix_type]; data_org[AZ_N_rows] = data_org[AZ_N_internal] + data_org[AZ_N_border]; start_time = AZ_second(); options[AZ_scaling] = AZ_none; ML_Create(&ml, N_levels); /* set up discretization matrix and matrix vector function */ AZ_ML_Set_Amat(ml, 0, N_update, N_update, Amat, proc_config); ML_Set_ResidualOutputFrequency(ml, context->output); ML_Set_Tolerance(ml, context->tol); ML_Aggregate_Create( &ag ); if (ML_strcmp(context->agg_coarsen_scheme,"Mis") == 0) { ML_Aggregate_Set_CoarsenScheme_MIS(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Uncoupled") == 0) { ML_Aggregate_Set_CoarsenScheme_Uncoupled(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Coupled") == 0) { ML_Aggregate_Set_CoarsenScheme_Coupled(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Metis") == 0) { ML_Aggregate_Set_CoarsenScheme_METIS(ag); for (i=0; i<N_levels; i++) ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9); } else if (ML_strcmp(context->agg_coarsen_scheme,"VBMetis") == 0) { /* when no blocks read, use standard metis assuming constant block sizes */ if (!blocks) ML_Aggregate_Set_CoarsenScheme_METIS(ag); else { ML_Aggregate_Set_CoarsenScheme_VBMETIS(ag); ML_Aggregate_Set_Vblocks_CoarsenScheme_VBMETIS(ag,0,N_levels,nblocks, blocks,block_pde,N_update); } for (i=0; i<N_levels; i++) ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9); } else { printf("**ERR** ML: Unknown aggregation scheme %s\n",context->agg_coarsen_scheme); exit(-1); } ML_Aggregate_Set_DampingFactor(ag, context->agg_damping); ML_Aggregate_Set_MaxCoarseSize( ag, context->maxcoarsesize); ML_Aggregate_Set_Threshold(ag, context->agg_thresh); if (ML_strcmp(context->agg_spectral_norm,"Calc") == 0) { ML_Set_SpectralNormScheme_Calc(ml); } else if (ML_strcmp(context->agg_spectral_norm,"Anorm") == 0) { ML_Set_SpectralNormScheme_Anorm(ml); } else { printf("**WRN** ML: Unknown spectral norm scheme %s\n",context->agg_spectral_norm); } /* read in the rigid body modes */ Nrigid = 0; if (proc_config[AZ_node] == 0) { sprintf(filename,"data_nullsp%d.txt",Nrigid); sprintf(pathfilename,"%s/%s",argv[1],filename); while( (fp = fopen(pathfilename,"r")) != NULL) { fclose(fp); Nrigid++; sprintf(filename,"data_nullsp%d.txt",Nrigid); sprintf(pathfilename,"%s/%s",argv[1],filename); } } Nrigid = AZ_gsum_int(Nrigid,proc_config); if (Nrigid != 0) { rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) ); if (rigid == NULL) { printf("Error: Not enough space for rigid body modes\n"); } } /* Set rhs */ sprintf(pathfilename,"%s/data_rhs.txt",argv[1]); fp = fopen(pathfilename,"r"); if (fp == NULL) { rhs=(double *)ML_allocate(leng*sizeof(double)); if (proc_config[AZ_node] == 0) printf("taking linear vector for rhs\n"); for (i = 0; i < N_update; i++) rhs[i] = (double) update[i]; } else { fclose(fp); if (proc_config[AZ_node] == 0) printf("reading rhs from a file\n"); AZ_input_msr_matrix(pathfilename, update, &rhs, &garbage, N_update, proc_config); } AZ_reorder_vec(rhs, data_org, update_index, NULL); for (i = 0; i < Nrigid; i++) { sprintf(filename,"data_nullsp%d.txt",i); sprintf(pathfilename,"%s/%s",argv[1],filename); AZ_input_msr_matrix(pathfilename, update, &mode, &garbage, N_update, proc_config); AZ_reorder_vec(mode, data_org, update_index, NULL); #if 0 /* test the given rigid body mode, output-vector should be ~0 */ Amat->matvec(mode, rigid, Amat, proc_config); for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]); #endif for (j = 0; j < i; j++) { alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/ AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); DAXPY_F77(&N_update, &alpha, &(rigid[j*N_update]), &one, mode, &one); } /* rhs orthogonalization */ alpha = -AZ_gdot(N_update, mode, rhs, proc_config)/ AZ_gdot(N_update, mode, mode, proc_config); DAXPY_F77(&N_update, &alpha, mode, &one, rhs, &one); for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j]; free(mode); free(garbage); } for (j = 0; j < Nrigid; j++) { alpha = -AZ_gdot(N_update, rhs, &(rigid[j*N_update]), proc_config)/ AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); DAXPY_F77(&N_update, &alpha, &(rigid[j*N_update]), &one, rhs, &one); } #if 0 /* for testing the default nullsp */ ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, 6, NULL, N_update); #else if (Nrigid != 0) { ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update); } #endif if (rigid) ML_free(rigid); ag->keep_agg_information = 1; coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, 0, ML_INCREASING, ag); coarsest_level--; if ( proc_config[AZ_node] == 0 ) printf("Coarse level = %d \n", coarsest_level); #if 0 /* set up smoothers */ if (!blocks) blocks = (int *) ML_allocate(sizeof(int)*N_update); #endif for (level = 0; level < coarsest_level; level++) { num_PDE_eqns = ml->Amat[level].num_PDEs; /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ if (ML_strcmp(context->smoother,"Parasails") == 0) { ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, (int) parasails_loadbal, parasails_factorized); } /* This is the symmetric Gauss-Seidel smoothing that we usually use. */ /* In parallel, it is not a true Gauss-Seidel in that each processor */ /* does a Gauss-Seidel on its local submatrix independent of the */ /* other processors. */ else if (ML_strcmp(context->smoother,"GaussSeidel") == 0) { ML_Gen_Smoother_GaussSeidel(ml , level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->smoother,"SymGaussSeidel") == 0) { ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->smoother,"Poly") == 0) { ML_Gen_Smoother_Cheby(ml, level, ML_BOTH, 30., nsmooth); } else if (ML_strcmp(context->smoother,"BlockGaussSeidel") == 0) { ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_BOTH, nsmooth,1., num_PDE_eqns); } else if (ML_strcmp(context->smoother,"VBSymGaussSeidel") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); blocks = NULL; block_pde = NULL; nblocks = 0; ML_Aggregate_Get_Vblocks_CoarsenScheme_VBMETIS(ag,level,N_levels,&nblocks, &blocks,&block_pde); if (blocks==NULL) ML_Gen_Blocks_Aggregates(ag, level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1., nblocks, blocks); } /* This is a true Gauss Seidel in parallel. This seems to work for */ /* elasticity problems. However, I don't believe that this is very */ /* efficient in parallel. */ /* nblocks = ml->Amat[level].invec_leng; for (i =0; i < nblocks; i++) blocks[i] = i; ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml , level, ML_PRESMOOTHER, nsmooth, 1., nblocks, blocks); ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml, level, ML_POSTSMOOTHER, nsmooth, 1., nblocks, blocks); */ /* Jacobi Smoothing */ else if (ML_strcmp(context->smoother,"Jacobi") == 0) { ML_Gen_Smoother_Jacobi(ml , level, ML_PRESMOOTHER, nsmooth,.4); ML_Gen_Smoother_Jacobi(ml , level, ML_POSTSMOOTHER, nsmooth,.4); } /* This does a block Gauss-Seidel (not true GS in parallel) */ /* where each processor has 'nblocks' blocks. */ /* */ else if (ML_strcmp(context->smoother,"Metis") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); nblocks = 250; ML_Gen_Blocks_Metis(ml, level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1., nblocks, blocks); } else { printf("unknown smoother %s\n",context->smoother); exit(1); } } /* set coarse level solver */ nsmooth = context->coarse_its; /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ if (ML_strcmp(context->coarse_solve,"Parasails") == 0) { ML_Gen_Smoother_ParaSails(ml , coarsest_level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, (int) parasails_loadbal, parasails_factorized); } else if (ML_strcmp(context->coarse_solve,"GaussSeidel") == 0) { ML_Gen_Smoother_GaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->coarse_solve,"Poly") == 0) { ML_Gen_Smoother_Cheby(ml, coarsest_level, ML_BOTH, 30., nsmooth); } else if (ML_strcmp(context->coarse_solve,"SymGaussSeidel") == 0) { ML_Gen_Smoother_SymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->coarse_solve,"BlockGaussSeidel") == 0) { ML_Gen_Smoother_BlockGaussSeidel(ml, coarsest_level, ML_BOTH, nsmooth,1., num_PDE_eqns); } else if (ML_strcmp(context->coarse_solve,"Aggregate") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); ML_Gen_Blocks_Aggregates(ag, coarsest_level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1., nblocks, blocks); } else if (ML_strcmp(context->coarse_solve,"Jacobi") == 0) { ML_Gen_Smoother_Jacobi(ml , coarsest_level, ML_BOTH, nsmooth,.5); } else if (ML_strcmp(context->coarse_solve,"Metis") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); nblocks = 250; ML_Gen_Blocks_Metis(ml, coarsest_level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1., nblocks, blocks); } else if (ML_strcmp(context->coarse_solve,"SuperLU") == 0) { ML_Gen_CoarseSolverSuperLU( ml, coarsest_level); } else if (ML_strcmp(context->coarse_solve,"Amesos") == 0) { ML_Gen_Smoother_Amesos(ml,coarsest_level,ML_AMESOS_KLU,-1, 0.0); } else { printf("unknown coarse grid solver %s\n",context->coarse_solve); exit(1); } ML_Gen_Solver(ml, ML_MGV, 0, coarsest_level); AZ_defaults(options, params); if (ML_strcmp(context->krylov,"Cg") == 0) { options[AZ_solver] = AZ_cg; } else if (ML_strcmp(context->krylov,"Bicgstab") == 0) { options[AZ_solver] = AZ_bicgstab; } else if (ML_strcmp(context->krylov,"Tfqmr") == 0) { options[AZ_solver] = AZ_tfqmr; } else if (ML_strcmp(context->krylov,"Gmres") == 0) { options[AZ_solver] = AZ_gmres; } else { printf("unknown krylov method %s\n",context->krylov); } if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); options[AZ_scaling] = AZ_none; options[AZ_precond] = AZ_user_precond; options[AZ_conv] = AZ_r0; options[AZ_output] = 1; options[AZ_max_iter] = context->max_outer_its; options[AZ_poly_ord] = 5; options[AZ_kspace] = 130; params[AZ_tol] = context->tol; options[AZ_output] = context->output; ML_free(context); AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); setup_time = AZ_second() - start_time; xxx = (double *) malloc( leng*sizeof(double)); for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; /* Set x */ /* there is no initguess supplied with these examples for the moment.... */ fp = fopen("initguessfile","r"); if (fp != NULL) { fclose(fp); if (proc_config[AZ_node]== 0) printf("reading initial guess from file\n"); AZ_input_msr_matrix("data_initguess.txt", update, &xxx, &garbage, N_update, proc_config); options[AZ_conv] = AZ_expected_values; } else if (proc_config[AZ_node]== 0) printf("taking 0 initial guess \n"); AZ_reorder_vec(xxx, data_org, update_index, NULL); /* if Dirichlet BC ... put the answer in */ for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) { if ( (val[i] > .99999999) && (val[i] < 1.0000001)) xxx[i] = rhs[i]; } fp = fopen("AZ_no_multilevel.dat","r"); scaling = AZ_scaling_create(); start_time = AZ_second(); if (fp != NULL) { fclose(fp); options[AZ_precond] = AZ_none; options[AZ_scaling] = AZ_sym_diag; options[AZ_ignore_scaling] = AZ_TRUE; options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); /* options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); */ } else { options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; /* if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); */ } solve_time = AZ_second() - start_time; if (proc_config[AZ_node] == 0) printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time); if (proc_config[AZ_node] == 0) printf("Printing out a few entries of the solution ...\n"); for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 7) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 23) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 47) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 101) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 171) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} ML_Aggregate_Destroy(&ag); ML_Destroy(&ml); AZ_free((void *) Amat->data_org); AZ_free((void *) Amat->val); AZ_free((void *) Amat->bindx); AZ_free((void *) update); AZ_free((void *) external); AZ_free((void *) extern_index); AZ_free((void *) update_index); AZ_scaling_destroy(&scaling); if (Amat != NULL) AZ_matrix_destroy(&Amat); if (Pmat != NULL) AZ_precond_destroy(&Pmat); free(xxx); free(rhs); #ifdef ML_MPI MPI_Finalize(); #endif return 0; }
void ddd_add_member2(void *address, int blockcount, size_t byte_size) /***************************************************************** * * ddd_add_member2: * * Buffered broadcast routine. This routine will force an * immediate exchange when the buffer gets larger than a fixed * size (currently 8000 bytes). At this point, its worthwhile * to exchange immediately, because further buffering won't yield * speed improvements. Also, memory requirements for reallocs() * in AZ_broadcast() are kept to a minimum. * * This routine uses two static variables (listed below) * and must be used in combination with the routine * ddd_set_commit2() listed below. ddd_set_commit2() does * the final broadcast, if needed. * * Inbetween the calls to ddd_add_commit2() and * ddd_set_commit2(), no other mp communication should be * carried out that involves syncing processors. * This is due to the fact that the send is carried out in * Proc 0 in the ddd_set_commit2() routine, while the * read is carried out on all other processors in the * ddd_add_commit2() routine. * * Note: Errors in the broadcast lead to immediate error exits. * * Input * ------- * address : base address of the item to be broadcast * blockcount : Number of items in the object to be sent * byte_size : Number of bytes per item in the object to be * sent. * * Static Variables * ------------------ * dd_internal_count : Internal count of the buffer length * Proc_Config[3] : MP configuration information, in a form * needed by Aztec. *****************************************************************/ { #ifdef PARALLEL int length; if (byte_size <= 0) { fprintf(stderr," ddd_add_member2 ERROR: byte_size = %ld\n", (long int)byte_size); EH(-1,"ddd_add_member2 parameter error"); } length = blockcount * byte_size; if (length > 0) { AZ_broadcast(address, length, Proc_Config, AZ_PACK); ddd_internal_count += length; if (ddd_internal_count > 8000) { AZ_broadcast(NULL, 0, Proc_Config, AZ_SEND); ddd_internal_count = 0; } } #endif }
int main(int argc, char *argv[]) /* Set up and solve a test problem defined in the subroutine init_matrix_vector_structures(). Author: Ray Tuminaro, Div 1422, Sandia National Labs date: 11/10/94 ******************************************************************************/ { double *ax,*x; /* ax is the right hand side for the test problem. x is the approximate solution obtained using AZTEC. */ int i,input_option; /* See Aztec User's Guide for more information */ /* on the variables that follow. */ int proc_config[AZ_PROC_SIZE];/* Processor information: */ /* proc_config[AZ_node] = node name */ /* proc_config[AZ_N_procs] = # of nodes */ int options[AZ_OPTIONS_SIZE]; /* Array used to select solver options. */ double params[AZ_PARAMS_SIZE]; /* User selected solver paramters. */ int *data_org; /* Array to specify data layout */ double status[AZ_STATUS_SIZE]; /* Information returned from AZ_solve() indicating success or failure. */ int *update, /* vector elements (global index) updated on this processor. */ *external; /* vector elements needed by this node. */ int *update_index; /* ordering of update[] and external[] */ int *extern_index; /* locally on this processor. For example update_index[i] gives the index location of the vector element which has the global index 'update[i]'. */ /* Sparse matrix to be solved is stored in these arrays. */ int *rpntr,*cpntr,*indx, *bpntr, *bindx; double *val; /* ----------------------- execution begins --------------------------------*/ /* Put the # of processors, the node id, */ /* and an MPI communicator into proc_config */ #ifdef AZTEC_MPI MPI_Init(&argc,&argv); AZ_set_proc_config(proc_config, MPI_COMM_WORLD); #else AZ_set_proc_config(proc_config, AZ_NOT_MPI ); #endif /* * Read and broadcast: problem choice, problem size, equations per grid point * and how we wish to initialize 'update'. */ if (proc_config[AZ_node] == 0) { (void) printf("enter the application problem number\n"); (void) printf(" = 0: Finite Difference MSR Poisson on n x n x n grid.\n"); (void) printf(" = 1: Finite Difference VBR Poisson on n x n x n grid.\n"); (void) printf(" = 2: Finite Element MSR Poisson\n"); (void) printf(" = 3: Use AZ_read_msr_matrix() to read file '.data'\n"); scanf("%d",&application); if ((application < 0) || (application > 3)){ (void) fprintf(stderr, "Error: Invalid application (%d) selected\n", application); exit(1); } if (application == 0) { (void) printf("\nNote: To try other problems, change add_row_3D()"); (void) printf("\n in create_msr_matrix() to add_row_5pt() or"); (void) printf("\n add_row_9pt().\n\n"); } if (application == 2) { (void) printf("\nNote: Input files are provided for 1 finite element "); (void) printf("\n problem. This problem can be run on either 1 "); (void) printf("\n or 4 processors. To run on 1 processor, copy "); (void) printf("\n the file fe_1proc_grid_0 to fe_grid_0. To run on"); (void) printf("\n 4 processors, copy the files fe_4proc_grid_k to "); (void) printf("\n fe_grid_k (k = 0,1,2,3). In both cases enter 197"); (void) printf("\n when prompted for the number of grid points and "); (void) printf("\n linear when prompted for the partitioning!!!\n\n"); } if (application == 3) (void) printf("enter the total number of matrix rows\n"); else (void) printf("enter the total number of grid points\n"); scanf("%d", &N_grid_pts); num_PDE_eqns = 1; if (application < 2) { (void) printf("enter the number of equations per grid point\n"); scanf("%d", &num_PDE_eqns); } (void) printf("partition option \n"); (void) printf(" = %d: linear\n", AZ_linear); (void) printf(" = %d: update pts from file '.update'\n", AZ_file); if (application < 2) (void) printf(" = %d: box decomposition\n", AZ_box); scanf("%d", &input_option); } AZ_broadcast((char *) &N_grid_pts , sizeof(int), proc_config, AZ_PACK); AZ_broadcast((char *) &num_PDE_eqns, sizeof(int), proc_config, AZ_PACK); AZ_broadcast((char *) &input_option, sizeof(int), proc_config, AZ_PACK); AZ_broadcast((char *) &application , sizeof(int), proc_config, AZ_PACK); AZ_broadcast((char *) NULL , 0 , proc_config, AZ_SEND); /* create an application matrix for AZTEC */ init_matrix_vector_structures(proc_config, &update_index, &update, &data_org, &external, &extern_index, input_option, &val, &bindx, &indx, &bpntr, &rpntr, &cpntr); /* initialize AZTEC options */ init_options(options,params); if ( (i = AZ_check_input(data_org, options, params, proc_config) ) < 0) { AZ_print_error(i); exit(-1); } /* Matrix fill for finite element example (see Aztec User's Guide). */ if (application == 2) fill_fe_matrix(val, bindx, update, update_index, external, extern_index, data_org); /* Initialize right hand side and initial guess */ /* NOTE: STORAGE ALLOCATED FOR 'x' IS GREATER THAN THE NUMBER */ /* OF MATRIX ROWS PER PROCESSOR. 'x' INCLUDES SPACE FOR */ /* EXTERNAL (GHOST) ELEMENTS. THUS, THE SIZE OF 'x' IS */ /* 'data_org[AZ_N_internal] + data_org[AZ_N_border] + */ /* data_org[AZ_N_external]'. */ init_guess_and_rhs(update_index, update, &x, &ax, data_org, val, indx, bindx, rpntr, cpntr, bpntr, proc_config); /* update[], update_index[], external[], extern_index[] are used to map * between Aztec's ordering of the equations and the user's ordering * (see the User's guide for more details). If these mapping arrays * are not needed by the user, they can be deallocated as they are not * used by AZ_solve(). */ free((void *) update); free((void *) update_index); free((void *) external); free((void *) extern_index); /* solve the system of equations using ax as the right hand side */ AZ_solve(x,ax, options, params, indx, bindx, rpntr, cpntr, bpntr, val, data_org, status, proc_config); /* Free allocated memory */ free((void *) x); free((void *) ax); free((void *) indx); free((void *) bindx); free((void *) rpntr); free((void *) cpntr); free((void *) bpntr); free((void *) val); free((void *) data_org); #ifdef AZTEC_MPI MPI_Finalize(); #endif return(1); }