int main(int argc, char *argv[]) { int proc_config[AZ_PROC_SIZE];// Processor information. #ifdef EPETRA_MPI MPI_Init(&argc,&argv); Epetra_MpiComm comm(MPI_COMM_WORLD); AZ_set_proc_config(proc_config,MPI_COMM_WORLD); #else Epetra_SerialComm comm; AZ_set_proc_config(proc_config,0); #endif int temp; if (comm.MyPID()==0) {cout << "Type 1 and enter to continue" <<endl; cin >> temp; } comm.Barrier();
AZ_MATRIX *user_Kn_build(struct user_partition *Node_Partition) { int *Kn_bindx; double *Kn_val; int proc_config[AZ_PROC_SIZE]; AZ_MATRIX *Kn_mat; int *reordered_glob_nodes = NULL, *cpntr = NULL, *Kn_data_org = NULL; int i, ii, jj, nx, gid, Nlocal_nodes, nz_ptr; Nlocal_nodes = Node_Partition->Nlocal; Kn_bindx = (int *) malloc((27*Nlocal_nodes+5)*sizeof(int)); Kn_val = (double *) malloc((27*Nlocal_nodes+5)*sizeof(double)); Kn_bindx[0] = Nlocal_nodes+1; nx = (int) sqrt( ((double) Node_Partition->Nglobal) + .00001); for (i = 0; i < Nlocal_nodes; i++) { gid = (Node_Partition->my_global_ids)[i]; nz_ptr = Kn_bindx[i]; ii = gid%nx; jj = (gid - ii)/nx; if (ii != nx-1) { Kn_bindx[nz_ptr] = gid+ 1; Kn_val[nz_ptr++] = -1.;} if (jj != nx-1) { Kn_bindx[nz_ptr] = gid+nx; Kn_val[nz_ptr++] = -1.;} if (jj != 0) { Kn_bindx[nz_ptr] = gid-nx; Kn_val[nz_ptr++] = -1.;} if (ii != 0) { Kn_bindx[nz_ptr] = gid- 1; Kn_val[nz_ptr++] = -1.;} if ((ii != nx-1) && (jj != 0)) {Kn_bindx[nz_ptr] = gid-nx+1; Kn_val[nz_ptr++] = -1.;} if ((ii != nx-1) && (jj != nx-1)) {Kn_bindx[nz_ptr] = gid+nx+1; Kn_val[nz_ptr++] = -1.;} if ((ii != 0) && (jj != nx-1)) {Kn_bindx[nz_ptr] = gid+nx-1; Kn_val[nz_ptr++] = -1.;} if ((ii != 0) && (jj != 0)) {Kn_bindx[nz_ptr] = gid-nx-1; Kn_val[nz_ptr++] = -1.;} Kn_val[i] = (double) (nz_ptr - Kn_bindx[i]); Kn_bindx[i+1] = nz_ptr; } AZ_set_proc_config(proc_config, COMMUNICATOR); AZ_transform_norowreordering(proc_config,&(Node_Partition->needed_external_ids), Kn_bindx, Kn_val, Node_Partition->my_global_ids, &reordered_glob_nodes, &reordered_node_externs, &Kn_data_org, Nlocal_nodes, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); Node_Partition->Nghost = Kn_data_org[AZ_N_external]; AZ_free(reordered_glob_nodes); /* Convert old style Aztec matrix to newer style Aztec matrix */ Kn_mat = AZ_matrix_create( Nlocal_nodes ); AZ_set_MSR(Kn_mat, Kn_bindx, Kn_val, Kn_data_org, 0, NULL, AZ_LOCAL); return(Kn_mat); }
void user_partition_nodes(struct user_partition *Partition) { int proc_config[AZ_PROC_SIZE]; AZ_set_proc_config(proc_config, COMMUNICATOR); AZ_input_update(NULL,&(Partition->Nlocal), &(Partition->my_global_ids), proc_config, Partition->Nglobal, 1, AZ_linear); Partition->Nghost = 0; }
void hunt_problem(Comm_Ex *cx, /* array of communications structures */ Exo_DB *exo, /* ptr to the finite element mesh database */ Dpi *dpi) /* distributed processing information */ { int *ija=NULL; /* column pointer array */ double *a=NULL; /* nonzero array */ double *a_old=NULL; /* nonzero array */ double *x=NULL; /* solution vector */ int iAC; /* COUNTER */ double *x_AC = NULL; /* SOLUTION VECTOR OF EXTRA UNKNOWNS */ double *x_AC_old=NULL; /* old SOLUTION VECTOR OF EXTRA UNKNOWNS */ double *x_AC_dot = NULL; int iHC; /* COUNTER */ int *ija_attic=NULL; /* storage for external dofs */ int eb_indx, ev_indx; /* * variables for path traversal */ double *x_old=NULL; /* old solution vector */ double *x_older=NULL; /* older solution vector */ double *x_oldest=NULL; /* oldest solution vector saved */ double *xdot=NULL; /* current path derivative of soln */ double *xdot_old=NULL; double *x_update=NULL; double *x_sens=NULL; /* solution sensitivity */ double **x_sens_p=NULL; /* solution sensitivity for parameters */ int num_pvector=0; /* number of solution sensitivity vectors */ #ifdef COUPLED_FILL struct Aztec_Linear_Solver_System *ams[NUM_ALSS]={NULL}; #else /* COUPLED_FILL */ struct Aztec_Linear_Solver_System *ams[NUM_ALSS]={NULL, NULL}; #endif /* COUPLED_FILL */ /* sl_util_structs.h */ double *resid_vector=NULL; /* residual */ double *resid_vector_sens=NULL; /* residual sensitivity */ double *scale=NULL; /* scale vector for modified newton */ int *node_to_fill = NULL; int n; /* total number of path steps attempted */ int ni; /* total number of nonlinear solves */ int nt; /* total number of successful path steps */ int path_step_reform; /* counter for jacobian reformation stride */ int converged; /* success or failure of Newton iteration */ int success_ds; /* success or failure of path step */ int i; int nprint=0, num_total_nodes; int numProcUnknowns; int *const_delta_s=NULL; int step_print; double i_print; int good_mesh = TRUE; double *path=NULL, *path1=NULL; double *delta_s=NULL, *delta_s_new=NULL, *delta_s_old=NULL; double *delta_s_older=NULL, *delta_s_oldest=NULL; double *hDelta_s0=NULL, *hDelta_s_min=NULL, *hDelta_s_max=NULL; double delta_t; double theta=0.0; double damp; double eps; double *lambda=NULL, *lambdaEnd=NULL; double hunt_par, dhunt_par, hunt_par_old; /* hunting continuation parameter */ double timeValueRead = 0.0; /* * ALC management variables */ int alqALC; int *aldALC=NULL; /* * Other local variables */ int error, err, is_steady_state, inewton; int *gindex = NULL, gsize; int *p_gsize=NULL; double *gvec=NULL; double ***gvec_elem; double err_dbl; FILE *file=NULL; double toler_org[3],damp_org; struct Results_Description *rd=NULL; int tnv; /* total number of nodal variables and kinds */ int tev; /* total number of elem variables and kinds */ int tnv_post; /* total number of nodal variables and kinds for post processing */ int tev_post; /* total number of elem variables and kinds for post processing */ int max_unk_elem, one, three; /* variables used as mf_setup arguments*/ unsigned int matrix_systems_mask; double evol_local=0.0; #ifdef PARALLEL double evol_global=0.0; #endif static char yo[]="hunt_problem"; /* * BEGIN EXECUTION */ #ifdef DEBUG fprintf(stderr, "hunt_problem() begins...\n"); #endif toler_org[0] = custom_tol1; toler_org[1] = custom_tol2; toler_org[2] = custom_tol3; damp_org = damp_factor1; is_steady_state = TRUE; p_gsize = &gsize; /* * set aside space for gather global vectors to print to exoII file * note: this is temporary * * For 2D prototype problem: allocate space for T, dx, dy arrays */ if( strlen( Soln_OutFile) ) { #ifdef DEBUG printf("Trying to open \"%s\" for writing.\n", Soln_OutFile); #endif file = fopen(Soln_OutFile, "w"); if (file == NULL) { DPRINTF(stderr, "%s: opening soln file for writing\n", yo); EH(-1, "\t"); } } #ifdef PARALLEL check_parallel_error("Soln output file error"); #endif /* * Some preliminaries to help setup EXODUS II database output. */ #ifdef DEBUG fprintf(stderr, "cnt_nodal_vars() begins...\n"); #endif tnv = cnt_nodal_vars(); /* tnv_post is calculated in load_nodal_tkn*/ tev = cnt_elem_vars(); /* tev_post is calculated in load_elem_tkn*/ #ifdef DEBUG fprintf(stderr, "Found %d total primitive nodal variables to output.\n", tnv); fprintf(stderr, "Found %d total primitive elem variables to output.\n", tev); #endif if ( tnv < 0 ) { DPRINTF(stderr, "%s:\tbad tnv.\n", yo); EH(-1, "\t"); } if ( tev < 0 ) { DPRINTF(stderr, "%s:\tMaybe bad tev? See goma design committee ;) \n", yo); /* exit(-1); */ } rd = (struct Results_Description *) smalloc(sizeof(struct Results_Description)); if (rd == NULL) { EH(-1, "Could not grab Results Description."); } (void) memset((void *) rd, 0, sizeof(struct Results_Description)); rd->nev = 0; /* number element variables in results */ rd->ngv = 0; /* number global variables in results */ rd->nhv = 0; /* number history variables in results */ if ( is_steady_state == TRUE ) { rd->ngv = 5; /* number global variables in results see load_global_var_info for names*/ error = load_global_var_info(rd, 0, "CONV"); error = load_global_var_info(rd, 1, "NEWT_IT"); error = load_global_var_info(rd, 2, "MAX_IT"); error = load_global_var_info(rd, 3, "CONVRATE"); error = load_global_var_info(rd, 4, "MESH_VOLUME"); } /* load nodal types, kinds, names */ error = load_nodal_tkn( rd, &tnv, &tnv_post); /* load nodal types, kinds, names */ if (error !=0) { DPRINTF(stderr, "%s: problem with load_nodal_tkn()\n", yo); EH(-1,"\t"); } /* load elem types, names */ error = load_elem_tkn( rd, exo, tev, &tev_post); /* load elem types, names */ if ( error !=0 ) { DPRINTF(stderr, "%s: problem with load_elem_tkn()\n", yo); EH(-1,"\t"); } /* * Write out the names of the nodal variables that we will be sending to * the EXODUS II output file later. */ #ifdef DEBUG fprintf(stderr, "wr_result_prelim() starts...\n", tnv); #endif gvec_elem = (double ***) smalloc ( (exo->num_elem_blocks)*sizeof(double **)); for (i = 0; i < exo->num_elem_blocks; i++) { gvec_elem[i] = (double **) smalloc ( (tev + tev_post)*sizeof(double *)); } wr_result_prelim_exo( rd, exo, ExoFileOut, gvec_elem ); #ifdef DEBUG fprintf(stderr, "P_%d: wr_result_prelim_exo() ends...\n", ProcID, tnv); #endif /* * This gvec workhorse transports output variables as nodal based vectors * that are gather from the solution vector. Note: it is NOT a global * vector at all and only carries this processor's nodal variables to * the exodus database. */ asdv(&gvec, Num_Node); /* * Allocate space and manipulate for all the nodes that this processor * is aware of... */ num_total_nodes = dpi->num_universe_nodes; numProcUnknowns = NumUnknowns + NumExtUnknowns; /* allocate memory for Volume Constraint Jacobian. ACS 2/99 */ if ( nAC > 0) { for(iAC=0;iAC<nAC;iAC++) { augc[iAC].d_evol_dx = (double*) malloc(numProcUnknowns*sizeof(double)); } } asdv(&resid_vector, numProcUnknowns); asdv(&resid_vector_sens, numProcUnknowns); asdv(&scale, numProcUnknowns); for (i=0;i<NUM_ALSS;i++) { ams[i] = (struct Aztec_Linear_Solver_System *) array_alloc(1, 1, sizeof(struct Aztec_Linear_Solver_System )); } #ifdef MPI AZ_set_proc_config( ams[0]->proc_config, MPI_COMM_WORLD ); #ifndef COUPLED_FILL if( Explicit_Fill ) AZ_set_proc_config( ams[1]->proc_config, MPI_COMM_WORLD ); #endif /* not COUPLED_FILL */ #else /* MPI */ AZ_set_proc_config( ams[0]->proc_config, 0 ); #ifndef COUPLED_FILL if( Explicit_Fill ) AZ_set_proc_config( ams[1]->proc_config, 0 ); #endif /* not COUPLED_FILL */ #endif /* MPI */ /* * allocate space for and initialize solution arrays */ asdv(&x, numProcUnknowns); asdv(&x_old, numProcUnknowns); asdv(&x_older, numProcUnknowns); asdv(&x_oldest, numProcUnknowns); asdv(&xdot, numProcUnknowns); asdv(&xdot_old, numProcUnknowns); asdv(&x_update, numProcUnknowns); asdv(&x_sens, numProcUnknowns); /* * Initialize solid inertia flag */ set_solid_inertia(); /* * ALLOCATE ALL THOSE WORK VECTORS FOR HUNTING */ asdv(&lambda, nHC); asdv(&lambdaEnd, nHC); asdv(&path, nHC); asdv(&path1, nHC); asdv(&hDelta_s0, nHC); asdv(&hDelta_s_min, nHC); asdv(&hDelta_s_max, nHC); asdv(&delta_s, nHC); asdv(&delta_s_new, nHC); asdv(&delta_s_old, nHC); asdv(&delta_s_older, nHC); asdv(&delta_s_oldest, nHC); aldALC = Ivector_birth(nHC); const_delta_s = Ivector_birth(nHC); /* HUNTING BY ZERO AND FIRST ORDER CONTINUATION */ alqALC = 1; damp = 1.0; delta_t = 0.0; tran->delta_t = 0.0; /*for Newmark-Beta terms in Lagrangian Solid*/ nprint = 0; MaxPathSteps = cont->MaxPathSteps; eps = cont->eps; for (iHC=0;iHC<nHC;iHC++) { const_delta_s[iHC] = 0; lambda[iHC] = hunt[iHC].BegParameterValue; lambdaEnd[iHC] = hunt[iHC].EndParameterValue; if ((lambdaEnd[iHC]-lambda[iHC]) > 0.0) { aldALC[iHC] = +1; } else { aldALC[iHC] = -1; } if (hunt[iHC].ramp == 1) { hunt[iHC].Delta_s0 = fabs(lambdaEnd[iHC]-lambda[iHC])/((double)(MaxPathSteps-1)); const_delta_s[iHC] = 1; } hDelta_s0[iHC] = hunt[iHC].Delta_s0; hDelta_s_min[iHC] = hunt[iHC].Delta_s_min; hDelta_s_max[iHC] = hunt[iHC].Delta_s_max; path[iHC] = path1[iHC] = lambda[iHC]; if (Debug_Flag && ProcID == 0) { fprintf(stderr,"MaxPathSteps: %d \tlambdaEnd: %f\n", MaxPathSteps, lambdaEnd[iHC]); fprintf(stderr,"continuation in progress\n"); } if (hDelta_s0[iHC] > hDelta_s_max[iHC]) { hDelta_s0[iHC] = hDelta_s_max[iHC]; } delta_s[iHC] = delta_s_old[iHC] = delta_s_older[iHC] = hDelta_s0[iHC]; /* * ADJUST NATURAL PARAMETER */ update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi); } /* define continuation parameter */ if(hunt[0].EndParameterValue == hunt[0].BegParameterValue) { hunt_par = 1.0; } else { hunt_par = (path1[0]-hunt[0].BegParameterValue) /(hunt[0].EndParameterValue - hunt[0].BegParameterValue) ; hunt_par=fabs(hunt_par); } hunt_par_old = hunt_par; /* Call prefront (or mf_setup) if necessary */ if (Linear_Solver == FRONT) { /* Also got to define these because it wants pointers to these numbers */ max_unk_elem = (MAX_PROB_VAR + MAX_CONC)*MDE; one = 1; three = 3; /* NOTE: We need a overall flag in the vn_glob struct that tells whether FULL_DG is on anywhere in domain. This assumes only one material. See sl_front_setup for test. that test needs to be in the input parser. */ if(vn_glob[0]->dg_J_model == FULL_DG) { max_unk_elem = (MAX_PROB_VAR + MAX_CONC)*MDE + 4*vn_glob[0]->modes*4*MDE; } if (Num_Proc > 1) EH(-1, "Whoa. No front allowed with nproc>1"); #ifdef HAVE_FRONT err = mf_setup(&exo->num_elems, &NumUnknowns, &max_unk_elem, &three, &one, exo->elem_order_map, fss->el_proc_assign, fss->level, fss->nopdof, fss->ncn, fss->constraint, front_scratch_directory, &fss->ntra); EH(err,"problems in frontal setup "); #else EH(-1,"Don't have frontal solver compiled and linked in"); #endif } /* * if compute parameter sensitivities, allocate space for solution * sensitivity vectors */ for(i=0;i<nn_post_fluxes_sens;i++) { num_pvector=MAX(num_pvector,pp_fluxes_sens[i]->vector_id);} for(i=0;i<nn_post_data_sens;i++) { num_pvector=MAX(num_pvector,pp_data_sens[i]->vector_id);} if((nn_post_fluxes_sens + nn_post_data_sens) > 0) { num_pvector++; num_pvector = MAX(num_pvector,2); x_sens_p = Dmatrix_birth(num_pvector,numProcUnknowns); } else { x_sens_p = NULL; } if (nAC > 0) { asdv(&x_AC, nAC); asdv(&x_AC_old, nAC); asdv(&x_AC_dot, nAC); } /* Allocate sparse matrix */ if( strcmp( Matrix_Format, "msr" ) == 0) { log_msg("alloc_MSR_sparse_arrays..."); alloc_MSR_sparse_arrays(&ija, &a, &a_old, 0, node_to_fill, exo, dpi); /* * An attic to store external dofs column names is needed when * running in parallel. */ alloc_extern_ija_buffer(num_universe_dofs, num_internal_dofs+num_boundary_dofs, ija, &ija_attic); /* * Any necessary one time initialization of the linear * solver package (Aztec). */ ams[JAC]->bindx = ija; ams[JAC]->val = a; ams[JAC]->belfry = ija_attic; ams[JAC]->val_old = a_old; /* * These point to nowhere since we're using MSR instead of VBR * format. */ ams[JAC]->indx = NULL; ams[JAC]->bpntr = NULL; ams[JAC]->rpntr = NULL; ams[JAC]->cpntr = NULL; ams[JAC]->npn = dpi->num_internal_nodes + dpi->num_boundary_nodes; ams[JAC]->npn_plus = dpi->num_internal_nodes + dpi->num_boundary_nodes + dpi->num_external_nodes; ams[JAC]->npu = num_internal_dofs+num_boundary_dofs; ams[JAC]->npu_plus = num_universe_dofs; ams[JAC]->nnz = ija[num_internal_dofs+num_boundary_dofs] - 1; ams[JAC]->nnz_plus = ija[num_universe_dofs]; } else if( strcmp( Matrix_Format, "vbr" ) == 0) { log_msg("alloc_VBR_sparse_arrays..."); alloc_VBR_sparse_arrays ( ams[JAC], exo, dpi); ija_attic = NULL; ams[JAC]->belfry = ija_attic; a = ams[JAC]->val; if( !save_old_A ) a_old = ams[JAC]->val_old; } else if ( strcmp( Matrix_Format, "front") == 0 ) { /* Don't allocate any sparse matrix space when using front */ ams[JAC]->bindx = NULL; ams[JAC]->val = NULL; ams[JAC]->belfry = NULL; ams[JAC]->val_old = NULL; ams[JAC]->indx = NULL; ams[JAC]->bpntr = NULL; ams[JAC]->rpntr = NULL; ams[JAC]->cpntr = NULL; } else { EH(-1,"Attempted to allocate unknown sparse matrix format"); } init_vec(x, cx, exo, dpi, x_AC, nAC, &timeValueRead); /* if read ACs, update data floats */ if (nAC > 0) { if(augc[0].iread == 1) { for(iAC=0 ; iAC<nAC ; iAC++) { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi); } } } /* * set boundary conditions on the initial conditions */ find_and_set_Dirichlet(x, xdot, exo, dpi); exchange_dof(cx, dpi, x); dcopy1(numProcUnknowns,x,x_old); dcopy1(numProcUnknowns,x_old,x_older); dcopy1(numProcUnknowns,x_older,x_oldest); if( nAC > 0) { dcopy1(nAC,x_AC, x_AC_old);} /* * initialize the counters for when to print out data */ step_print = 1; matrix_systems_mask = 1; log_msg("sl_init()..."); sl_init(matrix_systems_mask, ams, exo, dpi, cx); #ifdef PARALLEL /* * Make sure the solver was properly initialized on all processors. */ check_parallel_error("Solver initialization problems"); #endif ams[JAC]->options[AZ_keep_info] = 1; DPRINTF(stderr, "\nINITIAL ELEMENT QUALITY CHECK---\n"); good_mesh = element_quality(exo, x, ams[0]->proc_config); /* * set the number of successful path steps to zero */ nt = 0; /* * LOOP THROUGH PARAMETER UNTIL MAX NUMBER * OF STEPS SURPASSED */ for (n=0;n<MaxPathSteps;n++) { alqALC = 1; for (iHC=0;iHC<nHC;iHC++) { switch (aldALC[iHC]) { case -1: /* REDUCING PARAMETER DIRECTION */ if (path1[iHC] <= lambdaEnd[iHC]) { alqALC = -1; path1[iHC] = lambdaEnd[iHC]; delta_s[iHC] = path[iHC]-path1[iHC]; } break; case +1: /* RISING PARAMETER DIRECTION */ if (path1[iHC] >= lambdaEnd[iHC]) { alqALC = -1; path1[iHC] = lambdaEnd[iHC]; delta_s[iHC] = path1[iHC]-path[iHC]; } break; } /* * ADJUST NATURAL PARAMETER */ update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi); } /* end of iHC loop */ if(hunt[0].EndParameterValue == hunt[0].BegParameterValue) { hunt_par = 1.0; } else { hunt_par = (path1[0]-hunt[0].BegParameterValue) /(hunt[0].EndParameterValue - hunt[0].BegParameterValue) ; hunt_par=fabs(hunt_par); } /* * IF STEP CHANGED, REDO FIRST ORDER PREDICTION */ if(alqALC == -1) { DPRINTF(stderr,"\n\t ******** LAST PATH STEP!\n"); dcopy1(numProcUnknowns,x_old,x); dhunt_par = hunt_par-hunt_par_old; switch (Continuation) { case HUN_ZEROTH: break; case HUN_FIRST: v1add(numProcUnknowns, &x[0], dhunt_par, &x_sens[0]); break; } } /* * reset Dirichlet condition Mask, node->DBC to -1 where it * is set in order for Dirichlet conditions to be * set appropriately for each path step */ nullify_dirichlet_bcs(); find_and_set_Dirichlet (x, xdot, exo, dpi); exchange_dof(cx, dpi, x); if(ProcID ==0) { DPRINTF(stderr, "\n\t----------------------------------"); switch (Continuation) { case HUN_ZEROTH: DPRINTF(stderr, "\n\tZero Order Hunting:"); break; case HUN_FIRST: DPRINTF(stderr, "\n\tFirst Order Hunting:"); break; } DPRINTF(stderr, "\n\tStep number: %4d of %4d (max)", n+1, MaxPathSteps); DPRINTF(stderr, "\n\tAttempting solution at: theta = %g",hunt_par); for (iHC=0;iHC<nHC;iHC++) { switch (hunt[iHC].Type) { case 1: /* BC */ DPRINTF(stderr, "\n\tBCID=%3d DFID=%5d", hunt[iHC].BCID, hunt[iHC].DFID); break; case 2: /* MT */ DPRINTF(stderr, "\n\tMTID=%3d MPID=%5d", hunt[iHC].MTID, hunt[iHC].MPID); break; case 3: /* AC */ DPRINTF(stderr, "\n\tACID=%3d DFID=%5d", hunt[iHC].BCID, hunt[iHC].DFID); break; } DPRINTF(stderr, " Parameter= % 10.6e delta_s= %10.6e", path1[iHC], delta_s[iHC]); } } ni = 0; do { #ifdef DEBUG fprintf(stderr, "%s: starting solve_nonlinear_problem\n", yo); #endif err = solve_nonlinear_problem(ams[JAC], x, delta_t, theta, x_old, x_older, xdot, xdot_old, resid_vector, x_update, scale, &converged, &nprint, tev, tev_post, NULL, rd, gindex, p_gsize, gvec, gvec_elem, path1[0], exo, dpi, cx, 0, &path_step_reform, is_steady_state, x_AC, x_AC_dot, hunt_par, resid_vector_sens, x_sens, x_sens_p, NULL); #ifdef DEBUG fprintf(stderr, "%s: returned from solve_nonlinear_problem\n", yo); #endif if (err == -1) converged = 0; inewton = err; if (converged) { EH(error, "error writing ASCII soln file."); /* srs need to check */ if (Write_Intermediate_Solutions == 0) { #ifdef DEBUG fprintf(stderr, "%s: write_solution call WIS\n", yo); #endif write_solution(ExoFileOut, resid_vector, x, x_sens_p, x_old, xdot, xdot_old, tev, tev_post,NULL, rd, gindex, p_gsize, gvec, gvec_elem, &nprint, delta_s[0], theta, path1[0], NULL, exo, dpi); #ifdef DEBUG fprintf(stderr, "%s: write_solution end call WIS\n", yo); #endif } /* * PRINT OUT VALUES OF EXTRA UNKNOWNS * FROM AUGMENTING CONDITIONS */ if (nAC > 0) { DPRINTF(stderr, "\n------------------------------\n"); DPRINTF(stderr, "Augmenting Conditions: %4d\n", nAC); DPRINTF(stderr, "Number of extra unknowns: %4d\n\n", nAC); for (iAC = 0; iAC < nAC; iAC++) { if (augc[iAC].Type == AC_USERBC) { DPRINTF(stderr, "\tAC[%4d] DF[%4d] = %10.6e\n", augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]); } else if (augc[iAC].Type == AC_USERMAT || augc[iAC].Type == AC_FLUX_MAT ) { DPRINTF(stderr, "\n MT[%4d] MP[%4d] = %10.6e\n", augc[iAC].MTID, augc[iAC].MPID, x_AC[iAC]); } else if(augc[iAC].Type == AC_VOLUME) { evol_local = augc[iAC].evol; #ifdef PARALLEL if( Num_Proc > 1 ) { MPI_Allreduce( &evol_local, &evol_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); } evol_local = evol_global; #endif DPRINTF(stderr, "\tMT[%4d] VC[%4d]=%10.6e Param=%10.6e\n", augc[iAC].MTID, augc[iAC].VOLID, evol_local, x_AC[iAC]); } else if(augc[iAC].Type == AC_POSITION) { evol_local = augc[iAC].evol; #ifdef PARALLEL if( Num_Proc > 1 ) { MPI_Allreduce( &evol_local, &evol_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); } evol_local = evol_global; #endif DPRINTF(stderr, "\tMT[%4d] XY[%4d]=%10.6e Param=%10.6e\n", augc[iAC].MTID, augc[iAC].VOLID, evol_local, x_AC[iAC]); } else if(augc[iAC].Type == AC_FLUX) { DPRINTF(stderr, "\tBC[%4d] DF[%4d]=%10.6e\n", augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]); } } } /* Check element quality */ good_mesh = element_quality(exo, x, ams[0]->proc_config); /* INTEGRATE FLUXES, FORCES */ for (i = 0; i < nn_post_fluxes; i++) { err_dbl = evaluate_flux ( exo, dpi, pp_fluxes[i]->ss_id, pp_fluxes[i]->flux_type , pp_fluxes[i]->flux_type_name , pp_fluxes[i]->blk_id , pp_fluxes[i]->species_number, pp_fluxes[i]->flux_filenm, pp_fluxes[i]->profile_flag, x,xdot,NULL,delta_s[0],path1[0],1); } /* COMPUTE FLUX, FORCE SENSITIVITIES */ for (i = 0; i < nn_post_fluxes_sens; i++) { err_dbl = evaluate_flux_sens ( exo, dpi, pp_fluxes_sens[i]->ss_id, pp_fluxes_sens[i]->flux_type , pp_fluxes_sens[i]->flux_type_name , pp_fluxes_sens[i]->blk_id , pp_fluxes_sens[i]->species_number, pp_fluxes_sens[i]->sens_type, pp_fluxes_sens[i]->sens_id, pp_fluxes_sens[i]->sens_flt, pp_fluxes_sens[i]->sens_flt2, pp_fluxes_sens[i]->vector_id, pp_fluxes_sens[i]->flux_filenm, pp_fluxes_sens[i]->profile_flag, x,xdot,x_sens_p,delta_s[0],path1[0],1); } /* * Compute global volumetric quantities */ for (i = 0; i < nn_volume; i++ ) { evaluate_volume_integral(exo, dpi, pp_volume[i]->volume_type, pp_volume[i]->volume_name, pp_volume[i]->blk_id, pp_volume[i]->species_no, pp_volume[i]->volume_fname, pp_volume[i]->params, NULL, x, xdot, delta_s[0], path1[0], 1); } } /* end of if converged block */ /* * INCREMENT COUNTER */ ni++; /* * * DID IT CONVERGE ? * IF NOT, REDUCE STEP SIZE AND TRY AGAIN * */ if (!converged) { if (ni > 10) { DPRINTF(stderr,"\n ************************************\n"); DPRINTF(stderr," W: Did not converge in Newton steps.\n"); DPRINTF(stderr," Find better initial guess. \n"); DPRINTF(stderr," ************************************\n"); exit(0); } /* * ADJUST STEP SIZE - unless failed on first step */ if ( nt != 0 ) { DPRINTF(stderr, "\n\tFailed to converge:\n"); for (iHC=0;iHC<nHC;iHC++) { delta_s[iHC] *= 0.5; switch (aldALC[iHC]) { case -1: path1[iHC] = path[iHC] - delta_s[iHC]; break; case +1: path1[iHC] = path[iHC] + delta_s[iHC]; break; } /* * RESET */ alqALC = 1; DPRINTF(stderr, "Decreasing step-length to %10.6e.\n", delta_s[iHC]); if (delta_s[iHC] < hDelta_s_min[iHC]) { DPRINTF(stderr,"\n X: C step-length reduced below minimum."); DPRINTF(stderr,"\n Program terminated.\n"); /* This needs to have a return value of 0, indicating * success, for the continuation script to not treat this * as a failed command. */ exit(0); } #ifdef PARALLEL check_parallel_error("\t"); #endif /* * ADJUST NATURAL PARAMETER */ update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi); } /* end of iHC loop */ if(hunt[0].EndParameterValue == hunt[0].BegParameterValue) { hunt_par = 1.0; } else { hunt_par = (path1[0]-hunt[0].BegParameterValue) /(hunt[0].EndParameterValue - hunt[0].BegParameterValue) ; hunt_par=fabs(hunt_par); } /* * GET ZERO OR FIRST ORDER PREDICTION */ dhunt_par = hunt_par-hunt_par_old; switch (Continuation) { case HUN_ZEROTH: vcopy(numProcUnknowns, &x[0], 1.0, &x_old[0]); break; case HUN_FIRST: v2sum(numProcUnknowns, &x[0], 1.0, &x_old[0], dhunt_par, &x_sens[0]); break; } /* MMH: Needed to put this in, o/w it may find that the * solution and residual HAPPEN to satisfy the convergence * criterion for the next newton solve... */ find_and_set_Dirichlet(x, xdot, exo, dpi); exchange_dof(cx, dpi, x); if (nAC > 0) { dcopy1(nAC, x_AC_old, x_AC); for(iAC=0 ; iAC<nAC ; iAC++) { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi); } } if(hunt[0].EndParameterValue == hunt[0].BegParameterValue) { hunt_par = 1.0; } else { hunt_par = (path1[0]-hunt[0].BegParameterValue) /(hunt[0].EndParameterValue - hunt[0].BegParameterValue) ; hunt_par=fabs(hunt_par); } } else if (inewton == -1) { DPRINTF(stderr,"\nHmm... trouble on first step \n Let's try some more relaxation \n"); if((damp_factor1 <= 1. && damp_factor1 >= 0.) && (damp_factor2 <= 1. && damp_factor2 >= 0.) && (damp_factor3 <= 1. && damp_factor3 >= 0.)) { custom_tol1 *= 0.01; custom_tol2 *= 0.01; custom_tol3 *= 0.01; DPRINTF(stderr," custom tolerances %g %g %g \n",custom_tol1,custom_tol2,custom_tol3); } else { damp_factor1 *= 0.5; DPRINTF(stderr," damping factor %g \n",damp_factor1); } vcopy(numProcUnknowns, &x[0], 1.0, &x_old[0]); /* MMH: Needed to put this in, o/w it may find that the * solution and residual HAPPEN to satisfy the convergence * criterion for the next newton solve... */ find_and_set_Dirichlet(x, xdot, exo, dpi); exchange_dof(cx, dpi, x); if (nAC > 0) { dcopy1(nAC, x_AC_old, x_AC); for(iAC=0 ; iAC<nAC ; iAC++) { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi); } } } else { DPRINTF(stderr,"\nHmm... could not converge on first step\n Let's try some more iterations\n"); if((damp_factor1 <= 1. && damp_factor1 >= 0.) && (damp_factor2 <= 1. && damp_factor2 >= 0.) && (damp_factor3 <= 1. && damp_factor3 >= 0.)) { custom_tol1 *= 100.; custom_tol2 *= 100.; custom_tol3 *= 100.; DPRINTF(stderr," custom tolerances %g %g %g \n",custom_tol1,custom_tol2,custom_tol3); } else { damp_factor1 *= 2.0; damp_factor1 = MIN(damp_factor1,1.0); DPRINTF(stderr," damping factor %g \n",damp_factor1); } } } /* end of !converged */ } while (converged == 0); /* * CONVERGED */ nt++; custom_tol1 = toler_org[0]; custom_tol2 = toler_org[1]; custom_tol3 = toler_org[2]; damp_factor1 = damp_org; DPRINTF(stderr, "\n\tStep accepted, theta (proportion complete) = %10.6e\n", hunt_par); for (iHC=0;iHC<nHC;iHC++) { switch (hunt[iHC].Type) { case 1: /* BC */ DPRINTF(stderr, "\tStep accepted, BCID=%3d DFID=%5d", hunt[iHC].BCID, hunt[iHC].DFID); break; case 2: /* MT */ DPRINTF(stderr, "\tStep accepted, MTID=%3d MPID=%5d", hunt[iHC].MTID, hunt[iHC].MPID); break; case 3: /* AC */ DPRINTF(stderr, "\tStep accepted, ACID=%3d DFID=%5d", hunt[iHC].BCID, hunt[iHC].DFID); break; } DPRINTF(stderr, " Parameter= % 10.6e\n", path1[iHC]); } /* * check path step error, if too large do not enlarge path step */ for (iHC=0;iHC<nHC;iHC++) { if ((ni == 1) && (n != 0) && (!const_delta_s[iHC])) { delta_s_new[iHC] = path_step_control(num_total_nodes, delta_s[iHC], delta_s_old[iHC], x, eps, &success_ds, cont->use_var_norm, inewton); if (delta_s_new[iHC] > hDelta_s_max[iHC]) {delta_s_new[iHC] = hDelta_s_max[iHC];} } else { success_ds = 1; delta_s_new[iHC] = delta_s[iHC]; } } /* * determine whether to print out the data or not */ i_print = 0; if (nt == step_print) { i_print = 1; step_print += cont->print_freq; } if (alqALC == -1) { i_print = 1; } if (i_print) { error = write_ascii_soln(x, resid_vector, numProcUnknowns, x_AC, nAC, path1[0], file); if (error) { DPRINTF(stderr, "%s: error writing ASCII soln file\n", yo); } if ( Write_Intermediate_Solutions == 0 ) { write_solution(ExoFileOut, resid_vector, x, x_sens_p, x_old, xdot, xdot_old, tev, tev_post, NULL, rd, gindex, p_gsize, gvec, gvec_elem, &nprint, delta_s[0], theta, path1[0], NULL, exo, dpi); nprint++; } } /* * backup old solutions * can use previous solutions for prediction one day */ dcopy1(numProcUnknowns,x_older,x_oldest); dcopy1(numProcUnknowns,x_old,x_older); dcopy1(numProcUnknowns,x,x_old); dcopy1(nHC,delta_s_older,delta_s_oldest); dcopy1(nHC,delta_s_old ,delta_s_older ); dcopy1(nHC,delta_s ,delta_s_old ); dcopy1(nHC,delta_s_new ,delta_s ); /* delta_s_oldest = delta_s_older; delta_s_older = delta_s_old; delta_s_old = delta_s; delta_s = delta_s_new; */ hunt_par_old=hunt_par; if ( nAC > 0) { dcopy1(nAC, x_AC, x_AC_old); } /* * INCREMENT/DECREMENT PARAMETER */ for (iHC=0;iHC<nHC;iHC++) { path[iHC] = path1[iHC]; switch (aldALC[iHC]) { case -1: path1[iHC] = path[iHC] - delta_s[iHC]; break; case +1: path1[iHC] = path[iHC] + delta_s[iHC]; break; } /* * ADJUST NATURAL PARAMETER */ update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi); } /* end of iHC loop */ /* * GET FIRST ORDER PREDICTION */ if(hunt[0].EndParameterValue == hunt[0].BegParameterValue) { hunt_par = 1.0; } else { hunt_par = (path1[0]-hunt[0].BegParameterValue) /(hunt[0].EndParameterValue - hunt[0].BegParameterValue) ; hunt_par=fabs(hunt_par); } dhunt_par = hunt_par-hunt_par_old; switch (Continuation) { case HUN_ZEROTH: break; case HUN_FIRST: v1add(numProcUnknowns, &x[0], dhunt_par, &x_sens[0]); break; } if (!good_mesh) goto free_and_clear; /* * * CHECK END CONTINUATION * */ if (alqALC == -1) { alqALC = 0; } else { alqALC = 1; } if (alqALC == 0) { DPRINTF(stderr,"\n\n\t I will continue no more!\n\t No more continuation for you!\n"); goto free_and_clear; } } /* n */ if(n == MaxPathSteps && aldALC[0] * (lambdaEnd[0] - path[0]) > 0) { DPRINTF(stderr,"\n\tFailed to reach end of hunt in maximum number of successful steps (%d).\n\tSorry.\n", MaxPathSteps); exit(0); } #ifdef PARALLEL check_parallel_error("Hunting error"); #endif /* * DONE CONTINUATION */ free_and_clear: /* * Transform the node point coordinates according to the * displacements and write out all the results using the * displaced coordinates. Set the displacement field to * zero, too. */ if (Anneal_Mesh) { #ifdef DEBUG fprintf(stderr, "%s: anneal_mesh()...\n", yo); #endif err = anneal_mesh(x, tev, tev_post, NULL, rd, path1[0], exo, dpi); #ifdef DEBUG DPRINTF(stderr, "%s: anneal_mesh()-done\n", yo); #endif EH(err, "anneal_mesh() bad return."); } /* * Free a bunch of variables that aren't needed anymore */ safer_free((void **) &ROT_Types); safer_free((void **) &node_to_fill); safer_free( (void **) &resid_vector); safer_free( (void **) &resid_vector_sens); safer_free( (void **) &scale); safer_free( (void **) &x); if (nAC > 0) { safer_free( (void **) &x_AC); safer_free( (void **) &x_AC_old); safer_free( (void **) &x_AC_dot); } safer_free( (void **) &x_old); safer_free( (void **) &x_older); safer_free( (void **) &x_oldest); safer_free( (void **) &xdot); safer_free( (void **) &xdot_old); safer_free( (void **) &x_update); safer_free( (void **) &x_sens); if((nn_post_data_sens+nn_post_fluxes_sens) > 0) Dmatrix_death(x_sens_p,num_pvector,numProcUnknowns); for(i = 0; i < MAX_NUMBER_MATLS; i++) { for(n = 0; n < MAX_MODES; n++) { safer_free((void **) &(ve_glob[i][n]->gn)); safer_free((void **) &(ve_glob[i][n])); } safer_free((void **) &(vn_glob[i])); } sl_free(matrix_systems_mask, ams); for (i=0;i<NUM_ALSS;i++) { safer_free( (void**) &(ams[i])); } safer_free( (void **) &gvec); safer_free( (void **) &lambda); safer_free( (void **) &lambdaEnd); safer_free( (void **) &path); safer_free( (void **) &path1); safer_free( (void **) &hDelta_s0); safer_free( (void **) &hDelta_s_min); safer_free( (void **) &hDelta_s_max); safer_free( (void **) &delta_s); safer_free( (void **) &delta_s_new); safer_free( (void **) &delta_s_old); safer_free( (void **) &delta_s_older); safer_free( (void **) &delta_s_oldest); Ivector_death(&aldALC[0], nHC); Ivector_death(&const_delta_s[0], nHC); i = 0; for ( eb_indx = 0; eb_indx < exo->num_elem_blocks; eb_indx++ ) { for ( ev_indx = 0; ev_indx < rd->nev; ev_indx++ ) { if ( exo->elem_var_tab[i++] == 1 ) { safer_free ((void **) &(gvec_elem [eb_indx][ev_indx]) ); } } safer_free ((void **) &(gvec_elem [eb_indx])); } safer_free( (void **) &gvec_elem); safer_free( (void **) &rd); safer_free( (void **) &Local_Offset); safer_free( (void **) &Dolphin); if( strlen( Soln_OutFile) ) { fclose(file); } return; } /* END of routine hunt_problem */
int main(int argc, char *argv[]) { int num_PDE_eqns=5, N_levels=3; /* int nsmooth=1; */ int leng, level, N_grid_pts, coarsest_level; /* See Aztec User's Guide for more information on the */ /* variables that follow. */ int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE]; /* data structure for matrix corresponding to the fine grid */ int *data_org = NULL, *update = NULL, *external = NULL; int *update_index = NULL, *extern_index = NULL; int *cpntr = NULL; int *bindx = NULL, N_update, iii; double *val = NULL; double *xxx, *rhs; AZ_MATRIX *Amat; AZ_PRECOND *Pmat = NULL; ML *ml; FILE *fp; int ch,i; struct AZ_SCALING *scaling; double solve_time, setup_time, start_time; ML_Aggregate *ag; int *ivec; #ifdef VBR_VERSION ML_Operator *B, *C, *D; int *vbr_cnptr, *vbr_rnptr, *vbr_indx, *vbr_bindx, *vbr_bnptr, total_blk_rows; int total_blk_cols, blk_space, nz_space; double *vbr_val; struct ML_CSR_MSRdata *csr_data; #endif #ifdef ML_MPI MPI_Init(&argc,&argv); /* get number of processors and the name of this processor */ AZ_set_proc_config(proc_config, MPI_COMM_WORLD); #else AZ_set_proc_config(proc_config, AZ_NOT_MPI); #endif #ifdef binary fp=fopen(".data","rb"); #else fp=fopen(".data","r"); #endif if (fp==NULL) { printf("couldn't open file .data\n"); exit(1); } #ifdef binary fread(&leng, sizeof(int), 1, fp); #else fscanf(fp,"%d",&leng); #endif fclose(fp); N_grid_pts=leng/num_PDE_eqns; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ AZ_read_update(&N_update, &update, proc_config, N_grid_pts, num_PDE_eqns, AZ_linear); AZ_read_msr_matrix(update, &val, &bindx, N_update, proc_config); /* This code is to fix things up so that we are sure we have */ /* all block (including the ghost nodes the same size. */ AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update); AZ_transform(proc_config, &external, bindx, val, update, &update_index, &extern_index, &data_org, N_update, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); Amat = AZ_matrix_create( leng ); #ifndef VBR_VERSION AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL); Amat->matrix_type = data_org[AZ_matrix_type]; data_org[AZ_N_rows] = data_org[AZ_N_internal] + data_org[AZ_N_border]; #else total_blk_rows = N_update/num_PDE_eqns; total_blk_cols = total_blk_rows; blk_space = total_blk_rows*20; nz_space = blk_space*num_PDE_eqns*num_PDE_eqns; vbr_cnptr = (int *) ML_allocate(sizeof(int )*(total_blk_cols+1)); vbr_rnptr = (int *) ML_allocate(sizeof(int )*(total_blk_cols+1)); vbr_bnptr = (int *) ML_allocate(sizeof(int )*(total_blk_cols+2)); vbr_indx = (int *) ML_allocate(sizeof(int )*(blk_space+1)); vbr_bindx = (int *) ML_allocate(sizeof(int )*(blk_space+1)); vbr_val = (double *) ML_allocate(sizeof(double)*(nz_space+1)); for (i = 0; i <= total_blk_cols; i++) vbr_cnptr[i] = num_PDE_eqns; AZ_msr2vbr(vbr_val, vbr_indx, vbr_rnptr, vbr_cnptr, vbr_bnptr, vbr_bindx, bindx, val, total_blk_rows, total_blk_cols, blk_space, nz_space, -1); data_org[AZ_N_rows] = data_org[AZ_N_internal] + data_org[AZ_N_border]; data_org[AZ_N_int_blk] = data_org[AZ_N_internal]/num_PDE_eqns; data_org[AZ_N_bord_blk] = data_org[AZ_N_bord_blk]/num_PDE_eqns; data_org[AZ_N_ext_blk] = data_org[AZ_N_ext_blk]/num_PDE_eqns; data_org[AZ_matrix_type] = AZ_VBR_MATRIX; AZ_set_VBR(Amat, vbr_rnptr, vbr_cnptr, vbr_bnptr, vbr_indx, vbr_bindx, vbr_val, data_org, 0, NULL, AZ_LOCAL); Amat->matrix_type = data_org[AZ_matrix_type]; #endif start_time = AZ_second(); ML_Create(&ml, N_levels); ML_Set_PrintLevel(3); /* set up discretization matrix and matrix vector function */ AZ_ML_Set_Amat(ml, N_levels-1, N_update, N_update, Amat, proc_config); ML_Aggregate_Create( &ag ); ML_Aggregate_Set_Threshold(ag,0.0); ML_Set_SpectralNormScheme_PowerMethod(ml); /* To run SA: a) set damping factor to 1 and use power method ML_Aggregate_Set_DampingFactor(ag, 4./3.); To run NSA: a) set damping factor to 0 ML_Aggregate_Set_DampingFactor(ag, 0.); To run NSR a) set damping factor to 1 and use power method ML_Aggregate_Set_DampingFactor(ag, 1.); ag->Restriction_smoothagg_transpose = ML_FALSE; ag->keep_agg_information=1; ag->keep_P_tentative=1; b) hack code so it calls the energy minimizing restriction line 2973 of ml_agg_genP.c c) turn on the NSR flag in ml_agg_energy_min.cpp To run Emin a) set min_eneryg = 2 and keep_agg_info = 1; ag->minimizing_energy=2; ag->keep_agg_information=1; ag->cheap_minimizing_energy = 0; ag->block_scaled_SA = 1; */ ag->minimizing_energy=2; ag->keep_agg_information=1; ag->block_scaled_SA = 1; ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, num_PDE_eqns, NULL, N_update); ML_Aggregate_Set_MaxCoarseSize( ag, 20); /* ML_Aggregate_Set_RandomOrdering( ag ); ML_Aggregate_Set_DampingFactor(ag, .1); ag->drop_tol_for_smoothing = 1.0e-3; ML_Aggregate_Set_Threshold(ag, 1.0e-3); ML_Aggregate_Set_MaxCoarseSize( ag, 300); */ coarsest_level = ML_Gen_MultiLevelHierarchy_UsingAggregation(ml, N_levels-1, ML_DECREASING, ag); coarsest_level = N_levels - coarsest_level; if ( proc_config[AZ_node] == 0 ) printf("Coarse level = %d \n", coarsest_level); /* set up smoothers */ AZ_defaults(options, params); for (level = N_levels-1; level > coarsest_level; level--) { /* This is the Aztec domain decomp/ilu smoother that we */ /* usually use for this problem. */ /* options[AZ_precond] = AZ_dom_decomp; options[AZ_subdomain_solve] = AZ_ilut; params[AZ_ilut_fill] = 1.0; options[AZ_reorder] = 1; ML_Gen_SmootherAztec(ml, level, options, params, proc_config, status, AZ_ONLY_PRECONDITIONER, ML_PRESMOOTHER,NULL); */ /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ /* ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, parasails_loadbal, parasails_factorized); parasails_thresh /= 4.; */ /* This is the symmetric Gauss-Seidel smoothing. In parallel, */ /* it is not a true Gauss-Seidel in that each processor */ /* does a Gauss-Seidel on its local submatrix independent of the */ /* other processors. */ /* ML_Gen_Smoother_SymGaussSeidel(ml,level,ML_PRESMOOTHER, nsmooth,1.); ML_Gen_Smoother_SymGaussSeidel(ml,level,ML_POSTSMOOTHER,nsmooth,1.); */ /* Block Gauss-Seidel with block size equal to #DOF per node. */ /* Not a true Gauss-Seidel in that each processor does a */ /* Gauss-Seidel on its local submatrix independent of the other */ /* processors. */ /* ML_Gen_Smoother_BlockGaussSeidel(ml,level,ML_PRESMOOTHER, nsmooth,0.67, num_PDE_eqns); ML_Gen_Smoother_BlockGaussSeidel(ml,level,ML_POSTSMOOTHER, nsmooth, 0.67, num_PDE_eqns); */ ML_Gen_Smoother_SymBlockGaussSeidel(ml,level,ML_POSTSMOOTHER, 1, 1.0, num_PDE_eqns); } ML_Gen_CoarseSolverSuperLU( ml, coarsest_level); ML_Gen_Solver(ml, ML_MGW, N_levels-1, coarsest_level); AZ_defaults(options, params); options[AZ_solver] = AZ_gmres; options[AZ_scaling] = AZ_none; options[AZ_precond] = AZ_user_precond; /* options[AZ_conv] = AZ_r0; */ options[AZ_output] = 1; options[AZ_max_iter] = 1500; options[AZ_poly_ord] = 5; options[AZ_kspace] = 130; params[AZ_tol] = 1.0e-8; /* options[AZ_precond] = AZ_dom_decomp; options[AZ_subdomain_solve] = AZ_ilut; params[AZ_ilut_fill] = 2.0; */ AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); setup_time = AZ_second() - start_time; xxx = (double *) malloc( leng*sizeof(double)); rhs=(double *)malloc(leng*sizeof(double)); for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; /* Set rhs */ fp = fopen("AZ_capture_rhs.mat","r"); if (fp == NULL) { if (proc_config[AZ_node] == 0) printf("taking random vector for rhs\n"); AZ_random_vector(rhs, data_org, proc_config); AZ_reorder_vec(rhs, data_org, update_index, NULL); } else { fclose(fp); ivec =(int *)malloc((leng+1)*sizeof(int)); AZ_input_msr_matrix("AZ_capture_rhs.mat", update, &rhs, &ivec, N_update, proc_config); free(ivec); AZ_reorder_vec(rhs, data_org, update_index, NULL); } /* Set x */ fp = fopen("AZ_capture_init_guess.mat","r"); if (fp != NULL) { fclose(fp); ivec =(int *)malloc((leng+1)*sizeof(int)); AZ_input_msr_matrix("AZ_capture_init_guess.mat",update, &xxx, &ivec, N_update, proc_config); free(ivec); AZ_reorder_vec(xxx, data_org, update_index, NULL); } /* if Dirichlet BC ... put the answer in */ for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) { if ( (val[i] > .99999999) && (val[i] < 1.0000001)) xxx[i] = rhs[i]; } fp = fopen("AZ_no_multilevel.dat","r"); scaling = AZ_scaling_create(); start_time = AZ_second(); if (fp != NULL) { fclose(fp); options[AZ_precond] = AZ_none; options[AZ_scaling] = AZ_sym_diag; options[AZ_ignore_scaling] = AZ_TRUE; options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); /* options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); */ } else { options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; /* if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); */ } solve_time = AZ_second() - start_time; if (proc_config[AZ_node] == 0) printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time); ML_Aggregate_Destroy(&ag); ML_Destroy(&ml); AZ_free((void *) Amat->data_org); AZ_free((void *) Amat->val); AZ_free((void *) Amat->bindx); AZ_free((void *) update); AZ_free((void *) external); AZ_free((void *) extern_index); AZ_free((void *) update_index); AZ_scaling_destroy(&scaling); if (Amat != NULL) AZ_matrix_destroy(&Amat); if (Pmat != NULL) AZ_precond_destroy(&Pmat); free(xxx); free(rhs); #ifdef ML_MPI MPI_Finalize(); #endif return 0; }
AZ_MATRIX *user_Ke_build(struct user_partition *Edge_Partition) { double dcenter, doff, sigma = .0001; int ii,jj, horv, i, nx, global_id, nz_ptr, Nlocal_edges; /* Aztec matrix and temp variables */ int *Ke_bindx, *Ke_data_org = NULL; double *Ke_val; AZ_MATRIX *Ke_mat; int proc_config[AZ_PROC_SIZE], *cpntr = NULL; int *reordered_glob_edges = NULL, *reordered_edge_externs = NULL; Nlocal_edges = Edge_Partition->Nlocal; nx = (int) sqrt( ((double) Edge_Partition->Nglobal/2) + .00001); Ke_bindx = (int *) malloc((7*Nlocal_edges+1)*sizeof(int)); Ke_val = (double *) malloc((7*Nlocal_edges+1)*sizeof(double)); Ke_bindx[0] = Nlocal_edges+1; dcenter = 2 + 2.*sigma/((double) ( 3 * nx * nx)); doff = -1 + sigma/((double) ( 6 * nx * nx)); for (i = 0; i < Nlocal_edges; i++) { global_id = (Edge_Partition->my_global_ids)[i]; invindex(global_id, &ii, &jj, nx, &horv); nz_ptr = Ke_bindx[i]; Ke_val[i] = dcenter; if (horv == HORIZONTAL) { if (jj != 0) { Ke_bindx[nz_ptr] = north(ii,jj,nx); Ke_val[nz_ptr++] = doff; Ke_bindx[nz_ptr] = east(ii,jj,nx); Ke_val[nz_ptr++] = -1.; if (ii != 0) {Ke_bindx[nz_ptr]=west(ii,jj,nx); Ke_val[nz_ptr++]= 1.;} jj--; } else { Ke_val[i] = 1. + 2.*sigma/((double) ( 3 * nx * nx)); jj = nx-1; } Ke_bindx[nz_ptr] = east(ii,jj,nx); Ke_val[nz_ptr++] = 1.; if (ii != 0){ Ke_bindx[nz_ptr]=west(ii,jj,nx); Ke_val[nz_ptr++]=-1.;} if (jj != 0){ Ke_bindx[nz_ptr]=south(ii,jj,nx); Ke_val[nz_ptr++]=doff;} } else { if (ii != 0) { Ke_bindx[nz_ptr] = north(ii,jj,nx); Ke_val[nz_ptr++] = -1.; Ke_bindx[nz_ptr] = east(ii,jj,nx); Ke_val[nz_ptr++] = doff; if (jj != 0) {Ke_bindx[nz_ptr]=south(ii,jj,nx); Ke_val[nz_ptr++]=1.;} ii--; } else { Ke_val[i] = 1 + 2.*sigma/((double) ( 3 * nx * nx)); ii = nx-1; } Ke_bindx[nz_ptr] = north(ii,jj,nx); Ke_val[nz_ptr++] = 1.; if (ii != 0) {Ke_bindx[nz_ptr]=west(ii,jj,nx); Ke_val[nz_ptr++]=doff;} if (jj != 0) {Ke_bindx[nz_ptr]=south(ii,jj,nx); Ke_val[nz_ptr++]=-1.;} } Ke_bindx[i+1] = nz_ptr; } AZ_set_proc_config(proc_config, COMMUNICATOR); AZ_transform_norowreordering(proc_config, &(Edge_Partition->needed_external_ids), Ke_bindx, Ke_val, Edge_Partition->my_global_ids, &reordered_glob_edges, &reordered_edge_externs, &Ke_data_org, Nlocal_edges, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); AZ_free(reordered_glob_edges); AZ_free(reordered_edge_externs); Edge_Partition->Nghost = Ke_data_org[AZ_N_external]; Ke_mat = AZ_matrix_create( Nlocal_edges ); AZ_set_MSR(Ke_mat, Ke_bindx, Ke_val, Ke_data_org, 0, NULL, AZ_LOCAL); return(Ke_mat); }
void AZ_matvec_mult(double *val, int *indx, int *bindx, int *rpntr, int *cpntr, int *bpntr, double *b, register double *c, int exchange_flag, int *data_org) /****************************************************************************** c = Ab: Sparse (square) overlapped matrix-vector multiply, using the distributed variable block row (DVBR) data structure (A = val). Author: Scott A. Hutchinson, SNL, 1421 ======= Return code: void ============ Parameter list: =============== m: Number of (block) rows in A. val: Array containing the entries of the matrix. The matrix is stored block-row-by-block-row. Each block entry is dense and stored by columns (VBR). indx: The ith element of indx points to the location in val of the (0,0) entry of the ith block entry. The last element is the number of nonzero entries of matrix A plus one. bindx: Contains the block column indices of the non-zero block entries. rpntr: The ith element of rpntr indicates the first point row in the i'th block row. The last element is the number of block rows plus one. cpntr: The jth element of cpntr indicates the first point column in the jth block column. The last element is the number of bpntr: The ith element of bpntr points to the first block entry of the ith row in bindx. The last element is the number of nonzero blocks of matrix A plus one. b: Contains the vector b. c: Contains the result vector c. exchange_flag: Flag which controls call to exchange_bdry. data_org: Array containing information on the distribution of the matrix to this processor as well as communication parameters (see Aztec User's Guide). ******************************************************************************/ { /* local variables */ AZ_MATRIX Amat; int proc_config[AZ_PROC_SIZE]; static int first_time = 1; if (exchange_flag != 1) { printf("Warning: exchange_flag is no longer used in AZ_matvec_mult().\n"); printf(" Set to '1' to avoid this message.\n"); } Amat.rpntr = rpntr; Amat.cpntr = cpntr; Amat.bpntr = bpntr; Amat.bindx = bindx; Amat.indx = indx; Amat.val = val; Amat.data_org = data_org; Amat.aux_ival = NULL; Amat.aux_dval = NULL; Amat.aux_matrix = NULL; Amat.matrix_type = data_org[AZ_matrix_type]; #ifdef AZTEC_MPI AZ_set_comm(proc_config, MPI_COMM_WORLD); if (first_time == 1) { AZ_set_proc_config(proc_config, MPI_COMM_WORLD); #else if (first_time == 1) { AZ_set_proc_config(proc_config, AZ_NOT_MPI); #endif if (proc_config[AZ_node] == 0) { printf("Warning: AZ_matvec_mult() should be replaced with either\n"); printf(" AZ_MSR_matvec_mult or AZ_VBR_matvec_mult()\n"); } } first_time = 0; if (Amat.matrix_type == AZ_MSR_MATRIX) Amat.matvec = AZ_MSR_matvec_mult; else if (Amat.matrix_type == AZ_VBR_MATRIX) Amat.matvec = AZ_VBR_matvec_mult; Amat.matvec(b, c, &Amat, proc_config); }
/* Assign unknowns to processors */ void partition_edges(struct user_partition *Partition) { int i, nx, Nloc; ML_Comm *comm; int *my_local_ids; #ifdef AZTEC int proc_config[AZ_PROC_SIZE]; AZ_set_proc_config(proc_config, COMMUNICATOR); Partition->mypid = proc_config[AZ_node]; Partition->nprocs= proc_config[AZ_N_procs]; AZ_input_update(NULL,&(Partition->Nlocal), &(Partition->my_global_ids), proc_config, Partition->Nglobal, 1, AZ_linear); #else Partition->Nghost = 0; nx = (int) sqrt( ((double) Partition->Nglobal/2) + .00001); ML_Comm_Create( &comm); Partition->mypid = comm->ML_mypid; Partition->nprocs= comm->ML_nprocs; if (comm->ML_nprocs > 2) { if (comm->ML_mypid == 0) printf("This example only works for one or two processors\n"); exit(1); } if ((comm->ML_nprocs == 2) && (Partition->Nglobal%2 == 1)) { if (comm->ML_mypid == 0) printf("Two processor case requires an even number of points.\n"); exit(1); } if (comm->ML_nprocs == 1) { /* 1 processor case is simple */ Partition->Nlocal = Partition->Nglobal; Partition->my_global_ids = (int *) malloc(sizeof(int)*Partition->Nlocal); for (i = 0; i < Partition->Nlocal; i++) (Partition->my_global_ids)[i] = i; Partition->my_local_ids = (int *) malloc(sizeof(int)*Partition->Nglobal); for (i = 0; i < Partition->Nglobal; i++) (Partition->my_local_ids)[i] = i; } else { /* allocate space */ Partition->Nlocal = Partition->Nglobal/2; Partition->my_global_ids = (int *) malloc(sizeof(int)*Partition->Nlocal); my_local_ids = (int *) malloc(sizeof(int)*Partition->Nglobal); Partition->my_local_ids = my_local_ids; /* initialize local ids to '-1' (not owned by me) */ for (i = 0; i < Partition->Nglobal; i++) (Partition->my_local_ids)[i] = -1; /* set global ids */ for (i = 0; i < Partition->Nlocal/2; i++) { (Partition->my_global_ids)[i] = i + comm->ML_mypid*Partition->Nlocal/2; } for (i = 0; i < Partition->Nlocal/2; i++) { (Partition->my_global_ids)[i + Partition->Nlocal/2] = Partition->Nlocal+ i + comm->ML_mypid*Partition->Nlocal/2; } /* set local ids of nonghost unknowns */ for (i = 0; i < Partition->Nlocal; i++) my_local_ids[(Partition->my_global_ids)[i]] = i; /* set the ghost unknowns */ Partition->Nghost = 3*nx; Nloc = Partition->Nlocal; if (comm->ML_mypid == 0) { for (i = 0; i < nx; i++) { my_local_ids[Nloc/2 + i ] = Nloc + i; my_local_ids[Nloc - nx + i ] = Nloc + i + nx; my_local_ids[2*Nloc - nx + i ] = Nloc + i + 2*nx; } } else { for (i = 0; i < nx; i++) { my_local_ids[Nloc/2 - nx + i ] = Nloc + i; my_local_ids[ i ] = Nloc + i + nx; my_local_ids[3*Nloc/2 - nx + i ] = Nloc + i + 2*nx; } } } #endif }
/* Assign unknowns to processors */ void partition_nodes(struct user_partition *Partition) { int i, nx, Nloc; ML_Comm *comm; int *my_local_ids; Partition->Nghost = 0; #ifdef AZTEC int proc_config[AZ_PROC_SIZE]; AZ_set_proc_config(proc_config, COMMUNICATOR); AZ_input_update(NULL,&(Partition->Nlocal), &(Partition->my_global_ids), proc_config, Partition->Nglobal, 1, AZ_linear); Partition->mypid = proc_config[AZ_node]; Partition->nprocs= proc_config[AZ_N_procs]; #else nx = (int) sqrt( ((double) Partition->Nglobal) + .00001); ML_Comm_Create( &comm); Partition->mypid = comm->ML_mypid; Partition->nprocs= comm->ML_nprocs; Partition->Nghost = 2*nx; if (comm->ML_nprocs > 2) { if (comm->ML_mypid == 0) printf("This example only works for one or two processors\n"); exit(1); } if ((comm->ML_nprocs == 2) && (Partition->Nglobal%2 == 1)) { if (comm->ML_mypid == 0) printf("Two processor case requires an even number of points.\n"); exit(1); } if (comm->ML_nprocs == 1) { Partition->Nlocal = Partition->Nglobal; Partition->my_global_ids = (int *) malloc(sizeof(int)*Partition->Nlocal); for (i = 0; i < Partition->Nlocal; i++) (Partition->my_global_ids)[i] = i; Partition->my_local_ids = (int *) malloc(sizeof(int)*Partition->Nglobal); for (i = 0; i < Partition->Nglobal; i++) (Partition->my_local_ids)[i] = i; } else { Partition->Nlocal = Partition->Nglobal/2; Partition->my_global_ids = (int *) malloc(sizeof(int)*Partition->Nlocal); my_local_ids = (int *) malloc(sizeof(int)*Partition->Nglobal); Partition->my_local_ids = my_local_ids; for (i = 0; i < Partition->Nglobal; i++) (Partition->my_local_ids)[i] = -1; for (i = 0; i < Partition->Nlocal; i++) { (Partition->my_global_ids)[i] = i + comm->ML_mypid*Partition->Nlocal; my_local_ids[(Partition->my_global_ids)[i]] = i; } Nloc = Partition->Nlocal; if (comm->ML_mypid == 0) { for (i = 0; i < nx; i++) { my_local_ids[Nloc + i ] = Nloc + i; my_local_ids[2*Nloc - nx + i ] = Nloc + i + nx; } } else { for (i = 0; i < nx; i++) { my_local_ids[Nloc - nx + i ] = Nloc + i; my_local_ids[ i ] = Nloc + i + nx; } } } #endif }
AZ_MATRIX *user_Ke_build(struct user_partition *Edge_Partition) { double dcenter, doffdiag, sigma = .0001; int ii,jj, horv, i, nx, global_id, nz_ptr, Nlocal_edges; /* Aztec matrix and temp variables */ int *Ke_bindx, *Ke_data_org = NULL; double *Ke_val; AZ_MATRIX *Ke_mat; int proc_config[AZ_PROC_SIZE], *cpntr = NULL; int *reordered_glob_edges = NULL, *reordered_edge_externs = NULL; Nlocal_edges = Edge_Partition->Nlocal; nx = (int) sqrt( ((double) Edge_Partition->Nglobal/2) + .00001); Ke_bindx = (int *) malloc((7*Nlocal_edges+1)*sizeof(int)); Ke_val = (double *) malloc((7*Nlocal_edges+1)*sizeof(double)); Ke_bindx[0] = Nlocal_edges+1; dcenter = 2 + 2.*sigma/((double) ( 3 * nx * nx)); doffdiag = -1 + sigma/((double) ( 6 * nx * nx)); /* Create a DMSR matrix with global column indices */ for (i = 0; i < Nlocal_edges; i++) { global_id = (Edge_Partition->my_global_ids)[i]; Ke_val[i] = dcenter; Ke_bindx[i+1] = Ke_bindx[i] + 6; inv2dindex(global_id, &ii, &jj, nx, &horv); nz_ptr = Ke_bindx[global_id]; if (horv == HORIZONTAL) { Ke_bindx[nz_ptr] = north2d(ii,jj,nx); Ke_val[nz_ptr++] = doffdiag; Ke_bindx[nz_ptr] = west2d(ii,jj,nx); Ke_val[nz_ptr++] = 1.; Ke_bindx[nz_ptr] = east2d(ii,jj,nx); Ke_val[nz_ptr++] = -1.; if (jj == 0) jj = nx-1; else jj--; Ke_bindx[nz_ptr] = west2d(ii,jj,nx); Ke_val[nz_ptr++] = -1.; Ke_bindx[nz_ptr] = south2d(ii,jj,nx); Ke_val[nz_ptr++] = doffdiag; Ke_bindx[nz_ptr] = east2d(ii,jj,nx); Ke_val[nz_ptr++] = 1.; } else { Ke_bindx[nz_ptr] = north2d(ii,jj,nx); Ke_val[nz_ptr++] = -1.; Ke_bindx[nz_ptr] = east2d(ii,jj,nx); Ke_val[nz_ptr++] = doffdiag; Ke_bindx[nz_ptr] = south2d(ii,jj,nx); Ke_val[nz_ptr++] = 1.; if (ii == 0) ii = nx-1; else ii--; Ke_bindx[nz_ptr] = west2d(ii,jj,nx); Ke_val[nz_ptr++] = doffdiag; Ke_bindx[nz_ptr] = south2d(ii,jj,nx); Ke_val[nz_ptr++] = -1.; Ke_bindx[nz_ptr] = north2d(ii,jj,nx); Ke_val[nz_ptr++] = 1.; } } /* Transform the global Aztec matrix into a local Aztec matrix. That is, */ /* replace global column indices by local indices and set up communication */ /* data structure 'Ke_data_org' that will be used for matvec's. */ AZ_set_proc_config(proc_config, COMMUNICATOR); AZ_transform_norowreordering(proc_config, &(Edge_Partition->needed_external_ids), Ke_bindx, Ke_val, Edge_Partition->my_global_ids, &reordered_glob_edges, &reordered_edge_externs, &Ke_data_org, Nlocal_edges, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); /* Convert old style Aztec matrix to newer style Aztec matrix */ Ke_mat = AZ_matrix_create( Nlocal_edges ); AZ_set_MSR(Ke_mat, Ke_bindx, Ke_val, Ke_data_org, 0, NULL, AZ_LOCAL); return(Ke_mat); }
int test_AZ_iterate_then_AZ_scale_f(Epetra_Comm& Comm, bool verbose) { (void)Comm; if (verbose) { cout << "testing AZ_iterate/AZ_scale_f with 'old' Aztec"<<endl; } int* proc_config = new int[AZ_PROC_SIZE]; #ifdef EPETRA_MPI AZ_set_proc_config(proc_config, MPI_COMM_WORLD); AZ_set_comm(proc_config, MPI_COMM_WORLD); #else AZ_set_proc_config(proc_config, 0); #endif int *external, *update_index, *external_index; int i, N = 5; AZ_MATRIX* Amat = NULL; int err = create_and_transform_simple_matrix(AZ_MSR_MATRIX, N, 3.0, proc_config, Amat, external, update_index, external_index); if (err != 0) { return(err); } int* options = new int[AZ_OPTIONS_SIZE]; double* params = new double[AZ_PARAMS_SIZE]; double* status = new double[AZ_STATUS_SIZE]; AZ_defaults(options, params); options[AZ_scaling] = AZ_sym_diag; if (verbose) { options[AZ_output] = AZ_warnings; } else { options[AZ_output] = 0; } int N_update = N+Amat->data_org[AZ_N_border]; double* x = new double[N_update]; double* b = new double[N_update]; for(i=0; i<N_update; ++i) { x[i] = 0.0; b[i] = 1.0; } AZ_PRECOND* Pmat = AZ_precond_create(Amat, AZ_precondition, NULL); AZ_SCALING* Scal = AZ_scaling_create(); options[AZ_keep_info] = 1; AZ_iterate(x, b, options, params, status, proc_config, Amat, Pmat, Scal); //now set options[AZ_pre_calc] = AZ_reuse and try to call AZ_scale_f. options[AZ_pre_calc] = AZ_reuse; AZ_scale_f(AZ_SCALE_MAT_RHS_SOL, Amat, options, b, x, proc_config, Scal); AZ_scaling_destroy(&Scal); AZ_precond_destroy(&Pmat); destroy_matrix(Amat); delete [] x; delete [] b; delete [] options; delete [] params; delete [] status; delete [] proc_config; free(update_index); free(external); free(external_index); return(0); }
int main(int argc, char *argv[]) /* Set up and solve a test problem defined in the subroutine init_matrix_vector_structures(). Author: Ray Tuminaro, Div 1422, Sandia National Labs date: 11/10/94 ******************************************************************************/ { double *ax,*x; /* ax is the right hand side for the test problem. x is the approximate solution obtained using AZTEC. */ int i,input_option; /* See Aztec User's Guide for more information */ /* on the variables that follow. */ int proc_config[AZ_PROC_SIZE];/* Processor information: */ /* proc_config[AZ_node] = node name */ /* proc_config[AZ_N_procs] = # of nodes */ int options[AZ_OPTIONS_SIZE]; /* Array used to select solver options. */ double params[AZ_PARAMS_SIZE]; /* User selected solver paramters. */ int *data_org; /* Array to specify data layout */ double status[AZ_STATUS_SIZE]; /* Information returned from AZ_solve() indicating success or failure. */ int *update, /* vector elements (global index) updated on this processor. */ *external; /* vector elements needed by this node. */ int *update_index; /* ordering of update[] and external[] */ int *extern_index; /* locally on this processor. For example update_index[i] gives the index location of the vector element which has the global index 'update[i]'. */ /* Sparse matrix to be solved is stored in these arrays. */ int *rpntr,*cpntr,*indx, *bpntr, *bindx; double *val; /* ----------------------- execution begins --------------------------------*/ /* Put the # of processors, the node id, */ /* and an MPI communicator into proc_config */ #ifdef AZTEC_MPI MPI_Init(&argc,&argv); AZ_set_proc_config(proc_config, MPI_COMM_WORLD); #else AZ_set_proc_config(proc_config, AZ_NOT_MPI ); #endif /* * Read and broadcast: problem choice, problem size, equations per grid point * and how we wish to initialize 'update'. */ if (proc_config[AZ_node] == 0) { (void) printf("enter the application problem number\n"); (void) printf(" = 0: Finite Difference MSR Poisson on n x n x n grid.\n"); (void) printf(" = 1: Finite Difference VBR Poisson on n x n x n grid.\n"); (void) printf(" = 2: Finite Element MSR Poisson\n"); (void) printf(" = 3: Use AZ_read_msr_matrix() to read file '.data'\n"); scanf("%d",&application); if ((application < 0) || (application > 3)){ (void) fprintf(stderr, "Error: Invalid application (%d) selected\n", application); exit(1); } if (application == 0) { (void) printf("\nNote: To try other problems, change add_row_3D()"); (void) printf("\n in create_msr_matrix() to add_row_5pt() or"); (void) printf("\n add_row_9pt().\n\n"); } if (application == 2) { (void) printf("\nNote: Input files are provided for 1 finite element "); (void) printf("\n problem. This problem can be run on either 1 "); (void) printf("\n or 4 processors. To run on 1 processor, copy "); (void) printf("\n the file fe_1proc_grid_0 to fe_grid_0. To run on"); (void) printf("\n 4 processors, copy the files fe_4proc_grid_k to "); (void) printf("\n fe_grid_k (k = 0,1,2,3). In both cases enter 197"); (void) printf("\n when prompted for the number of grid points and "); (void) printf("\n linear when prompted for the partitioning!!!\n\n"); } if (application == 3) (void) printf("enter the total number of matrix rows\n"); else (void) printf("enter the total number of grid points\n"); scanf("%d", &N_grid_pts); num_PDE_eqns = 1; if (application < 2) { (void) printf("enter the number of equations per grid point\n"); scanf("%d", &num_PDE_eqns); } (void) printf("partition option \n"); (void) printf(" = %d: linear\n", AZ_linear); (void) printf(" = %d: update pts from file '.update'\n", AZ_file); if (application < 2) (void) printf(" = %d: box decomposition\n", AZ_box); scanf("%d", &input_option); } AZ_broadcast((char *) &N_grid_pts , sizeof(int), proc_config, AZ_PACK); AZ_broadcast((char *) &num_PDE_eqns, sizeof(int), proc_config, AZ_PACK); AZ_broadcast((char *) &input_option, sizeof(int), proc_config, AZ_PACK); AZ_broadcast((char *) &application , sizeof(int), proc_config, AZ_PACK); AZ_broadcast((char *) NULL , 0 , proc_config, AZ_SEND); /* create an application matrix for AZTEC */ init_matrix_vector_structures(proc_config, &update_index, &update, &data_org, &external, &extern_index, input_option, &val, &bindx, &indx, &bpntr, &rpntr, &cpntr); /* initialize AZTEC options */ init_options(options,params); if ( (i = AZ_check_input(data_org, options, params, proc_config) ) < 0) { AZ_print_error(i); exit(-1); } /* Matrix fill for finite element example (see Aztec User's Guide). */ if (application == 2) fill_fe_matrix(val, bindx, update, update_index, external, extern_index, data_org); /* Initialize right hand side and initial guess */ /* NOTE: STORAGE ALLOCATED FOR 'x' IS GREATER THAN THE NUMBER */ /* OF MATRIX ROWS PER PROCESSOR. 'x' INCLUDES SPACE FOR */ /* EXTERNAL (GHOST) ELEMENTS. THUS, THE SIZE OF 'x' IS */ /* 'data_org[AZ_N_internal] + data_org[AZ_N_border] + */ /* data_org[AZ_N_external]'. */ init_guess_and_rhs(update_index, update, &x, &ax, data_org, val, indx, bindx, rpntr, cpntr, bpntr, proc_config); /* update[], update_index[], external[], extern_index[] are used to map * between Aztec's ordering of the equations and the user's ordering * (see the User's guide for more details). If these mapping arrays * are not needed by the user, they can be deallocated as they are not * used by AZ_solve(). */ free((void *) update); free((void *) update_index); free((void *) external); free((void *) extern_index); /* solve the system of equations using ax as the right hand side */ AZ_solve(x,ax, options, params, indx, bindx, rpntr, cpntr, bpntr, val, data_org, status, proc_config); /* Free allocated memory */ free((void *) x); free((void *) ax); free((void *) indx); free((void *) bindx); free((void *) rpntr); free((void *) cpntr); free((void *) bpntr); free((void *) val); free((void *) data_org); #ifdef AZTEC_MPI MPI_Finalize(); #endif return(1); }
int goma_init_(dbl *time1, int *nnodes, int *nelems, int *nnv_in, int *nev_in, int *i_soln, int *i_post) /* * Initial main driver for GOMA. Derived from a (1/93) release of * the rf_salsa program by * * Original Authors: John Shadid (1421) * Scott Hutchinson (1421) * Harry Moffat (1421) * * Date: 12/3/92 * * * Updates and Changes by: * Randy Schunk (9111) * P. A. Sackinger (9111) * R. R. Rao (9111) * R. A. Cairncross (Univ. of Delaware) * Dates: 2/93 - 6/96 * * Modified for continuation * Ian Gates * Dates: 2/98 - 10/98 * Dates: 7/99 - 8/99 * * Last modified: Wed June 26 14:21:35 MST 1994 [email protected] * Hello. * * Note: Many modifications from an early 2/93 pre-release * version of rf_salsa were made by various persons * in order to test ideas about moving/deforming meshes... */ { /* Local Declarations */ double time_start, total_time; /* timing variables */ #ifndef PARALLEL struct tm *tm_ptr; /* additional serial timing variables */ time_t the_time; #endif int error; int i; int j; static int first_goma_call=TRUE; char **ptmp; static const char *yo="goma_init"; struct Command_line_command **clc=NULL; /* point to command line structure */ int nclc = 0; /* number of command line commands */ /********************** BEGIN EXECUTION ***************************************/ /* assume number of commands is less than or equal to the number of * arguments in the command line minus 1 (1st is program name) */ /* * Get the name of the executable, yo */ #ifdef PARALLEL if( first_goma_call ) { Argc = 1; Argv = (char **) smalloc( Argc*sizeof(char *) ); Argv[0] = (char *) yo; MPI_Init(&Argc, &Argv); /*PRS will have to fix this. Too late TAB already did. */ } time_start = MPI_Wtime(); #else /* PARALLEL */ (void) time(&the_time); tm_ptr = gmtime(&the_time); time_start = (double) ( tm_ptr->tm_sec + 60. * ( 60. * ( tm_ptr->tm_yday * 24. + tm_ptr->tm_hour ) + tm_ptr->tm_min ) ); #endif /* PARALLEL */ *time1 = time_start; /* Argv = argv; */ /* Argc = argc; */ time_goma_started = time_start; #ifdef PARALLEL /* * Determine the parallel processing status, if any. We need to know * pretty early if we're "one of many" or the only process. */ error = MPI_Comm_size(MPI_COMM_WORLD, &Num_Proc); error = MPI_Comm_rank(MPI_COMM_WORLD, &ProcID); /* * Setup a default Proc_config so we can use utility routines * from Aztec */ AZ_set_proc_config(Proc_Config, MPI_COMM_WORLD); /* set the output limit flag if need be */ if( Num_Proc > DP_PROC_PRINT_LIMIT ) Unlimited_Output = FALSE; #ifdef HAVE_MPE_H error = MPE_Init_log(); #endif /* HAVE_MPE_H */ Dim = 0; /* for any hypercube legacy code... */ #endif /* PARALLEL */ #ifndef PARALLEL Dim = 0; ProcID = 0; Num_Proc = 1; #endif /* PARALLEL */ /* * HKM - Change the ieee exception handling based on the machine and * the level of debugging/speed desired. This call currently causes * core dumps for floating point exceptions. */ handle_ieee(); log_msg("--------------"); log_msg("GOMA begins..."); #ifdef USE_CGM cgm_initialize(); #endif /* * Some initial stuff that only the master process does. */ /*PRS: Disable this command line stuff for the jas coupled version */ /*-----------------------------------------------------------------*/ /* if ( ProcID == 0 ) */ /* { */ /* if (argc > 1) */ /* { */ /* log_msg("Preprocessing command line options."); */ /* clc = (struct Command_line_command **) */ /* smalloc( argc * sizeof(struct Command_line_command *)); */ /* for (i=0; i<argc; i++) */ /* { */ /* clc[i] = (struct Command_line_command *) */ /* smalloc(sizeof(struct Command_line_command)); */ /* clc[i]->type = 0; /\* initialize command line structure *\/ */ /* clc[i]->i_val = 0; */ /* clc[i]->r_val = 0.; */ /* clc[i]->string = (char *) */ /* smalloc(MAX_COMMAND_LINE_LENGTH*sizeof(char)); */ /* for ( j=0; j<MAX_COMMAND_LINE_LENGTH; j++) */ /* { */ /* clc[i]->string[j] = '\0'; */ /* } */ /* #ifdef DEBUG */ /* fprintf(stderr, "clc[%d]->string is at 0x%x\n", i, clc[i]->string); */ /* fprintf(stderr, "clc[%d] is at 0x%x\n", i, clc[i]); */ /* #endif */ /* } */ /* } */ /* PRS For the JAS version we will use the default input file name "input" */ strcpy(Input_File, "input"); /* if (argc > 1) translate_command_line(argc, argv, clc, &nclc); */ /* print_code_version(); */ /* ptmp = legal_notice; */ /* while ( strcmp(*ptmp, LAST_LEGAL_STRING) != 0 ) */ /* { */ /* fprintf(stderr, "%s", *ptmp++); */ /* } */ /* } */ /* * Allocate the uniform problem description structure and * the problem description structures on all processors */ error = pd_alloc(); EH(error, "pd_alloc problem"); #ifdef DEBUG fprintf(stderr, "P_%d at barrier after pd_alloc\n", ProcID); #ifdef PARALLEL error = MPI_Barrier(MPI_COMM_WORLD); #endif #endif log_msg("Allocating mp, gn, ..."); error = mp_alloc(); EH(error, "mp_alloc problem"); error = gn_alloc(); EH(error, "gn_alloc problem"); error = ve_alloc(); EH(error, "ve_alloc problem"); error = elc_alloc(); EH(error, "elc_alloc problem"); error = elc_rs_alloc(); EH(error, "elc_alloc problem"); error = cr_alloc(); EH(error, "cr_alloc problem"); error = evp_alloc(); EH(error, "evp_alloc problem"); error = tran_alloc(); EH(error, "tran_alloc problem"); error = libio_alloc(); EH(error, "libio_alloc problem"); error = eigen_alloc(); EH(error, "eigen_alloc problem"); error = cont_alloc(); EH(error, "cont_alloc problem"); error = loca_alloc(); EH(error, "loca_alloc problem"); error = efv_alloc(); EH(error, "efv_alloc problem"); #ifdef DEBUG fprintf(stderr, "P_%d at barrier before read_input_file()\n", ProcID); #ifdef PARALLEL error = MPI_Barrier(MPI_COMM_WORLD); #endif #endif /*PRS AGAIN, NO COMMAND LINE OVERRIDES IN THIS JAS3D VERSION */ /* * Read ASCII input file, data files, related exodusII FEM databases. */ if ( ProcID == 0 ) { log_msg("Reading input file ..."); read_input_file(clc, nclc); } /* * The user-defined material properties, etc. available to goma users * mean that some dynamically allocated data needs to be communicated. * * To handle this, sizing information from the input file scan is * broadcast in stages so that the other processors can allocate space * accordingly to hold the data. * * Note: instead of handpacking a data structure, use MPI derived datatypes * to gather and scatter. Pray this is done efficiently. Certainly it costs * less from a memory standpoint. */ #ifdef PARALLEL /* * Make sure the input file was successully processed before moving on */ check_parallel_error("Input file error"); /* * This is some sizing information that helps fit a little bit more * onto the ark later on. */ #ifdef DEBUG fprintf(stderr, "P_%d at barrier before noahs_raven()\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif noahs_raven(); #ifdef DEBUG fprintf(stderr, "P_%d at barrier before MPI_Bcast of Noahs_Raven\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif MPI_Bcast(MPI_BOTTOM, 1, Noahs_Raven->new_type, 0, MPI_COMM_WORLD); #ifdef DEBUG fprintf(stderr, "P_%d at barrier after Bcast/before raven_landing()\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif /* * Get the other processors ready to handle ark data. */ raven_landing(); #ifdef DEBUG fprintf(stderr, "P_%d at barrier before noahs_ark()\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif /* * This is the main body of communicated information, including some * whose sizes were determined because of advanced legwork by the raven. */ noahs_ark(); MPI_Bcast(MPI_BOTTOM, 1, Noahs_Ark->new_type, 0, MPI_COMM_WORLD); /* * Chemkin was initialized on processor zero during the input file * process. Now, distribute it to all processors */ #ifdef USE_CHEMKIN if (Chemkin_Needed) { chemkin_initialize_mp(); } #endif /* * Once the ark has landed, there are additional things that will need to * be sent by dove. Example: BC_Types[]->u-BC arrays. * */ ark_landing(); noahs_dove(); MPI_Bcast(MPI_BOTTOM, 1, Noahs_Dove->new_type, 0, MPI_COMM_WORLD); #endif /* End of ifdef PARALLEL */ /* * We sent the packed line to all processors that contained geometry * creation commands. Now we need to step through it and create * geometry as we go (including possibly reading an ACIS .sat file). * */ #ifdef USE_CGM create_cgm_geometry(); #endif /* * For parallel execution, assume the following variables will be changed * to reflect the multiple file aspect of the problem. * * FEM file = file.exoII --> file_3of15.exoII * * Output EXODUS II file = out.exoII --> out_3of15.exoII * */ /* * Allocate space for structures holding the EXODUS II finite element * database information and for the Distributed Processing information. * * These are mostly skeletons with pointers that get allocated in the * rd_exoII and rd_dpi routines. Remember to free up those arrays first * before freeing the major pointers. */ EXO_ptr = alloc_struct_1(Exo_DB, 1); init_exo_struct(EXO_ptr); DPI_ptr = alloc_struct_1(Dpi, 1); init_dpi_struct(DPI_ptr); log_msg("Reading mesh from EXODUS II file..."); error = read_mesh_exoII(EXO_ptr, DPI_ptr); /* * Missing files on any processor are detected at a lower level * forcing a return to the higher level * rd_exo --> rd_mesh --> main * Shutdown now, if any of the exodus files weren't found */ if (error < 0) { #ifdef PARALLEL MPI_Finalize(); #endif return(-1); } /* * All of the MPI_Type_commit() calls called behind the scenes that build * the dove, ark and raven really allocated memory. Let's free it up now that * the initial information has been communicated. */ #ifdef PARALLEL MPI_Type_free(&(Noahs_Raven->new_type)); MPI_Type_free(&(Noahs_Ark->new_type)); MPI_Type_free(&(Noahs_Dove->new_type)); #endif /* * Setup the rest of the Problem Description structure that depends on * the mesh that was read in from the EXODUS II file... * * Note that memory allocation and some setup has already been performed * in mm_input()... */ error = setup_pd(); EH( error, "Problem setting up Problem_Description."); /* * Let's check to see if we need the large elasto-plastic global tensors * and allocate them if so */ error = evp_tensor_alloc(EXO_ptr); EH( error, "Problems setting up evp tensors"); /* * Now that we know about what kind of problem we're solving and the * mesh information, let's allocate space for elemental assembly structures * */ #ifdef DEBUG DPRINTF(stderr, "About to assembly_alloc()...\n"); #endif log_msg("Assembly allocation..."); error = assembly_alloc(EXO_ptr); EH( error, "Problem from assembly_alloc"); if (Debug_Flag) { DPRINTF(stderr, "%s: setting up EXODUS II output files...\n", yo); } /* * These are not critical - just niceties. Also, they should not overburden * your db with too much of this - they're capped verbiage compliant routines. */ add_qa_stamp(EXO_ptr); add_info_stamp(EXO_ptr); #ifdef DEBUG fprintf(stderr, "added qa and info stamps\n"); #endif /* * If the output EXODUS II database file is different from the input * file, then we'll need to replicate all the basic mesh information. * But, remember that if we're parallel, that the output file names must * be multiplexed first... */ if ( Num_Proc > 1 ) { multiname(ExoFileOut, ProcID, Num_Proc); multiname(Init_GuessFile, ProcID, Num_Proc); if ( strcmp( Soln_OutFile, "" ) != 0 ) { multiname(Soln_OutFile, ProcID, Num_Proc); } if( strcmp( ExoAuxFile, "" ) != 0 ) { multiname(ExoAuxFile, ProcID, Num_Proc); } if( efv->Num_external_field != 0 ) { for( i=0; i<efv->Num_external_field; i++ ) { multiname(efv->file_nm[i], ProcID, Num_Proc); } } } /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * Preprocess the exodus mesh * -> Allocate pointers to structures containing element * side bc info, First_Elem_Side_BC_Array, and * element edge info, First_Elem_Edge_BC_Array. * -> Determine Unique_Element_Types[] array */ #ifdef DEBUG fprintf(stderr, "pre_process()...\n"); #endif log_msg("Pre processing of mesh..."); #ifdef PARALLEL error = MPI_Barrier(MPI_COMM_WORLD); #endif pre_process(EXO_ptr); /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * Load up a few key indeces in the bfd prototype basis function structures * and make sure that each active eqn/vbl has a bf[v] that points to the * right bfd[]...needs pre_process to find out the number of unique * element types in the problem. */ #ifdef DEBUG fprintf(stderr, "bf_init()...\n"); #endif log_msg("Basis function initialization..."); error = bf_init(EXO_ptr); EH( error, "Problem from bf_init"); /* * check for parallel errors before continuing */ check_parallel_error("Error encountered in problem setup"); /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * Allocate space for each communication exchange description. */ #ifdef PARALLEL #ifdef DEBUG fprintf(stderr, "P_%d: Parallel cx allocation\n", ProcID); #endif if (DPI_ptr->num_neighbors > 0) { cx = alloc_struct_1(Comm_Ex, DPI_ptr->num_neighbors); Request = alloc_struct_1(MPI_Request, Num_Requests * DPI_ptr->num_neighbors); Status = alloc_struct_1(MPI_Status, Num_Requests * DPI_ptr->num_neighbors); } #endif /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * SET UP THE PROBLEM * * Setup node-based structures * Finalise how boundary conditions are to be handled * Determine what unknowns are at each owned node and then tell * neighboring processors about your nodes * Set up communications pattern for fast unknown updates between * processors. */ (void) setup_problem(EXO_ptr, DPI_ptr); /* * check for parallel errors before continuing */ check_parallel_error("Error encountered in problem setup"); /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * WRITE OUT INITIAL INFO TO EXODUS FILE */ /* * Only have to initialize the exodus file if we are using different * files for the output versus the input mesh */ if (strcmp(ExoFile, ExoFileOut)) { /* * Temporarily we'll need to renumber the nodes and elements in the * mesh to be 1-based. After writing, return to the 0 based indexing * that is more convenient in C. */ #ifdef DEBUG fprintf(stderr, "1-base; wr_mesh; 0-base\n"); #endif one_base(EXO_ptr); wr_mesh_exo(EXO_ptr, ExoFileOut, 0); zero_base(EXO_ptr); /* * If running on a distributed computer, augment the plain finite * element information of EXODUS with the description of how this * piece fits into the global problem. */ if (Num_Proc > 1) { #ifdef PARALLEL #ifdef DEBUG fprintf(stderr, "P_%d at barrier before wr_dpi()\n", ProcID); fprintf(stderr, "P_%d ExoFileOut = \"%s\"\n", ProcID, ExoFileOut); error = MPI_Barrier(MPI_COMM_WORLD); #endif #endif wr_dpi(DPI_ptr, ExoFileOut, 0); } } if (Num_Import_NV > 0 || Num_Import_EV > 0) printf (" Goma will import %d nodal and %d element variables.\n", Num_Import_NV, Num_Import_EV); if (Num_Export_XS > 0 || Num_Export_XP > 0) printf (" Goma will export %d solution and %d post-processing variables.\n", Num_Export_XS, Num_Export_XP); /* Return counts to calling program */ *nnodes = EXO_ptr->num_nodes; *nelems = EXO_ptr->num_elems; *nnv_in = Num_Import_NV; *nev_in = Num_Import_EV; *i_soln = Num_Export_XS; *i_post = Num_Export_XP; return (0); /* Back to animas*/ }
int main(int argc, char *argv[]) { int proc_config[AZ_PROC_SIZE];/* Processor information. */ int options[AZ_OPTIONS_SIZE]; /* Array used to select solver options. */ double params[AZ_PARAMS_SIZE]; /* User selected solver paramters. */ double status[AZ_STATUS_SIZE]; /* Information returned from AZ_solve(). */ int *bindx_real; /* index and values arrays for MSR matrices */ double *val_real, *val_imag; int * update; /* List of global eqs owned by the processor */ double *x_real, *b_real; /* initial guess/solution, RHS */ double *x_imag, *b_imag; unsigned int N_local; /* Number of equations on this node */ double residual; /* Used for computing residual */ double *xx_real, *xx_imag, *xx; /* Known exact solution */ int myPID, nprocs; AZ_MATRIX *Amat_real; /* Real matrix structure */ AZ_MATRIX *Amat; /* Komplex matrix to be solved. */ AZ_PRECOND *Prec; /* Komplex preconditioner */ double *x, *b; /* Komplex Initial guess and RHS */ int i; /******************************/ /* First executable statement */ /******************************/ #ifdef AZTEC_MPI MPI_Init(&argc,&argv); #endif /* Get number of processors and the name of this processor */ #ifdef AZTEC_MPI AZ_set_proc_config(proc_config,MPI_COMM_WORLD); #else AZ_set_proc_config(proc_config,0); #endif nprocs = proc_config[AZ_N_procs]; myPID = proc_config[AZ_node]; printf("proc %d of %d is alive\n",myPID, nprocs); /* Define two real diagonal matrices. Will use as real and imaginary parts */ /* Get the number of local equations from the command line */ if (argc!=2) { if (myPID==0) printf("Usage: %s number_of_local_equations\n",argv[0]); exit(1); } N_local = atoi(argv[1]); const unsigned int N_local_max = 1000000; if (N_local > N_local_max) { if (myPID==0) printf("No more than %d local equation allowed\n", N_local_max); exit(1); } /* Need N_local+1 elements for val/bindx arrays */ val_real = malloc((N_local+1)*sizeof(double)); val_imag = malloc((N_local+1)*sizeof(double)); /* bindx_imag is not needed since real/imag have same pattern */ bindx_real = malloc((N_local+1)*sizeof(int)); update = malloc((N_local+1)*sizeof(int)); /* Malloc equation update list */ b_real = malloc((N_local+1)*sizeof(double)); /* Malloc x and b arrays */ b_imag = malloc((N_local+1)*sizeof(double)); x_real = malloc((N_local+1)*sizeof(double)); x_imag = malloc((N_local+1)*sizeof(double)); xx_real = malloc((N_local+1)*sizeof(double)); xx_imag = malloc((N_local+1)*sizeof(double)); for (i=0; i<N_local; i++) { val_real[i] = 10 + i/(N_local/10); /* Some very fake diagonals */ val_imag[i] = 10 - i/(N_local/10); /* Should take exactly 20 GMRES steps */ x_real[i] = 0.0; /* Zero initial guess */ x_imag[i] = 0.0; xx_real[i] = 1.0; /* Let exact solution = 1 */ xx_imag[i] = 0.0; /* Generate RHS to match exact solution */ b_real[i] = val_real[i]*xx_real[i] - val_imag[i]*xx_imag[i]; b_imag[i] = val_imag[i]*xx_real[i] + val_real[i]*xx_imag[i]; /* All bindx[i] have same value since no off-diag terms */ bindx_real[i] = N_local + 1; /* each processor owns equations myPID*N_local through myPID*N_local + N_local - 1 */ update[i] = myPID*N_local + i; } bindx_real[N_local] = N_local+1; /* Need this last index */ /* Register Aztec Matrix for Real Part, only imaginary values are needed*/ Amat_real = AZ_matrix_create(N_local); AZ_set_MSR(Amat_real, bindx_real, val_real, NULL, N_local, update, AZ_GLOBAL); /* initialize AZTEC options */ AZ_defaults(options, params); options[AZ_solver] = AZ_gmres; /* Use CG with no preconditioning */ options[AZ_precond] = AZ_none; options[AZ_kspace] = 21; options[AZ_max_iter] = 21; params[AZ_tol] = 1.e-14; /**************************************************************/ /* Construct linear system. Form depends on input parameters */ /**************************************************************/ /**************************************************************/ /* Method 1: Construct A, x, and b in one call. */ /* Useful if using A,x,b only one time. Equivalent to Method 2*/ /**************************************************************/ AZK_create_linsys_ri2k (x_real, x_imag, b_real, b_imag, options, params, proc_config, Amat_real, val_imag, &x, &b, &Amat); /**************************************************************/ /* Method 2: Construct A, x, and b in separate calls. */ /* Useful for having more control over the construction. */ /* Note that the matrix must be constructed first. */ /**************************************************************/ /* Uncomment these three calls and comment out the above call AZK_create_matrix_ri2k (options, params, proc_config, Amat_real, val_imag, &Amat); AZK_create_vector_ri2k(options, params, proc_config, Amat, x_real, x_imag, &x); AZK_create_vector_ri2k(options, params, proc_config, Amat, b_real, b_imag, &b); */ /**************************************************************/ /* Build exact solution vector. */ /* Check residual of init guess and exact solution */ /**************************************************************/ AZK_create_vector_ri2k(options, params, proc_config, Amat, xx_real, xx_imag, &xx); residual = AZK_residual_norm(x, b, options, params, proc_config, Amat); if (proc_config[AZ_node]==0) printf("\n\n\nNorm of residual using initial guess = %12.4g\n",residual); residual = AZK_residual_norm(xx, b, options, params, proc_config, Amat); AZK_destroy_vector(options, params, proc_config, Amat, &xx); if (proc_config[AZ_node]==0) printf("\n\n\nNorm of residual using exact solution = %12.4g\n",residual); /**************************************************************/ /* Create preconditioner */ /**************************************************************/ AZK_create_precon(options, params, proc_config, x, b, Amat, &Prec); /**************************************************************/ /* Solve linear system using Aztec. */ /**************************************************************/ AZ_iterate(x, b, options, params, status, proc_config, Amat, Prec, NULL); /**************************************************************/ /* Extract solution. */ /**************************************************************/ AZK_extract_solution_k2ri(options, params, proc_config, Amat, Prec, x, x_real, x_imag); /**************************************************************/ /* Destroy Preconditioner. */ /**************************************************************/ AZK_destroy_precon (options, params, proc_config, Amat, &Prec); /**************************************************************/ /* Destroy linear system. */ /**************************************************************/ AZK_destroy_linsys (options, params, proc_config, &x, &b, &Amat); if (proc_config[AZ_node]==0) { printf("True residual norm squared = %22.16g\n",status[AZ_r]); printf("True scaled res norm squared = %22.16g\n",status[AZ_scaled_r]); printf("Computed res norm squared = %22.16g\n",status[AZ_rec_r]); } /* Print comparison between known exact and computed solution */ {double sum = 0.0; for (i=0; i<N_local; i++) sum += fabs(x_real[i]-xx_real[i]); for (i=0; i<N_local; i++) sum += fabs(x_imag[i]-xx_imag[i]); printf("Processor %d: Difference between exact and computed solution = %12.4g\n", proc_config[AZ_node],sum); } /* Free memory allocated */ free((void *) val_real ); free((void *) bindx_real ); free((void *) val_imag ); free((void *) update ); free((void *) b_real ); free((void *) b_imag ); free((void *) x_real ); free((void *) x_imag ); free((void *) xx_real ); free((void *) xx_imag ); AZ_matrix_destroy(&Amat_real); #ifdef AZTEC_MPI MPI_Finalize(); #endif return 0 ; }
int main(int argc, char *argv[]) { int Nnodes=32*32; /* Total number of nodes in the problem.*/ /* 'Nnodes' must be a perfect square. */ struct user_partition Edge_Partition = {NULL, NULL,0,0,NULL,0,0,0}, Node_Partition = {NULL, NULL,0,0,NULL,0,0,0}; int proc_config[AZ_PROC_SIZE]; #ifdef ML_MPI MPI_Init(&argc,&argv); #endif AZ_set_proc_config(proc_config, COMMUNICATOR); ML_Comm* comm; ML_Comm_Create(&comm); Node_Partition.Nglobal = Nnodes; Edge_Partition.Nglobal = Node_Partition.Nglobal*2; user_partition_nodes(&Node_Partition); user_partition_edges(&Edge_Partition, &Node_Partition); AZ_MATRIX * AZ_Ke = user_Ke_build(&Edge_Partition); AZ_MATRIX * AZ_Kn = user_Kn_build(&Node_Partition); // convert (put wrappers) from Aztec matrices to ML_Operator's ML_Operator * ML_Ke, * ML_Kn, * ML_Tmat; ML_Ke = ML_Operator_Create( comm ); ML_Kn = ML_Operator_Create( comm ); AZ_convert_aztec_matrix_2ml_matrix(AZ_Ke,ML_Ke,proc_config); AZ_convert_aztec_matrix_2ml_matrix(AZ_Kn,ML_Kn,proc_config); ML_Tmat = user_T_build(&Edge_Partition, &Node_Partition, ML_Kn, comm); Epetra_CrsMatrix * Epetra_Kn, * Epetra_Ke, * Epetra_T; int MaxNumNonzeros; double CPUTime; ML_Operator2EpetraCrsMatrix(ML_Ke,Epetra_Ke, MaxNumNonzeros, true,CPUTime); ML_Operator2EpetraCrsMatrix(ML_Kn, Epetra_Kn,MaxNumNonzeros, true,CPUTime); ML_Operator2EpetraCrsMatrix(ML_Tmat,Epetra_T,MaxNumNonzeros, true,CPUTime); Teuchos::ParameterList MLList; ML_Epetra::SetDefaults("maxwell", MLList); MLList.set("ML output", 0); MLList.set("aggregation: type", "Uncoupled"); MLList.set("coarse: max size", 30); MLList.set("aggregation: threshold", 0.0); MLList.set("coarse: type", "Amesos-KLU"); ML_Epetra::MultiLevelPreconditioner * MLPrec = new ML_Epetra::MultiLevelPreconditioner(*Epetra_Ke, *Epetra_T, *Epetra_Kn, MLList); Epetra_Vector LHS(Epetra_Ke->DomainMap()); LHS.Random(); Epetra_Vector RHS(Epetra_Ke->DomainMap()); RHS.PutScalar(1.0); Epetra_LinearProblem Problem(Epetra_Ke,&LHS,&RHS); AztecOO solver(Problem); solver.SetPrecOperator(MLPrec); solver.SetAztecOption(AZ_solver, AZ_cg_condnum); solver.SetAztecOption(AZ_output, 32); solver.Iterate(500, 1e-8); // ========================= // // compute the real residual // // ========================= // Epetra_Vector RHScomp(Epetra_Ke->DomainMap()); int ierr; ierr = Epetra_Ke->Multiply(false, LHS, RHScomp); assert(ierr==0); Epetra_Vector resid(Epetra_Ke->DomainMap()); ierr = resid.Update(1.0, RHS, -1.0, RHScomp, 0.0); assert(ierr==0); double residual; ierr = resid.Norm2(&residual); assert(ierr==0); if (proc_config[AZ_node] == 0) { std::cout << std::endl; std::cout << "==> Residual = " << residual << std::endl; std::cout << std::endl; } // =============== // // C L E A N U P // // =============== // delete MLPrec; // destroy phase prints out some information delete Epetra_Kn; delete Epetra_Ke; delete Epetra_T; ML_Operator_Destroy( &ML_Ke ); ML_Operator_Destroy( &ML_Kn ); ML_Comm_Destroy( &comm ); if (Edge_Partition.my_local_ids != NULL) free(Edge_Partition.my_local_ids); if (Node_Partition.my_local_ids != NULL) free(Node_Partition.my_local_ids); if (Node_Partition.my_global_ids != NULL) free(Node_Partition.my_global_ids); if (Edge_Partition.my_global_ids != NULL) free(Edge_Partition.my_global_ids); if (Node_Partition.needed_external_ids != NULL) free(Node_Partition.needed_external_ids); if (Edge_Partition.needed_external_ids != NULL) free(Edge_Partition.needed_external_ids); if (AZ_Ke!= NULL) { AZ_free(AZ_Ke->bindx); AZ_free(AZ_Ke->val); AZ_free(AZ_Ke->data_org); AZ_matrix_destroy(&AZ_Ke); } if (AZ_Kn!= NULL) { AZ_free(AZ_Kn->bindx); AZ_free(AZ_Kn->val); AZ_free(AZ_Kn->data_org); AZ_matrix_destroy(&AZ_Kn); } ML_Operator_Destroy(&ML_Tmat); if (residual > 1e-5) { std::cout << "`MultiLevelPreconditioner_Maxwell.exe' failed!" << std::endl; exit(EXIT_FAILURE); } #ifdef ML_MPI MPI_Finalize(); #endif if (proc_config[AZ_node] == 0) std::cout << "`MultiLevelPreconditioner_Maxwell.exe' passed!" << std::endl; exit(EXIT_SUCCESS); }
void continue_problem (Comm_Ex *cx, /* array of communications structures */ Exo_DB *exo, /* ptr to the finite element mesh database */ Dpi *dpi) /* distributed processing information */ { int *ija=NULL; /* column pointer array */ double *a=NULL; /* nonzero array */ double *a_old=NULL; /* nonzero array */ double *x=NULL; /* solution vector */ int iAC; /* COUNTER */ double *x_AC = NULL; /* SOLUTION VECTOR OF EXTRA UNKNOWNS */ double *x_AC_old=NULL; /* old SOLUTION VECTOR OF EXTRA UNKNOWNS */ double *x_AC_dot=NULL; int *ija_attic=NULL; /* storage for external dofs */ int eb_indx, ev_indx; /* * variables for path traversal */ double *x_old=NULL; /* old solution vector */ double *x_older=NULL; /* older solution vector */ double *x_oldest=NULL; /* oldest solution vector saved */ double *xdot=NULL; /* current path derivative of soln */ double *xdot_old=NULL; double *x_update=NULL; double *x_sens=NULL; /* solution sensitivity */ double *x_sens_temp=NULL; /* MMH thinks we need another one, so * that when the solution is updated * on a failure, it doesn't use the * last computed x_sens b/c that might * be crappy. We should use the last * known good one... I haven't done * the same thing with x_sens_p. */ double **x_sens_p=NULL; /* solution sensitivity for parameters */ int num_pvector=0; /* number of solution sensitivity vectors */ #ifdef COUPLED_FILL struct Aztec_Linear_Solver_System *ams[NUM_ALSS]={NULL}; #else /* COUPLED_FILL */ struct Aztec_Linear_Solver_System *ams[NUM_ALSS]={NULL, NULL}; #endif /* COUPLED_FILL */ /* sl_util_structs.h */ double *resid_vector=NULL; /* residual */ double *resid_vector_sens=NULL;/* residual sensitivity */ double *scale=NULL; /* scale vector for modified newton */ int *node_to_fill = NULL; int n; /* total number of path steps attempted */ int ni; /* total number of nonlinear solves */ int nt; /* total number of successful path steps */ int path_step_reform; /* counter for jacobian reformation stride */ int converged; /* success or failure of Newton iteration */ int success_ds; /* success or failure of path step */ int i, nprint=0, num_total_nodes; int numProcUnknowns; int const_delta_s, step_print; double path_print, i_print; double path, /* Current value (should have solution here) */ path1; /* New value (would like to get solution here) */ double delta_s, delta_s_new, delta_s_old, delta_s_older, delta_s_oldest; double delta_t; double theta=0.0; double damp; double eps; double lambda, lambdaEnd; double timeValueRead = 0.0; /* * ALC management variables */ int aldALC, /* direction of continuation, == -1 => beginning value is greater than ending value. */ alqALC; /* is -1 when we're on our last step. */ /* * Other local variables */ int error, err, is_steady_state, inewton; int *gindex = NULL, gsize; int *p_gsize=NULL; double *gvec=NULL; double ***gvec_elem=NULL; double err_dbl; FILE *cl_aux=NULL, *file=NULL; struct Results_Description *rd=NULL; int tnv; /* total number of nodal variables and kinds */ int tev; /* total number of elem variables and kinds */ int tnv_post; /* total number of nodal variables and kinds for post processing */ int tev_post; /* total number of elem variables and kinds for post processing */ int iUC; /* User-defined continuation condition index */ int max_unk_elem, one, three; /* variables used as mf_setup arguments*/ unsigned int matrix_systems_mask; double evol_local=0.0; #ifdef PARALLEL double evol_global=0.0; #endif static const char yo[]="continue_problem"; /* * BEGIN EXECUTION */ #ifdef DEBUG fprintf(stderr, "%s() begins...\n", yo); #endif is_steady_state = TRUE; p_gsize = &gsize; /* * set aside space for gather global vectors to print to exoII file * note: this is temporary * * For 2D prototype problem: allocate space for T, dx, dy arrays */ if( strlen(Soln_OutFile) ) { file = fopen(Soln_OutFile, "w"); if (file == NULL) { DPRINTF(stderr, "%s: opening soln file for writing\n", yo); EH(-1, "\t"); } } #ifdef PARALLEL check_parallel_error("Soln output file error"); #endif /* * Some preliminaries to help setup EXODUS II database output. */ #ifdef DEBUG fprintf(stderr, "cnt_nodal_vars() begins...\n"); #endif /* * tnv_post is calculated in load_nodal_tkn * tev_post is calculated in load_elem_tkn */ tnv = cnt_nodal_vars(); tev = cnt_elem_vars(); #ifdef DEBUG fprintf(stderr, "Found %d total primitive nodal variables to output.\n", tnv); fprintf(stderr, "Found %d total primitive elem variables to output.\n", tev); #endif if (tnv < 0) { DPRINTF(stderr, "%s:\tbad tnv.\n", yo); EH(-1, "\t"); } rd = (struct Results_Description *) smalloc(sizeof(struct Results_Description)); if (rd == NULL) EH(-1, "Could not grab Results Description."); (void) memset((void *) rd, 0, sizeof(struct Results_Description)); rd->nev = 0; /* number element variables in results */ rd->ngv = 0; /* number global variables in results */ rd->nhv = 0; /* number history variables in results */ rd->ngv = 5; /* number global variables in results see load_global_var_info for names*/ error = load_global_var_info(rd, 0, "CONV"); error = load_global_var_info(rd, 1, "NEWT_IT"); error = load_global_var_info(rd, 2, "MAX_IT"); error = load_global_var_info(rd, 3, "CONVRATE"); error = load_global_var_info(rd, 4, "MESH_VOLUME"); /* load nodal types, kinds, names */ error = load_nodal_tkn(rd, &tnv, &tnv_post); if (error) { DPRINTF(stderr, "%s: problem with load_nodal_tkn()\n", yo); EH(-1,"\t"); } /* load elem types, names */ error = load_elem_tkn(rd, exo, tev, &tev_post); if (error) { DPRINTF(stderr, "%s: problem with load_elem_tkn()\n", yo); EH(-1,"\t"); } #ifdef PARALLEL check_parallel_error("Results file error"); #endif /* * Write out the names of the nodal variables that we will be sending to * the EXODUS II output file later. */ #ifdef DEBUG fprintf(stderr, "wr_result_prelim() starts...\n", tnv); #endif gvec_elem = (double ***) smalloc ( (exo->num_elem_blocks)*sizeof(double **)); for (i = 0; i < exo->num_elem_blocks; i++) gvec_elem[i] = (double **) smalloc ( (tev + tev_post)*sizeof(double *)); wr_result_prelim_exo(rd, exo, ExoFileOut, gvec_elem ); #ifdef DEBUG fprintf(stderr, "P_%d: wr_result_prelim_exo() ends...\n", ProcID, tnv); #endif /* * This gvec workhorse transports output variables as nodal based vectors * that are gather from the solution vector. Note: it is NOT a global * vector at all and only carries this processor's nodal variables to * the exodus database. */ asdv(&gvec, Num_Node); /* * Allocate space and manipulate for all the nodes that this processor * is aware of... */ num_total_nodes = dpi->num_universe_nodes; numProcUnknowns = NumUnknowns + NumExtUnknowns; /* allocate memory for Volume Constraint Jacobian */ if ( nAC > 0) for(iAC=0;iAC<nAC;iAC++) augc[iAC].d_evol_dx = (double*) malloc(numProcUnknowns*sizeof(double)); asdv(&resid_vector, numProcUnknowns); asdv(&resid_vector_sens, numProcUnknowns); asdv(&scale, numProcUnknowns); for (i = 0; i < NUM_ALSS; i++) { ams[i] = alloc_struct_1(struct Aztec_Linear_Solver_System, 1); } #ifdef MPI AZ_set_proc_config( ams[0]->proc_config, MPI_COMM_WORLD ); #ifndef COUPLED_FILL if( Explicit_Fill ) AZ_set_proc_config( ams[1]->proc_config, MPI_COMM_WORLD ); #endif /* not COUPLED_FILL */ #else /* MPI */ AZ_set_proc_config( ams[0]->proc_config, 0 ); #ifndef COUPLED_FILL if( Explicit_Fill ) AZ_set_proc_config( ams[1]->proc_config, 0 ); #endif /* not COUPLED_FILL */ #endif /* MPI */ /* * allocate space for and initialize solution arrays */ asdv(&x, numProcUnknowns); asdv(&x_old, numProcUnknowns); asdv(&x_older, numProcUnknowns); asdv(&x_oldest, numProcUnknowns); asdv(&xdot, numProcUnknowns); asdv(&xdot_old, numProcUnknowns); asdv(&x_update, numProcUnknowns); asdv(&x_sens, numProcUnknowns); asdv(&x_sens_temp, numProcUnknowns); /* * Initialize solid inertia flag */ set_solid_inertia(); /* * FRIENDLY COMMAND LINE EQUIV */ if( ProcID == 0 ) { cl_aux = fopen("goma-cl.txt", "w+"); fprintf(cl_aux, "goma -a -i input "); fprintf(cl_aux, "-cb %10.6e ", cont->BegParameterValue); fprintf(cl_aux, "-ce %10.6e ", cont->EndParameterValue); fprintf(cl_aux, "-cd %10.6e ", cont->Delta_s0); fprintf(cl_aux, "-cn %d ", cont->MaxPathSteps); fprintf(cl_aux, "-cmin %10.6e ", cont->Delta_s_min); fprintf(cl_aux, "-cmax %10.6e ", cont->Delta_s_max); fprintf(cl_aux, "-cm %d ", Continuation); fprintf(cl_aux, "-ct %d ", cont->upType); switch (cont->upType) { case 1: /* BC TYPE */ fprintf(cl_aux, "-c_bc %d ", cont->upBCID); fprintf(cl_aux, "-c_df %d ", cont->upDFID); break; case 2: /* MAT TYPE */ fprintf(cl_aux, "-c_mn %d ", cont->upMTID+1); fprintf(cl_aux, "-c_mp %d ", cont->upMPID); break; case 3: /* AC TYPE */ fprintf(cl_aux, "-c_ac %d ", cont->upBCID); fprintf(cl_aux, "-c_df %d ", cont->upDFID); break; case 4: /* USER MAT TYPE */ fprintf(cl_aux, "-c_mn %d ", cont->upMTID+1); fprintf(cl_aux, "-c_mp %d ", cont->upMPID); fprintf(cl_aux, "-c_md %d ", cont->upMDID); break; case 5: /* USER-DEFINED FUNCTION TYPE */ /* NOTE: This is not available via the command line! */ break; case 6: /* ANGULAR CONTINUATION TYPE */ /* NOTE: This requires LOCA and is not available via the command line! */ EH(-1, "Angular continuation is available only in LOCA!"); break; default: fprintf(stderr, "%s: Bad cont->upType, %d\n", yo, cont->upType); EH(-1,"Bad cont->upType"); break; /* duh */ } fprintf(cl_aux, "\n"); fclose(cl_aux); } #ifdef PARALLEL check_parallel_error("Continuation setup error"); #endif /* * FIRST ORDER CONTINUATION */ lambda = cont->BegParameterValue; lambdaEnd = cont->EndParameterValue; if (lambdaEnd > lambda) aldALC = +1; else aldALC = -1; delta_s_new = 0.0; Delta_s0 = cont->Delta_s0; Delta_s_min = cont->Delta_s_min; Delta_s_max = cont->Delta_s_max; MaxPathSteps = cont->MaxPathSteps; PathMax = cont->PathMax; eps = cont->eps; if (Delta_s0 < 0.0 ) { Delta_s0 = -Delta_s0; const_delta_s = 1; } else const_delta_s = 0; damp = 1.0; path = path1 = lambda; if (Debug_Flag && ProcID == 0) { fprintf(stderr,"MaxPathSteps: %d \tlambdaEnd: %f\n", MaxPathSteps, lambdaEnd); fprintf(stderr,"continuation in progress\n"); } nprint = 0; if (Delta_s0 > Delta_s_max) Delta_s0 = Delta_s_max; delta_s = delta_s_old = delta_s_older = Delta_s0; delta_t = 0.0; tran->delta_t = 0.0; /*for Newmark-Beta terms in Lagrangian Solid*/ /* Call prefront (or mf_setup) if necessary */ if (Linear_Solver == FRONT) { /* Also got to define these because it wants pointers to these numbers */ max_unk_elem = (MAX_PROB_VAR + MAX_CONC)*MDE; one = 1; three = 3; /* NOTE: We need a overall flag in the vn_glob struct that tells whether FULL_DG is on anywhere in domain. This assumes only one material. See sl_front_setup for test. that test needs to be in the input parser. */ if(vn_glob[0]->dg_J_model == FULL_DG) max_unk_elem = (MAX_PROB_VAR + MAX_CONC)*MDE + 4*vn_glob[0]->modes*4*MDE; #ifdef PARALLEL if (Num_Proc > 1) EH(-1, "Whoa. No front allowed with nproc>1"); check_parallel_error("Front solver not allowed with nprocs>1"); #endif #ifdef HAVE_FRONT err = mf_setup(&exo->num_elems, &NumUnknowns, &max_unk_elem, &three, &one, exo->elem_order_map, fss->el_proc_assign, fss->level, fss->nopdof, fss->ncn, fss->constraint, front_scratch_directory, &fss->ntra); EH(err,"problems in frontal setup "); #else EH(-1,"Don't have frontal solver compiled and linked in"); #endif } /* * if computing parameter sensitivities, allocate space for solution * sensitivity vectors */ for(i=0;i<nn_post_fluxes_sens;i++) { num_pvector=MAX(num_pvector,pp_fluxes_sens[i]->vector_id); } for(i=0;i<nn_post_data_sens;i++) { num_pvector=MAX(num_pvector,pp_data_sens[i]->vector_id); } if((nn_post_fluxes_sens + nn_post_data_sens) > 0) { num_pvector++; num_pvector = MAX(num_pvector,2); x_sens_p = Dmatrix_birth(num_pvector,numProcUnknowns); } else x_sens_p = NULL; if (nAC > 0) { asdv(&x_AC, nAC); asdv(&x_AC_old, nAC); asdv(&x_AC_dot, nAC); } /* * ADJUST NATURAL PARAMETER */ update_parameterC(0, path1, x, xdot, x_AC, delta_s, cx, exo, dpi); /* Allocate sparse matrix */ if( strcmp( Matrix_Format, "msr" ) == 0) { log_msg("alloc_MSR_sparse_arrays..."); alloc_MSR_sparse_arrays(&ija, &a, &a_old, 0, node_to_fill, exo, dpi); /* * An attic to store external dofs column names is needed when * running in parallel. */ alloc_extern_ija_buffer(num_universe_dofs, num_internal_dofs+num_boundary_dofs, ija, &ija_attic); /* * Any necessary one time initialization of the linear * solver package (Aztec). */ ams[JAC]->bindx = ija; ams[JAC]->val = a; ams[JAC]->belfry = ija_attic; ams[JAC]->val_old = a_old; /* * These point to nowhere since we're using MSR instead of VBR * format. */ ams[JAC]->indx = NULL; ams[JAC]->bpntr = NULL; ams[JAC]->rpntr = NULL; ams[JAC]->cpntr = NULL; ams[JAC]->npn = dpi->num_internal_nodes + dpi->num_boundary_nodes; ams[JAC]->npn_plus = dpi->num_internal_nodes + dpi->num_boundary_nodes + dpi->num_external_nodes; ams[JAC]->npu = num_internal_dofs+num_boundary_dofs; ams[JAC]->npu_plus = num_universe_dofs; ams[JAC]->nnz = ija[num_internal_dofs+num_boundary_dofs] - 1; ams[JAC]->nnz_plus = ija[num_universe_dofs]; } else if( strcmp( Matrix_Format, "vbr" ) == 0) { log_msg("alloc_VBR_sparse_arrays..."); alloc_VBR_sparse_arrays (ams[JAC], exo, dpi); ija_attic = NULL; ams[JAC]->belfry = ija_attic; a = ams[JAC]->val; if( !save_old_A ) a_old = ams[JAC]->val_old = NULL; } else if ( strcmp( Matrix_Format, "front") == 0 ) { /* Don't allocate any sparse matrix space when using front */ ams[JAC]->bindx = NULL; ams[JAC]->val = NULL; ams[JAC]->belfry = NULL; ams[JAC]->val_old = NULL; ams[JAC]->indx = NULL; ams[JAC]->bpntr = NULL; ams[JAC]->rpntr = NULL; ams[JAC]->cpntr = NULL; } else EH(-1,"Attempted to allocate unknown sparse matrix format"); init_vec(x, cx, exo, dpi, x_AC, nAC, &timeValueRead); /* if read ACs, update data floats */ if (nAC > 0) if(augc[0].iread == 1) { for(iAC=0 ; iAC<nAC ; iAC++) { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi);} } vzero(numProcUnknowns, &x_sens[0]); vzero(numProcUnknowns, &x_sens_temp[0]); /* * set boundary conditions on the initial conditions */ nullify_dirichlet_bcs(); find_and_set_Dirichlet(x, xdot, exo, dpi); exchange_dof(cx, dpi, x); dcopy1(numProcUnknowns,x,x_old); dcopy1(numProcUnknowns,x_old,x_older); dcopy1(numProcUnknowns,x_older,x_oldest); if(nAC > 0) dcopy1(nAC,x_AC, x_AC_old); /* * initialize the counters for when to print out data */ path_print = path1; step_print = 1; matrix_systems_mask = 1; log_msg("sl_init()..."); sl_init(matrix_systems_mask, ams, exo, dpi, cx); /* * Make sure the solver was properly initialized on all processors. */ #ifdef PARALLEL check_parallel_error("Solver initialization problems"); #endif ams[JAC]->options[AZ_keep_info] = 1; /* * set the number of successful path steps to zero */ nt = 0; /* * LOOP THROUGH PARAMETER UNTIL MAX NUMBER * OF STEPS SURPASSED */ for(n = 0; n < MaxPathSteps; n++) { alqALC = 1; switch (aldALC) { case -1: /* REDUCING PARAMETER DIRECTION */ if (path1 <= lambdaEnd) { DPRINTF(stderr,"\n\t ******** LAST PATH STEP!\n"); alqALC = -1; path1 = lambdaEnd; delta_s = path-path1; } break; case +1: /* RISING PARAMETER DIRECTION */ if (path1 >= lambdaEnd) { DPRINTF(stderr,"\n\t ******** LAST PATH STEP!\n"); alqALC = -1; path1 = lambdaEnd; delta_s = path1-path; } break; default: DPRINTF(stderr, "%s: Bad aldALC, %d\n", yo, aldALC); EH(-1,"\t"); break; /* duh */ } #ifdef PARALLEL check_parallel_error("Bad aldALC"); #endif /* * ADJUST NATURAL PARAMETER */ update_parameterC(0, path1, x, xdot, x_AC, delta_s, cx, exo, dpi); /* * IF STEP CHANGED, REDO FIRST ORDER PREDICTION */ if(alqALC == -1) { dcopy1(NumUnknowns,x_old,x); switch (Continuation) { case ALC_ZEROTH: break; case ALC_FIRST: switch (aldALC) { case -1: v1add(NumUnknowns, &x[0], -delta_s, &x_sens[0]); break; case +1: v1add(NumUnknowns, &x[0], +delta_s, &x_sens[0]); break; default: DPRINTF(stderr, "%s: Bad aldALC, %d\n", yo, aldALC); EH(-1,"\t"); break; /* duh */ } break; default: DPRINTF(stderr, "%s: Bad Continuation, %d\n", yo, Continuation); EH(-1,"\t"); break; /* duh */ } } #ifdef PARALLEL check_parallel_error("Bad Continuation"); #endif find_and_set_Dirichlet (x, xdot, exo, dpi); exchange_dof(cx, dpi, x); if (ProcID == 0) { fprintf(stderr, "\n\t----------------------------------"); switch (Continuation) { case ALC_ZEROTH: DPRINTF(stderr, "\n\tZero Order Continuation:"); break; case ALC_FIRST: DPRINTF(stderr, "\n\tFirst Order Continuation:"); break; default: DPRINTF(stderr, "%s: Bad Continuation, %d\n", yo, Continuation); EH(-1,"\t"); break; /* duh */ } DPRINTF(stderr, "\n\tStep number: %4d of %4d (max)", n+1, MaxPathSteps); DPRINTF(stderr, "\n\tAttempting solution at:"); switch (cont->upType) { case 1: /* BC */ case 3: /* AC */ DPRINTF(stderr, "\n\tBCID=%3d DFID=%5d", cont->upBCID, cont->upDFID); break; case 2: /* MT */ DPRINTF(stderr, "\n\tMTID=%3d MPID=%5d", cont->upMTID, cont->upMPID); break; case 4: /* UM */ DPRINTF(stderr, "\n\tMTID=%3d MPID=%5d MDID=%3d", cont->upMTID, cont->upMPID, cont->upMDID); break; /* This case requires an inner switch block */ case 5: /* UF */ for (iUC=0; iUC<nUC; iUC++) { switch (cpuc[iUC].Type) { case 1: /* BC */ case 3: /* AC */ DPRINTF(stderr, "\n\tBCID=%3d DFID=%5d", cpuc[iUC].BCID, cpuc[iUC].DFID); break; case 2: /* MT */ DPRINTF(stderr, "\n\tMTID=%3d MPID=%5d", cpuc[iUC].MTID, cpuc[iUC].MPID); break; case 4: /* UM */ DPRINTF(stderr, "\n\tMTID=%3d MPID=%5d MDID=%3d", cpuc[iUC].MTID, cpuc[iUC].MPID, cpuc[iUC].MDID); break; default: DPRINTF(stderr, "%s: Bad user continuation type, %d\n", yo, cont->upType); EH(-1,"\t"); break; } DPRINTF(stderr, " Parameter= % 10.6e delta_s= %10.6e", cpuc[iUC].value, (cpuc[iUC].value-cpuc[iUC].old_value) ); } break; default: DPRINTF(stderr, "%s: Bad cont->upType, %d\n", yo, cont->upType); EH(-1,"\t"); break; /* duh */ } if (cont->upType != 5) { DPRINTF(stderr, " Parameter= % 10.6e delta_s= %10.6e", path1, delta_s); } } #ifdef PARALLEL check_parallel_error("Bad cont->upType"); #endif ni = 0; do { #ifdef DEBUG DPRINTF(stderr, "%s: starting solve_nonlinear_problem\n", yo); #endif err = solve_nonlinear_problem(ams[JAC], x, delta_t, theta, x_old, x_older, xdot, xdot_old, resid_vector, x_update, scale, &converged, &nprint, tev, tev_post, NULL, rd, gindex, p_gsize, gvec, gvec_elem, path1, exo, dpi, cx, 0, &path_step_reform, is_steady_state, x_AC, x_AC_dot, path1, resid_vector_sens, x_sens_temp, x_sens_p, NULL); #ifdef DEBUG fprintf(stderr, "%s: returned from solve_nonlinear_problem\n", yo); #endif if (err == -1) converged = 0; inewton = err; if (converged) { if (Write_Intermediate_Solutions == 0) { #ifdef DEBUG DPRINTF(stderr, "%s: write_solution call WIS\n", yo); #endif write_solution(ExoFileOut, resid_vector, x, x_sens_p, x_old, xdot, xdot_old, tev, tev_post, NULL, rd, gindex, p_gsize, gvec, gvec_elem, &nprint, delta_s, theta, path1, NULL, exo, dpi); #ifdef DEBUG fprintf(stderr, "%s: write_solution end call WIS\n", yo); #endif } #ifdef PARALLEL check_parallel_error("Error writing exodusII file"); #endif /* * PRINT OUT VALUES OF EXTRA UNKNOWNS * FROM AUGMENTING CONDITIONS */ if (nAC > 0) { DPRINTF(stderr, "\n------------------------------\n"); DPRINTF(stderr, "Augmenting Conditions: %4d\n", nAC); DPRINTF(stderr, "Number of extra unknowns: %4d\n\n", nAC); for (iAC = 0; iAC < nAC; iAC++) { if (augc[iAC].Type == AC_USERBC) { DPRINTF(stderr, "\tBC[%4d] DF[%4d] = %10.6e\n", augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]); } else if (augc[iAC].Type == AC_USERMAT || augc[iAC].Type == AC_FLUX_MAT ) { DPRINTF(stderr, "\tMT[%4d] MP[%4d] = %10.6e\n", augc[iAC].MTID, augc[iAC].MPID, x_AC[iAC]); } else if(augc[iAC].Type == AC_VOLUME) { evol_local = augc[iAC].evol; #ifdef PARALLEL if( Num_Proc > 1 ) { MPI_Allreduce( &evol_local, &evol_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); } evol_local = evol_global; #endif DPRINTF(stderr, "\tMT[%4d] VC[%4d]=%10.6e Param=%10.6e\n", augc[iAC].MTID, augc[iAC].VOLID, evol_local, x_AC[iAC]); } else if(augc[iAC].Type == AC_POSITION) { evol_local = augc[iAC].evol; #ifdef PARALLEL if( Num_Proc > 1 ) { MPI_Allreduce( &evol_local, &evol_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); } evol_local = evol_global; #endif DPRINTF(stderr, "\tMT[%4d] XY[%4d]=%10.6e Param=%10.6e\n", augc[iAC].MTID, augc[iAC].VOLID, evol_local, x_AC[iAC]); } else if(augc[iAC].Type == AC_FLUX) { DPRINTF(stderr, "\tBC[%4d] DF[%4d]=%10.6e\n", augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]); } } } /* * INTEGRATE FLUXES, FORCES */ for (i = 0; i < nn_post_fluxes; i++) err_dbl = evaluate_flux (exo, dpi, pp_fluxes[i]->ss_id, pp_fluxes[i]->flux_type , pp_fluxes[i]->flux_type_name , pp_fluxes[i]->blk_id , pp_fluxes[i]->species_number, pp_fluxes[i]->flux_filenm, pp_fluxes[i]->profile_flag, x,xdot,NULL, delta_s,path1,1); /* * COMPUTE FLUX, FORCE SENSITIVITIES */ for (i = 0; i < nn_post_fluxes_sens; i++) err_dbl = evaluate_flux_sens (exo, dpi, pp_fluxes_sens[i]->ss_id, pp_fluxes_sens[i]->flux_type , pp_fluxes_sens[i]->flux_type_name , pp_fluxes_sens[i]->blk_id , pp_fluxes_sens[i]->species_number, pp_fluxes_sens[i]->sens_type, pp_fluxes_sens[i]->sens_id, pp_fluxes_sens[i]->sens_flt, pp_fluxes_sens[i]->sens_flt2, pp_fluxes_sens[i]->vector_id, pp_fluxes_sens[i]->flux_filenm, pp_fluxes_sens[i]->profile_flag, x,xdot,x_sens_p,delta_s,path1,1); /* * Compute global volumetric quantities */ for (i = 0; i < nn_volume; i++ ) { evaluate_volume_integral(exo, dpi, pp_volume[i]->volume_type, pp_volume[i]->volume_name, pp_volume[i]->blk_id, pp_volume[i]->species_no, pp_volume[i]->volume_fname, pp_volume[i]->params, NULL, x, xdot, delta_s, path1, 1); } } /* end of if converged block */ /* * INCREMENT COUNTER */ ni++; /* * DID IT CONVERGE ? * IF NOT, REDUCE STEP SIZE AND TRY AGAIN */ if (!converged) { if (ni > 5) { puts(" "); puts(" ************************************"); puts(" W: Did not converge in Newton steps."); puts(" Find better initial guess. "); puts(" ************************************"); /* This needs to have a return value of 0, indicating * success, for the continuation script to not treat this * as a failed command. */ exit(0); } /* * ADJUST STEP SIZE */ DPRINTF(stderr, "\n\tFailed to converge:\n"); delta_s *= 0.5; switch (aldALC) { case -1: path1 = path - delta_s; break; case +1: path1 = path + delta_s; break; default: DPRINTF(stderr, "%s: Bad aldALC, %d\n", yo, aldALC); EH(-1,"\t"); break; /* duh */ } #ifdef PARALLEL check_parallel_error("Bad aldALC"); #endif /* * RESET */ alqALC = 1; /* If necessary, don't call this the last step... */ DPRINTF(stderr, "\n\tDecreasing step-length to %10.6e.\n", delta_s); if (delta_s < Delta_s_min) { puts("\n X: C step-length reduced below minimum."); puts("\n Program terminated.\n"); /* This needs to have a return value of 0, indicating * success, for the continuation script to not treat this * as a failed command. */ exit(0); } #ifdef PARALLEL check_parallel_error("\t"); #endif /* * ADJUST NATURAL PARAMETER */ dcopy1(numProcUnknowns, x_old, x); update_parameterC(0, path1, x, xdot, x_AC, delta_s, cx, exo, dpi); /* * GET ZERO OR FIRST ORDER PREDICTION */ switch (Continuation) { case ALC_ZEROTH: break; case ALC_FIRST: switch (aldALC) { case -1: v1add(numProcUnknowns, &x[0], -delta_s, &x_sens[0]); break; case +1: v1add(numProcUnknowns, &x[0], +delta_s, &x_sens[0]); break; default: DPRINTF(stderr, "%s: Bad aldALC, %d\n", yo, aldALC); EH(-1,"\t"); break; /* duh */ } break; default: DPRINTF(stderr, "%s: Bad Continuation, %d\n", yo, Continuation); EH(-1,"\t"); break; /* duh */ } #ifdef PARALLEL check_parallel_error("Bad Continuation"); #endif /* MMH: Needed to put this in, o/w it may find that the * solution and residual HAPPEN to satisfy the convergence * criterion for the next newton solve... */ find_and_set_Dirichlet(x, xdot, exo, dpi); exchange_dof(cx, dpi, x); /* Should be doing first order prediction on ACs * but for now, just reset the AC variables */ if( nAC > 0) { dcopy1(nAC, x_AC_old, x_AC); for(iAC=0 ; iAC<nAC ; iAC++) { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi);} } } /* end of !converged */ } while (converged == 0); /* * CONVERGED */ nt++; if( Continuation == ALC_ZEROTH ) { DPRINTF(stderr, "\n\tStep accepted, parameter = %10.6e\n", path1); } else { DPRINTF(stderr, "\tStep accepted, parameter = %10.6e\n", path1); } /* * check path step error, if too large do not enlarge path step */ if ((ni == 1) && (n != 0) && (!const_delta_s)) { delta_s_new = path_step_control(num_total_nodes, delta_s, delta_s_old, x, eps, &success_ds, cont->use_var_norm, inewton); if (delta_s_new > Delta_s_max) delta_s_new = Delta_s_max; } else { success_ds = 1; delta_s_new = delta_s; } /* * determine whether to print out the data or not */ i_print = 0; if (nt == step_print) { i_print = 1; step_print += cont->print_freq; } if (alqALC == -1) i_print = 1; if (i_print) { error = write_ascii_soln(x, resid_vector, numProcUnknowns, x_AC, nAC, path1, file); if (error) { DPRINTF(stdout, "%s: error writing ASCII soln file\n", yo); } if (Write_Intermediate_Solutions == 0 ) { write_solution(ExoFileOut, resid_vector, x, x_sens_p, x_old, xdot, xdot_old, tev, tev_post, NULL, rd, gindex, p_gsize, gvec, gvec_elem, &nprint, delta_s, theta, path1, NULL, exo, dpi); nprint++; } } /* * backup old solutions * can use previous solutions for prediction one day */ dcopy1(numProcUnknowns,x_older,x_oldest); dcopy1(numProcUnknowns,x_old,x_older); dcopy1(numProcUnknowns, x, x_old); dcopy1(numProcUnknowns, x_sens_temp, x_sens); delta_s_oldest = delta_s_older; delta_s_older = delta_s_old; delta_s_old = delta_s; delta_s = delta_s_new; if( nAC > 0) dcopy1(nAC, x_AC, x_AC_old); /* * INCREMENT/DECREMENT PARAMETER */ path = path1; switch (aldALC) { case -1: path1 = path - delta_s; break; case +1: path1 = path + delta_s; break; default: DPRINTF(stderr, "%s: Bad aldALC, %d\n", yo, aldALC); EH(-1,"\t"); break; /* duh */ } #ifdef PARALLEL check_parallel_error("Bad aldALC"); #endif /* * ADJUST NATURAL PARAMETER */ update_parameterC(0, path1, x, xdot, x_AC, delta_s, cx, exo, dpi); /* display_parameterC(path1, x, xdot, delta_s, cx, exo, dpi); */ /* * GET FIRST ORDER PREDICTION */ switch (Continuation) { case ALC_ZEROTH: break; case ALC_FIRST: switch (aldALC) { case -1: v1add(numProcUnknowns, &x[0], -delta_s, &x_sens[0]); break; case +1: v1add(numProcUnknowns, &x[0], +delta_s, &x_sens[0]); break; default: DPRINTF(stderr, "%s: Bad aldALC, %d\n", yo, aldALC); EH(-1,"\t"); break; /* duh */ } break; } #ifdef PARALLEL check_parallel_error("Bad aldALC"); #endif /* * CHECK END CONTINUATION */ /* if (alqALC == -1) alqALC = 0; else alqALC = 1; */ if (alqALC == -1) { DPRINTF(stderr,"\n\n\t I will continue no more!\n\t No more continuation for you!\n"); goto free_and_clear; } } /* for(n = 0; n < MaxPathSteps; n++) */ if(n == MaxPathSteps && aldALC * (lambdaEnd - path) > 0) { DPRINTF(stderr, "\n\tFailed to reach end of hunt in maximum number of successful steps (%d).\n\tSorry.\n", MaxPathSteps); /* EH(-1,"\t"); */ } #ifdef PARALLEL check_parallel_error("Continuation error"); #endif /* * DONE CONTINUATION */ free_and_clear: /* * Transform the node point coordinates according to the * displacements and write out all the results using the * displaced coordinates. Set the displacement field to * zero, too. */ if (Anneal_Mesh) { #ifdef DEBUG fprintf(stderr, "%s: anneal_mesh()...\n", yo); #endif err = anneal_mesh(x, tev, tev_post, NULL, rd, path1, exo, dpi); #ifdef DEBUG fprintf(stderr, "%s: anneal_mesh()-done\n", yo); #endif EH(err, "anneal_mesh() bad return."); } #ifdef PARALLEL check_parallel_error("Trouble annealing mesh"); #endif /* * Free a bunch of variables that aren't needed anymore */ safer_free((void **) &ROT_Types); safer_free((void **) &node_to_fill); safer_free( (void **) &resid_vector); safer_free( (void **) &resid_vector_sens); safer_free( (void **) &scale); safer_free( (void **) &x); if (nAC > 0) { safer_free( (void **) &x_AC); safer_free( (void **) &x_AC_old); safer_free( (void **) &x_AC_dot); } safer_free( (void **) &x_old); safer_free( (void **) &x_older); safer_free( (void **) &x_oldest); safer_free( (void **) &xdot); safer_free( (void **) &xdot_old); safer_free( (void **) &x_update); safer_free( (void **) &x_sens); safer_free( (void **) &x_sens_temp); if((nn_post_data_sens+nn_post_fluxes_sens) > 0) Dmatrix_death(x_sens_p,num_pvector,numProcUnknowns); for(i = 0; i < MAX_NUMBER_MATLS; i++) { for(n = 0; n < MAX_MODES; n++) { safer_free((void **) &(ve_glob[i][n]->gn)); safer_free((void **) &(ve_glob[i][n])); } safer_free((void **) &(vn_glob[i])); } sl_free(matrix_systems_mask, ams); for (i = 0; i < NUM_ALSS; i++) safer_free((void **) &(ams[i])); safer_free( (void **) &gvec); i = 0; for ( eb_indx = 0; eb_indx < exo->num_elem_blocks; eb_indx++ ) { for ( ev_indx = 0; ev_indx < rd->nev; ev_indx++ ) { if (exo->elem_var_tab[i++] == 1) { safer_free((void **) &(gvec_elem [eb_indx][ev_indx])); } } safer_free((void **) &(gvec_elem [eb_indx])); } safer_free( (void **) &gvec_elem); if (cpcc != NULL) safer_free( (void **) &cpcc); safer_free( (void **) &rd); safer_free( (void **) &Local_Offset); safer_free( (void **) &Dolphin); if (file != NULL) fclose(file); return; } /* END of routine continue_problem */
int main(int argc, char *argv[]) { int num_PDE_eqns=1, N_levels=3, nsmooth=2; int leng, level, N_grid_pts, coarsest_level; int leng1,leng2; /* See Aztec User's Guide for more information on the */ /* variables that follow. */ int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE]; /* data structure for matrix corresponding to the fine grid */ double *val = NULL, *xxx, *rhs, solve_time, setup_time, start_time; AZ_MATRIX *Amat; AZ_PRECOND *Pmat = NULL; ML *ml; FILE *fp; int i, j, Nrigid, *garbage, nblocks=0, *blocks = NULL, *block_pde=NULL; struct AZ_SCALING *scaling; ML_Aggregate *ag; double *mode, *rigid=NULL, alpha; char filename[80]; int one = 1; int proc,nprocs; char pathfilename[100]; #ifdef ML_MPI MPI_Init(&argc,&argv); /* get number of processors and the name of this processor */ AZ_set_proc_config(proc_config, MPI_COMM_WORLD); proc = proc_config[AZ_node]; nprocs = proc_config[AZ_N_procs]; #else AZ_set_proc_config(proc_config, AZ_NOT_MPI); proc = 0; nprocs = 1; #endif if (proc_config[AZ_node] == 0) { sprintf(pathfilename,"%s/inputfile",argv[1]); ML_Reader_ReadInput(pathfilename, &context); } else context = (struct reader_context *) ML_allocate(sizeof(struct reader_context)); AZ_broadcast((char *) context, sizeof(struct reader_context), proc_config, AZ_PACK); AZ_broadcast((char *) NULL , 0 , proc_config, AZ_SEND); N_levels = context->N_levels; printf("N_levels %d\n",N_levels); nsmooth = context->nsmooth; num_PDE_eqns = context->N_dofPerNode; printf("num_PDE_eqns %d\n",num_PDE_eqns); ML_Set_PrintLevel(context->output_level); /* read in the number of matrix equations */ leng = 0; if (proc_config[AZ_node] == 0) { sprintf(pathfilename,"%s/data_matrix.txt",argv[1]); fp=fopen(pathfilename,"r"); if (fp==NULL) { printf("**ERR** couldn't open file data_matrix.txt\n"); exit(1); } fscanf(fp,"%d",&leng); fclose(fp); } leng = AZ_gsum_int(leng, proc_config); N_grid_pts=leng/num_PDE_eqns; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ #if 0 if (proc_config[AZ_N_procs] == 1) i = AZ_linear; else i = AZ_file; #endif i = AZ_linear; /* cannot use AZ_input_update for variable blocks (forgot why, but debugged through it)*/ /* make a linear distribution of the matrix */ /* if the linear distribution does not align with the blocks, */ /* this is corrected in ML_AZ_Reader_ReadVariableBlocks */ leng1 = leng/nprocs; leng2 = leng-leng1*nprocs; if (proc >= leng2) { leng2 += (proc*leng1); } else { leng1++; leng2 = proc*leng1; } N_update = leng1; update = (int*)AZ_allocate((N_update+1)*sizeof(int)); if (update==NULL) { (void) fprintf (stderr, "Not enough space to allocate 'update'\n"); fflush(stderr); exit(EXIT_FAILURE); } for (i=0; i<N_update; i++) update[i] = i+leng2; #if 0 /* debug */ printf("proc %d N_update %d\n",proc_config[AZ_node],N_update); fflush(stdout); #endif sprintf(pathfilename,"%s/data_vblocks.txt",argv[1]); ML_AZ_Reader_ReadVariableBlocks(pathfilename,&nblocks,&blocks,&block_pde, &N_update,&update,proc_config); #if 0 /* debug */ printf("proc %d N_update %d\n",proc_config[AZ_node],N_update); fflush(stdout); #endif sprintf(pathfilename,"%s/data_matrix.txt",argv[1]); AZ_input_msr_matrix(pathfilename,update, &val, &bindx, N_update, proc_config); /* This code is to fix things up so that we are sure we have */ /* all blocks (including the ghost nodes) the same size. */ /* not sure, whether this is a good idea with variable blocks */ /* the examples inpufiles (see top of this file) don't need it */ /* anyway */ /* AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update); */ AZ_transform_norowreordering(proc_config, &external, bindx, val, update, &update_index, &extern_index, &data_org, N_update, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); Amat = AZ_matrix_create( leng ); AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL); Amat->matrix_type = data_org[AZ_matrix_type]; data_org[AZ_N_rows] = data_org[AZ_N_internal] + data_org[AZ_N_border]; start_time = AZ_second(); options[AZ_scaling] = AZ_none; ML_Create(&ml, N_levels); /* set up discretization matrix and matrix vector function */ AZ_ML_Set_Amat(ml, 0, N_update, N_update, Amat, proc_config); ML_Set_ResidualOutputFrequency(ml, context->output); ML_Set_Tolerance(ml, context->tol); ML_Aggregate_Create( &ag ); if (ML_strcmp(context->agg_coarsen_scheme,"Mis") == 0) { ML_Aggregate_Set_CoarsenScheme_MIS(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Uncoupled") == 0) { ML_Aggregate_Set_CoarsenScheme_Uncoupled(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Coupled") == 0) { ML_Aggregate_Set_CoarsenScheme_Coupled(ag); } else if (ML_strcmp(context->agg_coarsen_scheme,"Metis") == 0) { ML_Aggregate_Set_CoarsenScheme_METIS(ag); for (i=0; i<N_levels; i++) ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9); } else if (ML_strcmp(context->agg_coarsen_scheme,"VBMetis") == 0) { /* when no blocks read, use standard metis assuming constant block sizes */ if (!blocks) ML_Aggregate_Set_CoarsenScheme_METIS(ag); else { ML_Aggregate_Set_CoarsenScheme_VBMETIS(ag); ML_Aggregate_Set_Vblocks_CoarsenScheme_VBMETIS(ag,0,N_levels,nblocks, blocks,block_pde,N_update); } for (i=0; i<N_levels; i++) ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9); } else { printf("**ERR** ML: Unknown aggregation scheme %s\n",context->agg_coarsen_scheme); exit(-1); } ML_Aggregate_Set_DampingFactor(ag, context->agg_damping); ML_Aggregate_Set_MaxCoarseSize( ag, context->maxcoarsesize); ML_Aggregate_Set_Threshold(ag, context->agg_thresh); if (ML_strcmp(context->agg_spectral_norm,"Calc") == 0) { ML_Set_SpectralNormScheme_Calc(ml); } else if (ML_strcmp(context->agg_spectral_norm,"Anorm") == 0) { ML_Set_SpectralNormScheme_Anorm(ml); } else { printf("**WRN** ML: Unknown spectral norm scheme %s\n",context->agg_spectral_norm); } /* read in the rigid body modes */ Nrigid = 0; if (proc_config[AZ_node] == 0) { sprintf(filename,"data_nullsp%d.txt",Nrigid); sprintf(pathfilename,"%s/%s",argv[1],filename); while( (fp = fopen(pathfilename,"r")) != NULL) { fclose(fp); Nrigid++; sprintf(filename,"data_nullsp%d.txt",Nrigid); sprintf(pathfilename,"%s/%s",argv[1],filename); } } Nrigid = AZ_gsum_int(Nrigid,proc_config); if (Nrigid != 0) { rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) ); if (rigid == NULL) { printf("Error: Not enough space for rigid body modes\n"); } } /* Set rhs */ sprintf(pathfilename,"%s/data_rhs.txt",argv[1]); fp = fopen(pathfilename,"r"); if (fp == NULL) { rhs=(double *)ML_allocate(leng*sizeof(double)); if (proc_config[AZ_node] == 0) printf("taking linear vector for rhs\n"); for (i = 0; i < N_update; i++) rhs[i] = (double) update[i]; } else { fclose(fp); if (proc_config[AZ_node] == 0) printf("reading rhs from a file\n"); AZ_input_msr_matrix(pathfilename, update, &rhs, &garbage, N_update, proc_config); } AZ_reorder_vec(rhs, data_org, update_index, NULL); for (i = 0; i < Nrigid; i++) { sprintf(filename,"data_nullsp%d.txt",i); sprintf(pathfilename,"%s/%s",argv[1],filename); AZ_input_msr_matrix(pathfilename, update, &mode, &garbage, N_update, proc_config); AZ_reorder_vec(mode, data_org, update_index, NULL); #if 0 /* test the given rigid body mode, output-vector should be ~0 */ Amat->matvec(mode, rigid, Amat, proc_config); for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]); #endif for (j = 0; j < i; j++) { alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/ AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); DAXPY_F77(&N_update, &alpha, &(rigid[j*N_update]), &one, mode, &one); } /* rhs orthogonalization */ alpha = -AZ_gdot(N_update, mode, rhs, proc_config)/ AZ_gdot(N_update, mode, mode, proc_config); DAXPY_F77(&N_update, &alpha, mode, &one, rhs, &one); for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j]; free(mode); free(garbage); } for (j = 0; j < Nrigid; j++) { alpha = -AZ_gdot(N_update, rhs, &(rigid[j*N_update]), proc_config)/ AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); DAXPY_F77(&N_update, &alpha, &(rigid[j*N_update]), &one, rhs, &one); } #if 0 /* for testing the default nullsp */ ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, 6, NULL, N_update); #else if (Nrigid != 0) { ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update); } #endif if (rigid) ML_free(rigid); ag->keep_agg_information = 1; coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, 0, ML_INCREASING, ag); coarsest_level--; if ( proc_config[AZ_node] == 0 ) printf("Coarse level = %d \n", coarsest_level); #if 0 /* set up smoothers */ if (!blocks) blocks = (int *) ML_allocate(sizeof(int)*N_update); #endif for (level = 0; level < coarsest_level; level++) { num_PDE_eqns = ml->Amat[level].num_PDEs; /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ if (ML_strcmp(context->smoother,"Parasails") == 0) { ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, (int) parasails_loadbal, parasails_factorized); } /* This is the symmetric Gauss-Seidel smoothing that we usually use. */ /* In parallel, it is not a true Gauss-Seidel in that each processor */ /* does a Gauss-Seidel on its local submatrix independent of the */ /* other processors. */ else if (ML_strcmp(context->smoother,"GaussSeidel") == 0) { ML_Gen_Smoother_GaussSeidel(ml , level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->smoother,"SymGaussSeidel") == 0) { ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->smoother,"Poly") == 0) { ML_Gen_Smoother_Cheby(ml, level, ML_BOTH, 30., nsmooth); } else if (ML_strcmp(context->smoother,"BlockGaussSeidel") == 0) { ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_BOTH, nsmooth,1., num_PDE_eqns); } else if (ML_strcmp(context->smoother,"VBSymGaussSeidel") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); blocks = NULL; block_pde = NULL; nblocks = 0; ML_Aggregate_Get_Vblocks_CoarsenScheme_VBMETIS(ag,level,N_levels,&nblocks, &blocks,&block_pde); if (blocks==NULL) ML_Gen_Blocks_Aggregates(ag, level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1., nblocks, blocks); } /* This is a true Gauss Seidel in parallel. This seems to work for */ /* elasticity problems. However, I don't believe that this is very */ /* efficient in parallel. */ /* nblocks = ml->Amat[level].invec_leng; for (i =0; i < nblocks; i++) blocks[i] = i; ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml , level, ML_PRESMOOTHER, nsmooth, 1., nblocks, blocks); ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml, level, ML_POSTSMOOTHER, nsmooth, 1., nblocks, blocks); */ /* Jacobi Smoothing */ else if (ML_strcmp(context->smoother,"Jacobi") == 0) { ML_Gen_Smoother_Jacobi(ml , level, ML_PRESMOOTHER, nsmooth,.4); ML_Gen_Smoother_Jacobi(ml , level, ML_POSTSMOOTHER, nsmooth,.4); } /* This does a block Gauss-Seidel (not true GS in parallel) */ /* where each processor has 'nblocks' blocks. */ /* */ else if (ML_strcmp(context->smoother,"Metis") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); nblocks = 250; ML_Gen_Blocks_Metis(ml, level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1., nblocks, blocks); } else { printf("unknown smoother %s\n",context->smoother); exit(1); } } /* set coarse level solver */ nsmooth = context->coarse_its; /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ if (ML_strcmp(context->coarse_solve,"Parasails") == 0) { ML_Gen_Smoother_ParaSails(ml , coarsest_level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, (int) parasails_loadbal, parasails_factorized); } else if (ML_strcmp(context->coarse_solve,"GaussSeidel") == 0) { ML_Gen_Smoother_GaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->coarse_solve,"Poly") == 0) { ML_Gen_Smoother_Cheby(ml, coarsest_level, ML_BOTH, 30., nsmooth); } else if (ML_strcmp(context->coarse_solve,"SymGaussSeidel") == 0) { ML_Gen_Smoother_SymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.); } else if (ML_strcmp(context->coarse_solve,"BlockGaussSeidel") == 0) { ML_Gen_Smoother_BlockGaussSeidel(ml, coarsest_level, ML_BOTH, nsmooth,1., num_PDE_eqns); } else if (ML_strcmp(context->coarse_solve,"Aggregate") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); ML_Gen_Blocks_Aggregates(ag, coarsest_level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1., nblocks, blocks); } else if (ML_strcmp(context->coarse_solve,"Jacobi") == 0) { ML_Gen_Smoother_Jacobi(ml , coarsest_level, ML_BOTH, nsmooth,.5); } else if (ML_strcmp(context->coarse_solve,"Metis") == 0) { if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); nblocks = 250; ML_Gen_Blocks_Metis(ml, coarsest_level, &nblocks, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1., nblocks, blocks); } else if (ML_strcmp(context->coarse_solve,"SuperLU") == 0) { ML_Gen_CoarseSolverSuperLU( ml, coarsest_level); } else if (ML_strcmp(context->coarse_solve,"Amesos") == 0) { ML_Gen_Smoother_Amesos(ml,coarsest_level,ML_AMESOS_KLU,-1, 0.0); } else { printf("unknown coarse grid solver %s\n",context->coarse_solve); exit(1); } ML_Gen_Solver(ml, ML_MGV, 0, coarsest_level); AZ_defaults(options, params); if (ML_strcmp(context->krylov,"Cg") == 0) { options[AZ_solver] = AZ_cg; } else if (ML_strcmp(context->krylov,"Bicgstab") == 0) { options[AZ_solver] = AZ_bicgstab; } else if (ML_strcmp(context->krylov,"Tfqmr") == 0) { options[AZ_solver] = AZ_tfqmr; } else if (ML_strcmp(context->krylov,"Gmres") == 0) { options[AZ_solver] = AZ_gmres; } else { printf("unknown krylov method %s\n",context->krylov); } if (blocks) ML_free(blocks); if (block_pde) ML_free(block_pde); options[AZ_scaling] = AZ_none; options[AZ_precond] = AZ_user_precond; options[AZ_conv] = AZ_r0; options[AZ_output] = 1; options[AZ_max_iter] = context->max_outer_its; options[AZ_poly_ord] = 5; options[AZ_kspace] = 130; params[AZ_tol] = context->tol; options[AZ_output] = context->output; ML_free(context); AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); setup_time = AZ_second() - start_time; xxx = (double *) malloc( leng*sizeof(double)); for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; /* Set x */ /* there is no initguess supplied with these examples for the moment.... */ fp = fopen("initguessfile","r"); if (fp != NULL) { fclose(fp); if (proc_config[AZ_node]== 0) printf("reading initial guess from file\n"); AZ_input_msr_matrix("data_initguess.txt", update, &xxx, &garbage, N_update, proc_config); options[AZ_conv] = AZ_expected_values; } else if (proc_config[AZ_node]== 0) printf("taking 0 initial guess \n"); AZ_reorder_vec(xxx, data_org, update_index, NULL); /* if Dirichlet BC ... put the answer in */ for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) { if ( (val[i] > .99999999) && (val[i] < 1.0000001)) xxx[i] = rhs[i]; } fp = fopen("AZ_no_multilevel.dat","r"); scaling = AZ_scaling_create(); start_time = AZ_second(); if (fp != NULL) { fclose(fp); options[AZ_precond] = AZ_none; options[AZ_scaling] = AZ_sym_diag; options[AZ_ignore_scaling] = AZ_TRUE; options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); /* options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); */ } else { options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; /* if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); */ } solve_time = AZ_second() - start_time; if (proc_config[AZ_node] == 0) printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time); if (proc_config[AZ_node] == 0) printf("Printing out a few entries of the solution ...\n"); for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 7) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 23) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 47) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 101) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 171) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} ML_Aggregate_Destroy(&ag); ML_Destroy(&ml); AZ_free((void *) Amat->data_org); AZ_free((void *) Amat->val); AZ_free((void *) Amat->bindx); AZ_free((void *) update); AZ_free((void *) external); AZ_free((void *) extern_index); AZ_free((void *) update_index); AZ_scaling_destroy(&scaling); if (Amat != NULL) AZ_matrix_destroy(&Amat); if (Pmat != NULL) AZ_precond_destroy(&Pmat); free(xxx); free(rhs); #ifdef ML_MPI MPI_Finalize(); #endif return 0; }
int test_azoo_conv_with_scaling(int conv_option, int scaling_option, const Epetra_Comm& comm, bool verbose) { int localN = 20; int numprocs = comm.NumProc(); int globalN = numprocs*localN; Epetra_Map emap(globalN, 0, comm); Epetra_CrsMatrix* Acrs = create_and_fill_crs_matrix(emap); Epetra_Vector x_crs(emap), b_crs(emap); x_crs.PutScalar(1.0); Acrs->Multiply(false, x_crs, b_crs); x_crs.PutScalar(0.0); AztecOO azoo(Acrs, &x_crs, &b_crs); azoo.SetAztecOption(AZ_conv, conv_option); azoo.SetAztecOption(AZ_solver, AZ_cg); azoo.SetAztecOption(AZ_scaling, scaling_option); azoo.Iterate(100, 1.e-9); //now, do the same thing with 'old-fashioned Aztec', and compare //the solutions. int* proc_config = new int[AZ_PROC_SIZE]; #ifdef EPETRA_MPI AZ_set_proc_config(proc_config, MPI_COMM_WORLD); AZ_set_comm(proc_config, MPI_COMM_WORLD); #else AZ_set_proc_config(proc_config, 0); #endif int *external, *update_index, *external_index; int *external2, *update_index2, *external_index2; AZ_MATRIX* Amsr = NULL; AZ_MATRIX* Avbr = NULL; int err = create_and_transform_simple_matrix(AZ_MSR_MATRIX, localN, 4.0, proc_config, Amsr, external, update_index, external_index); int N_update = localN+Amsr->data_org[AZ_N_border]; double* x_msr = new double[N_update]; double* b_msr = new double[N_update*2]; double* b_msr_u = b_msr+N_update; double* x_vbr = new double[N_update]; double* b_vbr = new double[N_update*2]; double* b_vbr_u = b_vbr+N_update; err = create_and_transform_simple_matrix(AZ_VBR_MATRIX, localN, 4.0, proc_config, Avbr, external2, update_index2, external_index2); for(int i=0; i<N_update; ++i) { x_msr[i] = 1.0; b_msr[i] = 0.0; b_msr_u[i] = 0.0; x_vbr[i] = 1.0; b_vbr[i] = 0.0; b_vbr_u[i] = 0.0; } Amsr->matvec(x_msr, b_msr, Amsr, proc_config); Avbr->matvec(x_vbr, b_vbr, Avbr, proc_config); for(int i=0; i<N_update; ++i) { x_msr[i] = 0.0; x_vbr[i] = 0.0; } //check that the rhs's are the same. double max_rhs_diff1 = 0.0; double max_rhs_diff2 = 0.0; double* bptr_crs = b_crs.Values(); AZ_invorder_vec(b_msr, Amsr->data_org, update_index, NULL, b_msr_u); AZ_invorder_vec(b_vbr, Avbr->data_org, update_index2, Avbr->rpntr, b_vbr_u); for(int i=0; i<localN; ++i) { if (std::abs(bptr_crs[i] - b_msr_u[i]) > max_rhs_diff1) { max_rhs_diff1 = std::abs(bptr_crs[i] - b_msr_u[i]); } if (std::abs(bptr_crs[i] - b_vbr_u[i]) > max_rhs_diff2) { max_rhs_diff2 = std::abs(bptr_crs[i] - b_vbr_u[i]); } } if (max_rhs_diff1> 1.e-12) { cout << "AztecOO rhs not equal to Aztec msr rhs "<<max_rhs_diff1<<endl; return(-1); } if (max_rhs_diff2> 1.e-12) { cout << "AztecOO rhs not equal to Aztec vbr rhs "<<max_rhs_diff2<<endl; return(-1); } int* az_options = new int[AZ_OPTIONS_SIZE]; double* params = new double[AZ_PARAMS_SIZE]; double* status = new double[AZ_STATUS_SIZE]; AZ_defaults(az_options, params); az_options[AZ_solver] = AZ_cg; az_options[AZ_conv] = conv_option; az_options[AZ_scaling] = scaling_option; az_options[AZ_max_iter] = 100; params[AZ_tol] = 1.e-9; AZ_iterate(x_msr, b_msr, az_options, params, status, proc_config, Amsr, NULL, NULL); AZ_iterate(x_vbr, b_vbr, az_options, params, status, proc_config, Avbr, NULL, NULL); AZ_invorder_vec(x_msr, Amsr->data_org, update_index, NULL, b_msr_u); AZ_invorder_vec(x_vbr, Avbr->data_org, update_index2, Avbr->rpntr, b_vbr_u); double max_diff1 = 0.0; double max_diff2 = 0.0; double* xptr_crs = x_crs.Values(); for(int i=0; i<localN; ++i) { if (std::abs(xptr_crs[i] - b_msr_u[i]) > max_diff1) { max_diff1 = std::abs(xptr_crs[i] - b_msr_u[i]); } if (std::abs(xptr_crs[i] - b_vbr_u[i]) > max_diff2) { max_diff2 = std::abs(xptr_crs[i] - b_vbr_u[i]); } } if (max_diff1 > 1.e-7) { cout << "AztecOO failed to match Aztec msr with scaling and Anorm conv." << endl; return(-1); } if (max_diff2 > 1.e-7) { cout << "AztecOO failed to match Aztec vbr with scaling and Anorm conv." << endl; return(-1); } delete Acrs; delete [] x_msr; delete [] b_msr; delete [] x_vbr; delete [] b_vbr; destroy_matrix(Amsr); destroy_matrix(Avbr); delete [] proc_config; free(update_index); free(external); free(external_index); free(update_index2); free(external2); free(external_index2); delete [] az_options; delete [] params; delete [] status; return(0); }
AZ_MATRIX *user_Kn_build(struct user_partition *Node_Partition) { int *Kn_bindx; double *Kn_val; int proc_config[AZ_PROC_SIZE]; AZ_MATRIX *Kn_mat; int *reordered_glob_nodes = NULL, *cpntr = NULL, *Kn_data_org = NULL; int i, ii, jj, nx, global_id, Nlocal_nodes, nz_ptr; Nlocal_nodes = Node_Partition->Nlocal; Kn_bindx = (int *) malloc((27*Nlocal_nodes+5)*sizeof(int)); Kn_val = (double *) malloc((27*Nlocal_nodes+5)*sizeof(double)); Kn_bindx[0] = Nlocal_nodes+1; nx = (int) sqrt( ((double) Node_Partition->Nglobal) + .00001); #ifdef periodic nx = (int) sqrt( ((double) Node_Partition->Nglobal - 2) + .00001); Nlocal_nodes -= 2; #endif for (i = 0; i < Nlocal_nodes; i++) { global_id = (Node_Partition->my_global_ids)[i]; Kn_bindx[i+1] = Kn_bindx[i] + 8; Kn_val[i] = 8.; inv2dnodeindex(global_id, &ii, &jj, nx); nz_ptr = Kn_bindx[i]; Kn_bindx[nz_ptr] = southeast2d(ii,jj,nx); Kn_val[nz_ptr++] = -1.; Kn_bindx[nz_ptr] = northwest2d(ii,jj,nx); Kn_val[nz_ptr++] = -1.; Kn_bindx[nz_ptr] = northeast2d(ii,jj,nx); Kn_val[nz_ptr++] = -.00000001; if (ii == 0) ii = nx-1; else ii--; Kn_bindx[nz_ptr] = northwest2d(ii,jj,nx); Kn_val[nz_ptr++] = -.00000001; if (jj == 0) jj = nx-1; else jj--; Kn_bindx[nz_ptr] = southeast2d(ii,jj,nx); Kn_val[nz_ptr++] = -1.; Kn_bindx[nz_ptr] = northwest2d(ii,jj,nx); Kn_val[nz_ptr++] = -1.; Kn_bindx[nz_ptr] = southwest2d(ii,jj,nx); Kn_val[nz_ptr++] = -.00000001; if (ii == nx-1) ii = 0; else ii++; Kn_bindx[nz_ptr] = southeast2d(ii,jj,nx); Kn_val[nz_ptr++] = -.00000001; } #ifdef periodic i = Nlocal_nodes; Kn_bindx[i+1] = Kn_bindx[i]; Kn_val[i] = 1.; i++; Kn_bindx[i+1] = Kn_bindx[i]; Kn_val[i] = 1.; Nlocal_nodes += 2; #endif /* Transform the global Aztec matrix into a local Aztec matrix. That is, */ /* replace global column indices by local indices and set up communication */ /* data structure 'Ke_data_org' that will be used for matvec's. */ AZ_set_proc_config(proc_config, COMMUNICATOR); AZ_transform_norowreordering(proc_config,&(Node_Partition->needed_external_ids), Kn_bindx, Kn_val, Node_Partition->my_global_ids, &reordered_glob_nodes, &reordered_node_externs, &Kn_data_org, Nlocal_nodes, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); /* Convert old style Aztec matrix to newer style Aztec matrix */ Kn_mat = AZ_matrix_create( Nlocal_nodes ); AZ_set_MSR(Kn_mat, Kn_bindx, Kn_val, Kn_data_org, 0, NULL, AZ_LOCAL); return(Kn_mat); }
int test_AZ_iterate_AZ_pre_calc_AZ_reuse(Epetra_Comm& Comm, int* options, bool verbose) { (void)Comm; if (verbose) { cout << "testing AZ_keep_info/AZ_reuse with 'old' Aztec (solver " <<options[AZ_solver] <<", precond "<<options[AZ_precond]<<"/" << options[AZ_subdomain_solve]<<")"<<endl; } int* proc_config = new int[AZ_PROC_SIZE]; #ifdef EPETRA_MPI AZ_set_proc_config(proc_config, MPI_COMM_WORLD); AZ_set_comm(proc_config, MPI_COMM_WORLD); #else AZ_set_proc_config(proc_config, 0); #endif //We're going to create 2 Aztec matrices, one MSR and one VBR. We're going //to do 2 solves with each, reusing the preconditioner for the second solve. int *external, *update_index, *external_index; int *external2, *update_index2, *external_index2; int i, N = 5; AZ_MATRIX* Amsr = NULL; AZ_MATRIX* Avbr = NULL; int err = create_and_transform_simple_matrix(AZ_MSR_MATRIX, N, 3.0, proc_config, Amsr, external, update_index, external_index); err += create_and_transform_simple_matrix(AZ_VBR_MATRIX, N, 3.0, proc_config, Avbr, external2, update_index2, external_index2); int* az_options = new int[AZ_OPTIONS_SIZE]; double* params = new double[AZ_PARAMS_SIZE]; double* status = new double[AZ_STATUS_SIZE]; AZ_defaults(az_options, params); az_options[AZ_solver] = options[AZ_solver]; az_options[AZ_precond] = options[AZ_precond]; az_options[AZ_subdomain_solve] = options[AZ_subdomain_solve]; az_options[AZ_scaling] = AZ_sym_diag; if (verbose) { az_options[AZ_output] = AZ_warnings; } else { az_options[AZ_output] = 0; } int N_update = N+Amsr->data_org[AZ_N_border]; double* x = new double[N_update]; double* b = new double[N_update]; for(i=0; i<N_update; ++i) { x[i] = 0.0; b[i] = 1.0; } AZ_PRECOND* Pmsr = AZ_precond_create(Amsr, AZ_precondition, NULL); AZ_SCALING* Smsr = AZ_scaling_create(); AZ_PRECOND* Pvbr = AZ_precond_create(Avbr, AZ_precondition, NULL); AZ_SCALING* Svbr = AZ_scaling_create(); // Amsr->data_org[AZ_name] = 1; // Avbr->data_org[AZ_name] = 2; //First solve with the first matrix (Amsr). if (verbose) cout << "solve Amsr, name: "<<Amsr->data_org[AZ_name]<<endl; call_AZ_iterate(Amsr, Pmsr, Smsr, x, b, az_options, params, status, proc_config, 1, AZ_calc, verbose); //First solve with the second matrix (Avbr). if (verbose) cout << "solve Avbr, name: " <<Avbr->data_org[AZ_name]<<endl; call_AZ_iterate(Avbr, Pvbr, Svbr, x, b, az_options, params, status, proc_config, 0, AZ_calc, verbose); //Second solve with Amsr, reusing preconditioner if (verbose) cout << "solve Amsr (first reuse)"<<endl; call_AZ_iterate(Amsr, Pmsr, Smsr, x, b, az_options, params, status, proc_config, 1, AZ_reuse, verbose); //Second solve with Avbr, not reusing preconditioner if (verbose) cout << "solve Avbr (keepinfo==0), name: " <<Avbr->data_org[AZ_name]<<endl; call_AZ_iterate(Avbr, Pvbr, Svbr, x, b, az_options, params, status, proc_config, 0, AZ_calc, verbose); if (verbose) std::cout << "calling AZ_free_memory..."<<std::endl; AZ_free_memory(Amsr->data_org[AZ_name]); AZ_free_memory(Avbr->data_org[AZ_name]); //solve with Amsr again, not reusing preconditioner if (verbose) cout << "solve Amsr (keepinfo==0)"<<endl; call_AZ_iterate(Amsr, Pmsr, Smsr, x, b, az_options, params, status, proc_config, 0, AZ_calc, verbose); //Second solve with Avbr, this time with keepinfo==1 if (verbose) cout << "solve Avbr (keepinfo==1), name: " <<Avbr->data_org[AZ_name]<<endl; call_AZ_iterate(Avbr, Pvbr, Svbr, x, b, az_options, params, status, proc_config, 1, AZ_calc, verbose); //Second solve with Amsr, not reusing preconditioner if (verbose) cout << "solve Amsr (keepinfo==0, calc)"<<endl; call_AZ_iterate(Amsr, Pmsr, Smsr, x, b, az_options, params, status, proc_config, 0, AZ_calc, verbose); //Second solve with Avbr, not reusing preconditioner if (verbose) cout << "solve Avbr (keepinfo==1, reuse), name: "<<Avbr->data_org[AZ_name]<<endl; call_AZ_iterate(Avbr, Pvbr, Svbr, x, b, az_options, params, status, proc_config, 1, AZ_reuse, verbose); AZ_free_memory(Amsr->data_org[AZ_name]); AZ_free_memory(Avbr->data_org[AZ_name]); AZ_scaling_destroy(&Smsr); AZ_precond_destroy(&Pmsr); AZ_scaling_destroy(&Svbr); AZ_precond_destroy(&Pvbr); destroy_matrix(Amsr); destroy_matrix(Avbr); delete [] x; delete [] b; delete [] az_options; delete [] params; delete [] status; delete [] proc_config; free(update_index); free(external); free(external_index); free(update_index2); free(external2); free(external_index2); return(0); }
int main(int argc, char *argv[]) { int Nnodes=16*16; /* Total number of nodes in the problem.*/ /* 'Nnodes' must be a perfect square. */ int MaxMgLevels=6; /* Maximum number of Multigrid Levels */ int Nits_per_presmooth=1; /* # of pre & post smoothings per level */ double tolerance = 1.0e-8; /* At convergence: */ /* ||r_k||_2 < tolerance ||r_0||_2 */ int smoothPe_flag = ML_YES; /* ML_YES: smooth tentative prolongator */ /* ML_NO: don't smooth prolongator */ /***************************************************************************/ /* Select Hiptmair relaxation subsmoothers for the nodal and edge problems */ /* Choices include */ /* 1) ML_Gen_Smoother_SymGaussSeidel: this corresponds to a processor */ /* local version of symmetric Gauss-Seidel/SOR. The number of sweeps */ /* can be set via either 'edge_its' or 'nodal_its'. The damping can */ /* be set via 'edge_omega' or 'nodal_omega'. When set to ML_DDEFAULT, */ /* the damping is set to '1' on one processor. On multiple processors */ /* a lower damping value is set. This is needed to converge processor */ /* local SOR. */ /* 2) ML_Gen_Smoother_Cheby: this corresponds to polynomial relaxation. */ /* The degree of the polynomial is set via 'edge_its' or 'nodal_its'. */ /* If the degree is '-1', Marian Brezina's MLS polynomial is chosen. */ /* Otherwise, a Chebyshev polynomial is used over high frequencies */ /* [ lambda_max/alpha , lambda_max]. Lambda_max is computed. 'alpha' */ /* is hardwired in this example to correspond to twice the ratio of */ /* unknowns in the fine and coarse meshes. */ /* */ /* Using 'hiptmair_type' (see comments below) it is also possible to choose*/ /* when edge and nodal problems are relaxed within the Hiptmair smoother. */ /***************************************************************************/ void *edge_smoother=(void *) /* Edge relaxation: */ ML_Gen_Smoother_Cheby; /* ML_Gen_Smoother_Cheby */ /* ML_Gen_Smoother_SymGaussSeidel */ void *nodal_smoother=(void *) /* Nodal relaxation */ ML_Gen_Smoother_Cheby;/* ML_Gen_Smoother_Cheby */ /* ML_Gen_Smoother_SymGaussSeidel */ int edge_its = 3; /* Iterations or polynomial degree for */ int nodal_its = 3; /* edge/nodal subsmoothers. */ double nodal_omega = ML_DDEFAULT, /* SOR damping parameter for noda/edge */ edge_omega = ML_DDEFAULT; /* subsmoothers (see comments above). */ int hiptmair_type=HALF_HIPTMAIR;/* FULL_HIPTMAIR: each invokation */ /* smoothes on edges, then nodes, */ /* and then once again on edges. */ /* HALF_HIPTMAIR: each pre-invokation */ /* smoothes on edges, then nodes. */ /* Each post-invokation smoothes */ /* on nodes then edges. . */ ML_Operator *Tmat, *Tmat_trans, **Tmat_array, **Tmat_trans_array; ML *ml_edges, *ml_nodes; ML_Aggregate *ag; int Nfine_edge, Ncoarse_edge, Nfine_node, Ncoarse_node, Nlevels; int level, coarsest_level, itmp; double edge_coarsening_rate, node_coarsening_rate, *rhs, *xxx; void **edge_args, **nodal_args; struct user_partition Edge_Partition = {NULL, NULL,0,0}, Node_Partition = {NULL, NULL,0,0}; struct Tmat_data Tmat_data; int i, Ntotal; ML_Comm *comm; /* See Aztec User's Guide for information on these variables */ #ifdef AZTEC AZ_MATRIX *Ke_mat, *Kn_mat; AZ_PRECOND *Pmat = NULL; int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE]; #endif /* get processor information (proc id & # of procs) and set ML's printlevel. */ #ifdef ML_MPI MPI_Init(&argc,&argv); #endif #ifdef AZTEC AZ_set_proc_config(proc_config, COMMUNICATOR); #endif ML_Set_PrintLevel(10); /* set ML's output level: 0 gives least output */ /* Set the # of global nodes/edges and partition both the edges and the */ /* nodes over the processors. NOTE: I believe we assume that if an edge */ /* is assigned to a processor at least one of its nodes must be also */ /* assigned to that processor. */ Node_Partition.Nglobal = Nnodes; Edge_Partition.Nglobal = Node_Partition.Nglobal*2; Node_Partition.type = NODE; Edge_Partition.type = EDGE; #define perxodic #ifdef periodic Node_Partition.Nglobal += 2; #endif partition_edges(&Edge_Partition); partition_nodes(&Node_Partition); xxx = (double *) ML_allocate((Edge_Partition.Nlocal+100)*sizeof(double)); rhs = (double *) ML_allocate((Edge_Partition.Nlocal+100)*sizeof(double)); for (i = 0; i < Edge_Partition.Nlocal + 100; i++) xxx[i] = -1.; for (i = 0; i < Edge_Partition.Nlocal; i++) xxx[i] = (double) Edge_Partition.my_global_ids[i]; update_ghost_edges(xxx, (void *) &Edge_Partition); /* Create an empty multigrid hierarchy and set the 'MaxMGLevels-1'th */ /* level discretization within this hierarchy to the ML matrix */ /* representing Ke (Maxwell edge discretization). */ ML_Create(&ml_edges, MaxMgLevels); #ifdef AZTEC /* Build Ke as an Aztec matrix. Use built-in function AZ_ML_Set_Amat() */ /* to convert to an ML matrix and put in hierarchy. */ Ke_mat = user_Ke_build(&Edge_Partition); AZ_ML_Set_Amat(ml_edges, MaxMgLevels-1, Edge_Partition.Nlocal, Edge_Partition.Nlocal, Ke_mat, proc_config); #else /* Build Ke directly as an ML matrix. */ ML_Init_Amatrix (ml_edges, MaxMgLevels-1, Edge_Partition.Nlocal, Edge_Partition.Nlocal, &Edge_Partition); Ntotal = Edge_Partition.Nlocal; if (Edge_Partition.nprocs == 2) Ntotal += Edge_Partition.Nghost; ML_Set_Amatrix_Getrow(ml_edges, MaxMgLevels-1, Ke_getrow, update_ghost_edges, Ntotal); ML_Set_Amatrix_Matvec(ml_edges, MaxMgLevels-1, Ke_matvec); #endif /* Build an Aztec matrix representing an auxiliary nodal PDE problem. */ /* This should be a variable coefficient Poisson problem (with unknowns*/ /* at the nodes). The coefficients should be chosen to reflect the */ /* conductivity of the original edge problems. */ /* Create an empty multigrid hierarchy. Convert the Aztec matrix to an */ /* ML matrix and put it in the 'MaxMGLevels-1' level of the hierarchy. */ /* Note it is possible to multiply T'*T for get this matrix though this*/ /* will not incorporate material properties. */ ML_Create(&ml_nodes, MaxMgLevels); #ifdef AZTEC Kn_mat = user_Kn_build( &Node_Partition); AZ_ML_Set_Amat(ml_nodes, MaxMgLevels-1, Node_Partition.Nlocal, Node_Partition.Nlocal, Kn_mat, proc_config); #else ML_Init_Amatrix (ml_nodes, MaxMgLevels-1 , Node_Partition.Nlocal, Node_Partition.Nlocal, &Node_Partition); Ntotal = Node_Partition.Nlocal; if (Node_Partition.nprocs == 2) Ntotal += Node_Partition.Nghost; ML_Set_Amatrix_Getrow(ml_nodes, MaxMgLevels-1, Kn_getrow, update_ghost_nodes, Ntotal); #endif /* Build an ML matrix representing the null space of the PDE problem. */ /* This should be a discrete gradient (nodes to edges). */ #ifdef AZTEC Tmat = user_T_build (&Edge_Partition, &Node_Partition, &(ml_nodes->Amat[MaxMgLevels-1])); #else Tmat = ML_Operator_Create(ml_nodes->comm); Tmat_data.edge = &Edge_Partition; Tmat_data.node = &Node_Partition; Tmat_data.Kn = &(ml_nodes->Amat[MaxMgLevels-1]); ML_Operator_Set_ApplyFuncData( Tmat, Node_Partition.Nlocal, Edge_Partition.Nlocal, ML_EMPTY, (void *) &Tmat_data, Edge_Partition.Nlocal, NULL, 0); ML_Operator_Set_Getrow( Tmat, ML_INTERNAL, Edge_Partition.Nlocal,Tmat_getrow); ML_Operator_Set_ApplyFunc(Tmat, ML_INTERNAL, Tmat_matvec); ML_Comm_Create( &comm); ML_CommInfoOP_Generate( &(Tmat->getrow->pre_comm), update_ghost_nodes, &Node_Partition,comm, Tmat->invec_leng, Node_Partition.Nghost); #endif /********************************************************************/ /* Set some ML parameters. */ /*------------------------------------------------------------------*/ ML_Set_ResidualOutputFrequency(ml_edges, 1); ML_Set_Tolerance(ml_edges, 1.0e-8); ML_Aggregate_Create( &ag ); ML_Aggregate_Set_CoarsenScheme_Uncoupled(ag); ML_Aggregate_Set_DampingFactor(ag, 0.0); /* must use 0 for maxwell */ ML_Aggregate_Set_MaxCoarseSize(ag, 30); ML_Aggregate_Set_Threshold(ag, 0.0); /********************************************************************/ /* Set up Tmat_trans */ /*------------------------------------------------------------------*/ Tmat_trans = ML_Operator_Create(ml_edges->comm); ML_Operator_Transpose_byrow(Tmat, Tmat_trans); Nlevels=ML_Gen_MGHierarchy_UsingReitzinger(ml_edges, &ml_nodes,MaxMgLevels-1, ML_DECREASING,ag,Tmat,Tmat_trans, &Tmat_array,&Tmat_trans_array, smoothPe_flag, 1.5); /* Set the Hiptmair subsmoothers */ if (nodal_smoother == (void *) ML_Gen_Smoother_SymGaussSeidel) { nodal_args = ML_Smoother_Arglist_Create(2); ML_Smoother_Arglist_Set(nodal_args, 0, &nodal_its); ML_Smoother_Arglist_Set(nodal_args, 1, &nodal_omega); } if (edge_smoother == (void *) ML_Gen_Smoother_SymGaussSeidel) { edge_args = ML_Smoother_Arglist_Create(2); ML_Smoother_Arglist_Set(edge_args, 0, &edge_its); ML_Smoother_Arglist_Set(edge_args, 1, &edge_omega); } if (nodal_smoother == (void *) ML_Gen_Smoother_Cheby) { nodal_args = ML_Smoother_Arglist_Create(2); ML_Smoother_Arglist_Set(nodal_args, 0, &nodal_its); Nfine_node = Tmat_array[MaxMgLevels-1]->invec_leng; Nfine_node = ML_gsum_int(Nfine_node, ml_edges->comm); } if (edge_smoother == (void *) ML_Gen_Smoother_Cheby) { edge_args = ML_Smoother_Arglist_Create(2); ML_Smoother_Arglist_Set(edge_args, 0, &edge_its); Nfine_edge = Tmat_array[MaxMgLevels-1]->outvec_leng; Nfine_edge = ML_gsum_int(Nfine_edge, ml_edges->comm); } /**************************************************** * Set up smoothers for all levels but the coarsest. * ****************************************************/ coarsest_level = MaxMgLevels - Nlevels; for (level = MaxMgLevels-1; level > coarsest_level; level--) { if (edge_smoother == (void *) ML_Gen_Smoother_Cheby) { Ncoarse_edge = Tmat_array[level-1]->outvec_leng; Ncoarse_edge = ML_gsum_int(Ncoarse_edge, ml_edges->comm); edge_coarsening_rate = 2.*((double) Nfine_edge)/ ((double) Ncoarse_edge); ML_Smoother_Arglist_Set(edge_args, 1, &edge_coarsening_rate); Nfine_edge = Ncoarse_edge; } if (nodal_smoother == (void *) ML_Gen_Smoother_Cheby) { Ncoarse_node = Tmat_array[level-1]->invec_leng; Ncoarse_node = ML_gsum_int(Ncoarse_node, ml_edges->comm); node_coarsening_rate = 2.*((double) Nfine_node)/ ((double) Ncoarse_node); ML_Smoother_Arglist_Set(nodal_args, 1, &node_coarsening_rate); Nfine_node = Ncoarse_node; } ML_Gen_Smoother_Hiptmair(ml_edges, level, ML_BOTH, Nits_per_presmooth, Tmat_array, Tmat_trans_array, NULL, edge_smoother, edge_args, nodal_smoother,nodal_args, hiptmair_type); } /******************************************* * Set up coarsest level smoother *******************************************/ if (edge_smoother == (void *) ML_Gen_Smoother_Cheby) { edge_coarsening_rate = (double) Nfine_edge; ML_Smoother_Arglist_Set(edge_args, 1, &edge_coarsening_rate); } if (nodal_smoother == (void *) ML_Gen_Smoother_Cheby) { node_coarsening_rate = (double) Nfine_node; ML_Smoother_Arglist_Set(nodal_args,1,&node_coarsening_rate); } ML_Gen_CoarseSolverSuperLU( ml_edges, coarsest_level); /* Must be called before invoking the preconditioner */ ML_Gen_Solver(ml_edges, ML_MGV, MaxMgLevels-1, coarsest_level); /* Set the initial guess and the right hand side. Invoke solver */ xxx = (double *) ML_allocate(Edge_Partition.Nlocal*sizeof(double)); ML_random_vec(xxx, Edge_Partition.Nlocal, ml_edges->comm); rhs = (double *) ML_allocate(Edge_Partition.Nlocal*sizeof(double)); ML_random_vec(rhs, Edge_Partition.Nlocal, ml_edges->comm); #ifdef AZTEC /* Choose the Aztec solver and criteria. Also tell Aztec that */ /* ML will be supplying the preconditioner. */ AZ_defaults(options, params); options[AZ_solver] = AZ_fixed_pt; options[AZ_solver] = AZ_gmres; options[AZ_kspace] = 80; params[AZ_tol] = tolerance; AZ_set_ML_preconditioner(&Pmat, Ke_mat, ml_edges, options); options[AZ_conv] = AZ_noscaled; AZ_iterate(xxx, rhs, options, params, status, proc_config, Ke_mat, Pmat, NULL); #else ML_Iterate(ml_edges, xxx, rhs); #endif /* clean up. */ ML_Smoother_Arglist_Delete(&nodal_args); ML_Smoother_Arglist_Delete(&edge_args); ML_Aggregate_Destroy(&ag); ML_Destroy(&ml_edges); ML_Destroy(&ml_nodes); #ifdef AZTEC AZ_free((void *) Ke_mat->data_org); AZ_free((void *) Ke_mat->val); AZ_free((void *) Ke_mat->bindx); if (Ke_mat != NULL) AZ_matrix_destroy(&Ke_mat); if (Pmat != NULL) AZ_precond_destroy(&Pmat); if (Kn_mat != NULL) AZ_matrix_destroy(&Kn_mat); #endif free(xxx); free(rhs); ML_Operator_Destroy(&Tmat); ML_Operator_Destroy(&Tmat_trans); ML_MGHierarchy_ReitzingerDestroy(MaxMgLevels-2, &Tmat_array, &Tmat_trans_array); #ifdef ML_MPI MPI_Finalize(); #endif return 0; }
int main(int argc, char *argv[]) { int num_PDE_eqns=6, N_levels=4, nsmooth=2; int leng, level, N_grid_pts, coarsest_level; /* See Aztec User's Guide for more information on the */ /* variables that follow. */ int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE]; /* data structure for matrix corresponding to the fine grid */ double *val = NULL, *xxx, *rhs, solve_time, setup_time, start_time; AZ_MATRIX *Amat; AZ_PRECOND *Pmat = NULL; ML *ml; FILE *fp; int i, j, Nrigid, *garbage = NULL; #ifdef ML_partition int nblocks; int *block_list = NULL; int k; #endif struct AZ_SCALING *scaling; ML_Aggregate *ag; double *mode, *rigid; char filename[80]; double alpha; int allocated = 0; int old_prec, old_sol; double old_tol; /* double *Amode, beta, biggest; int big_ind = -1, ii; */ ML_Operator *Amatrix; int *rowi_col = NULL, rowi_N, count2, ccc; double *rowi_val = NULL; double max_diag, min_diag, max_sum, sum; int nBlocks, *blockIndices, Ndof; #ifdef ML_partition FILE *fp2; int count; if (argc != 2) { printf("Usage: ml_read_elas num_processors\n"); exit(1); } else sscanf(argv[1],"%d",&nblocks); #endif #ifdef HAVE_MPI MPI_Init(&argc,&argv); /* get number of processors and the name of this processor */ AZ_set_proc_config(proc_config, MPI_COMM_WORLD); #else AZ_set_proc_config(proc_config, AZ_NOT_MPI); #endif /* read in the number of matrix equations */ leng = 0; if (proc_config[AZ_node] == 0) { # ifdef binary fp=fopen(".data","rb"); # else fp=fopen(".data","r"); # endif if (fp==NULL) { printf("couldn't open file .data\n"); exit(1); } # ifdef binary fread(&leng, sizeof(int), 1, fp); # else fscanf(fp,"%d",&leng); # endif fclose(fp); } leng = AZ_gsum_int(leng, proc_config); N_grid_pts=leng/num_PDE_eqns; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ if (proc_config[AZ_N_procs] == 1) i = AZ_linear; else i = AZ_file; AZ_read_update(&N_update, &update, proc_config, N_grid_pts, num_PDE_eqns,i); AZ_read_msr_matrix(update, &val, &bindx, N_update, proc_config); /* This code is to fix things up so that we are sure we have */ /* all block (including the ghost nodes the same size. */ AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update); AZ_transform_norowreordering(proc_config, &external, bindx, val, update, &update_index, &extern_index, &data_org, N_update, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); Amat = AZ_matrix_create( leng ); AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL); Amat->matrix_type = data_org[AZ_matrix_type]; data_org[AZ_N_rows] = data_org[AZ_N_internal] + data_org[AZ_N_border]; #ifdef SCALE_ME ML_MSR_sym_diagonal_scaling(Amat, proc_config, &scaling_vect); #endif start_time = AZ_second(); options[AZ_scaling] = AZ_none; ML_Create(&ml, N_levels); ML_Set_PrintLevel(10); /* set up discretization matrix and matrix vector function */ AZ_ML_Set_Amat(ml, N_levels-1, N_update, N_update, Amat, proc_config); #ifdef ML_partition /* this code is meant to partition the matrices so that things can be */ /* run in parallel later. */ /* It is meant to be run on only one processor. */ #ifdef MB_MODIF fp2 = fopen(".update","w"); #else fp2 = fopen("partition_file","w"); #endif ML_Operator_AmalgamateAndDropWeak(&(ml->Amat[N_levels-1]), num_PDE_eqns, 0.0); ML_Gen_Blocks_Metis(ml, N_levels-1, &nblocks, &block_list); for (i = 0; i < nblocks; i++) { count = 0; for (j = 0; j < ml->Amat[N_levels-1].outvec_leng; j++) { if (block_list[j] == i) count++; } fprintf(fp2," %d\n",count*num_PDE_eqns); for (j = 0; j < ml->Amat[N_levels-1].outvec_leng; j++) { if (block_list[j] == i) { for (k = 0; k < num_PDE_eqns; k++) fprintf(fp2,"%d\n",j*num_PDE_eqns+k); } } } fclose(fp2); ML_Operator_UnAmalgamateAndDropWeak(&(ml->Amat[N_levels-1]),num_PDE_eqns,0.0); #ifdef MB_MODIF printf(" partition file dumped in .update\n"); #endif exit(1); #endif ML_Aggregate_Create( &ag ); /* ML_Aggregate_Set_CoarsenScheme_MIS(ag); */ #ifdef MB_MODIF ML_Aggregate_Set_DampingFactor(ag,1.50); #else ML_Aggregate_Set_DampingFactor(ag,1.5); #endif ML_Aggregate_Set_CoarsenScheme_METIS(ag); ML_Aggregate_Set_NodesPerAggr( ml, ag, -1, 35); /* ML_Aggregate_Set_Phase3AggregateCreationAggressiveness(ag, 10.001); */ ML_Aggregate_Set_Threshold(ag, 0.0); ML_Aggregate_Set_MaxCoarseSize( ag, 300); /* read in the rigid body modes */ Nrigid = 0; /* to ensure compatibility with RBM dumping software */ if (proc_config[AZ_node] == 0) { sprintf(filename,"rigid_body_mode%02d",Nrigid+1); while( (fp = fopen(filename,"r")) != NULL) { which_filename = 1; fclose(fp); Nrigid++; sprintf(filename,"rigid_body_mode%02d",Nrigid+1); } sprintf(filename,"rigid_body_mode%d",Nrigid+1); while( (fp = fopen(filename,"r")) != NULL) { fclose(fp); Nrigid++; sprintf(filename,"rigid_body_mode%d",Nrigid+1); } } Nrigid = AZ_gsum_int(Nrigid,proc_config); if (Nrigid != 0) { rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) ); if (rigid == NULL) { printf("Error: Not enough space for rigid body modes\n"); } } rhs = (double *) malloc(leng*sizeof(double)); xxx = (double *) malloc(leng*sizeof(double)); for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; for (i = 0; i < Nrigid; i++) { if (which_filename == 1) sprintf(filename,"rigid_body_mode%02d",i+1); else sprintf(filename,"rigid_body_mode%d",i+1); AZ_input_msr_matrix(filename,update,&mode,&garbage,N_update,proc_config); AZ_reorder_vec(mode, data_org, update_index, NULL); /* here is something to stick a rigid body mode as the initial */ /* The idea is to solve A x = 0 without smoothing with a two */ /* level method. If everything is done properly, we should */ /* converge in 2 iterations. */ /* Note: we must also zero out components of the rigid body */ /* mode that correspond to Dirichlet bcs. */ if (i == -4) { for (iii = 0; iii < leng; iii++) xxx[iii] = mode[iii]; ccc = 0; Amatrix = &(ml->Amat[N_levels-1]); for (iii = 0; iii < Amatrix->outvec_leng; iii++) { ML_get_matrix_row(Amatrix,1,&iii,&allocated,&rowi_col,&rowi_val, &rowi_N, 0); count2 = 0; for (j = 0; j < rowi_N; j++) if (rowi_val[j] != 0.) count2++; if (count2 <= 1) { xxx[iii] = 0.; ccc++; } } free(rowi_col); free(rowi_val); allocated = 0; rowi_col = NULL; rowi_val = NULL; } /* * Rescale matrix/rigid body modes and checking * AZ_sym_rescale_sl(mode, Amat->data_org, options, proc_config, scaling); Amat->matvec(mode, rigid, Amat, proc_config); for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]); */ /* Here is some code to check that the rigid body modes are */ /* really rigid body modes. The idea is to multiply by A and */ /* then to zero out things that we "think" are boundaries. */ /* In this hardwired example, things near boundaries */ /* correspond to matrix rows that do not have 81 nonzeros. */ /* Amode = (double *) malloc(leng*sizeof(double)); Amat->matvec(mode, Amode, Amat, proc_config); j = 0; biggest = 0.0; for (ii = 0; ii < N_update; ii++) { if ( Amat->bindx[ii+1] - Amat->bindx[ii] != 80) { Amode[ii] = 0.; j++; } else { if ( fabs(Amode[ii]) > biggest) { biggest=fabs(Amode[ii]); big_ind = ii; } } } printf("%d entries zeroed out of %d elements\n",j,N_update); alpha = AZ_gdot(N_update, Amode, Amode, proc_config); beta = AZ_gdot(N_update, mode, mode, proc_config); printf("||A r||^2 =%e, ||r||^2 = %e, ratio = %e\n", alpha,beta,alpha/beta); printf("the biggest is %e at row %d\n",biggest,big_ind); free(Amode); */ /* orthogonalize mode with respect to previous modes. */ for (j = 0; j < i; j++) { alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/ AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); /* daxpy_(&N_update,&alpha,&(rigid[j*N_update]), &one, mode, &one); */ } #ifndef MB_MODIF printf(" after mb %e %e %e\n",mode[0],mode[1],mode[2]); #endif for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j]; free(mode); free(garbage); garbage = NULL; } if (Nrigid != 0) { ML_Aggregate_Set_BlockDiagScaling(ag); ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update); free(rigid); } #ifdef SCALE_ME ML_Aggregate_Scale_NullSpace(ag, scaling_vect, N_update); #endif coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, N_levels-1, ML_DECREASING, ag); AZ_defaults(options, params); coarsest_level = N_levels - coarsest_level; if ( proc_config[AZ_node] == 0 ) printf("Coarse level = %d \n", coarsest_level); /* set up smoothers */ for (level = N_levels-1; level > coarsest_level; level--) { /* ML_Gen_Smoother_BlockGaussSeidel(ml, level,ML_BOTH, 1, 1., num_PDE_eqns); */ /* Sparse approximate inverse smoother that acutally does both */ /* pre and post smoothing. */ /* ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth, parasails_sym, parasails_thresh, parasails_nlevels, parasails_filter, parasails_loadbal, parasails_factorized); */ /* This is the symmetric Gauss-Seidel smoothing that we usually use. */ /* In parallel, it is not a true Gauss-Seidel in that each processor */ /* does a Gauss-Seidel on its local submatrix independent of the */ /* other processors. */ /* ML_Gen_Smoother_Cheby(ml, level, ML_BOTH, 30., nsmooth); */ Ndof = ml->Amat[level].invec_leng; ML_Gen_Blocks_Aggregates(ag, level, &nBlocks, &blockIndices); ML_Gen_Smoother_BlockDiagScaledCheby(ml, level, ML_BOTH, 30.,nsmooth, nBlocks, blockIndices); /* ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.); */ /* This is a true Gauss Seidel in parallel. This seems to work for */ /* elasticity problems. However, I don't believe that this is very */ /* efficient in parallel. */ /* nblocks = ml->Amat[level].invec_leng/num_PDE_eqns; blocks = (int *) ML_allocate(sizeof(int)*N_update); for (i =0; i < ml->Amat[level].invec_leng; i++) blocks[i] = i/num_PDE_eqns; ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml , level, ML_PRESMOOTHER, nsmooth, 1., nblocks, blocks); ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml, level, ML_POSTSMOOTHER, nsmooth, 1., nblocks, blocks); free(blocks); */ /* Block Jacobi Smoothing */ /* nblocks = ml->Amat[level].invec_leng/num_PDE_eqns; blocks = (int *) ML_allocate(sizeof(int)*N_update); for (i =0; i < ml->Amat[level].invec_leng; i++) blocks[i] = i/num_PDE_eqns; ML_Gen_Smoother_VBlockJacobi(ml , level, ML_BOTH, nsmooth, ML_ONE_STEP_CG, nblocks, blocks); free(blocks); */ /* Jacobi Smoothing */ /* ML_Gen_Smoother_Jacobi(ml , level, ML_PRESMOOTHER, nsmooth, ML_ONE_STEP_CG); ML_Gen_Smoother_Jacobi(ml , level, ML_POSTSMOOTHER, nsmooth,ML_ONE_STEP_CG); */ /* This does a block Gauss-Seidel (not true GS in parallel) */ /* where each processor has 'nblocks' blocks. */ /* nblocks = 250; ML_Gen_Blocks_Metis(ml, level, &nblocks, &blocks); ML_Gen_Smoother_VBlockJacobi(ml , level, ML_BOTH, nsmooth,ML_ONE_STEP_CG, nblocks, blocks); free(blocks); */ num_PDE_eqns = 6; } /* Choose coarse grid solver: mls, superlu, symGS, or Aztec */ /* ML_Gen_Smoother_Cheby(ml, coarsest_level, ML_BOTH, 30., nsmooth); ML_Gen_CoarseSolverSuperLU( ml, coarsest_level); */ /* ML_Gen_Smoother_SymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.); */ old_prec = options[AZ_precond]; old_sol = options[AZ_solver]; old_tol = params[AZ_tol]; params[AZ_tol] = 1.0e-9; params[AZ_tol] = 1.0e-5; options[AZ_precond] = AZ_Jacobi; options[AZ_solver] = AZ_cg; options[AZ_poly_ord] = 1; options[AZ_conv] = AZ_r0; options[AZ_orth_kvecs] = AZ_TRUE; j = AZ_gsum_int(ml->Amat[coarsest_level].outvec_leng, proc_config); options[AZ_keep_kvecs] = j - 6; options[AZ_max_iter] = options[AZ_keep_kvecs]; ML_Gen_SmootherAztec(ml, coarsest_level, options, params, proc_config, status, options[AZ_keep_kvecs], ML_PRESMOOTHER, NULL); options[AZ_conv] = AZ_noscaled; options[AZ_keep_kvecs] = 0; options[AZ_orth_kvecs] = 0; options[AZ_precond] = old_prec; options[AZ_solver] = old_sol; params[AZ_tol] = old_tol; /* */ #ifdef RST_MODIF ML_Gen_Solver(ml, ML_MGV, N_levels-1, coarsest_level); #else #ifdef MB_MODIF ML_Gen_Solver(ml, ML_SAAMG, N_levels-1, coarsest_level); #else ML_Gen_Solver(ml, ML_MGFULLV, N_levels-1, coarsest_level); #endif #endif options[AZ_solver] = AZ_GMRESR; options[AZ_solver] = AZ_cg; options[AZ_scaling] = AZ_none; options[AZ_precond] = AZ_user_precond; options[AZ_conv] = AZ_r0; options[AZ_conv] = AZ_noscaled; options[AZ_output] = 1; options[AZ_max_iter] = 500; options[AZ_poly_ord] = 5; options[AZ_kspace] = 40; params[AZ_tol] = 4.8e-6; AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); setup_time = AZ_second() - start_time; /* Set rhs */ fp = fopen("AZ_capture_rhs.dat","r"); if (fp == NULL) { AZ_random_vector(rhs, data_org, proc_config); if (proc_config[AZ_node] == 0) printf("taking random vector for rhs\n"); for (i = 0; i < -N_update; i++) { rhs[i] = (double) update[i]; rhs[i] = 7.; } } else { if (proc_config[AZ_node]== 0) printf("reading rhs guess from file\n"); AZ_input_msr_matrix("AZ_capture_rhs.dat", update, &rhs, &garbage, N_update, proc_config); free(garbage); } AZ_reorder_vec(rhs, data_org, update_index, NULL); printf("changing rhs by multiplying with A\n"); Amat->matvec(rhs, xxx, Amat, proc_config); for (i = 0; i < N_update; i++) rhs[i] = xxx[i]; fp = fopen("AZ_capture_init_guess.dat","r"); if (fp != NULL) { fclose(fp); if (proc_config[AZ_node]== 0) printf("reading initial guess from file\n"); AZ_input_msr_matrix("AZ_capture_init_guess.dat", update, &xxx, &garbage, N_update, proc_config); free(garbage); xxx = (double *) realloc(xxx, sizeof(double)*( Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border] + Amat->data_org[AZ_N_external])); } AZ_reorder_vec(xxx, data_org, update_index, NULL); /* if Dirichlet BC ... put the answer in */ /* for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) { if ( (val[i] > .99999999) && (val[i] < 1.0000001)) xxx[i] = rhs[i]; } */ fp = fopen("AZ_no_multilevel.dat","r"); scaling = AZ_scaling_create(); start_time = AZ_second(); if (fp != NULL) { fclose(fp); options[AZ_precond] = AZ_none; options[AZ_scaling] = AZ_sym_diag; options[AZ_ignore_scaling] = AZ_TRUE; options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); /* options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); */ } else { options[AZ_keep_info] = 1; options[AZ_conv] = AZ_noscaled; options[AZ_conv] = AZ_r0; params[AZ_tol] = 1.0e-7; /* ML_Iterate(ml, xxx, rhs); */ alpha = sqrt(AZ_gdot(N_update, xxx, xxx, proc_config)); printf("init guess = %e\n",alpha); alpha = sqrt(AZ_gdot(N_update, rhs, rhs, proc_config)); printf("rhs = %e\n",alpha); #ifdef SCALE_ME ML_MSR_scalerhs(rhs, scaling_vect, data_org[AZ_N_internal] + data_org[AZ_N_border]); ML_MSR_scalesol(xxx, scaling_vect, data_org[AZ_N_internal] + data_org[AZ_N_border]); #endif max_diag = 0.; min_diag = 1.e30; max_sum = 0.; for (i = 0; i < N_update; i++) { if (Amat->val[i] < 0.) printf("woops negative diagonal A(%d,%d) = %e\n", i,i,Amat->val[i]); if (Amat->val[i] > max_diag) max_diag = Amat->val[i]; if (Amat->val[i] < min_diag) min_diag = Amat->val[i]; sum = fabs(Amat->val[i]); for (j = Amat->bindx[i]; j < Amat->bindx[i+1]; j++) { sum += fabs(Amat->val[j]); } if (sum > max_sum) max_sum = sum; } printf("Largest diagonal = %e, min diag = %e large abs row sum = %e\n", max_diag, min_diag, max_sum); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; /* if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); */ } solve_time = AZ_second() - start_time; if (proc_config[AZ_node] == 0) printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time); if (proc_config[AZ_node] == 0) printf("Printing out a few entries of the solution ...\n"); for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 7) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 23) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 47) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 101) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} j = AZ_gsum_int(7, proc_config); /* sync processors */ for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++) if (update[j] == 171) {printf("solution(gid = %d) = %10.4e\n", update[j],xxx[update_index[j]]); fflush(stdout);} ML_Aggregate_Destroy(&ag); ML_Destroy(&ml); AZ_free((void *) Amat->data_org); AZ_free((void *) Amat->val); AZ_free((void *) Amat->bindx); AZ_free((void *) update); AZ_free((void *) external); AZ_free((void *) extern_index); AZ_free((void *) update_index); AZ_scaling_destroy(&scaling); if (Amat != NULL) AZ_matrix_destroy(&Amat); if (Pmat != NULL) AZ_precond_destroy(&Pmat); free(xxx); free(rhs); #ifdef HAVE_MPI MPI_Finalize(); #endif return 0; }
int main(int argc, char **argv) /* * Initial main driver for GOMA. Derived from a (1/93) release of * the rf_salsa program by * * Original Authors: John Shadid (1421) * Scott Hutchinson (1421) * Harry Moffat (1421) * * Date: 12/3/92 * * * Updates and Changes by: * Randy Schunk (9111) * P. A. Sackinger (9111) * R. R. Rao (9111) * R. A. Cairncross (Univ. of Delaware) * Dates: 2/93 - 6/96 * * Modified for continuation * Ian Gates * Dates: 2/98 - 10/98 * Dates: 7/99 - 8/99 * * Last modified: Wed June 26 14:21:35 MST 1994 [email protected] * Hello. * * Note: Many modifications from an early 2/93 pre-release * version of rf_salsa were made by various persons * in order to test ideas about moving/deforming meshes... */ { /* Local Declarations */ double time_start, total_time; /* timing variables */ #ifndef PARALLEL /* struct tm *tm_ptr; additional serial timing variables */ time_t now; #endif int error; int i; int j; char **ptmp; char *yo; struct Command_line_command **clc=NULL; /* point to command line structure */ int nclc = 0; /* number of command line commands */ /********************** BEGIN EXECUTION ***************************************/ #ifdef FP_EXCEPT feenableexcept ((FE_OVERFLOW | FE_DIVBYZERO | FE_INVALID)); #endif /* assume number of commands is less than or equal to the number of * arguments in the command line minus 1 (1st is program name) */ /* * Get the name of the executable, yo */ yo = argv[0]; #ifdef PARALLEL MPI_Init(&argc, &argv); time_start = MPI_Wtime(); #endif /* PARALLEL */ #ifndef PARALLEL (void)time(&now); time_start = (double)now; #endif /* PARALLEL */ time_goma_started = time_start; Argv = argv; Argc = argc; #ifdef PARALLEL /* * Determine the parallel processing status, if any. We need to know * pretty early if we're "one of many" or the only process. */ error = MPI_Comm_size(MPI_COMM_WORLD, &Num_Proc); error = MPI_Comm_rank(MPI_COMM_WORLD, &ProcID); /* * Setup a default Proc_config so we can use utility routines * from Aztec */ AZ_set_proc_config(Proc_Config, MPI_COMM_WORLD); /* set the output limit flag if need be */ if( Num_Proc > DP_PROC_PRINT_LIMIT ) Unlimited_Output = FALSE; #ifdef HAVE_MPE_H error = MPE_Init_log(); #endif /* HAVE_MPE_H */ Dim = 0; /* for any hypercube legacy code... */ #endif /* PARALLEL */ #ifndef PARALLEL Dim = 0; ProcID = 0; Num_Proc = 1; #endif /* PARALLEL */ /* * HKM - Change the ieee exception handling based on the machine and * the level of debugging/speed desired. This call currently causes * core dumps for floating point exceptions. */ handle_ieee(); log_msg("--------------"); log_msg("GOMA begins..."); /* * Some initial stuff that only the master process does. */ if ( ProcID == 0 ) { if (argc > 1) { log_msg("Preprocessing command line options."); clc = (struct Command_line_command **) smalloc( argc * sizeof(struct Command_line_command *)); for (i=0; i<argc; i++) { clc[i] = (struct Command_line_command *) smalloc(sizeof(struct Command_line_command)); clc[i]->type = 0; /* initialize command line structure */ clc[i]->i_val = 0; clc[i]->r_val = 0.; clc[i]->string = (char *) smalloc(MAX_COMMAND_LINE_LENGTH*sizeof(char)); for ( j=0; j<MAX_COMMAND_LINE_LENGTH; j++) { clc[i]->string[j] = '\0'; } #ifdef DEBUG fprintf(stderr, "clc[%d]->string is at 0x%x\n", i, clc[i]->string); fprintf(stderr, "clc[%d] is at 0x%x\n", i, clc[i]); #endif } } strcpy(Input_File, "input"); strcpy(Echo_Input_File , "echo_input"); if (argc > 1) translate_command_line(argc, argv, clc, &nclc); ECHO("OPEN", Echo_Input_File); echo_command_line( argc, argv, Echo_Input_File ); print_code_version(); ptmp = legal_notice; while ( strcmp(*ptmp, LAST_LEGAL_STRING) != 0 ) { fprintf(stderr, "%s", *ptmp++); } } /* * Allocate the uniform problem description structure and * the problem description structures on all processors */ error = pd_alloc(); EH(error, "pd_alloc problem"); #ifdef DEBUG fprintf(stderr, "P_%d at barrier after pd_alloc\n", ProcID); #ifdef PARALLEL error = MPI_Barrier(MPI_COMM_WORLD); #endif #endif log_msg("Allocating mp, gn, ..."); error = mp_alloc(); EH(error, "mp_alloc problem"); error = gn_alloc(); EH(error, "gn_alloc problem"); error = ve_alloc(); EH(error, "ve_alloc problem"); error = elc_alloc(); EH(error, "elc_alloc problem"); error = elc_rs_alloc(); EH(error, "elc_alloc problem"); error = cr_alloc(); EH(error, "cr_alloc problem"); error = evp_alloc(); EH(error, "evp_alloc problem"); error = tran_alloc(); EH(error, "tran_alloc problem"); error = eigen_alloc(); EH(error, "eigen_alloc problem"); error = cont_alloc(); EH(error, "cont_alloc problem"); error = loca_alloc(); EH(error, "loca_alloc problem"); error = efv_alloc(); EH(error, "efv_alloc problem"); #ifdef DEBUG fprintf(stderr, "P_%d at barrier before read_input_file()\n", ProcID); #ifdef PARALLEL error = MPI_Barrier(MPI_COMM_WORLD); #endif #endif /* * Read ASCII input file, data files, related exodusII FEM databases. */ if ( ProcID == 0 ) { log_msg("Reading input file ..."); read_input_file(clc, nclc); /* Read ascii input file get file names */ /* update inputed data to account for command line arguments that * might override the input deck... */ log_msg("Overriding any input file specs w/ any command line specs..."); if (argc > 1) apply_command_line(clc, nclc); #ifdef DEBUG DPRINTF(stderr, "apply_command_line() is done.\n"); #endif } /* * The user-defined material properties, etc. available to goma users * mean that some dynamically allocated data needs to be communicated. * * To handle this, sizing information from the input file scan is * broadcast in stages so that the other processors can allocate space * accordingly to hold the data. * * Note: instead of handpacking a data structure, use MPI derived datatypes * to gather and scatter. Pray this is done efficiently. Certainly it costs * less from a memory standpoint. */ #ifdef PARALLEL /* * Make sure the input file was successully processed before moving on */ check_parallel_error("Input file error"); /* * This is some sizing information that helps fit a little bit more * onto the ark later on. */ #ifdef DEBUG fprintf(stderr, "P_%d at barrier before noahs_raven()\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif noahs_raven(); #ifdef DEBUG fprintf(stderr, "P_%d at barrier before MPI_Bcast of Noahs_Raven\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif MPI_Bcast(MPI_BOTTOM, 1, Noahs_Raven->new_type, 0, MPI_COMM_WORLD); #ifdef DEBUG fprintf(stderr, "P_%d at barrier after Bcast/before raven_landing()\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif /* * Get the other processors ready to handle ark data. */ raven_landing(); #ifdef DEBUG fprintf(stderr, "P_%d at barrier before noahs_ark()\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif /* * This is the main body of communicated information, including some * whose sizes were determined because of advanced legwork by the raven. */ noahs_ark(); MPI_Bcast(MPI_BOTTOM, 1, Noahs_Ark->new_type, 0, MPI_COMM_WORLD); /* * Chemkin was initialized on processor zero during the input file * process. Now, distribute it to all processors */ #ifdef USE_CHEMKIN if (Chemkin_Needed) { chemkin_initialize_mp(); } #endif /* * Once the ark has landed, there are additional things that will need to * be sent by dove. Example: BC_Types[]->u-BC arrays. * */ ark_landing(); noahs_dove(); MPI_Bcast(MPI_BOTTOM, 1, Noahs_Dove->new_type, 0, MPI_COMM_WORLD); #endif /* End of ifdef PARALLEL */ /* * We sent the packed line to all processors that contained geometry * creation commands. Now we need to step through it and create * geometry as we go (including possibly reading an ACIS .sat file). * */ /* Check to see if BRK File option exists and if so check if file exits */ if (Brk_Flag == 1) { check_for_brkfile(Brk_File); } check_parallel_error("Error encountered in check for brkfile"); /* Now break the exodus files */ if (Num_Proc > 1 && ProcID == 0 && Brk_Flag == 1) { call_brk(); } check_parallel_error("Error in brking exodus files"); MPI_Barrier(MPI_COMM_WORLD); /* * For parallel execution, assume the following variables will be changed * to reflect the multiple file aspect of the problem. * * FEM file = file.exoII --> file_3of15.exoII * * Output EXODUS II file = out.exoII --> out_3of15.exoII * */ /* * Allocate space for structures holding the EXODUS II finite element * database information and for the Distributed Processing information. * * These are mostly skeletons with pointers that get allocated in the * rd_exoII and rd_dpi routines. Remember to free up those arrays first * before freeing the major pointers. */ EXO_ptr = alloc_struct_1(Exo_DB, 1); init_exo_struct(EXO_ptr); DPI_ptr = alloc_struct_1(Dpi, 1); init_dpi_struct(DPI_ptr); log_msg("Reading mesh from EXODUS II file..."); error = read_mesh_exoII(EXO_ptr, DPI_ptr); /* * Missing files on any processor are detected at a lower level * forcing a return to the higher level * rd_exo --> rd_mesh --> main * Shutdown now, if any of the exodus files weren't found */ if (error < 0) { #ifdef PARALLEL MPI_Finalize(); #endif return(-1); } /* * All of the MPI_Type_commit() calls called behind the scenes that build * the dove, ark and raven really allocated memory. Let's free it up now that * the initial information has been communicated. */ #ifdef PARALLEL MPI_Type_free(&(Noahs_Raven->new_type)); MPI_Type_free(&(Noahs_Ark->new_type)); MPI_Type_free(&(Noahs_Dove->new_type)); #endif /* * Setup the rest of the Problem Description structure that depends on * the mesh that was read in from the EXODUS II file... * * Note that memory allocation and some setup has already been performed * in mm_input()... */ error = setup_pd(); EH( error, "Problem setting up Problem_Description."); /* * Let's check to see if we need the large elasto-plastic global tensors * and allocate them if so */ error = evp_tensor_alloc(EXO_ptr); EH( error, "Problems setting up evp tensors"); /* * Now that we know about what kind of problem we're solving and the * mesh information, let's allocate space for elemental assembly structures * */ #ifdef DEBUG DPRINTF(stderr, "About to assembly_alloc()...\n"); #endif log_msg("Assembly allocation..."); error = assembly_alloc(EXO_ptr); EH( error, "Problem from assembly_alloc"); if (Debug_Flag) { DPRINTF(stderr, "%s: setting up EXODUS II output files...\n", yo); } /* * These are not critical - just niceties. Also, they should not overburden * your db with too much of this - they're capped verbiage compliant routines. */ add_qa_stamp(EXO_ptr); add_info_stamp(EXO_ptr); #ifdef DEBUG fprintf(stderr, "added qa and info stamps\n"); #endif /* * If the output EXODUS II database file is different from the input * file, then we'll need to replicate all the basic mesh information. * But, remember that if we're parallel, that the output file names must * be multiplexed first... */ if ( Num_Proc > 1 ) { multiname(ExoFileOut, ProcID, Num_Proc); multiname(Init_GuessFile, ProcID, Num_Proc); if ( strcmp( Soln_OutFile, "" ) != 0 ) { multiname(Soln_OutFile, ProcID, Num_Proc); } if( strcmp( ExoAuxFile, "" ) != 0 ) { multiname(ExoAuxFile, ProcID, Num_Proc); } if( efv->Num_external_field != 0 ) { for( i=0; i<efv->Num_external_field; i++ ) { multiname(efv->file_nm[i], ProcID, Num_Proc); } } } /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * Preprocess the exodus mesh * -> Allocate pointers to structures containing element * side bc info, First_Elem_Side_BC_Array, and * element edge info, First_Elem_Edge_BC_Array. * -> Determine Unique_Element_Types[] array */ #ifdef DEBUG fprintf(stderr, "pre_process()...\n"); #endif log_msg("Pre processing of mesh..."); #ifdef PARALLEL error = MPI_Barrier(MPI_COMM_WORLD); #endif pre_process(EXO_ptr); /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * Load up a few key indeces in the bfd prototype basis function structures * and make sure that each active eqn/vbl has a bf[v] that points to the * right bfd[]...needs pre_process to find out the number of unique * element types in the problem. */ #ifdef DEBUG fprintf(stderr, "bf_init()...\n"); #endif log_msg("Basis function initialization..."); error = bf_init(EXO_ptr); EH( error, "Problem from bf_init"); /* * check for parallel errors before continuing */ check_parallel_error("Error encountered in problem setup"); /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * Allocate space for each communication exchange description. */ #ifdef PARALLEL #ifdef DEBUG fprintf(stderr, "P_%d: Parallel cx allocation\n", ProcID); #endif if (DPI_ptr->num_neighbors > 0) { cx = alloc_struct_1(Comm_Ex, DPI_ptr->num_neighbors); Request = alloc_struct_1(MPI_Request, Num_Requests * DPI_ptr->num_neighbors); Status = alloc_struct_1(MPI_Status, Num_Requests * DPI_ptr->num_neighbors); } #endif /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * SET UP THE PROBLEM * * Setup node-based structures * Finalise how boundary conditions are to be handled * Determine what unknowns are at each owned node and then tell * neighboring processors about your nodes * Set up communications pattern for fast unknown updates between * processors. */ (void) setup_problem(EXO_ptr, DPI_ptr); /* * check for parallel errors before continuing */ check_parallel_error("Error encountered in problem setup"); /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * CREATE BRK_FILE IF ONE DOES NOT EXIST * * If no Brk_File exists but the option was configured in the input or * optional command we create one now and exit from goma. */ if ( Brk_Flag == 2 ) { write_brk_file(Brk_File, EXO_ptr); exit(0); } /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * WRITE OUT INITIAL INFO TO EXODUS FILE */ /* * Only have to initialize the exodus file if we are using different * files for the output versus the input mesh */ if (strcmp(ExoFile, ExoFileOut)) { /* * Temporarily we'll need to renumber the nodes and elements in the * mesh to be 1-based. After writing, return to the 0 based indexing * that is more convenient in C. */ #ifdef DEBUG fprintf(stderr, "1-base; wr_mesh; 0-base\n"); #endif one_base(EXO_ptr); wr_mesh_exo(EXO_ptr, ExoFileOut, 0); zero_base(EXO_ptr); /* * If running on a distributed computer, augment the plain finite * element information of EXODUS with the description of how this * piece fits into the global problem. */ if (Num_Proc > 1) { #ifdef PARALLEL #ifdef DEBUG fprintf(stderr, "P_%d at barrier before wr_dpi()\n", ProcID); fprintf(stderr, "P_%d ExoFileOut = \"%s\"\n", ProcID, ExoFileOut); error = MPI_Barrier(MPI_COMM_WORLD); #endif #endif wr_dpi(DPI_ptr, ExoFileOut, 0); } } /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * SOLVE THE PROBLEM */ if (Debug_Flag) { switch (Continuation) { case ALC_ZEROTH: P0PRINTF("%s: continue_problem (zeroth order) ...\n", yo); break; case ALC_FIRST: P0PRINTF("%s: continue_problem (first order) ...\n", yo); break; case HUN_ZEROTH: P0PRINTF("%s: hunt_problem (zeroth order) ...\n", yo); break; case HUN_FIRST: P0PRINTF("%s: hunt_problem (first order) ...\n", yo); break; case LOCA: P0PRINTF("%s: do_loca ...\n", yo); break; default: P0PRINTF("%s: solve_problem...\n", yo); break; } } #ifdef DEBUG switch (Continuation) { case ALC_ZEROTH: DPRINTF(stderr, "%s: continue_problem (zeroth order) ...\n", yo); break; case ALC_FIRST: DPRINTF(stderr, "%s: continue_problem (first order) ...\n", yo); break; case HUN_ZEROTH: DPRINTF(stderr, "%s: hunt_problem (zeroth order) ...\n", yo); break; case HUN_FIRST: DPRINTF(stderr, "%s: hunt_problem (first order) ...\n", yo); break; case LOCA: DPRINTF(stderr, "%s: do_loca ...\n", yo); break; default: DPRINTF(stderr, "%s: solve_problem...\n", yo); break; } #endif if( TimeIntegration == TRANSIENT) { Continuation = ALC_NONE; if (Debug_Flag) { P0PRINTF("%s: solve_problem...TRANSIENT superceded Continuation...\n", yo); } #ifdef DEBUG DPRINTF(stderr, "%s: solve_problem...TRANSIENT superceded Continuation...\n", yo); #endif solve_problem(EXO_ptr, DPI_ptr, NULL); } switch (Continuation) { case ALC_ZEROTH: case ALC_FIRST: log_msg("Solving continuation problem"); continue_problem(cx, EXO_ptr, DPI_ptr); break; case HUN_ZEROTH: case HUN_FIRST: log_msg("Solving hunt problem"); hunt_problem(cx, EXO_ptr, DPI_ptr); break; case LOCA: log_msg("Solving continuation problem with LOCA"); error = do_loca(cx, EXO_ptr, DPI_ptr); break; default: log_msg("Solving problem"); if (loca_in->Cont_Alg == LOCA_LSA_ONLY) { error = do_loca(cx, EXO_ptr, DPI_ptr); } else if(TimeIntegration != TRANSIENT) { solve_problem(EXO_ptr, DPI_ptr, NULL); } break; } #ifdef PARALLEL MPI_Barrier(MPI_COMM_WORLD); #endif if (ProcID == 0 && Brk_Flag == 1 && Num_Proc > 1) { fix_output(); } /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * PRINT A MESSAGE TO STDOUT SAYING WE ARE DONE */ P0PRINTF("\n-done\n\n"); /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * FREE MEMORY ALLOCATED BY THE PROGRAM */ /* * free the element block / element based structures */ free_element_blocks(EXO_ptr); /* * free nodal based structures */ free_nodes(); #ifdef FREE_PROBLEM free_problem ( EXO_ptr, DPI_ptr ); #endif /* * Free command line stuff */ if ( ProcID == 0 ) { if ( argc > 1 ) { for (i=0; i<argc; i++) { #ifdef DEBUG fprintf(stderr, "clc[%d]->string &= 0x%x\n", i, clc[i]->string); fprintf(stderr, "clc[%d] &= 0x%x\n", i, clc[i]); #endif safer_free((void **) &(clc[i]->string)); safer_free((void **) (clc + i)); } safer_free((void **) &clc); } } /* * Free exodus database structures */ free_exo(EXO_ptr); safer_free((void **) &EXO_ptr); if ( Num_Proc > 1 ) { free_dpi(DPI_ptr); } else { free_dpi_uni(DPI_ptr); } safer_free((void **) &DPI_ptr); /* * Remove front scratch file [/tmp/lu.'pid'.0] */ if (Linear_Solver == FRONT) { unlerr = unlink(front_scratch_directory); WH(unlerr, "Unlink problem with front scratch file"); } #ifdef PARALLEL total_time = ( MPI_Wtime() - time_start )/ 60. ; DPRINTF(stderr, "\nProc 0 runtime: %10.2f Minutes.\n\n",total_time); MPI_Finalize(); #endif #ifndef PARALLEL (void)time(&now); total_time = (double)(now) - time_start; fprintf(stderr, "\nProc 0 runtime: %10.2f Minutes.\n\n",total_time/60); #endif fflush(stdout); fflush(stderr); log_msg("GOMA ends normally."); return (0); }
int main(int argc, char *argv[]) { char global[]="global"; char local[]="local"; int proc_config[AZ_PROC_SIZE];/* Processor information. */ int options[AZ_OPTIONS_SIZE]; /* Array used to select solver options. */ double params[AZ_PARAMS_SIZE]; /* User selected solver paramters. */ int *data_org; /* Array to specify data layout */ double status[AZ_STATUS_SIZE]; /* Information returned from AZ_solve(). */ int *update; /* vector elements updated on this node. */ int *external; /* vector elements needed by this node. */ int *update_index; /* ordering of update[] and external[] */ int *extern_index; /* locally on this processor. */ int *indx; /* MSR format of real and imag parts */ int *bindx; int *bpntr; int *rpntr; int *cpntr; AZ_MATRIX *Amat; AZ_PRECOND *Prec; double *val; double *x, *b, *xexact, *xsolve; int n_nonzeros, n_blk_nonzeros; int N_update; /* # of block unknowns updated on this node */ int N_local; /* Number scalar equations on this node */ int N_global, N_blk_global; /* Total number of equations */ int N_external, N_blk_eqns; double *val_msr; int *bindx_msr; double norm, d ; int matrix_type; int has_global_indices, option; int i, j, m, mp ; int ione = 1; #ifdef TEST_SINGULAR double * xnull; /* will contain difference of given exact solution and computed solution*/ double * Axnull; /* Product of A time xnull */ double norm_Axnull; #endif #ifdef AZTEC_MPI double MPI_Wtime(void) ; #endif double time ; #ifdef AZTEC_MPI MPI_Init(&argc,&argv); #endif /* get number of processors and the name of this processor */ #ifdef AZTEC_MPI AZ_set_proc_config(proc_config,MPI_COMM_WORLD); #else AZ_set_proc_config(proc_config,0); #endif printf("proc %d of %d is alive\n", proc_config[AZ_node],proc_config[AZ_N_procs]) ; #ifdef AZTEC_MPI MPI_Barrier(MPI_COMM_WORLD) ; #endif #ifdef VBRMATRIX if(argc != 3) perror("error: enter name of data and partition file on command line") ; #else if(argc != 2) perror("error: enter name of data file on command line") ; #endif /* Set exact solution to NULL */ xexact = NULL; /* Read matrix file and distribute among processors. Returns with this processor's set of rows */ #ifdef VBRMATRIX read_hb(argv[1], proc_config, &N_global, &n_nonzeros, &val_msr, &bindx_msr, &x, &b, &xexact); create_vbr(argv[2], proc_config, &N_global, &N_blk_global, &n_nonzeros, &n_blk_nonzeros, &N_update, &update, bindx_msr, val_msr, &val, &indx, &rpntr, &cpntr, &bpntr, &bindx); if(proc_config[AZ_node] == 0) { free ((void *) val_msr); free ((void *) bindx_msr); free ((void *) cpntr); } matrix_type = AZ_VBR_MATRIX; #ifdef AZTEC_MPI MPI_Barrier(MPI_COMM_WORLD) ; #endif distrib_vbr_matrix( proc_config, N_global, N_blk_global, &n_nonzeros, &n_blk_nonzeros, &N_update, &update, &val, &indx, &rpntr, &cpntr, &bpntr, &bindx, &x, &b, &xexact); #else read_hb(argv[1], proc_config, &N_global, &n_nonzeros, &val, &bindx, &x, &b, &xexact); #ifdef AZTEC_MPI MPI_Barrier(MPI_COMM_WORLD) ; #endif distrib_msr_matrix(proc_config, N_global, &n_nonzeros, &N_update, &update, &val, &bindx, &x, &b, &xexact); #ifdef DEBUG for (i = 0; i<N_update; i++) if (val[i] == 0.0 ) printf("Zero diagonal at row %d\n",i); #endif matrix_type = AZ_MSR_MATRIX; #endif /* convert matrix to a local distributed matrix */ cpntr = NULL; AZ_transform(proc_config, &external, bindx, val, update, &update_index, &extern_index, &data_org, N_update, indx, bpntr, rpntr, &cpntr, matrix_type); printf("Processor %d: Completed AZ_transform\n",proc_config[AZ_node]) ; has_global_indices = 0; option = AZ_LOCAL; #ifdef VBRMATRIX N_local = rpntr[N_update]; #else N_local = N_update; #endif Amat = AZ_matrix_create(N_local); #ifdef VBRMATRIX AZ_set_VBR(Amat, rpntr, cpntr, bpntr, indx, bindx, val, data_org, N_update, update, option); #else AZ_set_MSR(Amat, bindx, val, data_org, N_update, update, option); #endif printf("proc %d Completed AZ_create_matrix\n",proc_config[AZ_node]) ; #ifdef AZTEC_MPI MPI_Barrier(MPI_COMM_WORLD) ; #endif /* initialize AZTEC options */ AZ_defaults(options, params); options[AZ_solver] = AZ_gmres; options[AZ_precond] = AZ_sym_GS; options[AZ_poly_ord] = 1; options[AZ_graph_fill] = 1; params[AZ_rthresh] = 0.0E-7; params[AZ_athresh] = 0.0E-7; options[AZ_overlap] = 1; /* params[AZ_ilut_fill] = 2.0; params[AZ_drop] = 0.01; options[AZ_overlap] = 0; options[AZ_reorder] = 0; params[AZ_rthresh] = 1.0E-1; params[AZ_athresh] = 1.0E-1; options[AZ_precond] = AZ_dom_decomp ; options[AZ_subdomain_solve] = AZ_bilu_ifp; options[AZ_reorder] = 0; options[AZ_graph_fill] = 0; params[AZ_rthresh] = 1.0E-7; params[AZ_athresh] = 1.0E-7; options[AZ_poly_ord] = 1; options[AZ_precond] = AZ_Jacobi; params[AZ_omega] = 1.0; options[AZ_precond] = AZ_none ; options[AZ_poly_ord] = 1; options[AZ_precond] = AZ_Jacobi ; options[AZ_scaling] = AZ_sym_row_sum ; options[AZ_scaling] = AZ_sym_diag; options[AZ_conv] = AZ_noscaled; options[AZ_scaling] = AZ_Jacobi ; options[AZ_precond] = AZ_dom_decomp ; options[AZ_subdomain_solve] = AZ_icc ; options[AZ_subdomain_solve] = AZ_ilut ; params[AZ_omega] = 1.2; params[AZ_ilut_fill] = 2.0; params[AZ_drop] = 0.01; options[AZ_reorder] = 0; options[AZ_overlap] = 0; options[AZ_type_overlap] = AZ_symmetric; options[AZ_precond] = AZ_dom_decomp ; options[AZ_subdomain_solve] = AZ_bilu ; options[AZ_graph_fill] = 0; options[AZ_overlap] = 0; options[AZ_precond] = AZ_dom_decomp ; options[AZ_subdomain_solve] = AZ_bilu_ifp ; options[AZ_graph_fill] = 0; options[AZ_overlap] = 0; params[AZ_rthresh] = 1.0E-3; params[AZ_athresh] = 1.0E-3; options[AZ_poly_ord] = 1; options[AZ_precond] = AZ_Jacobi ; */ options[AZ_kspace] = 600 ; options[AZ_max_iter] = 600 ; params[AZ_tol] = 1.0e-14; #ifdef BGMRES options[AZ_gmres_blocksize] = 3; options[AZ_gmres_num_rhs] = 1; #endif #ifdef DEBUG if (proc_config[AZ_N_procs]==1) write_vec("rhs.dat", N_local, b); #endif /* xsolve is a little longer vector needed to account for external entries. Make it and copy x (initial guess) into it. */ if (has_global_indices) { N_external = 0; } else { N_external = data_org[AZ_N_external]; } xsolve = (double *) calloc(N_local + N_external, sizeof(double)) ; for (i=0; i<N_local; i++) xsolve[i] = x[i]; /* Reorder rhs and xsolve to match matrix ordering from AZ_transform */ if (!has_global_indices) { AZ_reorder_vec(b, data_org, update_index, rpntr) ; AZ_reorder_vec(xsolve, data_org, update_index, rpntr) ; } #ifdef VBRMATRIX AZ_check_vbr(N_update, data_org[AZ_N_ext_blk], AZ_LOCAL, bindx, bpntr, cpntr, rpntr, proc_config); #else AZ_check_msr(bindx, N_update, N_external, AZ_LOCAL, proc_config); #endif printf("Processor %d of %d N_local = %d N_external = %d NNZ = %d\n", proc_config[AZ_node],proc_config[AZ_N_procs],N_local,N_external, n_nonzeros); /* solve the system of equations using b as the right hand side */ Prec = AZ_precond_create(Amat,AZ_precondition, NULL); AZ_iterate(xsolve, b, options, params, status, proc_config, Amat, Prec, NULL); /*AZ_ifpack_iterate(xsolve, b, options, params, status, proc_config, Amat);*/ if (proc_config[AZ_node]==0) { printf("True residual norm = %22.16g\n",status[AZ_r]); printf("True scaled res = %22.16g\n",status[AZ_scaled_r]); printf("Computed res norm = %22.16g\n",status[AZ_rec_r]); } #ifdef TEST_SINGULAR xnull = (double *) calloc(N_local + N_external, sizeof(double)) ; Axnull = (double *) calloc(N_local + N_external, sizeof(double)) ; for (i=0; i<N_local; i++) xnull[i] = xexact[i]; if (!has_global_indices) AZ_reorder_vec(xnull, data_org, update_index, rpntr); for (i=0; i<N_local; i++) xnull[i] -= xsolve[i]; /* fill with nullerence */ Amat->matvec(xnull, Axnull, Amat, proc_config); norm_Axnull = AZ_gvector_norm(N_local, 2, Axnull, proc_config); if (proc_config[AZ_node]==0) printf("Norm of A(xexact-xsolve) = %12.4g\n",norm_Axnull); free((void *) xnull); free((void *) Axnull); #endif /* Get solution back into original ordering */ if (!has_global_indices) { AZ_invorder_vec(xsolve, data_org, update_index, rpntr, x); free((void *) xsolve); } else { free((void *) x); x = xsolve; } #ifdef DEBUG if (proc_config[AZ_N_procs]==1) write_vec("solution.dat", N_local, x); #endif if (xexact != NULL) { double sum = 0.0; double largest = 0.0; for (i=0; i<N_local; i++) sum += fabs(x[i]-xexact[i]); printf("Processor %d: Difference between exact and computed solution = %12.4g\n", proc_config[AZ_node],sum); for (i=0; i<N_local; i++) largest = AZ_MAX(largest,fabs(xexact[i])); printf("Processor %d: Difference divided by max abs value of exact = %12.4g\n", proc_config[AZ_node],sum/largest); } free((void *) val); free((void *) bindx); #ifdef VBRMATRIX free((void *) rpntr); free((void *) bpntr); free((void *) indx); #endif free((void *) b); free((void *) x); if (xexact!=NULL) free((void *) xexact); AZ_free((void *) update); AZ_free((void *) update_index); AZ_free((void *) external); AZ_free((void *) extern_index); AZ_free((void *) data_org); if (cpntr!=NULL) AZ_free((void *) cpntr); AZ_precond_destroy(&Prec); AZ_matrix_destroy(&Amat); #ifdef AZTEC_MPI MPI_Finalize() ; #endif /* end main */ return 0 ; }
int main(int argc, char *argv[]) { /* See Aztec User's Guide for the variables that follow: */ int proc_config[AZ_PROC_SIZE];/* Processor information. */ int N_update; /* # of unknowns updated on this node */ int *update; /* vector elements updated on this node */ int *data_orgA; /* Array to specify data layout */ int *externalA; /* vector elements needed by this node. */ int *update_indexA; /* ordering of update[] and external[] */ int *extern_indexA; /* locally on this processor. */ int *bindxA; /* Sparse matrix to be solved is stored */ double *valA; /* in these MSR arrays. */ AZ_MATRIX *mat_curl_edge; /* curl operator matrix */ int *data_orgB; /* Array to specify data layout */ int *externalB; /* vector elements needed by this node. */ int *update_indexB; /* ordering of update[] and external[] */ int *extern_indexB; /* locally on this processor. */ int *bindxB; /* Sparse matrix to be solved is stored */ double *valB; /* in these MSR arrays. */ AZ_MATRIX *mat_curl_face; /* curl operator matrix */ int *bc_indx; int n_bc; double *efield; double *bfield; double *epsilon; double *tmp_vec; double *tmp_vec2; int i, nrow, x, y, z; int k, t; long startTime, endTime; int myrank; int vec_len; /* get number of processors and the name of this processor */ #ifdef AZ_MPI MPI_Init(&argc,&argv); AZ_set_proc_config(proc_config, MPI_COMM_WORLD); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); #else myrank = 0; AZ_set_proc_config(proc_config, AZ_NOT_MPI); #endif nrow = ncomp * nx * ny * nz; /* overll number of matrix rows */ // Define partitioning: matrix rows (ascending order) owned by this node // Here it is done automatically, but it can also be specified by hand AZ_read_update(&N_update, &update, proc_config, nrow, 1, AZ_linear); // In the following we set up the matrix for the edge centered curl operator // All the steps are described in detail in the AZTEC manual. // first: allocate space for the first matrix. bindxA = (int *) malloc((N_update*MAX_NZ_ROW+1)*sizeof(int)); valA = (double *) malloc((N_update*MAX_NZ_ROW+1)*sizeof(double)); if (valA == NULL) perror("Error: Not enough space to create matrix"); // Initialize the index for the first off diagonal element bindxA[0] = N_update+1; // Create the matrix row by row. Each processor creates only rows appearing // in update[] (using global col. numbers). for (i = 0; i < N_update; i++) create_curl_matrix_row_edge(update[i], i, valA, bindxA); // convert matrix to a local distributed matrix AZ_transform(proc_config, &externalA, bindxA, valA, update, &update_indexA, &extern_indexA, &data_orgA, N_update, NULL, NULL, NULL, NULL, AZ_MSR_MATRIX); // convert the matrix arrays into a matrix structure, used in the // matrix vector multiplication mat_curl_edge = AZ_matrix_create(data_orgA[AZ_N_internal] + data_orgA[AZ_N_border]); AZ_set_MSR(mat_curl_edge, bindxA, valA, data_orgA, 0, NULL, AZ_LOCAL); // at this point the edge centered curl matrix is completed. // In the following we set up the matrix for the face centered curl operator // All the steps are described in detail in the AZTEC manual. // first: allocate space for the first matrix. bindxB = (int *) malloc((N_update*MAX_NZ_ROW+1)*sizeof(int)); valB = (double *) malloc((N_update*MAX_NZ_ROW+1)*sizeof(double)); if (valB == NULL) perror("Error: Not enough space to create matrix"); // Initialize the index for the first off diagonal element bindxB[0] = N_update+1; // Create the matrix row by row. Each processor creates only rows appearing // in update[] (using global col. numbers). for (i = 0; i < N_update; i++) create_curl_matrix_row_face(update[i], i, valB, bindxB); // convert matrix to a local distributed matrix AZ_transform(proc_config, &externalB, bindxB, valB, update, &update_indexB, &extern_indexB, &data_orgB, N_update, NULL, NULL, NULL, NULL, AZ_MSR_MATRIX); // convert the matrix arrays into a matrix structure, used in the // matrix vector multiplication mat_curl_face = AZ_matrix_create(data_orgB[AZ_N_internal] + data_orgB[AZ_N_border]); AZ_set_MSR(mat_curl_face, bindxB, valB, data_orgB, 0, NULL, AZ_LOCAL); // at this point the face centered curl matrix is completed. // allocate memory for the fields and a temporary vector vec_len = N_update + data_orgA[AZ_N_external]; efield = (double *) malloc(vec_len*sizeof(double)); bfield = (double *) malloc(vec_len*sizeof(double)); epsilon = (double *) malloc(vec_len*sizeof(double)); tmp_vec = (double *) malloc(vec_len*sizeof(double)); tmp_vec2 = (double *) malloc(vec_len*sizeof(double)); // setup the boundary condition. We will get an arry that tells us // which positions need to be updated and where the results needs // to be stored in the E field. setup_bc(update, update_indexB, N_update, &bc_indx, &n_bc); // initialize the field vectors for(k = 0; k < vec_len; k++){ efield[k] = 0.; bfield[k] = 0.; epsilon[k] = 1.; tmp_vec[k] = 0.; } // initialize the dielectric structure. Ugly hard-coded stuff, // needs to be cleaned out... for(y=45; y<55; y++){ for(x = y; x<100; x++) epsilon[compZ + pos_to_row(x, y, 0)] = 0.95; } // reorder the dielectric vector in order to align with the B field AZ_reorder_vec(epsilon, data_orgA, update_indexA, NULL); printf("Begin iteration \n"); // just some timing ... startTime = currentTimeMillis(); // ******************* // begin of the time stepping loop // ******************* for( t = 0; t < nsteps; t++){ // first we do the e field update // convert the B field to the H field for(k = 0 ; k < vec_len; k++) bfield[k] *= epsilon[k]; // setup the initial condition for( k = 0; k < n_bc; k++){ x = bc_indx[4*k]; y = bc_indx[4*k+1]; z = bc_indx[4*k+2]; efield[bc_indx[4*k+3]] = sin((double) y * 5. * 3.14159 / (double) ny) * sin(omega * dt * (double) (t + 1)); } // E field update: // tmp_vec = Curl_Op * bfield // efield = efield + c^2 * dt * tmp_vec AZ_MSR_matvec_mult( bfield, tmp_vec, mat_curl_edge, proc_config); // reorder the result in tmp_vec so that it aligns with the // decomposition of the E field AZ_invorder_vec(tmp_vec, data_orgA, update_indexA, NULL, tmp_vec2); AZ_reorder_vec(tmp_vec2, data_orgB, update_indexB, NULL); // update the efield for(k = 0 ; k < N_update; k++) efield[k] = efield[k] + c2 * tmp_vec2[k] * dt; // bfield update : // tmp_vec = DualCurl_Op * efield // bfield = bfield - tmp_vec * dt AZ_MSR_matvec_mult( efield, tmp_vec, mat_curl_face, proc_config); // reorder the result so that it fits the decomposition of the bfield AZ_invorder_vec(tmp_vec, data_orgB, update_indexB, NULL, tmp_vec2); AZ_reorder_vec(tmp_vec2, data_orgA, update_indexA, NULL); // update the b field for(k = 0; k < N_update; k++) bfield[k] = bfield[k] - tmp_vec2[k] * dt; if(myrank == 0) printf("Taking step %d at time %g\n", t, (double) (currentTimeMillis() - startTime) / 1000.); } // ****************** // end of timestepping loop // ***************** endTime = currentTimeMillis(); printf("After iteration: %g\n", (double)(endTime - startTime) / 1000. ); #if 1 system("rm efield.txt bfield.txt"); // dump filed data: efield AZ_invorder_vec(efield, data_orgB, update_indexB, NULL, tmp_vec); write_file("efield.txt", tmp_vec, N_update); // dump filed data: bfield AZ_invorder_vec(bfield, data_orgA, update_indexA, NULL, tmp_vec); write_file("bfield.txt", tmp_vec, N_update); #endif /* Free allocated memory */ AZ_matrix_destroy( &mat_curl_edge); free((void *) update); free((void *) update_indexA); free((void *) externalA); free((void *) extern_indexA); free((void *) bindxA); free((void *) valA); free((void *) data_orgA); AZ_matrix_destroy( &mat_curl_face); free((void *) externalB); free((void *) extern_indexB); free((void *) bindxB); free((void *) valB); free((void *) data_orgB); free((void *) efield); free((void *) bfield); free((void *) tmp_vec); free((void *) tmp_vec2); #ifdef AZ_MPI MPI_Finalize(); #endif return(1); }
int main(int argc, char *argv[]) { int num_PDE_eqns=3, N_levels=3, nsmooth=1; int leng, level, N_grid_pts, coarsest_level; /* See Aztec User's Guide for more information on the */ /* variables that follow. */ int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE]; /* data structure for matrix corresponding to the fine grid */ int *data_org = NULL, *update = NULL, *external = NULL; int *update_index = NULL, *extern_index = NULL; int *cpntr = NULL; int *bindx = NULL, N_update, iii; double *val = NULL; double *xxx, *rhs; AZ_MATRIX *Amat; AZ_PRECOND *Pmat = NULL; ML *ml; FILE *fp; int ch,i,j, Nrigid, *garbage; struct AZ_SCALING *scaling; double solve_time, setup_time, start_time, *mode, *rigid; ML_Aggregate *ag; int nblocks, *blocks; char filename[80]; double alpha; int one = 1; #ifdef ML_MPI MPI_Init(&argc,&argv); /* get number of processors and the name of this processor */ AZ_set_proc_config(proc_config, MPI_COMM_WORLD); #else AZ_set_proc_config(proc_config, AZ_NOT_MPI); #endif leng = 0; if (proc_config[AZ_node] == 0) { #ifdef binary fp=fopen(".data","rb"); #else fp=fopen(".data","r"); #endif if (fp==NULL) { printf("couldn't open file .data\n"); exit(1); } #ifdef binary fread(&leng, sizeof(int), 1, fp); #else fscanf(fp,"%d",&leng); #endif fclose(fp); } leng = AZ_gsum_int(leng, proc_config); N_grid_pts=leng/num_PDE_eqns; /* initialize the list of global indices. NOTE: the list of global */ /* indices must be in ascending order so that subsequent calls to */ /* AZ_find_index() will function properly. */ AZ_read_update(&N_update, &update, proc_config, N_grid_pts, num_PDE_eqns, AZ_linear); AZ_read_msr_matrix(update, &val, &bindx, N_update, proc_config); AZ_transform(proc_config, &external, bindx, val, update, &update_index, &extern_index, &data_org, N_update, 0, 0, 0, &cpntr, AZ_MSR_MATRIX); Amat = AZ_matrix_create( leng ); AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL); Amat->matrix_type = data_org[AZ_matrix_type]; data_org[AZ_N_rows] = data_org[AZ_N_internal] + data_org[AZ_N_border]; start_time = AZ_second(); AZ_defaults(options, params); /* scaling = AZ_scaling_create(); xxx = (double *) calloc( leng,sizeof(double)); rhs=(double *)calloc(leng,sizeof(double)); options[AZ_scaling] = AZ_sym_diag; options[AZ_precond] = AZ_none; options[AZ_max_iter] = 30; options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); don't forget vector rescaling ... free(xxx); free(rhs); */ options[AZ_scaling] = AZ_none; ML_Create(&ml, N_levels); /* set up discretization matrix and matrix vector function */ AZ_ML_Set_Amat(ml, N_levels-1, N_update, N_update, Amat, proc_config); ML_Aggregate_Create( &ag ); Nrigid = 0; if (proc_config[AZ_node] == 0) { sprintf(filename,"rigid_body_mode%d",Nrigid+1); while( (fp = fopen(filename,"r")) != NULL) { fclose(fp); Nrigid++; sprintf(filename,"rigid_body_mode%d",Nrigid+1); } } Nrigid = AZ_gsum_int(Nrigid,proc_config); if (Nrigid != 0) { rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) ); if (rigid == NULL) { printf("Error: Not enough space for rigid body modes\n"); } } rhs=(double *)malloc(leng*sizeof(double)); AZ_random_vector(rhs, data_org, proc_config); for (i = 0; i < Nrigid; i++) { sprintf(filename,"rigid_body_mode%d",i+1); AZ_input_msr_matrix(filename, update, &mode, &garbage, N_update, proc_config); /* AZ_sym_rescale_sl(mode, Amat->data_org, options, proc_config, scaling); */ /* Amat->matvec(mode, rigid, Amat, proc_config); for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]); */ for (j = 0; j < i; j++) { alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); daxpy_(&N_update, &alpha, &(rigid[j*N_update]), &one, mode, &one); printf("alpha1 is %e\n",alpha); } alpha = -AZ_gdot(N_update, mode, rhs, proc_config)/AZ_gdot(N_update, mode, mode, proc_config); printf("alpha2 is %e\n",alpha); daxpy_(&N_update, &alpha, mode, &one, rhs, &one); for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j]; free(mode); free(garbage); } for (j = 0; j < Nrigid; j++) { alpha = -AZ_gdot(N_update, rhs, &(rigid[j*N_update]), proc_config)/AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); daxpy_(&N_update, &alpha, &(rigid[j*N_update]), &one, rhs, &one); printf("alpha4 is %e\n",alpha); } for (i = 0; i < Nrigid; i++) { alpha = -AZ_gdot(N_update, &(rigid[i*N_update]), rhs, proc_config); printf("alpha is %e\n",alpha); } if (Nrigid != 0) { ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update); /* free(rigid); */ } coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, N_levels-1, ML_DECREASING, ag); coarsest_level = N_levels - coarsest_level; /* ML_Operator_Print(&(ml->Pmat[N_levels-2]), "Pmat"); exit(1); */ if ( proc_config[AZ_node] == 0 ) printf("Coarse level = %d \n", coarsest_level); /* set up smoothers */ for (level = N_levels-1; level > coarsest_level; level--) { j = 10; if (level == N_levels-1) j = 10; options[AZ_solver] = AZ_cg; options[AZ_precond]=AZ_sym_GS; options[AZ_subdomain_solve]=AZ_icc; /* options[AZ_precond] = AZ_none; */ options[AZ_poly_ord] = 5; ML_Gen_SmootherAztec(ml, level, options, params, proc_config, status, j, ML_PRESMOOTHER,NULL); ML_Gen_SmootherAztec(ml, level, options, params, proc_config, status, j, ML_POSTSMOOTHER,NULL); /* ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_PRESMOOTHER, nsmooth,1.0); ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_POSTSMOOTHER, nsmooth,1.0); */ /* nblocks = ML_Aggregate_Get_AggrCount( ag, level ); ML_Aggregate_Get_AggrMap( ag, level, &blocks); ML_Gen_Smoother_VBlockSymGaussSeidel( ml , level, ML_BOTH, nsmooth, 1.0, nblocks, blocks); ML_Gen_Smoother_VBlockSymGaussSeidel( ml , level, ML_POSTSMOOTHER, nsmooth, 1.0, nblocks, blocks); */ /* ML_Gen_Smoother_VBlockJacobi( ml , level, ML_PRESMOOTHER, nsmooth, .5, nblocks, blocks); ML_Gen_Smoother_VBlockJacobi( ml , level, ML_POSTSMOOTHER, nsmooth,.5, nblocks, blocks); */ /* ML_Gen_Smoother_GaussSeidel(ml , level, ML_PRESMOOTHER, nsmooth); ML_Gen_Smoother_GaussSeidel(ml , level, ML_POSTSMOOTHER, nsmooth); */ /* need to change this when num_pdes is different on different levels */ /* if (level == N_levels-1) { ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_PRESMOOTHER, nsmooth, 0.5, num_PDE_eqns); ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_POSTSMOOTHER, nsmooth, 0.5, num_PDE_eqns); } else { ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_PRESMOOTHER, nsmooth, 0.5, 2*num_PDE_eqns); ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_POSTSMOOTHER, nsmooth, 0.5, 2*num_PDE_eqns); } */ /* */ /* ML_Gen_SmootherJacobi(ml , level, ML_PRESMOOTHER, nsmooth, .67); ML_Gen_SmootherJacobi(ml , level, ML_POSTSMOOTHER, nsmooth, .67 ); */ } /* ML_Gen_CoarseSolverSuperLU( ml, coarsest_level); */ /* ML_Gen_SmootherSymGaussSeidel(ml , coarsest_level, ML_PRESMOOTHER, 2*nsmooth,1.); */ /* ML_Gen_SmootherBlockGaussSeidel(ml , level, ML_PRESMOOTHER, 50*nsmooth, 1.0, 2*num_PDE_eqns); */ ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_PRESMOOTHER, 2*nsmooth, 1.0, num_PDE_eqns); ML_Gen_Solver(ml, ML_MGV, N_levels-1, coarsest_level); AZ_defaults(options, params); options[AZ_solver] = AZ_GMRESR; options[AZ_scaling] = AZ_none; options[AZ_precond] = AZ_user_precond; options[AZ_conv] = AZ_rhs; options[AZ_output] = 1; options[AZ_max_iter] = 1500; options[AZ_poly_ord] = 5; options[AZ_kspace] = 130; params[AZ_tol] = 1.0e-8; AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); setup_time = AZ_second() - start_time; xxx = (double *) malloc( leng*sizeof(double)); /* Set rhs */ fp = fopen("AZ_capture_rhs.dat","r"); if (fp == NULL) { if (proc_config[AZ_node] == 0) printf("taking random vector for rhs\n"); /* AZ_random_vector(rhs, data_org, proc_config); AZ_reorder_vec(rhs, data_org, update_index, NULL); AZ_random_vector(xxx, data_org, proc_config); AZ_reorder_vec(xxx, data_org, update_index, NULL); Amat->matvec(xxx, rhs, Amat, proc_config); */ } else { ch = getc(fp); if (ch == 'S') { while ( (ch = getc(fp)) != '\n') ; } else ungetc(ch,fp); for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) fscanf(fp,"%lf",&(rhs[i])); fclose(fp); } for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; /* Set x */ fp = fopen("AZ_capture_init_guess.dat","r"); if (fp != NULL) { ch = getc(fp); if (ch == 'S') { while ( (ch = getc(fp)) != '\n') ; } else ungetc(ch,fp); for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) fscanf(fp,"%lf",&(xxx[i])); fclose(fp); options[AZ_conv] = AZ_expected_values; } /* if Dirichlet BC ... put the answer in */ for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) { if ( (val[i] > .99999999) && (val[i] < 1.0000001)) xxx[i] = rhs[i]; } fp = fopen("AZ_no_multilevel.dat","r"); scaling = AZ_scaling_create(); start_time = AZ_second(); if (fp != NULL) { fclose(fp); options[AZ_precond] = AZ_none; options[AZ_scaling] = AZ_sym_diag; options[AZ_ignore_scaling] = AZ_TRUE; options[AZ_keep_info] = 1; AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); /* options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); */ } else { options[AZ_keep_info] = 1; /* options[AZ_max_iter] = 40; */ AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); for (j = 0; j < Nrigid; j++) { alpha = -AZ_gdot(N_update, xxx, &(rigid[j*N_update]), proc_config)/AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config); daxpy_(&N_update, &alpha, &(rigid[j*N_update]), &one, xxx, &one); printf("alpha5 is %e\n",alpha); } AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); options[AZ_pre_calc] = AZ_reuse; options[AZ_conv] = AZ_expected_values; /* if (proc_config[AZ_node] == 0) printf("\n-------- Second solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); if (proc_config[AZ_node] == 0) printf("\n-------- Third solve with improved convergence test -----\n"); AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); */ } solve_time = AZ_second() - start_time; if (proc_config[AZ_node] == 0) printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time); ML_Aggregate_Destroy(&ag); ML_Destroy(&ml); AZ_free((void *) Amat->data_org); AZ_free((void *) Amat->val); AZ_free((void *) Amat->bindx); AZ_free((void *) update); AZ_free((void *) external); AZ_free((void *) extern_index); AZ_free((void *) update_index); if (Amat != NULL) AZ_matrix_destroy(&Amat); if (Pmat != NULL) AZ_precond_destroy(&Pmat); free(xxx); free(rhs); #ifdef ML_MPI MPI_Finalize(); #endif return 0; }