Beispiel #1
0
int main(int argc, char *argv[])
{
  int    proc_config[AZ_PROC_SIZE];// Processor information.

#ifdef EPETRA_MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm comm(MPI_COMM_WORLD);
  AZ_set_proc_config(proc_config,MPI_COMM_WORLD);
#else
  Epetra_SerialComm comm;
  AZ_set_proc_config(proc_config,0);
#endif

  int temp; if (comm.MyPID()==0) {cout << "Type 1 and enter to continue" <<endl; cin >> temp; } comm.Barrier();
AZ_MATRIX *user_Kn_build(struct user_partition *Node_Partition)

{
  int *Kn_bindx;
  double *Kn_val;
  int    proc_config[AZ_PROC_SIZE];
  AZ_MATRIX *Kn_mat;
  int    *reordered_glob_nodes = NULL, *cpntr = NULL, *Kn_data_org = NULL;
  int i, ii, jj, nx, gid, Nlocal_nodes, nz_ptr;


  Nlocal_nodes = Node_Partition->Nlocal;
  Kn_bindx = (int    *) malloc((27*Nlocal_nodes+5)*sizeof(int));
  Kn_val   = (double *) malloc((27*Nlocal_nodes+5)*sizeof(double));
  Kn_bindx[0] = Nlocal_nodes+1;

  nx = (int) sqrt( ((double) Node_Partition->Nglobal) + .00001);

  for (i = 0; i < Nlocal_nodes; i++) {
    gid = (Node_Partition->my_global_ids)[i];

    nz_ptr = Kn_bindx[i];
    ii = gid%nx;
    jj = (gid - ii)/nx;


    if (ii != nx-1) { Kn_bindx[nz_ptr] = gid+ 1; Kn_val[nz_ptr++] = -1.;}
    if (jj != nx-1) { Kn_bindx[nz_ptr] = gid+nx; Kn_val[nz_ptr++] = -1.;}
    if (jj !=    0) { Kn_bindx[nz_ptr] = gid-nx; Kn_val[nz_ptr++] = -1.;}
    if (ii !=    0) { Kn_bindx[nz_ptr] = gid- 1; Kn_val[nz_ptr++] = -1.;}

    if ((ii != nx-1) && (jj !=    0)) 
      {Kn_bindx[nz_ptr] = gid-nx+1; Kn_val[nz_ptr++] = -1.;}
    if ((ii != nx-1) && (jj != nx-1)) 
      {Kn_bindx[nz_ptr] = gid+nx+1; Kn_val[nz_ptr++] = -1.;}
    if ((ii !=    0) && (jj != nx-1)) 
      {Kn_bindx[nz_ptr] = gid+nx-1; Kn_val[nz_ptr++] = -1.;}
    if ((ii !=    0) && (jj !=    0)) 
      {Kn_bindx[nz_ptr] = gid-nx-1; Kn_val[nz_ptr++] = -1.;}
    Kn_val[i] = (double) (nz_ptr - Kn_bindx[i]);
    Kn_bindx[i+1] = nz_ptr;
  }

  AZ_set_proc_config(proc_config, COMMUNICATOR);

  AZ_transform_norowreordering(proc_config,&(Node_Partition->needed_external_ids),
			       Kn_bindx, Kn_val, Node_Partition->my_global_ids,
			       &reordered_glob_nodes, &reordered_node_externs, 
			       &Kn_data_org, Nlocal_nodes, 0, 0, 0, 
			       &cpntr, AZ_MSR_MATRIX);
  Node_Partition->Nghost = Kn_data_org[AZ_N_external];
  AZ_free(reordered_glob_nodes);

  /* Convert old style Aztec matrix to newer style Aztec matrix */

  Kn_mat = AZ_matrix_create( Nlocal_nodes );
  AZ_set_MSR(Kn_mat, Kn_bindx, Kn_val, Kn_data_org, 0, NULL, AZ_LOCAL);

  return(Kn_mat);
}
void user_partition_nodes(struct user_partition *Partition)
{
  int    proc_config[AZ_PROC_SIZE];

  AZ_set_proc_config(proc_config, COMMUNICATOR);

  AZ_input_update(NULL,&(Partition->Nlocal), &(Partition->my_global_ids),
		  proc_config, Partition->Nglobal, 1, AZ_linear);
  Partition->Nghost = 0;
}
Beispiel #4
0
void
hunt_problem(Comm_Ex *cx,	/* array of communications structures */
	     Exo_DB *exo,	/* ptr to the finite element mesh database */
	     Dpi *dpi)	        /* distributed processing information */
{
  int    *ija=NULL;           /* column pointer array                         */
  double *a=NULL;             /* nonzero array                                */
  double *a_old=NULL;         /* nonzero array                                */
  double *x=NULL;             /* solution vector                              */

  int     iAC;                /* COUNTER                                      */
  double *x_AC = NULL;        /* SOLUTION VECTOR OF EXTRA UNKNOWNS            */
  double *x_AC_old=NULL;      /* old SOLUTION VECTOR OF EXTRA UNKNOWNS        */
  double *x_AC_dot = NULL; 

  int     iHC;                /* COUNTER                                      */
  
  int    *ija_attic=NULL;     /* storage for external dofs                    */

  int eb_indx, ev_indx;

  /* 
   * variables for path traversal 
   */
  
  double *x_old=NULL;         /* old solution vector                          */
  double *x_older=NULL;       /* older solution vector                        */
  double *x_oldest=NULL;      /* oldest solution vector saved                 */
  double *xdot=NULL;          /* current path derivative of soln              */
  double *xdot_old=NULL;
  double *x_update=NULL;

  double *x_sens=NULL;        /* solution sensitivity */
  double **x_sens_p=NULL;     /* solution sensitivity for parameters */
  int num_pvector=0;          /*  number of solution sensitivity vectors */
#ifdef COUPLED_FILL
  struct Aztec_Linear_Solver_System *ams[NUM_ALSS]={NULL}; 
#else /* COUPLED_FILL */
  struct Aztec_Linear_Solver_System *ams[NUM_ALSS]={NULL, NULL}; 
#endif /* COUPLED_FILL */
                              /* sl_util_structs.h */

  double *resid_vector=NULL;  /* residual */
  double *resid_vector_sens=NULL;    /* residual sensitivity */
  double *scale=NULL;      /* scale vector for modified newton */

  int 	 *node_to_fill = NULL;	

  int		n;            /* total number of path steps attempted */
  int		ni;           /* total number of nonlinear solves */
  int		nt;           /* total number of successful path steps */
  int		path_step_reform; /* counter for jacobian reformation stride */
  int		converged;    /* success or failure of Newton iteration */
  int		success_ds;   /* success or failure of path step */

  int           i;

  int           nprint=0, num_total_nodes;

  int           numProcUnknowns;
  int           *const_delta_s=NULL;
  int           step_print;
  double        i_print;
  int good_mesh = TRUE;
  double	*path=NULL, *path1=NULL;
  double	*delta_s=NULL, *delta_s_new=NULL, *delta_s_old=NULL;
  double        *delta_s_older=NULL, *delta_s_oldest=NULL;
  double        *hDelta_s0=NULL, *hDelta_s_min=NULL, *hDelta_s_max=NULL;
  double        delta_t;
  double	theta=0.0;
  double        damp;
  double        eps;
  double        *lambda=NULL, *lambdaEnd=NULL;
  double	hunt_par, dhunt_par, hunt_par_old;	/* hunting continuation parameter */
  double        timeValueRead = 0.0;

  /* 
   * ALC management variables
   */

  int           alqALC;
  int           *aldALC=NULL; 

  /*
   * Other local variables 
   */
  
  int	        error, err, is_steady_state, inewton;
  int 		*gindex = NULL, gsize;
  int		*p_gsize=NULL;
  double	*gvec=NULL;
  double        ***gvec_elem;
  double	err_dbl;
  FILE          *file=NULL;
  double 	toler_org[3],damp_org;
  
  struct Results_Description  *rd=NULL;
  
  int		tnv;		/* total number of nodal variables and kinds */
  int		tev;		/* total number of elem variables and kinds */
  int		tnv_post;	/* total number of nodal variables and kinds 
					   for post processing */
  int		tev_post;	/* total number of elem variables and kinds 
					   for post processing */

  int max_unk_elem, one, three; /* variables used as mf_setup arguments*/

  unsigned int
  matrix_systems_mask;

  double evol_local=0.0;
#ifdef PARALLEL
  double evol_global=0.0;
#endif

  static char yo[]="hunt_problem"; 

  /*
   * 		BEGIN EXECUTION
   */

#ifdef DEBUG
  fprintf(stderr, "hunt_problem() begins...\n");
#endif

  toler_org[0] = custom_tol1;
  toler_org[1] = custom_tol2;
  toler_org[2] = custom_tol3;
  damp_org = damp_factor1;

  is_steady_state = TRUE;

  p_gsize = &gsize;
  
  /* 
   * set aside space for gather global vectors to print to exoII file
   * note: this is temporary
   *
   * For 2D prototype problem:  allocate space for T, dx, dy arrays
   */

  if( strlen( Soln_OutFile)  )
    {
#ifdef DEBUG
      printf("Trying to open \"%s\" for writing.\n", Soln_OutFile);
#endif
      file = fopen(Soln_OutFile, "w");
      if (file == NULL)  {
	DPRINTF(stderr, "%s:  opening soln file for writing\n", yo);
        EH(-1, "\t");
      }
    }
#ifdef PARALLEL
  check_parallel_error("Soln output file error");
#endif

  /*
   * Some preliminaries to help setup EXODUS II database output.
   */

#ifdef DEBUG
  fprintf(stderr, "cnt_nodal_vars() begins...\n");
#endif

  tnv = cnt_nodal_vars();
  /*  tnv_post is calculated in load_nodal_tkn*/
  tev = cnt_elem_vars();
  /*  tev_post is calculated in load_elem_tkn*/
  
#ifdef DEBUG
  fprintf(stderr, "Found %d total primitive nodal variables to output.\n", tnv);
  fprintf(stderr, "Found %d total primitive elem variables to output.\n", tev);
#endif
  
  if ( tnv < 0 )
    {
      DPRINTF(stderr, "%s:\tbad tnv.\n", yo);
      EH(-1, "\t");
    }

  if ( tev < 0 )
    {
      DPRINTF(stderr, "%s:\tMaybe bad tev? See goma design committee ;) \n", yo);
/*       exit(-1); */
    }
  
  rd = (struct Results_Description *) 
    smalloc(sizeof(struct Results_Description));

  if (rd == NULL) 
    { EH(-1, "Could not grab Results Description."); }
  (void) memset((void *) rd, 0, sizeof(struct Results_Description));
  
  rd->nev = 0;			/* number element variables in results */
  rd->ngv = 0;			/* number global variables in results */
  rd->nhv = 0;			/* number history variables in results */
  
  if ( is_steady_state == TRUE ) {
    rd->ngv = 5;			/* number global variables in results 
					   see load_global_var_info for names*/
    error = load_global_var_info(rd, 0, "CONV");
    error = load_global_var_info(rd, 1, "NEWT_IT");
    error = load_global_var_info(rd, 2, "MAX_IT");
    error = load_global_var_info(rd, 3, "CONVRATE");
    error = load_global_var_info(rd, 4, "MESH_VOLUME");
  }
  
  /* load nodal types, kinds, names */
  error = load_nodal_tkn( rd,
			  &tnv,
			  &tnv_post); /* load nodal types, kinds, names */
  
  if (error !=0)
    {
      DPRINTF(stderr, "%s:  problem with load_nodal_tkn()\n", yo);
      EH(-1,"\t");
    }

  /* load elem types, names */
  error = load_elem_tkn( rd,
			 exo,
			 tev, 
			 &tev_post); /* load elem types, names */
  
  if ( error !=0 )
    {
      DPRINTF(stderr, "%s:  problem with load_elem_tkn()\n", yo);
      EH(-1,"\t");
    }

  /* 
   * Write out the names of the nodal variables that we will be sending to
   * the EXODUS II output file later.
   */

#ifdef DEBUG
  fprintf(stderr, "wr_result_prelim() starts...\n", tnv);
#endif

  gvec_elem = (double ***) smalloc ( (exo->num_elem_blocks)*sizeof(double **));
  for (i = 0; i < exo->num_elem_blocks; i++) {
    gvec_elem[i] = (double **) smalloc ( (tev + tev_post)*sizeof(double *));
  }

  wr_result_prelim_exo( rd, 
                        exo, 
                        ExoFileOut,
                        gvec_elem );

#ifdef DEBUG
  fprintf(stderr, "P_%d: wr_result_prelim_exo() ends...\n", ProcID, tnv);
#endif

  /* 
   * This gvec workhorse transports output variables as nodal based vectors
   * that are gather from the solution vector. Note: it is NOT a global
   * vector at all and only carries this processor's nodal variables to
   * the exodus database.
   */

  asdv(&gvec, Num_Node);

  /*
   * Allocate space and manipulate for all the nodes that this processor
   * is aware of...
   */

  num_total_nodes = dpi->num_universe_nodes;

  numProcUnknowns = NumUnknowns + NumExtUnknowns;

  /* allocate memory for Volume Constraint Jacobian. ACS 2/99 */

  if ( nAC > 0)
    {
      for(iAC=0;iAC<nAC;iAC++) {
	augc[iAC].d_evol_dx = (double*) malloc(numProcUnknowns*sizeof(double));
      } }
  
  asdv(&resid_vector, numProcUnknowns);
  asdv(&resid_vector_sens, numProcUnknowns);
  asdv(&scale, numProcUnknowns);

  for (i=0;i<NUM_ALSS;i++) 
    {
      ams[i] = (struct Aztec_Linear_Solver_System *) 
	array_alloc(1, 1, sizeof(struct Aztec_Linear_Solver_System )); 
    }

#ifdef MPI
  AZ_set_proc_config( ams[0]->proc_config, MPI_COMM_WORLD );
#ifndef COUPLED_FILL
  if( Explicit_Fill ) AZ_set_proc_config( ams[1]->proc_config, MPI_COMM_WORLD );
#endif /* not COUPLED_FILL */
#else /* MPI */
  AZ_set_proc_config( ams[0]->proc_config, 0 );
#ifndef COUPLED_FILL
  if( Explicit_Fill ) AZ_set_proc_config( ams[1]->proc_config, 0 );
#endif /* not COUPLED_FILL */
#endif /* MPI */

  /*
   * allocate space for and initialize solution arrays
   */

  asdv(&x,        numProcUnknowns);
  asdv(&x_old,    numProcUnknowns);
  asdv(&x_older,  numProcUnknowns);
  asdv(&x_oldest, numProcUnknowns);
  asdv(&xdot,     numProcUnknowns);
  asdv(&xdot_old, numProcUnknowns);
  asdv(&x_update, numProcUnknowns);

  asdv(&x_sens, numProcUnknowns);

  /*
   * Initialize solid inertia flag
   */
  set_solid_inertia();

  /*
   * ALLOCATE ALL THOSE WORK VECTORS FOR HUNTING
   */

  asdv(&lambda,         nHC);
  asdv(&lambdaEnd,      nHC);
  asdv(&path,           nHC);
  asdv(&path1,          nHC);
  asdv(&hDelta_s0,      nHC);
  asdv(&hDelta_s_min,   nHC);
  asdv(&hDelta_s_max,   nHC);
  asdv(&delta_s,        nHC);
  asdv(&delta_s_new,    nHC);
  asdv(&delta_s_old,    nHC);
  asdv(&delta_s_older,  nHC);
  asdv(&delta_s_oldest, nHC);

  aldALC        = Ivector_birth(nHC);
  const_delta_s = Ivector_birth(nHC);

  /*

   HUNTING BY ZERO AND FIRST ORDER CONTINUATION

  */

  alqALC = 1;

  damp = 1.0;

  delta_t = 0.0;
  tran->delta_t = 0.0;      /*for Newmark-Beta terms in Lagrangian Solid*/

  nprint = 0;

  MaxPathSteps      = cont->MaxPathSteps;
  eps               = cont->eps;

  for (iHC=0;iHC<nHC;iHC++) {

    const_delta_s[iHC] = 0;

    lambda[iHC]       = hunt[iHC].BegParameterValue;
    lambdaEnd[iHC]    = hunt[iHC].EndParameterValue;

    if ((lambdaEnd[iHC]-lambda[iHC]) > 0.0)
    {
      aldALC[iHC] = +1;
    }
    else
    {
      aldALC[iHC] = -1;
    } 

    if (hunt[iHC].ramp == 1) {
      hunt[iHC].Delta_s0 = fabs(lambdaEnd[iHC]-lambda[iHC])/((double)(MaxPathSteps-1));
      const_delta_s[iHC] = 1;
    }

    hDelta_s0[iHC]     = hunt[iHC].Delta_s0;
    hDelta_s_min[iHC]  = hunt[iHC].Delta_s_min;
    hDelta_s_max[iHC]  = hunt[iHC].Delta_s_max;

    path[iHC] = path1[iHC] = lambda[iHC];

    if (Debug_Flag && ProcID == 0) {
      fprintf(stderr,"MaxPathSteps: %d \tlambdaEnd: %f\n", MaxPathSteps, lambdaEnd[iHC]);
      fprintf(stderr,"continuation in progress\n");
    }

    if (hDelta_s0[iHC] > hDelta_s_max[iHC]) 
    {
      hDelta_s0[iHC] = hDelta_s_max[iHC];
    }

    delta_s[iHC] = delta_s_old[iHC] = delta_s_older[iHC] = hDelta_s0[iHC];
      
    /*
     * ADJUST NATURAL PARAMETER
     */
	
    update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi); 
  }

  /*  define continuation parameter */

  if(hunt[0].EndParameterValue == hunt[0].BegParameterValue)
 	{	hunt_par = 1.0;	}
  else
 	{
	  hunt_par = (path1[0]-hunt[0].BegParameterValue)
	      /(hunt[0].EndParameterValue - hunt[0].BegParameterValue)  ;
          hunt_par=fabs(hunt_par);
 	}
  hunt_par_old = hunt_par;

  /* Call prefront (or mf_setup) if necessary */
  if (Linear_Solver == FRONT)
  {
    /* Also got to define these because it wants pointers to these numbers */
	  
    max_unk_elem = (MAX_PROB_VAR + MAX_CONC)*MDE;
    one = 1;
    three = 3;

    /* NOTE: We need a overall flag in the vn_glob struct that tells whether FULL_DG
       is on anywhere in domain.  This assumes only one material.  See sl_front_setup for test.
       that test needs to be in the input parser.  */

    if(vn_glob[0]->dg_J_model == FULL_DG) 
    {
      max_unk_elem = (MAX_PROB_VAR + MAX_CONC)*MDE + 4*vn_glob[0]->modes*4*MDE;
    }

    if (Num_Proc > 1) EH(-1, "Whoa.  No front allowed with nproc>1");  
	  
#ifdef HAVE_FRONT  
    err = mf_setup(&exo->num_elems, 
		   &NumUnknowns, 
		   &max_unk_elem, 
		   &three,
		   &one,
		   exo->elem_order_map,
		   fss->el_proc_assign,
		   fss->level,
		   fss->nopdof,
		   fss->ncn,
		   fss->constraint,
		   front_scratch_directory,
		   &fss->ntra);
    EH(err,"problems in frontal setup ");

#else
    EH(-1,"Don't have frontal solver compiled and linked in");
#endif
  }


  /*
         *  if compute parameter sensitivities, allocate space for solution
         *  sensitivity vectors
         */

        for(i=0;i<nn_post_fluxes_sens;i++)      {
          num_pvector=MAX(num_pvector,pp_fluxes_sens[i]->vector_id);}
        for(i=0;i<nn_post_data_sens;i++)        {
          num_pvector=MAX(num_pvector,pp_data_sens[i]->vector_id);}

  if((nn_post_fluxes_sens + nn_post_data_sens) > 0)
  {
    num_pvector++;
    num_pvector = MAX(num_pvector,2);
        x_sens_p = Dmatrix_birth(num_pvector,numProcUnknowns);
  }
  else
  {
    x_sens_p = NULL;
  }


  if (nAC > 0)
  {
    asdv(&x_AC, nAC);
    asdv(&x_AC_old, nAC);
    asdv(&x_AC_dot, nAC);
  }

  /* Allocate sparse matrix */

  if( strcmp( Matrix_Format, "msr" ) == 0)
  {
    log_msg("alloc_MSR_sparse_arrays...");
    alloc_MSR_sparse_arrays(&ija, 
			    &a, 
			    &a_old, 
			    0, 
			    node_to_fill, 
			    exo, 
			    dpi);
    /*
     * An attic to store external dofs column names is needed when
     * running in parallel.
     */

    alloc_extern_ija_buffer(num_universe_dofs, 
			    num_internal_dofs+num_boundary_dofs, 
			    ija, &ija_attic);
    /*
     * Any necessary one time initialization of the linear
     * solver package (Aztec).
     */
      
    ams[JAC]->bindx   = ija;
    ams[JAC]->val     = a;
    ams[JAC]->belfry  = ija_attic;
    ams[JAC]->val_old = a_old;
	  
    /*
     * These point to nowhere since we're using MSR instead of VBR
     * format.
     */
      
    ams[JAC]->indx  = NULL;
    ams[JAC]->bpntr = NULL;
    ams[JAC]->rpntr = NULL;
    ams[JAC]->cpntr = NULL;
    ams[JAC]->npn      = dpi->num_internal_nodes + dpi->num_boundary_nodes;
    ams[JAC]->npn_plus = dpi->num_internal_nodes + dpi->num_boundary_nodes + dpi->num_external_nodes;

    ams[JAC]->npu      = num_internal_dofs+num_boundary_dofs;
    ams[JAC]->npu_plus = num_universe_dofs;

    ams[JAC]->nnz = ija[num_internal_dofs+num_boundary_dofs] - 1;
    ams[JAC]->nnz_plus = ija[num_universe_dofs];

  }
  else if(  strcmp( Matrix_Format, "vbr" ) == 0)
  {
    log_msg("alloc_VBR_sparse_arrays...");
    alloc_VBR_sparse_arrays ( ams[JAC],
			      exo,
			      dpi);
    ija_attic = NULL;
    ams[JAC]->belfry  = ija_attic;

    a = ams[JAC]->val;
    if( !save_old_A ) a_old = ams[JAC]->val_old;
  }
  else if ( strcmp( Matrix_Format, "front") == 0 )
    {
      /* Don't allocate any sparse matrix space when using front */
      ams[JAC]->bindx   = NULL;
      ams[JAC]->val     = NULL;
      ams[JAC]->belfry  = NULL;
      ams[JAC]->val_old = NULL;
      ams[JAC]->indx  = NULL;
      ams[JAC]->bpntr = NULL;
      ams[JAC]->rpntr = NULL;
      ams[JAC]->cpntr = NULL;

    }
  else
  {
    EH(-1,"Attempted to allocate unknown sparse matrix format");
  }

  init_vec(x, cx, exo, dpi, x_AC, nAC, &timeValueRead);

/*  if read ACs, update data floats */
  if (nAC > 0)
  {
    if(augc[0].iread == 1)
      {
	for(iAC=0 ; iAC<nAC ; iAC++)	
	  { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi); }
      }
  }


  /* 
       * set boundary conditions on the initial conditions 
       */

  find_and_set_Dirichlet(x, xdot, exo, dpi);

  exchange_dof(cx, dpi, x);

  dcopy1(numProcUnknowns,x,x_old);
  dcopy1(numProcUnknowns,x_old,x_older);
  dcopy1(numProcUnknowns,x_older,x_oldest);

  if( nAC > 0)
  {
    dcopy1(nAC,x_AC, x_AC_old);}

  /* 
       * initialize the counters for when to print out data 
       */

  step_print = 1;

  matrix_systems_mask = 1;
      
  log_msg("sl_init()...");
  sl_init(matrix_systems_mask, ams, exo, dpi, cx);

#ifdef PARALLEL
  /*
  * Make sure the solver was properly initialized on all processors.
  */
  check_parallel_error("Solver initialization problems");
#endif

      ams[JAC]->options[AZ_keep_info] = 1;

    DPRINTF(stderr, "\nINITIAL ELEMENT QUALITY CHECK---\n");
    good_mesh = element_quality(exo, x, ams[0]->proc_config);

  /* 
       * set the number of successful path steps to zero 
       */

  nt = 0;   

  /* 
       * LOOP THROUGH PARAMETER UNTIL MAX NUMBER 
       * OF STEPS SURPASSED
       */

  for (n=0;n<MaxPathSteps;n++) {

    alqALC = 1;

    for (iHC=0;iHC<nHC;iHC++) {
	
      switch (aldALC[iHC]) {
      case -1: /* REDUCING PARAMETER DIRECTION */
	  if (path1[iHC] <= lambdaEnd[iHC]) { 
	    alqALC = -1;
	    path1[iHC] = lambdaEnd[iHC];
	    delta_s[iHC] = path[iHC]-path1[iHC];
	  } 
	  break;
      case +1: /* RISING PARAMETER DIRECTION */
	  if (path1[iHC] >= lambdaEnd[iHC]) { 
	    alqALC = -1;
	    path1[iHC] = lambdaEnd[iHC];
	    delta_s[iHC] = path1[iHC]-path[iHC];
	  } 
	  break;
      }

      /*
       * ADJUST NATURAL PARAMETER
       */

      update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi); 
    }   /*  end of iHC loop */

  	if(hunt[0].EndParameterValue == hunt[0].BegParameterValue)
 		{	hunt_par = 1.0;	}
	else
 		{
		  hunt_par = (path1[0]-hunt[0].BegParameterValue)
		      /(hunt[0].EndParameterValue - hunt[0].BegParameterValue)  ;
                  hunt_par=fabs(hunt_par);
 		}

    /*
     * IF STEP CHANGED, REDO FIRST ORDER PREDICTION
     */

    if(alqALC == -1)
    {
      DPRINTF(stderr,"\n\t ******** LAST PATH STEP!\n");
      dcopy1(numProcUnknowns,x_old,x);

      dhunt_par = hunt_par-hunt_par_old;
      switch (Continuation) {
      case HUN_ZEROTH:
          break;
      case  HUN_FIRST:
          v1add(numProcUnknowns, &x[0], dhunt_par, &x_sens[0]);
	  break;
      }
    }

    /* 
     * reset Dirichlet condition Mask, node->DBC to -1 where it
     * is set in order for Dirichlet conditions to be 
     * set appropriately for each path step 
     */
	  
    nullify_dirichlet_bcs();
	  
    find_and_set_Dirichlet (x, xdot, exo, dpi); 

    exchange_dof(cx, dpi, x);

    if(ProcID ==0) {
      DPRINTF(stderr, "\n\t----------------------------------");
      switch (Continuation) {
      case HUN_ZEROTH:
	  DPRINTF(stderr, "\n\tZero Order Hunting:");
	  break;
      case  HUN_FIRST:
	  DPRINTF(stderr, "\n\tFirst Order Hunting:");
	  break; }
      DPRINTF(stderr, "\n\tStep number: %4d of %4d (max)", n+1, MaxPathSteps);
      DPRINTF(stderr, "\n\tAttempting solution at: theta = %g",hunt_par);
      for (iHC=0;iHC<nHC;iHC++) {
	switch (hunt[iHC].Type) {
	case 1: /* BC */
	    DPRINTF(stderr, "\n\tBCID=%3d DFID=%5d", hunt[iHC].BCID, hunt[iHC].DFID);
	    break;
	case 2: /* MT */
	    DPRINTF(stderr, "\n\tMTID=%3d MPID=%5d", hunt[iHC].MTID, hunt[iHC].MPID);
	    break;
 	case 3: /* AC */
 	    DPRINTF(stderr, "\n\tACID=%3d DFID=%5d", hunt[iHC].BCID, hunt[iHC].DFID);
 	    break;
	}
	DPRINTF(stderr, " Parameter= % 10.6e delta_s= %10.6e", path1[iHC], delta_s[iHC]);
      }
    }
	
    ni = 0;
    do {

#ifdef DEBUG
      fprintf(stderr, "%s: starting solve_nonlinear_problem\n", yo);
#endif
      err = solve_nonlinear_problem(ams[JAC], 
				    x, 
				    delta_t, 
				    theta,
				    x_old,
				    x_older, 
				    xdot,
				    xdot_old,
				    resid_vector,
				    x_update,
				    scale, 
				    &converged, 
				    &nprint, 
				    tev, 
				    tev_post,
				    NULL,
				    rd,
				    gindex,
				    p_gsize,
				    gvec, 
				    gvec_elem, 
 				    path1[0],
				    exo, 
				    dpi, 
				    cx, 
				    0, 
				    &path_step_reform,
				    is_steady_state,
				    x_AC,
 				    x_AC_dot,
				    hunt_par,
				    resid_vector_sens,
				    x_sens,
				    x_sens_p,
                                    NULL);

#ifdef DEBUG
      fprintf(stderr, "%s: returned from solve_nonlinear_problem\n", yo);
#endif

      if (err == -1) converged = 0;
      inewton = err;
      if (converged)
      {
	EH(error, "error writing ASCII soln file."); /* srs need to check */

	if (Write_Intermediate_Solutions == 0) {    
#ifdef DEBUG
	  fprintf(stderr, "%s: write_solution call WIS\n", yo);
#endif
	  write_solution(ExoFileOut, resid_vector, x, x_sens_p, x_old, 
			 xdot, xdot_old, tev, tev_post,NULL,  rd, gindex,
			 p_gsize, gvec, gvec_elem, &nprint, delta_s[0], 
 			 theta, path1[0], NULL, exo, dpi);
#ifdef DEBUG
	  fprintf(stderr, "%s: write_solution end call WIS\n", yo);
#endif
	}

	/*
	 * PRINT OUT VALUES OF EXTRA UNKNOWNS 
	 * FROM AUGMENTING CONDITIONS 
	 */

	if (nAC > 0) 
          {
	    
	    DPRINTF(stderr, "\n------------------------------\n");
	    DPRINTF(stderr, "Augmenting Conditions:    %4d\n", nAC);
	    DPRINTF(stderr, "Number of extra unknowns: %4d\n\n", nAC);

            for (iAC = 0; iAC < nAC; iAC++)
             {
              if (augc[iAC].Type == AC_USERBC)
               {
                DPRINTF(stderr, "\tAC[%4d] DF[%4d] = %10.6e\n",
                        augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]);
               }
              else if (augc[iAC].Type == AC_USERMAT  ||
                       augc[iAC].Type == AC_FLUX_MAT )
               {
                DPRINTF(stderr, "\n MT[%4d] MP[%4d] = %10.6e\n",
                        augc[iAC].MTID, augc[iAC].MPID, x_AC[iAC]);
               }
              else if(augc[iAC].Type == AC_VOLUME)
               {
                evol_local = augc[iAC].evol;
#ifdef PARALLEL
                if( Num_Proc > 1 ) {
                     MPI_Allreduce( &evol_local, &evol_global, 1,
                                    MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
                }
                evol_local = evol_global;
#endif
                DPRINTF(stderr, "\tMT[%4d] VC[%4d]=%10.6e Param=%10.6e\n",
                        augc[iAC].MTID, augc[iAC].VOLID, evol_local,
                        x_AC[iAC]);
               }
	      else if(augc[iAC].Type == AC_POSITION)
               {
                evol_local = augc[iAC].evol;
#ifdef PARALLEL
                if( Num_Proc > 1 ) {
                     MPI_Allreduce( &evol_local, &evol_global, 1,
                                    MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
                }
                evol_local = evol_global;
#endif
                DPRINTF(stderr, "\tMT[%4d] XY[%4d]=%10.6e Param=%10.6e\n",
                        augc[iAC].MTID, augc[iAC].VOLID, evol_local,
                        x_AC[iAC]);
               }
               else if(augc[iAC].Type == AC_FLUX)
               {
                DPRINTF(stderr, "\tBC[%4d] DF[%4d]=%10.6e\n",
                        augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]);
               }
             }
	  }

      /* Check element quality */
      good_mesh = element_quality(exo, x, ams[0]->proc_config);

	/*
	     
	  INTEGRATE FLUXES, FORCES  

	*/

	for (i = 0; i < nn_post_fluxes; i++)
	{
	  err_dbl = evaluate_flux ( exo, dpi, 
                                    pp_fluxes[i]->ss_id, 
				    pp_fluxes[i]->flux_type ,
                                    pp_fluxes[i]->flux_type_name ,
				    pp_fluxes[i]->blk_id , 
				    pp_fluxes[i]->species_number, 
				    pp_fluxes[i]->flux_filenm,
                                    pp_fluxes[i]->profile_flag,
 				    x,xdot,NULL,delta_s[0],path1[0],1); 
	}


	/*
	  COMPUTE FLUX, FORCE SENSITIVITIES
	*/


	for (i = 0; i < nn_post_fluxes_sens; i++)
	{
	  err_dbl = evaluate_flux_sens ( exo, dpi,
                                         pp_fluxes_sens[i]->ss_id,
					 pp_fluxes_sens[i]->flux_type ,
                                         pp_fluxes_sens[i]->flux_type_name ,
					 pp_fluxes_sens[i]->blk_id ,
					 pp_fluxes_sens[i]->species_number,
					 pp_fluxes_sens[i]->sens_type,
					 pp_fluxes_sens[i]->sens_id,
					 pp_fluxes_sens[i]->sens_flt,
					 pp_fluxes_sens[i]->sens_flt2,
					 pp_fluxes_sens[i]->vector_id,
					 pp_fluxes_sens[i]->flux_filenm,
                                         pp_fluxes_sens[i]->profile_flag,
 					 x,xdot,x_sens_p,delta_s[0],path1[0],1);
	}
 	/*
      	 * Compute global volumetric quantities
      	 */
     	 for (i = 0; i < nn_volume; i++ ) {
       		evaluate_volume_integral(exo, dpi,
                                pp_volume[i]->volume_type,
                                pp_volume[i]->volume_name,
                                pp_volume[i]->blk_id,
                                pp_volume[i]->species_no,
                                pp_volume[i]->volume_fname,
                                pp_volume[i]->params,
                                NULL,  x, xdot, delta_s[0],
                                path1[0], 1);
     		}

      } /* end of if converged block */

      /*
       * INCREMENT COUNTER
       */
   
      ni++;

      /*
       * 
       * DID IT CONVERGE ? 
       * IF NOT, REDUCE STEP SIZE AND TRY AGAIN
       * 
       */

      if (!converged) {

	if (ni > 10) {
 	  DPRINTF(stderr,"\n ************************************\n");
 	  DPRINTF(stderr," W: Did not converge in Newton steps.\n");
 	  DPRINTF(stderr,"    Find better initial guess.       \n");
 	  DPRINTF(stderr," ************************************\n"); 
 	  exit(0);
	}

        /*
         * ADJUST STEP SIZE - unless failed on first step
         */

        if ( nt != 0 )
        {
	DPRINTF(stderr, "\n\tFailed to converge:\n");

	for (iHC=0;iHC<nHC;iHC++) {

	  delta_s[iHC] *= 0.5;

	  switch (aldALC[iHC]) {
	  case -1: 
	      path1[iHC] = path[iHC] - delta_s[iHC];
	      break;
	  case +1: 
	      path1[iHC] = path[iHC] + delta_s[iHC];
	      break;
	  }

	  /*
	   * RESET
	   */

	  alqALC = 1;

	  DPRINTF(stderr, "Decreasing step-length to %10.6e.\n", delta_s[iHC]);

	  if (delta_s[iHC] < hDelta_s_min[iHC]) {
 	    DPRINTF(stderr,"\n X: C step-length reduced below minimum.");
 	    DPRINTF(stderr,"\n    Program terminated.\n");
	    /* This needs to have a return value of 0, indicating
	     * success, for the continuation script to not treat this
	     * as a failed command. */
	    exit(0);
	  } 
#ifdef PARALLEL
              check_parallel_error("\t");
#endif

	  /*
	   * ADJUST NATURAL PARAMETER
	   */
	    
	  update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi);
	}  /* end of iHC loop  */

  	if(hunt[0].EndParameterValue == hunt[0].BegParameterValue)
 		{	hunt_par = 1.0;	}
	else
 		{
	  	hunt_par = (path1[0]-hunt[0].BegParameterValue)
	     	 /(hunt[0].EndParameterValue - hunt[0].BegParameterValue)  ;
                hunt_par=fabs(hunt_par);
 		}

	/*
	 * GET ZERO OR FIRST ORDER PREDICTION
	 */

	dhunt_par = hunt_par-hunt_par_old;

	switch (Continuation) {
	case HUN_ZEROTH:
	    vcopy(numProcUnknowns, &x[0], 1.0, &x_old[0]);
	    break;
	case  HUN_FIRST:
	    v2sum(numProcUnknowns, &x[0], 1.0, &x_old[0], dhunt_par, &x_sens[0]);
            break;
	}
	
	/* MMH: Needed to put this in, o/w it may find that the
         * solution and residual HAPPEN to satisfy the convergence
         * criterion for the next newton solve...
         */
        find_and_set_Dirichlet(x, xdot, exo, dpi);
	
        exchange_dof(cx, dpi, x);

	if (nAC > 0)
          {
	    dcopy1(nAC, x_AC_old, x_AC);
	    for(iAC=0 ; iAC<nAC ; iAC++)	
	      { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi); }
	  }

  		if(hunt[0].EndParameterValue == hunt[0].BegParameterValue)
 			{	hunt_par = 1.0;	}
  		else
 			{
	  		hunt_par = (path1[0]-hunt[0].BegParameterValue)
	      			/(hunt[0].EndParameterValue - hunt[0].BegParameterValue)  ;
                        hunt_par=fabs(hunt_par);
 			}

 	}
 	else if (inewton == -1)
 	{
 	DPRINTF(stderr,"\nHmm... trouble on first step \n  Let's try some more relaxation  \n");
 	      if((damp_factor1 <= 1. && damp_factor1 >= 0.) &&
 	         (damp_factor2 <= 1. && damp_factor2 >= 0.) &&
        		 (damp_factor3 <= 1. && damp_factor3 >= 0.))
 		{
 		custom_tol1 *= 0.01;
 		custom_tol2 *= 0.01;
 		custom_tol3 *= 0.01;
 	DPRINTF(stderr,"  custom tolerances %g %g %g  \n",custom_tol1,custom_tol2,custom_tol3);
 		}
 		else
 		{
 		damp_factor1 *= 0.5;
 	DPRINTF(stderr,"  damping factor %g  \n",damp_factor1);
 		}
 
 	    vcopy(numProcUnknowns, &x[0], 1.0, &x_old[0]);
 	
 	/* MMH: Needed to put this in, o/w it may find that the
          * solution and residual HAPPEN to satisfy the convergence
          * criterion for the next newton solve...
          */
         find_and_set_Dirichlet(x, xdot, exo, dpi);
 	
         exchange_dof(cx, dpi, x);
 
 
 	if (nAC > 0)
          {
 	    dcopy1(nAC, x_AC_old, x_AC);
 	    for(iAC=0 ; iAC<nAC ; iAC++)	
 	      { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi); }
 	  }
 
 	}
 	else 
 	{
 	DPRINTF(stderr,"\nHmm... could not converge on first step\n Let's try some more iterations\n");
 	      if((damp_factor1 <= 1. && damp_factor1 >= 0.) &&
 	         (damp_factor2 <= 1. && damp_factor2 >= 0.) &&
        		 (damp_factor3 <= 1. && damp_factor3 >= 0.))
 		{
 		custom_tol1 *= 100.;
 		custom_tol2 *= 100.;
 		custom_tol3 *= 100.;
 	DPRINTF(stderr,"  custom tolerances %g %g %g  \n",custom_tol1,custom_tol2,custom_tol3);
 		}
 		else
 		{
 		damp_factor1 *= 2.0;
		damp_factor1 = MIN(damp_factor1,1.0);
 	DPRINTF(stderr,"  damping factor %g  \n",damp_factor1);
 		}
 	}
 

      }  /* end of !converged */

    } while (converged == 0);

    /*
     * CONVERGED
     */
    nt++;
    custom_tol1 = toler_org[0];
    custom_tol2 = toler_org[1];
    custom_tol3 = toler_org[2];
    damp_factor1 = damp_org;
    DPRINTF(stderr,
	    "\n\tStep accepted, theta (proportion complete) = %10.6e\n",
	    hunt_par);
    for (iHC=0;iHC<nHC;iHC++) {
      switch (hunt[iHC].Type) {
      case 1:		/* BC */
	  DPRINTF(stderr, "\tStep accepted, BCID=%3d DFID=%5d",
		  hunt[iHC].BCID, hunt[iHC].DFID);
	  break;
      case 2:		/* MT */
	  DPRINTF(stderr, "\tStep accepted, MTID=%3d MPID=%5d",
		  hunt[iHC].MTID, hunt[iHC].MPID);
	  break;
      case 3:		/* AC */
	  DPRINTF(stderr, "\tStep accepted, ACID=%3d DFID=%5d",
 		  hunt[iHC].BCID, hunt[iHC].DFID);
 	  break;
      }
      DPRINTF(stderr, " Parameter= % 10.6e\n", path1[iHC]);
    }

    /* 
     * check path step error, if too large do not enlarge path step 
     */

    for (iHC=0;iHC<nHC;iHC++) {

      if ((ni == 1) && (n != 0) && (!const_delta_s[iHC])) 
      {
	delta_s_new[iHC] = path_step_control(num_total_nodes, 
					     delta_s[iHC], delta_s_old[iHC], 
					     x, 
					     eps, 
					     &success_ds, 
					     cont->use_var_norm, inewton);
	if (delta_s_new[iHC] > hDelta_s_max[iHC]) {delta_s_new[iHC] = hDelta_s_max[iHC];}
      }
      else 
      {
	success_ds = 1;
	delta_s_new[iHC] = delta_s[iHC];
      }
    }
	  
    /* 
     * determine whether to print out the data or not 
     */

    i_print = 0;
    if (nt == step_print) {
      i_print = 1;
      step_print += cont->print_freq; }

    if (alqALC == -1) 
    { i_print = 1; }
	  
    if (i_print) {
      error = write_ascii_soln(x, resid_vector, numProcUnknowns,
 			       x_AC, nAC, path1[0], file);
      if (error) {
	DPRINTF(stderr, "%s:  error writing ASCII soln file\n", yo);
      }	  
      if ( Write_Intermediate_Solutions == 0 ) {
	write_solution(ExoFileOut, resid_vector, x, x_sens_p, 
		       x_old, xdot, xdot_old, tev, tev_post, NULL, 
		       rd, gindex, p_gsize, gvec, gvec_elem, &nprint,
 		       delta_s[0], theta, path1[0], NULL, exo, dpi);
	nprint++;
      }
    }
	  
    /*
     * backup old solutions
     * can use previous solutions for prediction one day
     */
	  
    dcopy1(numProcUnknowns,x_older,x_oldest);
    dcopy1(numProcUnknowns,x_old,x_older);
    dcopy1(numProcUnknowns,x,x_old);

    dcopy1(nHC,delta_s_older,delta_s_oldest);
    dcopy1(nHC,delta_s_old  ,delta_s_older );
    dcopy1(nHC,delta_s      ,delta_s_old   );
    dcopy1(nHC,delta_s_new  ,delta_s       );
/*
    delta_s_oldest = delta_s_older;
    delta_s_older = delta_s_old;
    delta_s_old = delta_s;
    delta_s = delta_s_new;
*/
    hunt_par_old=hunt_par;
    if ( nAC > 0) {
      dcopy1(nAC, x_AC, x_AC_old);
    }

    /*
     * INCREMENT/DECREMENT PARAMETER
     */


    for (iHC=0;iHC<nHC;iHC++) {

      path[iHC]  = path1[iHC];
	  
      switch (aldALC[iHC]) {
      case -1: 
	  path1[iHC] = path[iHC] - delta_s[iHC];
	  break;
      case +1: 
	  path1[iHC] = path[iHC] + delta_s[iHC];
	  break;
      }
	  
      /*
       * ADJUST NATURAL PARAMETER
       */
	
      update_parameterHC(iHC, path1[iHC], x, xdot, x_AC, delta_s[iHC], cx, exo, dpi); 
    }  /*  end of iHC loop */

    /*
     * GET FIRST ORDER PREDICTION
     */

	  if(hunt[0].EndParameterValue == hunt[0].BegParameterValue)
 		{	hunt_par = 1.0;	}
  		else
 		{
	  	hunt_par = (path1[0]-hunt[0].BegParameterValue)
	      		/(hunt[0].EndParameterValue - hunt[0].BegParameterValue)  ;
                hunt_par=fabs(hunt_par);
 		}
    dhunt_par = hunt_par-hunt_par_old;
    switch (Continuation) {
    case HUN_ZEROTH:
	break;
    case  HUN_FIRST:
	v1add(numProcUnknowns, &x[0], dhunt_par, &x_sens[0]);
        break; }

        if (!good_mesh) goto free_and_clear;

    /*
     * 
     * CHECK END CONTINUATION
     *  
     */

    if (alqALC == -1)
    { alqALC = 0; }
    else
    { alqALC = 1; }

    if (alqALC == 0) {
      DPRINTF(stderr,"\n\n\t I will continue no more!\n\t No more continuation for you!\n");
      goto free_and_clear;
    }
	
  } /* n */

      if(n == MaxPathSteps &&
	 aldALC[0] * (lambdaEnd[0] - path[0]) > 0)
	{
	  DPRINTF(stderr,"\n\tFailed to reach end of hunt in maximum number of successful steps (%d).\n\tSorry.\n",
		  MaxPathSteps);
 	  exit(0);
	}
#ifdef PARALLEL
      check_parallel_error("Hunting error");
#endif

  /*
   * DONE CONTINUATION
   */

 free_and_clear: 

  /*
   * Transform the node point coordinates according to the
   * displacements and write out all the results using the
   * displaced coordinates. Set the displacement field to
   * zero, too.
   */

  if (Anneal_Mesh) {
#ifdef DEBUG
    fprintf(stderr, "%s: anneal_mesh()...\n", yo);
#endif
    err = anneal_mesh(x, tev, tev_post, NULL, rd, path1[0], exo, dpi);
#ifdef DEBUG
    DPRINTF(stderr, "%s: anneal_mesh()-done\n", yo);
#endif
    EH(err, "anneal_mesh() bad return.");
  }

  /* 
   * Free a bunch of variables that aren't needed anymore 
   */
  safer_free((void **) &ROT_Types);
  safer_free((void **) &node_to_fill);

  safer_free( (void **) &resid_vector);
  safer_free( (void **) &resid_vector_sens);
  safer_free( (void **) &scale);
  safer_free( (void **) &x);

  if (nAC > 0) {
    safer_free( (void **) &x_AC);
    safer_free( (void **) &x_AC_old);
    safer_free( (void **) &x_AC_dot);
  }

  safer_free( (void **) &x_old); 
  safer_free( (void **) &x_older); 
  safer_free( (void **) &x_oldest); 
  safer_free( (void **) &xdot); 
  safer_free( (void **) &xdot_old); 
  safer_free( (void **) &x_update); 

  safer_free( (void **) &x_sens); 

  if((nn_post_data_sens+nn_post_fluxes_sens) > 0)
          Dmatrix_death(x_sens_p,num_pvector,numProcUnknowns);

  for(i = 0; i < MAX_NUMBER_MATLS; i++) {
    for(n = 0; n < MAX_MODES; n++) {
      safer_free((void **) &(ve_glob[i][n]->gn));
      safer_free((void **) &(ve_glob[i][n]));
    }
    safer_free((void **) &(vn_glob[i]));
  }

  sl_free(matrix_systems_mask, ams);

  for (i=0;i<NUM_ALSS;i++) {
    safer_free( (void**) &(ams[i]));
  }					

  safer_free( (void **) &gvec);

  safer_free( (void **) &lambda);
  safer_free( (void **) &lambdaEnd);
  safer_free( (void **) &path);
  safer_free( (void **) &path1);
  safer_free( (void **) &hDelta_s0);
  safer_free( (void **) &hDelta_s_min);
  safer_free( (void **) &hDelta_s_max);
  safer_free( (void **) &delta_s);
  safer_free( (void **) &delta_s_new);
  safer_free( (void **) &delta_s_old);
  safer_free( (void **) &delta_s_older);
  safer_free( (void **) &delta_s_oldest);

  Ivector_death(&aldALC[0], nHC);
  Ivector_death(&const_delta_s[0], nHC);

  i = 0;
  for ( eb_indx = 0; eb_indx < exo->num_elem_blocks; eb_indx++ ) {
    for ( ev_indx = 0; ev_indx < rd->nev; ev_indx++ ) {
      if ( exo->elem_var_tab[i++] == 1 ) {
        safer_free ((void **) &(gvec_elem [eb_indx][ev_indx]) );
      }
    }
    safer_free ((void **) &(gvec_elem [eb_indx]));
  }

  safer_free( (void **) &gvec_elem); 

  safer_free( (void **) &rd);
  safer_free( (void **) &Local_Offset);
  safer_free( (void **) &Dolphin);

  if( strlen( Soln_OutFile)  )
    {
       fclose(file);
    }

  return;

} /* END of routine hunt_problem  */
Beispiel #5
0
int main(int argc, char *argv[])
{
	int num_PDE_eqns=5, N_levels=3;
    /* int nsmooth=1; */

	int    leng, level, N_grid_pts, coarsest_level;

  /* See Aztec User's Guide for more information on the */
  /* variables that follow.                             */

  int    proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE];
  double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE];

  /* data structure for matrix corresponding to the fine grid */

  int    *data_org = NULL, *update = NULL, *external = NULL;
  int    *update_index = NULL, *extern_index = NULL;
  int    *cpntr = NULL;
  int    *bindx = NULL, N_update, iii;
  double *val = NULL;
	double *xxx, *rhs;

	AZ_MATRIX *Amat;
	AZ_PRECOND *Pmat = NULL;
	ML *ml;
	FILE *fp;
  int ch,i;
   struct AZ_SCALING *scaling;
double solve_time, setup_time, start_time;
ML_Aggregate *ag;
int *ivec;
#ifdef VBR_VERSION
ML_Operator *B, *C, *D;
int *vbr_cnptr, *vbr_rnptr, *vbr_indx, *vbr_bindx, *vbr_bnptr, total_blk_rows;
int total_blk_cols, blk_space, nz_space;
double *vbr_val;
struct ML_CSR_MSRdata *csr_data;
#endif


#ifdef ML_MPI
  MPI_Init(&argc,&argv);

  /* get number of processors and the name of this processor */

  AZ_set_proc_config(proc_config, MPI_COMM_WORLD);
#else
  AZ_set_proc_config(proc_config, AZ_NOT_MPI);
#endif

#ifdef binary
	fp=fopen(".data","rb");
#else
	fp=fopen(".data","r");
#endif
	if (fp==NULL)
		{
			printf("couldn't open file .data\n");
			exit(1);
		}
#ifdef binary
        fread(&leng, sizeof(int), 1, fp);
#else
	fscanf(fp,"%d",&leng);
#endif

	fclose(fp);

	N_grid_pts=leng/num_PDE_eqns;



  /* initialize the list of global indices. NOTE: the list of global */
  /* indices must be in ascending order so that subsequent calls to  */
  /* AZ_find_index() will function properly. */

  AZ_read_update(&N_update, &update, proc_config, N_grid_pts, num_PDE_eqns,
                 AZ_linear);

  AZ_read_msr_matrix(update, &val, &bindx, N_update, proc_config);

  /* This code is to fix things up so that we are sure we have */
  /* all block (including the ghost nodes the same size.       */

  AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update);


  AZ_transform(proc_config, &external, bindx, val,  update, &update_index,
	       &extern_index, &data_org, N_update, 0, 0, 0, &cpntr,
               AZ_MSR_MATRIX);

  Amat = AZ_matrix_create( leng );

#ifndef VBR_VERSION

  AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL);

  Amat->matrix_type  = data_org[AZ_matrix_type];

  data_org[AZ_N_rows]  = data_org[AZ_N_internal] + data_org[AZ_N_border];

#else

total_blk_rows = N_update/num_PDE_eqns;
total_blk_cols = total_blk_rows;
blk_space      = total_blk_rows*20;
nz_space       = blk_space*num_PDE_eqns*num_PDE_eqns;

vbr_cnptr = (int    *) ML_allocate(sizeof(int   )*(total_blk_cols+1));
vbr_rnptr = (int    *) ML_allocate(sizeof(int   )*(total_blk_cols+1));
vbr_bnptr = (int    *) ML_allocate(sizeof(int   )*(total_blk_cols+2));
vbr_indx  = (int    *) ML_allocate(sizeof(int   )*(blk_space+1));
vbr_bindx = (int    *) ML_allocate(sizeof(int   )*(blk_space+1));
vbr_val   = (double *) ML_allocate(sizeof(double)*(nz_space+1));

for (i = 0; i <= total_blk_cols; i++) vbr_cnptr[i] = num_PDE_eqns;

  AZ_msr2vbr(vbr_val, vbr_indx, vbr_rnptr,  vbr_cnptr, vbr_bnptr,
                vbr_bindx, bindx, val,
                total_blk_rows, total_blk_cols, blk_space,
                nz_space, -1);

  data_org[AZ_N_rows]  = data_org[AZ_N_internal] + data_org[AZ_N_border];
  data_org[AZ_N_int_blk]  = data_org[AZ_N_internal]/num_PDE_eqns;
  data_org[AZ_N_bord_blk] = data_org[AZ_N_bord_blk]/num_PDE_eqns;
  data_org[AZ_N_ext_blk]  = data_org[AZ_N_ext_blk]/num_PDE_eqns;
  data_org[AZ_matrix_type] = AZ_VBR_MATRIX;


  AZ_set_VBR(Amat, vbr_rnptr, vbr_cnptr, vbr_bnptr, vbr_indx, vbr_bindx,
             vbr_val, data_org, 0, NULL, AZ_LOCAL);

  Amat->matrix_type  = data_org[AZ_matrix_type];
#endif

  start_time = AZ_second();

  ML_Create(&ml, N_levels);
  ML_Set_PrintLevel(3);


  /* set up discretization matrix and matrix vector function */

  AZ_ML_Set_Amat(ml, N_levels-1, N_update, N_update, Amat, proc_config);

  ML_Aggregate_Create( &ag );
  ML_Aggregate_Set_Threshold(ag,0.0);
  ML_Set_SpectralNormScheme_PowerMethod(ml);
/*
   To run SA:
     a) set damping factor to 1 and use power method
        ML_Aggregate_Set_DampingFactor(ag, 4./3.);
   To run NSA:
     a) set damping factor to 0
        ML_Aggregate_Set_DampingFactor(ag, 0.);
   To run NSR
     a) set damping factor to 1 and use power method
        ML_Aggregate_Set_DampingFactor(ag, 1.);
        ag->Restriction_smoothagg_transpose = ML_FALSE;
        ag->keep_agg_information=1;
        ag->keep_P_tentative=1;
     b) hack code so it calls the energy minimizing restriction
          line 2973 of ml_agg_genP.c
     c) turn on the NSR flag in ml_agg_energy_min.cpp
   To run Emin
     a) set min_eneryg = 2 and keep_agg_info = 1;
      ag->minimizing_energy=2;
      ag->keep_agg_information=1;
      ag->cheap_minimizing_energy = 0;
      ag->block_scaled_SA = 1;
*/
  ag->minimizing_energy=2;
  ag->keep_agg_information=1;
  ag->block_scaled_SA = 1;

  ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, num_PDE_eqns, NULL, N_update);
  ML_Aggregate_Set_MaxCoarseSize( ag, 20);

/*
ML_Aggregate_Set_RandomOrdering( ag );
ML_Aggregate_Set_DampingFactor(ag, .1);
ag->drop_tol_for_smoothing = 1.0e-3;
ML_Aggregate_Set_Threshold(ag, 1.0e-3);
ML_Aggregate_Set_MaxCoarseSize( ag, 300);
*/


	coarsest_level = ML_Gen_MultiLevelHierarchy_UsingAggregation(ml, N_levels-1, ML_DECREASING, ag);
	coarsest_level = N_levels - coarsest_level;
	if ( proc_config[AZ_node] == 0 )
		printf("Coarse level = %d \n", coarsest_level);

	/* set up smoothers */

        AZ_defaults(options, params);

	for (level = N_levels-1; level > coarsest_level; level--) {
          /* This is the Aztec domain decomp/ilu smoother that we */
          /* usually use for this problem.                        */

/*
          options[AZ_precond] = AZ_dom_decomp;
          options[AZ_subdomain_solve] = AZ_ilut;
          params[AZ_ilut_fill] = 1.0;
          options[AZ_reorder] = 1;
          ML_Gen_SmootherAztec(ml, level, options, params,
                        proc_config, status, AZ_ONLY_PRECONDITIONER,
                        ML_PRESMOOTHER,NULL);
*/

          /*  Sparse approximate inverse smoother that acutally does both */
          /*  pre and post smoothing.                                     */
          /*

          ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth,
                                parasails_sym, parasails_thresh,
                                parasails_nlevels, parasails_filter,
                                parasails_loadbal, parasails_factorized);

          parasails_thresh /= 4.;
          */


          /* This is the symmetric Gauss-Seidel smoothing. In parallel,    */
          /* it is not a true Gauss-Seidel in that each processor          */
          /* does a Gauss-Seidel on its local submatrix independent of the */
          /* other processors.                                             */
          /*
	  ML_Gen_Smoother_SymGaussSeidel(ml,level,ML_PRESMOOTHER, nsmooth,1.);
	  ML_Gen_Smoother_SymGaussSeidel(ml,level,ML_POSTSMOOTHER,nsmooth,1.);
          */

          /* Block Gauss-Seidel with block size equal to #DOF per node.    */
          /* Not a true Gauss-Seidel in that each processor does a         */
          /* Gauss-Seidel on its local submatrix independent of the other  */
          /* processors.                                                   */
          /*

	  ML_Gen_Smoother_BlockGaussSeidel(ml,level,ML_PRESMOOTHER,
                                           nsmooth,0.67, num_PDE_eqns);
	  ML_Gen_Smoother_BlockGaussSeidel(ml,level,ML_POSTSMOOTHER,
                                           nsmooth, 0.67, num_PDE_eqns);
          */


  	  ML_Gen_Smoother_SymBlockGaussSeidel(ml,level,ML_POSTSMOOTHER,
                                                1, 1.0, num_PDE_eqns);
	}

        ML_Gen_CoarseSolverSuperLU( ml, coarsest_level);
	ML_Gen_Solver(ml, ML_MGW, N_levels-1, coarsest_level);
	AZ_defaults(options, params);

        options[AZ_solver]   = AZ_gmres;
        options[AZ_scaling]  = AZ_none;
        options[AZ_precond]  = AZ_user_precond;
/*
        options[AZ_conv]     = AZ_r0;
*/
        options[AZ_output]   = 1;
        options[AZ_max_iter] = 1500;
        options[AZ_poly_ord] = 5;
        options[AZ_kspace]   = 130;
        params[AZ_tol]       = 1.0e-8;
/*
options[AZ_precond] = AZ_dom_decomp;
options[AZ_subdomain_solve] = AZ_ilut;
params[AZ_ilut_fill] = 2.0;
*/

	AZ_set_ML_preconditioner(&Pmat, Amat, ml, options);
setup_time = AZ_second() - start_time;

	xxx = (double *) malloc( leng*sizeof(double));
	rhs=(double *)malloc(leng*sizeof(double));

	for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0;

        /* Set rhs */

        fp = fopen("AZ_capture_rhs.mat","r");
        if (fp == NULL) {
           if (proc_config[AZ_node] == 0) printf("taking random vector for rhs\n");
           AZ_random_vector(rhs, data_org, proc_config);
           AZ_reorder_vec(rhs, data_org, update_index, NULL);
        }
        else {
           fclose(fp);
	   ivec =(int *)malloc((leng+1)*sizeof(int));
           AZ_input_msr_matrix("AZ_capture_rhs.mat", update, &rhs, &ivec,
                                N_update, proc_config);
           free(ivec);
           AZ_reorder_vec(rhs, data_org, update_index, NULL);
        }

        /* Set x */

        fp = fopen("AZ_capture_init_guess.mat","r");
        if (fp != NULL) {
           fclose(fp);
	   ivec =(int *)malloc((leng+1)*sizeof(int));
           AZ_input_msr_matrix("AZ_capture_init_guess.mat",update, &xxx, &ivec,
                                N_update, proc_config);
           free(ivec);
           AZ_reorder_vec(xxx, data_org, update_index, NULL);
        }

        /* if Dirichlet BC ... put the answer in */

        for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) {
           if ( (val[i] > .99999999) && (val[i] < 1.0000001))
              xxx[i] = rhs[i];
        }

        fp = fopen("AZ_no_multilevel.dat","r");
        scaling = AZ_scaling_create();
start_time = AZ_second();
        if (fp != NULL) {
           fclose(fp);
           options[AZ_precond] = AZ_none;
           options[AZ_scaling] = AZ_sym_diag;
           options[AZ_ignore_scaling] = AZ_TRUE;

           options[AZ_keep_info] = 1;
           AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling);

/*
           options[AZ_pre_calc] = AZ_reuse;
           options[AZ_conv] = AZ_expected_values;
           if (proc_config[AZ_node] == 0)
              printf("\n-------- Second solve with improved convergence test -----\n");
           AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling);
           if (proc_config[AZ_node] == 0)
              printf("\n-------- Third solve with improved convergence test -----\n");
           AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling);
*/
        }
        else {
           options[AZ_keep_info] = 1;
           AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling);
           options[AZ_pre_calc] = AZ_reuse;
           options[AZ_conv] = AZ_expected_values;
/*
           if (proc_config[AZ_node] == 0)
              printf("\n-------- Second solve with improved convergence test -----\n");
           AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling);
           if (proc_config[AZ_node] == 0)
              printf("\n-------- Third solve with improved convergence test -----\n");
           AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling);
*/
        }
   solve_time = AZ_second() - start_time;

   if (proc_config[AZ_node] == 0)
      printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time);

   ML_Aggregate_Destroy(&ag);
   ML_Destroy(&ml);
   AZ_free((void *) Amat->data_org);
   AZ_free((void *) Amat->val);
   AZ_free((void *) Amat->bindx);
   AZ_free((void *) update);
   AZ_free((void *) external);
   AZ_free((void *) extern_index);
   AZ_free((void *) update_index);
   AZ_scaling_destroy(&scaling);
   if (Amat  != NULL) AZ_matrix_destroy(&Amat);
   if (Pmat  != NULL) AZ_precond_destroy(&Pmat);
   free(xxx);
   free(rhs);


#ifdef ML_MPI
  MPI_Finalize();
#endif

  return 0;

}
AZ_MATRIX *user_Ke_build(struct user_partition *Edge_Partition)
{
  double dcenter, doff, sigma = .0001;
  int ii,jj, horv, i, nx, global_id, nz_ptr, Nlocal_edges;

  /* Aztec matrix and temp variables */

  int       *Ke_bindx, *Ke_data_org = NULL;
  double    *Ke_val;
  AZ_MATRIX *Ke_mat;
  int       proc_config[AZ_PROC_SIZE], *cpntr = NULL;
  int       *reordered_glob_edges = NULL, *reordered_edge_externs = NULL;

  Nlocal_edges = Edge_Partition->Nlocal;
  nx = (int) sqrt( ((double) Edge_Partition->Nglobal/2) + .00001);

  Ke_bindx = (int    *) malloc((7*Nlocal_edges+1)*sizeof(int));
  Ke_val   = (double *) malloc((7*Nlocal_edges+1)*sizeof(double));
  Ke_bindx[0] = Nlocal_edges+1;

  dcenter  = 2 + 2.*sigma/((double) ( 3 * nx * nx));
  doff = -1 + sigma/((double) ( 6 * nx * nx));

  for (i = 0; i < Nlocal_edges; i++) {
    global_id = (Edge_Partition->my_global_ids)[i];
    invindex(global_id, &ii, &jj, nx, &horv);
    nz_ptr = Ke_bindx[i];

    Ke_val[i] = dcenter;

    if (horv == HORIZONTAL) {
      if (jj != 0) {
	Ke_bindx[nz_ptr] = north(ii,jj,nx);     Ke_val[nz_ptr++] = doff;
	Ke_bindx[nz_ptr] = east(ii,jj,nx);      Ke_val[nz_ptr++] = -1.;
	if (ii != 0) {Ke_bindx[nz_ptr]=west(ii,jj,nx); Ke_val[nz_ptr++]= 1.;}
	jj--;
      }
      else {
	Ke_val[i] = 1. +  2.*sigma/((double) ( 3 * nx * nx));
	jj = nx-1;
      }
      Ke_bindx[nz_ptr] = east(ii,jj,nx);      Ke_val[nz_ptr++] = 1.;
      if (ii != 0){ Ke_bindx[nz_ptr]=west(ii,jj,nx);  Ke_val[nz_ptr++]=-1.;}
      if (jj != 0){ Ke_bindx[nz_ptr]=south(ii,jj,nx); Ke_val[nz_ptr++]=doff;}
    }
    else {
      if (ii != 0) {
	Ke_bindx[nz_ptr] = north(ii,jj,nx);     Ke_val[nz_ptr++] = -1.;
	Ke_bindx[nz_ptr] = east(ii,jj,nx);      Ke_val[nz_ptr++] = doff;
	if (jj != 0) {Ke_bindx[nz_ptr]=south(ii,jj,nx); Ke_val[nz_ptr++]=1.;}
	ii--;
      }
      else {
	Ke_val[i]  = 1 +  2.*sigma/((double) ( 3 * nx * nx));
	ii = nx-1;
      }
      Ke_bindx[nz_ptr] = north(ii,jj,nx);     Ke_val[nz_ptr++] = 1.;
      if (ii != 0) {Ke_bindx[nz_ptr]=west(ii,jj,nx);  Ke_val[nz_ptr++]=doff;}
      if (jj != 0) {Ke_bindx[nz_ptr]=south(ii,jj,nx); Ke_val[nz_ptr++]=-1.;}
    }
    Ke_bindx[i+1] = nz_ptr;
  }

  AZ_set_proc_config(proc_config, COMMUNICATOR);

  AZ_transform_norowreordering(proc_config, &(Edge_Partition->needed_external_ids),
			       Ke_bindx, Ke_val, Edge_Partition->my_global_ids,
			       &reordered_glob_edges, &reordered_edge_externs, 
			       &Ke_data_org, Nlocal_edges, 0, 0, 0, 
			       &cpntr,	       AZ_MSR_MATRIX);
  AZ_free(reordered_glob_edges);
  AZ_free(reordered_edge_externs);
  Edge_Partition->Nghost = Ke_data_org[AZ_N_external];

  Ke_mat = AZ_matrix_create( Nlocal_edges );
  AZ_set_MSR(Ke_mat, Ke_bindx, Ke_val, Ke_data_org, 0, NULL, AZ_LOCAL);

  return(Ke_mat);
}
void AZ_matvec_mult(double *val, int *indx, int *bindx, int *rpntr, int *cpntr,
                    int *bpntr, double *b, register double *c,
                    int exchange_flag, int *data_org)

/******************************************************************************

  c = Ab:
  Sparse (square) overlapped matrix-vector multiply, using the distributed
  variable block row (DVBR) data structure (A = val).

  Author:          Scott A. Hutchinson, SNL, 1421
  =======

  Return code:     void
  ============

  Parameter list:
  ===============

  m:               Number of (block) rows in A.

  val:             Array containing the entries of the matrix. The matrix is
                   stored block-row-by-block-row. Each block entry is dense and
                   stored by columns (VBR).

  indx:            The ith element of indx points to the location in val of the
                   (0,0) entry of the ith block entry. The last element is the
                   number of nonzero entries of matrix A plus one.

  bindx:           Contains the block column indices of the non-zero block
                   entries.

  rpntr:           The ith element of rpntr indicates the first point row in
                   the i'th block row. The last element is the number of block
                   rows plus one.

  cpntr:           The jth element of cpntr indicates the first point column in
                   the jth block column. The last element is the number of
  bpntr:           The ith element of bpntr points to the first block entry of
                   the ith row in bindx. The last element is the number of
                   nonzero blocks of matrix A plus one.

  b:               Contains the vector b.

  c:               Contains the result vector c.

  exchange_flag:   Flag which controls call to exchange_bdry.

  data_org:        Array containing information on the distribution of the
                   matrix to this processor as well as communication parameters
                   (see Aztec User's Guide).

******************************************************************************/

{

  /* local variables */

   AZ_MATRIX Amat;
   int proc_config[AZ_PROC_SIZE];
   static int first_time = 1;

   if (exchange_flag != 1) {
      printf("Warning: exchange_flag is no longer used in AZ_matvec_mult().\n");
      printf("         Set to '1' to avoid this message.\n");
   }
   Amat.rpntr      = rpntr;   Amat.cpntr    = cpntr;
   Amat.bpntr      = bpntr;   Amat.bindx    = bindx;
   Amat.indx       = indx;    Amat.val      = val;
   Amat.data_org   = data_org;
   Amat.aux_ival   = NULL;
   Amat.aux_dval   = NULL;
   Amat.aux_matrix = NULL;
   Amat.matrix_type = data_org[AZ_matrix_type];
#ifdef AZTEC_MPI
   AZ_set_comm(proc_config, MPI_COMM_WORLD);
   if (first_time == 1) {
      AZ_set_proc_config(proc_config, MPI_COMM_WORLD);
#else
   if (first_time == 1) {
      AZ_set_proc_config(proc_config, AZ_NOT_MPI);
#endif
      if (proc_config[AZ_node] == 0) {
          printf("Warning: AZ_matvec_mult() should be replaced with either\n");
          printf("          AZ_MSR_matvec_mult or AZ_VBR_matvec_mult()\n");
      }
   }
   first_time = 0;

   if      (Amat.matrix_type == AZ_MSR_MATRIX) Amat.matvec = AZ_MSR_matvec_mult;
   else if (Amat.matrix_type == AZ_VBR_MATRIX) Amat.matvec = AZ_VBR_matvec_mult;

   Amat.matvec(b, c, &Amat, proc_config);
}
Beispiel #8
0
/* Assign unknowns to processors */
void partition_edges(struct user_partition *Partition)
{
  int i, nx, Nloc;
  ML_Comm *comm;
  int *my_local_ids;

#ifdef AZTEC
  int    proc_config[AZ_PROC_SIZE];

  AZ_set_proc_config(proc_config, COMMUNICATOR);
  Partition->mypid = proc_config[AZ_node];
  Partition->nprocs= proc_config[AZ_N_procs];

  AZ_input_update(NULL,&(Partition->Nlocal),
		  &(Partition->my_global_ids),
		    proc_config,     Partition->Nglobal, 1, AZ_linear);
#else
  Partition->Nghost = 0;

  nx = (int) sqrt( ((double) Partition->Nglobal/2) + .00001);

  ML_Comm_Create( &comm);
  Partition->mypid = comm->ML_mypid;
  Partition->nprocs= comm->ML_nprocs;

  if (comm->ML_nprocs > 2) {
    if (comm->ML_mypid == 0)
      printf("This example only works for one or two processors\n");
    exit(1);
  }
  if ((comm->ML_nprocs == 2) && (Partition->Nglobal%2 == 1)) {
    if (comm->ML_mypid == 0)
      printf("Two processor case requires an even number of points.\n");
    exit(1);
  }


  if (comm->ML_nprocs == 1) {  /* 1 processor case is simple */
    Partition->Nlocal = Partition->Nglobal; 
    Partition->my_global_ids = (int *) malloc(sizeof(int)*Partition->Nlocal);
    for (i = 0; i < Partition->Nlocal; i++) 
      (Partition->my_global_ids)[i] = i;
    
    Partition->my_local_ids = (int *) malloc(sizeof(int)*Partition->Nglobal);
    for (i = 0; i < Partition->Nglobal; i++) 
      (Partition->my_local_ids)[i] = i;
  }
  else {

    /* allocate space */

    Partition->Nlocal = Partition->Nglobal/2; 
    Partition->my_global_ids = (int *) malloc(sizeof(int)*Partition->Nlocal);
    my_local_ids =  (int *) malloc(sizeof(int)*Partition->Nglobal);
    Partition->my_local_ids = my_local_ids;

    /* initialize local ids to '-1' (not owned by me) */
    for (i = 0; i < Partition->Nglobal; i++) 
      (Partition->my_local_ids)[i] = -1;

    /* set global ids */
    for (i = 0; i < Partition->Nlocal/2; i++) {
      (Partition->my_global_ids)[i] = i + comm->ML_mypid*Partition->Nlocal/2;
    }
    for (i = 0; i < Partition->Nlocal/2; i++) {
      (Partition->my_global_ids)[i + Partition->Nlocal/2] = 
	Partition->Nlocal+
	i + comm->ML_mypid*Partition->Nlocal/2;
    }

    /* set local ids of nonghost unknowns */

    for (i = 0; i < Partition->Nlocal; i++) 
      my_local_ids[(Partition->my_global_ids)[i]] = i;

    /* set the ghost unknowns */
    Partition->Nghost = 3*nx;

    Nloc = Partition->Nlocal;
    if (comm->ML_mypid == 0) {
      for (i = 0; i < nx; i++) {
	my_local_ids[Nloc/2 + i ] = Nloc + i;
	my_local_ids[Nloc - nx + i ] = Nloc + i + nx;
	my_local_ids[2*Nloc - nx + i ] = Nloc + i + 2*nx;
      }
    }
    else {
      for (i = 0; i < nx; i++) {
	my_local_ids[Nloc/2 - nx + i ] = Nloc + i;
	my_local_ids[              i ] = Nloc + i + nx;
	my_local_ids[3*Nloc/2 - nx + i ] = Nloc + i + 2*nx;
      }
    }

  }

#endif
}
Beispiel #9
0
/* Assign unknowns to processors */
void partition_nodes(struct user_partition *Partition)
{
  int i, nx, Nloc;
  ML_Comm *comm;
  int *my_local_ids;

  Partition->Nghost = 0;
#ifdef AZTEC
  int    proc_config[AZ_PROC_SIZE];

  AZ_set_proc_config(proc_config, COMMUNICATOR);

  AZ_input_update(NULL,&(Partition->Nlocal),
		  &(Partition->my_global_ids),
		    proc_config,     Partition->Nglobal, 1, AZ_linear);
  Partition->mypid = proc_config[AZ_node];
  Partition->nprocs= proc_config[AZ_N_procs];

#else

  nx = (int) sqrt( ((double) Partition->Nglobal) + .00001);

  ML_Comm_Create( &comm);
  Partition->mypid = comm->ML_mypid;
  Partition->nprocs= comm->ML_nprocs;
  Partition->Nghost = 2*nx;

  if (comm->ML_nprocs > 2) {
    if (comm->ML_mypid == 0)
      printf("This example only works for one or two processors\n");
    exit(1);
  }
  if ((comm->ML_nprocs == 2) && (Partition->Nglobal%2 == 1)) {
    if (comm->ML_mypid == 0)
      printf("Two processor case requires an even number of points.\n");
    exit(1);
  }
  if (comm->ML_nprocs == 1) {
    Partition->Nlocal = Partition->Nglobal; 
    Partition->my_global_ids = (int *) malloc(sizeof(int)*Partition->Nlocal);
    for (i = 0; i < Partition->Nlocal; i++) 
      (Partition->my_global_ids)[i] = i;
    
    Partition->my_local_ids = (int *) malloc(sizeof(int)*Partition->Nglobal);
    for (i = 0; i < Partition->Nglobal; i++) 
      (Partition->my_local_ids)[i] = i;
  }
  else {
    Partition->Nlocal = Partition->Nglobal/2; 
    Partition->my_global_ids = (int *) malloc(sizeof(int)*Partition->Nlocal);
    my_local_ids =  (int *) malloc(sizeof(int)*Partition->Nglobal);
    Partition->my_local_ids = my_local_ids;

    for (i = 0; i < Partition->Nglobal; i++) 
      (Partition->my_local_ids)[i] = -1;

    for (i = 0; i < Partition->Nlocal; i++) {
      (Partition->my_global_ids)[i] = i + comm->ML_mypid*Partition->Nlocal;
      my_local_ids[(Partition->my_global_ids)[i]] = i;
    }

    Nloc = Partition->Nlocal;
    if (comm->ML_mypid == 0) {
      for (i = 0; i < nx; i++) {
	my_local_ids[Nloc + i ] = Nloc + i;
	my_local_ids[2*Nloc - nx + i ] = Nloc + i + nx;
      }
    }
    else {
      for (i = 0; i < nx; i++) {
	my_local_ids[Nloc - nx + i ] = Nloc + i;
	my_local_ids[            i ] = Nloc + i + nx;
      }
    }

  }
#endif
}
Beispiel #10
0
AZ_MATRIX *user_Ke_build(struct user_partition *Edge_Partition)
{
  double dcenter, doffdiag, sigma = .0001;
  int ii,jj, horv, i, nx, global_id, nz_ptr, Nlocal_edges;

  /* Aztec matrix and temp variables */

  int       *Ke_bindx, *Ke_data_org = NULL;
  double    *Ke_val;
  AZ_MATRIX *Ke_mat;
  int       proc_config[AZ_PROC_SIZE], *cpntr = NULL;
  int       *reordered_glob_edges = NULL, *reordered_edge_externs = NULL;

  Nlocal_edges = Edge_Partition->Nlocal;
  nx = (int) sqrt( ((double) Edge_Partition->Nglobal/2) + .00001);

  Ke_bindx = (int    *) malloc((7*Nlocal_edges+1)*sizeof(int));
  Ke_val   = (double *) malloc((7*Nlocal_edges+1)*sizeof(double));
  Ke_bindx[0] = Nlocal_edges+1;

  dcenter  = 2 + 2.*sigma/((double) ( 3 * nx * nx));
  doffdiag = -1 + sigma/((double) ( 6 * nx * nx));

  /* Create a DMSR matrix with global column indices */

  for (i = 0; i < Nlocal_edges; i++) {
    global_id = (Edge_Partition->my_global_ids)[i];
    Ke_val[i] = dcenter;
    Ke_bindx[i+1] = Ke_bindx[i] + 6;
    inv2dindex(global_id, &ii, &jj, nx, &horv);
    nz_ptr = Ke_bindx[global_id];
    if (horv == HORIZONTAL) {
      Ke_bindx[nz_ptr] = north2d(ii,jj,nx);     Ke_val[nz_ptr++] = doffdiag;
      Ke_bindx[nz_ptr] = west2d(ii,jj,nx);      Ke_val[nz_ptr++] = 1.;
      Ke_bindx[nz_ptr] = east2d(ii,jj,nx);      Ke_val[nz_ptr++] = -1.;
      if (jj == 0) jj = nx-1;
      else jj--;
      Ke_bindx[nz_ptr] = west2d(ii,jj,nx);      Ke_val[nz_ptr++] = -1.;
      Ke_bindx[nz_ptr] = south2d(ii,jj,nx);     Ke_val[nz_ptr++] = doffdiag;
      Ke_bindx[nz_ptr] = east2d(ii,jj,nx);      Ke_val[nz_ptr++] = 1.;
    }
    else {
      Ke_bindx[nz_ptr] = north2d(ii,jj,nx);     Ke_val[nz_ptr++] = -1.;
      Ke_bindx[nz_ptr] = east2d(ii,jj,nx);      Ke_val[nz_ptr++] = doffdiag;
      Ke_bindx[nz_ptr] = south2d(ii,jj,nx);     Ke_val[nz_ptr++] = 1.;
      if (ii == 0) ii = nx-1;
      else ii--;
      Ke_bindx[nz_ptr] = west2d(ii,jj,nx);     Ke_val[nz_ptr++] = doffdiag;
      Ke_bindx[nz_ptr] = south2d(ii,jj,nx);     Ke_val[nz_ptr++] = -1.;
      Ke_bindx[nz_ptr] = north2d(ii,jj,nx);     Ke_val[nz_ptr++] = 1.;
    }
  }

  /* Transform the global Aztec matrix into a local Aztec matrix. That is,   */
  /* replace global column indices by local indices and set up communication */
  /* data structure 'Ke_data_org' that will be used for matvec's.            */

  AZ_set_proc_config(proc_config, COMMUNICATOR);

  AZ_transform_norowreordering(proc_config, &(Edge_Partition->needed_external_ids),
			       Ke_bindx, Ke_val, Edge_Partition->my_global_ids,
			       &reordered_glob_edges, &reordered_edge_externs, 
			       &Ke_data_org, Nlocal_edges, 0, 0, 0, 
			       &cpntr,	       AZ_MSR_MATRIX);

  /* Convert old style Aztec matrix to newer style Aztec matrix */

  Ke_mat = AZ_matrix_create( Nlocal_edges );
  AZ_set_MSR(Ke_mat, Ke_bindx, Ke_val, Ke_data_org, 0, NULL, AZ_LOCAL);

  return(Ke_mat);
}
Beispiel #11
0
int test_AZ_iterate_then_AZ_scale_f(Epetra_Comm& Comm, bool verbose)
{
  (void)Comm;
  if (verbose) {
    cout << "testing AZ_iterate/AZ_scale_f with 'old' Aztec"<<endl;
  }

  int* proc_config = new int[AZ_PROC_SIZE];

#ifdef EPETRA_MPI
  AZ_set_proc_config(proc_config, MPI_COMM_WORLD);
  AZ_set_comm(proc_config, MPI_COMM_WORLD);
#else
  AZ_set_proc_config(proc_config, 0);
#endif

  int *external, *update_index, *external_index;

  int i, N = 5;
  AZ_MATRIX* Amat = NULL;
  int err = create_and_transform_simple_matrix(AZ_MSR_MATRIX, N, 3.0,
                                               proc_config, Amat,
                                                 external, update_index,
                                                 external_index);
  if (err != 0) {
    return(err);
  }
 
  int* options = new int[AZ_OPTIONS_SIZE];
  double* params = new double[AZ_PARAMS_SIZE];
  double* status = new double[AZ_STATUS_SIZE];
  AZ_defaults(options, params);
  options[AZ_scaling] = AZ_sym_diag;
  if (verbose) {
    options[AZ_output] = AZ_warnings;
  }
  else {
    options[AZ_output] = 0;
  }
  
  int N_update = N+Amat->data_org[AZ_N_border];
  double* x = new double[N_update];
  double* b = new double[N_update];

  for(i=0; i<N_update; ++i) {
    x[i] = 0.0;
    b[i] = 1.0;
  }

  AZ_PRECOND* Pmat = AZ_precond_create(Amat, AZ_precondition, NULL);
  AZ_SCALING* Scal = AZ_scaling_create();

  options[AZ_keep_info] = 1;

  AZ_iterate(x, b, options, params, status, proc_config,
             Amat, Pmat, Scal);

  //now set options[AZ_pre_calc] = AZ_reuse and try to call AZ_scale_f.
  options[AZ_pre_calc] = AZ_reuse;

  AZ_scale_f(AZ_SCALE_MAT_RHS_SOL, Amat, options, b, x, proc_config, Scal);

  AZ_scaling_destroy(&Scal);
  AZ_precond_destroy(&Pmat);
  destroy_matrix(Amat);

  delete [] x;
  delete [] b;

  delete [] options;
  delete [] params;
  delete [] status;
  delete [] proc_config;
  free(update_index);
  free(external);
  free(external_index);

  return(0);
}
Beispiel #12
0
int main(int argc, char *argv[])

/* Set up and solve a test problem defined in the subroutine
   init_matrix_vector_structures().

   Author:   Ray Tuminaro, Div 1422, Sandia National Labs
   date:     11/10/94

 ******************************************************************************/

{

  double *ax,*x;                   /* ax is the right hand side for the test
                                      problem. x is the approximate solution
                                      obtained using AZTEC.                  */

  int    i,input_option;


  /* See Aztec User's Guide for more information   */
  /* on the variables that follow.                 */

  int    proc_config[AZ_PROC_SIZE];/* Processor information:                 */
  /*  proc_config[AZ_node] = node name      */
  /*  proc_config[AZ_N_procs] = # of nodes  */

  int    options[AZ_OPTIONS_SIZE]; /* Array used to select solver options.   */
  double params[AZ_PARAMS_SIZE];   /* User selected solver paramters.        */
  int    *data_org;                /* Array to specify data layout */
  double status[AZ_STATUS_SIZE];   /* Information returned from AZ_solve()
                                      indicating success or failure.         */

  int    *update,                  /* vector elements (global index) updated
                                      on this processor.                     */
    *external;                     /* vector elements needed by this node.   */

  int    *update_index;            /* ordering of update[] and external[]    */
  int    *extern_index;            /* locally on this processor. For example
                                      update_index[i] gives the index
                                      location of the vector element which
                                      has the global index 'update[i]'.      */

                                   /* Sparse matrix to be solved is stored
                                      in these arrays.                       */
  int    *rpntr,*cpntr,*indx, *bpntr, *bindx;
  double *val;


  /* ----------------------- execution begins --------------------------------*/

  /* Put the # of processors, the node id,    */
  /* and an MPI communicator into proc_config */

#ifdef AZTEC_MPI
  MPI_Init(&argc,&argv);
  AZ_set_proc_config(proc_config, MPI_COMM_WORLD);
#else
  AZ_set_proc_config(proc_config, AZ_NOT_MPI );
#endif

  /*
   * Read and broadcast: problem choice, problem size, equations per grid point
   * and how we wish to initialize 'update'.
   */

  if (proc_config[AZ_node] == 0) {
    (void) printf("enter the application problem number\n");
    (void) printf("  = 0: Finite Difference MSR Poisson on n x n x n grid.\n");
    (void) printf("  = 1: Finite Difference VBR Poisson on n x n x n grid.\n");
    (void) printf("  = 2: Finite Element MSR Poisson\n");
    (void) printf("  = 3: Use AZ_read_msr_matrix() to read file '.data'\n");
    scanf("%d",&application);
    if ((application < 0) || (application > 3)){
      (void) fprintf(stderr, "Error: Invalid application (%d) selected\n",
                     application);
      exit(1);
    }

    if (application == 0) {
      (void) printf("\nNote: To try other problems, change add_row_3D()");
      (void) printf("\n      in create_msr_matrix() to add_row_5pt() or");
      (void) printf("\n      add_row_9pt().\n\n");
    }
    if (application == 2) {
      (void) printf("\nNote: Input files are provided for 1 finite element ");
      (void) printf("\n      problem. This problem can be run on either 1  ");
      (void) printf("\n      or 4 processors. To run on 1 processor, copy  ");
      (void) printf("\n      the file fe_1proc_grid_0 to fe_grid_0. To run on");
      (void) printf("\n      4 processors, copy the files fe_4proc_grid_k to ");
      (void) printf("\n      fe_grid_k (k = 0,1,2,3). In both cases enter 197");
      (void) printf("\n      when prompted for the number of grid points and ");
      (void) printf("\n      linear when prompted for the partitioning!!!\n\n");
    }

    if (application == 3)
      (void) printf("enter the total number of matrix rows\n");
    else (void) printf("enter the total number of grid points\n");
    scanf("%d", &N_grid_pts);

    num_PDE_eqns = 1;
    if (application < 2) {
      (void) printf("enter the number of equations per grid point\n");
      scanf("%d", &num_PDE_eqns);
    }

    (void) printf("partition option \n");
    (void) printf("     = %d: linear\n", AZ_linear);
    (void) printf("     = %d: update pts from file '.update'\n", AZ_file);
    if (application < 2)
      (void) printf("     = %d: box decomposition\n", AZ_box);
    scanf("%d", &input_option);
  }
  AZ_broadcast((char *) &N_grid_pts  , sizeof(int), proc_config, AZ_PACK);
  AZ_broadcast((char *) &num_PDE_eqns, sizeof(int), proc_config, AZ_PACK);
  AZ_broadcast((char *) &input_option, sizeof(int), proc_config, AZ_PACK);
  AZ_broadcast((char *) &application , sizeof(int), proc_config, AZ_PACK);
  AZ_broadcast((char *) NULL         , 0          , proc_config, AZ_SEND);

  /* create an application matrix for AZTEC */

  init_matrix_vector_structures(proc_config, &update_index, &update, &data_org,
                                &external, &extern_index, input_option, &val,
                                &bindx, &indx, &bpntr, &rpntr, &cpntr);

  /* initialize AZTEC options */

  init_options(options,params);

  if ( (i = AZ_check_input(data_org, options, params, proc_config) ) < 0) {
    AZ_print_error(i);
    exit(-1);
  }

  /* Matrix fill for finite element example (see Aztec User's Guide). */

  if (application == 2)
    fill_fe_matrix(val, bindx, update, update_index, external, extern_index,
                   data_org);

  /* Initialize right hand side and initial guess */
  /* NOTE: STORAGE ALLOCATED FOR 'x' IS GREATER THAN THE NUMBER */
  /*       OF MATRIX ROWS PER PROCESSOR. 'x' INCLUDES SPACE FOR */
  /*       EXTERNAL (GHOST) ELEMENTS. THUS, THE SIZE OF 'x' IS  */
  /*       'data_org[AZ_N_internal] + data_org[AZ_N_border] +   */
  /*       data_org[AZ_N_external]'.                            */

  init_guess_and_rhs(update_index, update, &x, &ax, data_org, val, indx, bindx,
                     rpntr, cpntr, bpntr, proc_config);

  /* update[], update_index[], external[], extern_index[] are used to map
   * between Aztec's ordering of the equations and the user's ordering
   * (see the User's guide for more details). If these mapping arrays 
   * are not needed by the user, they can be deallocated as they are not 
   * used by AZ_solve().
   */

  free((void *) update);   free((void *) update_index);
  free((void *) external); free((void *) extern_index);

  /* solve the system of equations using ax as the right hand side */

  AZ_solve(x,ax, options, params, indx, bindx, rpntr, cpntr, bpntr, val,
           data_org, status, proc_config);

  /* Free allocated memory */

  free((void *) x);        free((void *) ax);           free((void *) indx);
  free((void *) bindx);    free((void *) rpntr);        free((void *) cpntr);
  free((void *) bpntr);    free((void *) val);          free((void *) data_org);

#ifdef AZTEC_MPI
  MPI_Finalize();
#endif
  return(1);

}
Beispiel #13
0
int
goma_init_(dbl *time1, int *nnodes, int *nelems,
           int *nnv_in, int *nev_in, int *i_soln, int *i_post)
     
     /*
      * Initial main driver for GOMA. Derived from a (1/93) release of
      * the rf_salsa program by
      *        
      *        Original Authors: John  Shadid (1421)
      *		                 Scott Hutchinson (1421)
      *        		         Harry Moffat (1421)
      *       
      *        Date:		12/3/92
      * 
      *
      *        Updates and Changes by:
      *                           Randy Schunk (9111)
      *                           P. A. Sackinger (9111)
      *                           R. R. Rao       (9111)
      *                           R. A. Cairncross (Univ. of Delaware)
      *        Dates:           2/93 - 6/96
      *
      *       Modified for continuation
      *                           Ian Gates
      *       Dates:            2/98 - 10/98
      *       Dates:            7/99 - 8/99
      * 
      * Last modified: Wed  June 26 14:21:35 MST 1994 [email protected]
      * Hello.
      * 
      * Note: Many modifications from an early 2/93 pre-release
      *	      version of rf_salsa were made by various persons 
      *       in order to test ideas about moving/deforming meshes...
      */ 
{
  /* Local Declarations */

  double time_start, total_time;   /* timing variables */
#ifndef PARALLEL
  struct tm *tm_ptr;               /* additional serial timing variables */
  time_t the_time;
#endif

  int error;
  int i;
  int j;
  static int first_goma_call=TRUE;

  char	**ptmp;
  static const char *yo="goma_init";

  struct Command_line_command **clc=NULL; /* point to command line structure */
  int           nclc = 0;		/* number of command line commands */

/********************** BEGIN EXECUTION ***************************************/
  
/* assume number of commands is less than or equal to the number of 
 * arguments in the command line minus 1 (1st is program name) */

  /*
  *  Get the name of the executable, yo
  */

#ifdef PARALLEL
if( first_goma_call ) {
	Argc = 1;
	Argv = (char **) smalloc( Argc*sizeof(char *) );
	Argv[0] = (char *) yo;
	MPI_Init(&Argc, &Argv);  /*PRS will have to fix this.  Too late TAB already did. */
  }
  time_start = MPI_Wtime();
#else /* PARALLEL */
  (void) time(&the_time);
  tm_ptr = gmtime(&the_time);
  time_start = (double)  ( tm_ptr->tm_sec
               + 60. * (   60. * ( tm_ptr->tm_yday * 24. + tm_ptr->tm_hour )
                                                         + tm_ptr->tm_min  )
                         );
#endif /* PARALLEL */
  *time1 = time_start;

/*   Argv = argv; */

/*   Argc = argc; */

  time_goma_started = time_start;

#ifdef PARALLEL
  /*
   * Determine the parallel processing status, if any. We need to know
   * pretty early if we're "one of many" or the only process.
   */

  error = MPI_Comm_size(MPI_COMM_WORLD, &Num_Proc);
  error = MPI_Comm_rank(MPI_COMM_WORLD, &ProcID);

  /*
   * Setup a default Proc_config so we can use utility routines 
   * from Aztec
   */

  AZ_set_proc_config(Proc_Config, MPI_COMM_WORLD);

  /* set the output limit flag if need be */

  if( Num_Proc > DP_PROC_PRINT_LIMIT ) Unlimited_Output = FALSE;

#ifdef HAVE_MPE_H
  error = MPE_Init_log();
#endif /* HAVE_MPE_H */

  Dim = 0;			/* for any hypercube legacy code...  */

#endif /* PARALLEL */
  
#ifndef PARALLEL
  Dim        = 0;
  ProcID     = 0;
  Num_Proc   = 1;
#endif /* PARALLEL */


  /*
  *   HKM - Change the ieee exception handling based on the machine and
  *         the level of debugging/speed desired. This call currently causes
  *         core dumps for floating point exceptions.
  */

  handle_ieee();
  
  log_msg("--------------");
  log_msg("GOMA begins...");

#ifdef USE_CGM
  cgm_initialize();
#endif
  /*
   * Some initial stuff that only the master process does.
   */

/*PRS: Disable this command line stuff for the jas coupled version */
/*-----------------------------------------------------------------*/
/*   if ( ProcID == 0 ) */
/*     { */
/*       if (argc > 1) */
/* 	{ */
/* 	  log_msg("Preprocessing command line options."); */
/* 	  clc = (struct Command_line_command **)  */
/* 	    smalloc( argc * sizeof(struct Command_line_command *)); */
/* 	  for (i=0; i<argc; i++) */
/* 	    { */
/* 	      clc[i] = (struct Command_line_command *)  */
/* 		smalloc(sizeof(struct Command_line_command)); */
/* 	      clc[i]->type   = 0; /\* initialize command line structure *\/ */
/* 	      clc[i]->i_val  = 0; */
/* 	      clc[i]->r_val  = 0.; */
/* 	      clc[i]->string = (char *)  */
/* 		smalloc(MAX_COMMAND_LINE_LENGTH*sizeof(char)); */
/* 	      for ( j=0; j<MAX_COMMAND_LINE_LENGTH; j++) */
/* 		{ */
/* 		  clc[i]->string[j] = '\0'; */
/* 		} */
/* #ifdef DEBUG */
/* 	      fprintf(stderr, "clc[%d]->string is at 0x%x\n", i, clc[i]->string); */
/* 	      fprintf(stderr, "clc[%d]         is at 0x%x\n", i, clc[i]); */
/* #endif */
/* 	    } */
/* 	} */

/* PRS For the JAS version we will use the default input file name "input" */
      strcpy(Input_File, "input");

/* if (argc > 1) translate_command_line(argc, argv, clc, &nclc); */
      
/*       print_code_version(); */
/*       ptmp = legal_notice; */
/*       while ( strcmp(*ptmp, LAST_LEGAL_STRING) != 0 ) */
/* 	{ */
/* 	  fprintf(stderr, "%s", *ptmp++); */
/* 	} */
/* } */

  /*
   *  Allocate the uniform problem description structure and
   *  the problem description structures on all processors
   */
  error = pd_alloc();
  EH(error, "pd_alloc problem");

#ifdef DEBUG
  fprintf(stderr, "P_%d at barrier after pd_alloc\n", ProcID);
#ifdef PARALLEL
  error = MPI_Barrier(MPI_COMM_WORLD);
#endif
#endif

  log_msg("Allocating mp, gn, ...");

  error = mp_alloc();
  EH(error, "mp_alloc problem");

  error = gn_alloc();
  EH(error, "gn_alloc problem");

  error = ve_alloc();
  EH(error, "ve_alloc problem");

  error = elc_alloc();
  EH(error, "elc_alloc problem");

  error = elc_rs_alloc();
  EH(error, "elc_alloc problem");

  error = cr_alloc();
  EH(error, "cr_alloc problem");

  error = evp_alloc();
  EH(error, "evp_alloc problem");

  error = tran_alloc();
  EH(error, "tran_alloc problem");

  error = libio_alloc();
  EH(error, "libio_alloc problem");

  error = eigen_alloc();
  EH(error, "eigen_alloc problem");

  error = cont_alloc();
  EH(error, "cont_alloc problem");

  error = loca_alloc();
  EH(error, "loca_alloc problem");

  error = efv_alloc();
  EH(error, "efv_alloc problem");

#ifdef DEBUG
  fprintf(stderr, "P_%d at barrier before read_input_file()\n", ProcID);
#ifdef PARALLEL
  error = MPI_Barrier(MPI_COMM_WORLD);
#endif
#endif

/*PRS AGAIN, NO COMMAND LINE OVERRIDES IN THIS JAS3D VERSION */
  /*
   * Read ASCII input file, data files, related exodusII FEM databases.
   */	
   if ( ProcID == 0 ) 
       { 
         log_msg("Reading input file ..."); 
         read_input_file(clc, nclc); 

       }

  /*
   * The user-defined material properties, etc. available to goma users
   * mean that some dynamically allocated data needs to be communicated.
   *
   * To handle this, sizing information from the input file scan is
   * broadcast in stages so that the other processors can allocate space
   * accordingly to hold the data.
   *
   * Note: instead of handpacking a data structure, use MPI derived datatypes
   * to gather and scatter. Pray this is done efficiently. Certainly it costs
   * less from a memory standpoint.
   */

#ifdef PARALLEL

  /*
   *  Make sure the input file was successully processed before moving on
   */
  check_parallel_error("Input file error");


  /*
   * This is some sizing information that helps fit a little bit more
   * onto the ark later on.
   */

#ifdef DEBUG
  fprintf(stderr, "P_%d at barrier before noahs_raven()\n", ProcID);
  error = MPI_Barrier(MPI_COMM_WORLD);
#endif

  noahs_raven();

#ifdef DEBUG
  fprintf(stderr, "P_%d at barrier before MPI_Bcast of Noahs_Raven\n", ProcID);
  error = MPI_Barrier(MPI_COMM_WORLD);
#endif

  MPI_Bcast(MPI_BOTTOM, 1, Noahs_Raven->new_type, 0, MPI_COMM_WORLD);

#ifdef DEBUG
  fprintf(stderr, "P_%d at barrier after Bcast/before raven_landing()\n", 
	  ProcID);
  error = MPI_Barrier(MPI_COMM_WORLD);
#endif  
  /*
   * Get the other processors ready to handle ark data.
   */

  raven_landing();

#ifdef DEBUG
  fprintf(stderr, "P_%d at barrier before noahs_ark()\n", ProcID);
  error = MPI_Barrier(MPI_COMM_WORLD);
#endif
  
  
  /*
   * This is the main body of communicated information, including some
   * whose sizes were determined because of advanced legwork by the raven.
   */

  noahs_ark();
  MPI_Bcast(MPI_BOTTOM, 1, Noahs_Ark->new_type, 0, MPI_COMM_WORLD);

  /*
   * Chemkin was initialized on processor zero during the input file
   * process. Now, distribute it to all processors
   */
#ifdef USE_CHEMKIN
  if (Chemkin_Needed) {
    chemkin_initialize_mp();
  }
#endif 

  /*
   * Once the ark has landed, there are additional things that will need to
   * be sent by dove. Example: BC_Types[]->u-BC arrays.
   *
   */

  ark_landing();

  noahs_dove();
  MPI_Bcast(MPI_BOTTOM, 1, Noahs_Dove->new_type, 0, MPI_COMM_WORLD);


#endif          /* End of ifdef PARALLEL */


  /*
   * We sent the packed line to all processors that contained geometry
   * creation commands.  Now we need to step through it and create
   * geometry as we go (including possibly reading an ACIS .sat file).
   *
   */
#ifdef USE_CGM
  create_cgm_geometry();
#endif

  /*
   * For parallel execution, assume the following variables will be changed
   * to reflect the multiple file aspect of the problem.
   *
   *	FEM file = file.exoII		--> file_3of15.exoII
   *
   *	Output EXODUS II file = out.exoII --> out_3of15.exoII
   *
   */


  /*
   * Allocate space for structures holding the EXODUS II finite element
   * database information and for the Distributed Processing information.
   *
   * These are mostly skeletons with pointers that get allocated in the
   * rd_exoII and rd_dpi routines. Remember to free up those arrays first
   * before freeing the major pointers.
   */

  EXO_ptr = alloc_struct_1(Exo_DB, 1);
  init_exo_struct(EXO_ptr);
  DPI_ptr = alloc_struct_1(Dpi, 1);
  init_dpi_struct(DPI_ptr);  

  log_msg("Reading mesh from EXODUS II file...");
  error = read_mesh_exoII(EXO_ptr, DPI_ptr);

  /*
   *   Missing files on any processor are detected at a lower level
   *   forcing a return to the higher level
   *         rd_exo -->  rd_mesh  -->  main
   *   Shutdown now, if any of the exodus files weren't found
   */
  if (error < 0) {
#ifdef PARALLEL
    MPI_Finalize();
#endif
    return(-1);
  }

  /*
   * All of the MPI_Type_commit() calls called behind the scenes that build
   * the dove, ark and raven really allocated memory. Let's free it up now that
   * the initial information has been communicated.
   */

#ifdef PARALLEL
  MPI_Type_free(&(Noahs_Raven->new_type));
  MPI_Type_free(&(Noahs_Ark->new_type));
  MPI_Type_free(&(Noahs_Dove->new_type));
#endif   

  /*
   * Setup the rest of the Problem Description structure that depends on
   * the mesh that was read in from the EXODUS II file...
   * 
   * Note that memory allocation and some setup has already been performed
   * in mm_input()...
   */

  error = setup_pd();
  EH( error, "Problem setting up Problem_Description.");
  /*
   * Let's check to see if we need the large elasto-plastic global tensors
   * and allocate them if so 
   */
  error = evp_tensor_alloc(EXO_ptr);
  EH( error, "Problems setting up evp tensors");
  
  /*
   * Now that we know about what kind of problem we're solving and the
   * mesh information, let's allocate space for elemental assembly structures
   *
   */
#ifdef DEBUG
  DPRINTF(stderr, "About to assembly_alloc()...\n");
#endif
  log_msg("Assembly allocation...");

  error = assembly_alloc(EXO_ptr);
  EH( error, "Problem from assembly_alloc");

  if (Debug_Flag)  {
    DPRINTF(stderr, "%s:  setting up EXODUS II output files...\n", yo);
  }

  /*
   * These are not critical - just niceties. Also, they should not overburden
   * your db with too much of this - they're capped verbiage compliant routines.
   */

  add_qa_stamp(EXO_ptr);

  add_info_stamp(EXO_ptr);

#ifdef DEBUG
  fprintf(stderr, "added qa and info stamps\n");
#endif

  /*
   * If the output EXODUS II database file is different from the input
   * file, then we'll need to replicate all the basic mesh information.
   * But, remember that if we're parallel, that the output file names must
   * be multiplexed first...
   */
  if ( Num_Proc > 1 )
    {
      multiname(ExoFileOut,     ProcID, Num_Proc);      
      multiname(Init_GuessFile, ProcID, Num_Proc);

      if ( strcmp( Soln_OutFile, "" ) != 0 )
	{
	  multiname(Soln_OutFile,   ProcID, Num_Proc);
	}

      if( strcmp( ExoAuxFile, "" ) != 0 )
        {
          multiname(ExoAuxFile,     ProcID, Num_Proc);
        }

      if( efv->Num_external_field != 0 )
        {
          for( i=0; i<efv->Num_external_field; i++ )
            {
              multiname(efv->file_nm[i], ProcID, Num_Proc);
            }
        }

    }



  /***********************************************************************/
  /***********************************************************************/
  /***********************************************************************/
  /*
   *   Preprocess the exodus mesh
   *        -> Allocate pointers to structures containing element
   *           side bc info, First_Elem_Side_BC_Array, and
   *           element edge info, First_Elem_Edge_BC_Array.
   *        -> Determine Unique_Element_Types[] array
   */
#ifdef DEBUG
  fprintf(stderr, "pre_process()...\n");
#endif
  log_msg("Pre processing of mesh...");
#ifdef PARALLEL
  error = MPI_Barrier(MPI_COMM_WORLD);
#endif
  pre_process(EXO_ptr);

  /***********************************************************************/
  /***********************************************************************/
  /***********************************************************************/
  /*
   * Load up a few key indeces in the bfd prototype basis function structures
   * and make sure that each active eqn/vbl has a bf[v] that points to the
   * right bfd[]...needs pre_process to find out the number of unique
   * element types in the problem.
   */

#ifdef DEBUG
  fprintf(stderr, "bf_init()...\n");
#endif
  log_msg("Basis function initialization...");
  error = bf_init(EXO_ptr);
  EH( error, "Problem from bf_init");

  /*
   * check for parallel errors before continuing
   */
  check_parallel_error("Error encountered in problem setup");

  /***********************************************************************/
  /***********************************************************************/
  /***********************************************************************/  
  /*
   * Allocate space for each communication exchange description.
   */
#ifdef PARALLEL
#ifdef DEBUG
  fprintf(stderr, "P_%d: Parallel cx allocation\n", ProcID);
#endif
  if (DPI_ptr->num_neighbors > 0) {
    cx = alloc_struct_1(Comm_Ex, DPI_ptr->num_neighbors);
    Request = alloc_struct_1(MPI_Request, 
			     Num_Requests * DPI_ptr->num_neighbors);
    Status = alloc_struct_1(MPI_Status, 
			    Num_Requests * DPI_ptr->num_neighbors);
  }
#endif

  /***********************************************************************/
  /***********************************************************************/
  /***********************************************************************/
  /*
   *                           SET UP THE PROBLEM
   *
   * Setup node-based structures
   * Finalise how boundary conditions are to be handled
   * Determine what unknowns are at each owned node and then tell
   *  neighboring processors about your nodes
   * Set up communications pattern for fast unknown updates between
   *  processors.
   */
  (void) setup_problem(EXO_ptr, DPI_ptr);

  /*
   * check for parallel errors before continuing
   */
  check_parallel_error("Error encountered in problem setup");
  
  /***********************************************************************/
  /***********************************************************************/
  /***********************************************************************/
  /*
   *                     WRITE OUT INITIAL INFO TO EXODUS FILE
   */

  /*
   *  Only have to initialize the exodus file if we are using different
   *  files for the output versus the input mesh
   */
  if (strcmp(ExoFile, ExoFileOut)) {
    /*
     * Temporarily we'll need to renumber the nodes and elements in the
     * mesh to be 1-based. After writing, return to the 0 based indexing
     * that is more convenient in C.
     */
#ifdef DEBUG
    fprintf(stderr, "1-base; wr_mesh; 0-base\n");
#endif
    one_base(EXO_ptr);
    wr_mesh_exo(EXO_ptr, ExoFileOut, 0);
    zero_base(EXO_ptr);

    /*
     * If running on a distributed computer, augment the plain finite
     * element information of EXODUS with the description of how this
     * piece fits into the global problem.
     */
    if (Num_Proc > 1) {
#ifdef PARALLEL
#ifdef DEBUG
      fprintf(stderr, "P_%d at barrier before wr_dpi()\n", ProcID);
      fprintf(stderr, "P_%d ExoFileOut = \"%s\"\n", ProcID, ExoFileOut);
      error = MPI_Barrier(MPI_COMM_WORLD);
#endif
#endif
      wr_dpi(DPI_ptr, ExoFileOut, 0);
    }
  }

  if (Num_Import_NV > 0 || Num_Import_EV > 0) printf
    (" Goma will import %d nodal and %d element variables.\n",
     Num_Import_NV, Num_Import_EV);
  if (Num_Export_XS > 0 || Num_Export_XP > 0) printf
    (" Goma will export %d solution and %d post-processing variables.\n",
     Num_Export_XS, Num_Export_XP);

  /* Return counts to calling program */
  *nnodes = EXO_ptr->num_nodes;
  *nelems = EXO_ptr->num_elems;
  *nnv_in = Num_Import_NV;
  *nev_in = Num_Import_EV;
  *i_soln = Num_Export_XS;
  *i_post = Num_Export_XP;

  return (0); /* Back to  animas*/
}
Beispiel #14
0
int main(int argc, char *argv[])
{
  int    proc_config[AZ_PROC_SIZE];/* Processor information.                */
  int    options[AZ_OPTIONS_SIZE]; /* Array used to select solver options.  */
  double params[AZ_PARAMS_SIZE];   /* User selected solver paramters.       */
  double status[AZ_STATUS_SIZE];   /* Information returned from AZ_solve(). */

  int    *bindx_real;              /* index and values arrays for MSR matrices */
  double *val_real, *val_imag;

  int * update;                    /* List of global eqs owned by the processor */
  double *x_real, *b_real;         /* initial guess/solution, RHS  */
  double *x_imag, *b_imag;

  unsigned int  N_local;           /* Number of equations on this node */
  double residual;                 /* Used for computing residual */

  double *xx_real, *xx_imag, *xx; /* Known exact solution */
  int myPID, nprocs;

  AZ_MATRIX *Amat_real;             /* Real matrix structure */
  AZ_MATRIX  *Amat;                 /* Komplex matrix to be solved. */
  AZ_PRECOND *Prec;                 /* Komplex preconditioner */
  double *x, *b;                    /* Komplex Initial guess and RHS */

  int i;

  /******************************/
  /* First executable statement */
  /******************************/

#ifdef AZTEC_MPI
  MPI_Init(&argc,&argv);
#endif

  /* Get number of processors and the name of this processor */

#ifdef AZTEC_MPI
  AZ_set_proc_config(proc_config,MPI_COMM_WORLD);
#else
  AZ_set_proc_config(proc_config,0);
#endif

  nprocs = proc_config[AZ_N_procs];
  myPID  = proc_config[AZ_node];

  printf("proc %d of %d is alive\n",myPID, nprocs);

  /* Define two real diagonal matrices. Will use as real and imaginary parts */

  /* Get the number of local equations from the command line */
  if (argc!=2)
  {
    if (myPID==0) printf("Usage: %s number_of_local_equations\n",argv[0]);
    exit(1);
  }
  N_local = atoi(argv[1]);

  const unsigned int N_local_max = 1000000;
  if (N_local > N_local_max) {
    if (myPID==0)
      printf("No more than %d local equation allowed\n", N_local_max);
    exit(1);
  }


  /* Need N_local+1 elements for val/bindx arrays */
  val_real = malloc((N_local+1)*sizeof(double));
  val_imag = malloc((N_local+1)*sizeof(double));

  /* bindx_imag is not needed since real/imag have same pattern  */
  bindx_real = malloc((N_local+1)*sizeof(int));

  update = malloc((N_local+1)*sizeof(int)); /* Malloc equation update list */

  b_real = malloc((N_local+1)*sizeof(double)); /* Malloc x and b arrays */
  b_imag = malloc((N_local+1)*sizeof(double));
  x_real = malloc((N_local+1)*sizeof(double));
  x_imag = malloc((N_local+1)*sizeof(double));
  xx_real = malloc((N_local+1)*sizeof(double));
  xx_imag = malloc((N_local+1)*sizeof(double));

  for (i=0; i<N_local; i++)
  {
    val_real[i] = 10 + i/(N_local/10); /* Some very fake diagonals */
    val_imag[i] = 10 - i/(N_local/10); /* Should take exactly 20 GMRES steps */

    x_real[i] = 0.0;         /* Zero initial guess */
    x_imag[i] = 0.0;

    xx_real[i] = 1.0;        /* Let exact solution = 1 */
    xx_imag[i] = 0.0;

    /* Generate RHS to match exact solution */
    b_real[i] = val_real[i]*xx_real[i] - val_imag[i]*xx_imag[i];
    b_imag[i] = val_imag[i]*xx_real[i] + val_real[i]*xx_imag[i];

    /* All bindx[i] have same value since no off-diag terms */
    bindx_real[i] = N_local + 1;

    /* each processor owns equations
       myPID*N_local through myPID*N_local + N_local - 1 */
    update[i] = myPID*N_local + i;

  }

  bindx_real[N_local] = N_local+1; /* Need this last index */

  /* Register Aztec Matrix for Real Part, only imaginary values are needed*/

  Amat_real = AZ_matrix_create(N_local);

  AZ_set_MSR(Amat_real, bindx_real, val_real, NULL, N_local, update, AZ_GLOBAL);

  /* initialize AZTEC options */

  AZ_defaults(options, params);
  options[AZ_solver]  = AZ_gmres; /* Use CG with no preconditioning */
  options[AZ_precond] = AZ_none;
  options[AZ_kspace] = 21;
  options[AZ_max_iter] = 21;
  params[AZ_tol] = 1.e-14;


  /**************************************************************/
  /* Construct linear system.  Form depends on input parameters */
  /**************************************************************/

  /**************************************************************/
  /* Method 1:  Construct A, x, and b in one call.              */
  /* Useful if using A,x,b only one time. Equivalent to Method 2*/
  /**************************************************************/

  AZK_create_linsys_ri2k (x_real,  x_imag,  b_real,  b_imag,
      options,  params, proc_config,
      Amat_real, val_imag, &x, &b, &Amat);

  /**************************************************************/
  /* Method 2:  Construct A, x, and b in separate calls.        */
  /* Useful for having more control over the construction.      */
  /* Note that the matrix must be constructed first.            */
  /**************************************************************/

  /* Uncomment these three calls and comment out the above call

     AZK_create_matrix_ri2k (options,  params, proc_config,
     Amat_real, val_imag, &Amat);

     AZK_create_vector_ri2k(options,  params, proc_config, Amat,
     x_real, x_imag, &x);

     AZK_create_vector_ri2k(options,  params, proc_config, Amat,
     b_real, b_imag, &b);
     */

  /**************************************************************/
  /* Build exact solution vector.                               */
  /* Check residual of init guess and exact solution            */
  /**************************************************************/

  AZK_create_vector_ri2k(options,  params, proc_config, Amat,
      xx_real, xx_imag, &xx);

  residual = AZK_residual_norm(x, b, options, params, proc_config, Amat);
  if (proc_config[AZ_node]==0)
    printf("\n\n\nNorm of residual using initial guess = %12.4g\n",residual);

  residual = AZK_residual_norm(xx, b, options, params, proc_config, Amat);
  AZK_destroy_vector(options,  params, proc_config, Amat, &xx);
  if (proc_config[AZ_node]==0)
    printf("\n\n\nNorm of residual using exact solution = %12.4g\n",residual);

  /**************************************************************/
  /* Create preconditioner                                      */
  /**************************************************************/

  AZK_create_precon(options,  params, proc_config, x, b, Amat, &Prec);

  /**************************************************************/
  /* Solve linear system using Aztec.                           */
  /**************************************************************/

  AZ_iterate(x, b, options, params, status, proc_config, Amat, Prec, NULL);

  /**************************************************************/
  /* Extract solution.                                          */
  /**************************************************************/

  AZK_extract_solution_k2ri(options, params, proc_config, Amat, Prec, x,
      x_real,  x_imag);
  /**************************************************************/
  /* Destroy Preconditioner.                                    */
  /**************************************************************/

  AZK_destroy_precon (options,  params, proc_config, Amat, &Prec);

  /**************************************************************/
  /* Destroy linear system.                                     */
  /**************************************************************/

  AZK_destroy_linsys (options,  params, proc_config, &x, &b, &Amat);

  if (proc_config[AZ_node]==0)
  {
    printf("True residual norm squared   = %22.16g\n",status[AZ_r]);
    printf("True scaled res norm squared = %22.16g\n",status[AZ_scaled_r]);
    printf("Computed res norm squared    = %22.16g\n",status[AZ_rec_r]);
  }

  /* Print comparison between known exact and computed solution */
  {double sum = 0.0;

    for (i=0; i<N_local; i++) sum += fabs(x_real[i]-xx_real[i]);
    for (i=0; i<N_local; i++) sum += fabs(x_imag[i]-xx_imag[i]);
    printf("Processor %d:  Difference between exact and computed solution = %12.4g\n",
        proc_config[AZ_node],sum);
  }
  /*  Free memory allocated */

  free((void *) val_real );
  free((void *) bindx_real );
  free((void *) val_imag );
  free((void *) update );
  free((void *) b_real );
  free((void *) b_imag );
  free((void *) x_real );
  free((void *) x_imag );
  free((void *) xx_real );
  free((void *) xx_imag );
  AZ_matrix_destroy(&Amat_real);

#ifdef AZTEC_MPI
  MPI_Finalize();
#endif

  return 0 ;
}
int main(int argc, char *argv[])
{
  int    Nnodes=32*32;              /* Total number of nodes in the problem.*/
                                    /* 'Nnodes' must be a perfect square.   */

  struct       user_partition Edge_Partition = {NULL, NULL,0,0,NULL,0,0,0}, 
                                Node_Partition = {NULL, NULL,0,0,NULL,0,0,0};

  int          proc_config[AZ_PROC_SIZE];

#ifdef ML_MPI
  MPI_Init(&argc,&argv);
#endif

  AZ_set_proc_config(proc_config, COMMUNICATOR);
  ML_Comm* comm;
  ML_Comm_Create(&comm);

  Node_Partition.Nglobal = Nnodes;
  Edge_Partition.Nglobal = Node_Partition.Nglobal*2;

  user_partition_nodes(&Node_Partition);
  user_partition_edges(&Edge_Partition, &Node_Partition);
  
  AZ_MATRIX * AZ_Ke = user_Ke_build(&Edge_Partition);
  AZ_MATRIX * AZ_Kn = user_Kn_build(&Node_Partition);

  // convert (put wrappers) from Aztec matrices to ML_Operator's

  ML_Operator * ML_Ke, * ML_Kn, * ML_Tmat;

  ML_Ke = ML_Operator_Create( comm );
  ML_Kn = ML_Operator_Create( comm );

  AZ_convert_aztec_matrix_2ml_matrix(AZ_Ke,ML_Ke,proc_config);
  AZ_convert_aztec_matrix_2ml_matrix(AZ_Kn,ML_Kn,proc_config);

  ML_Tmat = user_T_build(&Edge_Partition, &Node_Partition, 
		      ML_Kn, comm);

  Epetra_CrsMatrix * Epetra_Kn, * Epetra_Ke, * Epetra_T;
  
  int MaxNumNonzeros;
  double CPUTime;

  ML_Operator2EpetraCrsMatrix(ML_Ke,Epetra_Ke,
			      MaxNumNonzeros,
			      true,CPUTime);

  ML_Operator2EpetraCrsMatrix(ML_Kn,
			      Epetra_Kn,MaxNumNonzeros,
			      true,CPUTime);

  ML_Operator2EpetraCrsMatrix(ML_Tmat,Epetra_T,MaxNumNonzeros,
			      true,CPUTime);  

  Teuchos::ParameterList MLList;
  ML_Epetra::SetDefaults("maxwell", MLList);
  
  MLList.set("ML output", 0);

  MLList.set("aggregation: type", "Uncoupled");
  MLList.set("coarse: max size", 30);
  MLList.set("aggregation: threshold", 0.0);

  MLList.set("coarse: type", "Amesos-KLU");

  ML_Epetra::MultiLevelPreconditioner * MLPrec =
    new ML_Epetra::MultiLevelPreconditioner(*Epetra_Ke, *Epetra_T, *Epetra_Kn,
					    MLList);

  Epetra_Vector LHS(Epetra_Ke->DomainMap()); LHS.Random();
  Epetra_Vector RHS(Epetra_Ke->DomainMap()); RHS.PutScalar(1.0);
  
  Epetra_LinearProblem Problem(Epetra_Ke,&LHS,&RHS);
  AztecOO solver(Problem);
  solver.SetPrecOperator(MLPrec);

  solver.SetAztecOption(AZ_solver, AZ_cg_condnum);
  solver.SetAztecOption(AZ_output, 32);
  solver.Iterate(500, 1e-8);

  // ========================= //
  // compute the real residual //
  // ========================= //

  Epetra_Vector RHScomp(Epetra_Ke->DomainMap());
  int ierr;
  ierr = Epetra_Ke->Multiply(false, LHS, RHScomp);
  assert(ierr==0);

  Epetra_Vector resid(Epetra_Ke->DomainMap());

  ierr = resid.Update(1.0, RHS, -1.0, RHScomp, 0.0);
  assert(ierr==0);

  double residual;
  ierr = resid.Norm2(&residual);
  assert(ierr==0);
  if (proc_config[AZ_node] == 0) {
    std::cout << std::endl;
    std::cout << "==> Residual = " << residual << std::endl;
    std::cout << std::endl;
  }

  // =============== //
  // C L E A N   U P //
  // =============== //
  
  delete MLPrec;    // destroy phase prints out some information
  delete Epetra_Kn;
  delete Epetra_Ke;
  delete Epetra_T;
  
  ML_Operator_Destroy( &ML_Ke );
  ML_Operator_Destroy( &ML_Kn );
  ML_Comm_Destroy( &comm );

  if (Edge_Partition.my_local_ids != NULL) free(Edge_Partition.my_local_ids);
  if (Node_Partition.my_local_ids != NULL) free(Node_Partition.my_local_ids);
  if (Node_Partition.my_global_ids != NULL) free(Node_Partition.my_global_ids);
  if (Edge_Partition.my_global_ids != NULL) free(Edge_Partition.my_global_ids);
  if (Node_Partition.needed_external_ids != NULL) 
    free(Node_Partition.needed_external_ids);
  if (Edge_Partition.needed_external_ids != NULL) 
    free(Edge_Partition.needed_external_ids);

  if (AZ_Ke!= NULL) {
    AZ_free(AZ_Ke->bindx);
    AZ_free(AZ_Ke->val);
    AZ_free(AZ_Ke->data_org);
    AZ_matrix_destroy(&AZ_Ke);
  }
  if (AZ_Kn!= NULL) {
    AZ_free(AZ_Kn->bindx);
    AZ_free(AZ_Kn->val);
    AZ_free(AZ_Kn->data_org);
    AZ_matrix_destroy(&AZ_Kn);
  }

  ML_Operator_Destroy(&ML_Tmat);

  if (residual > 1e-5) {
    std::cout << "`MultiLevelPreconditioner_Maxwell.exe' failed!" << std::endl;
    exit(EXIT_FAILURE);
  }

#ifdef ML_MPI
  MPI_Finalize();
#endif
		
  if (proc_config[AZ_node] == 0)
    std::cout << "`MultiLevelPreconditioner_Maxwell.exe' passed!" << std::endl;
  exit(EXIT_SUCCESS);
		
}
Beispiel #16
0
void
continue_problem (Comm_Ex *cx,	/* array of communications structures */
		  Exo_DB  *exo, /* ptr to the finite element mesh database */
		  Dpi     *dpi) /* distributed processing information */
{
  int    *ija=NULL;		/* column pointer array                         */
  double *a=NULL;		/* nonzero array                                */
  double *a_old=NULL;		/* nonzero array                                */
  double *x=NULL;		/* solution vector                              */

  int     iAC;			/* COUNTER                                      */
  double *x_AC = NULL;		/* SOLUTION VECTOR OF EXTRA UNKNOWNS            */
  double *x_AC_old=NULL;	/* old SOLUTION VECTOR OF EXTRA UNKNOWNS        */
  double *x_AC_dot=NULL;	
 
  int    *ija_attic=NULL;	/* storage for external dofs                    */

  int eb_indx, ev_indx;

  /* 
   * variables for path traversal 
   */
  double *x_old=NULL;		/* old solution vector                          */
  double *x_older=NULL;		/* older solution vector                        */
  double *x_oldest=NULL;	/* oldest solution vector saved                 */
  double *xdot=NULL;		/* current path derivative of soln              */
  double *xdot_old=NULL;
  double *x_update=NULL;


  double *x_sens=NULL;		/* solution sensitivity */
  double *x_sens_temp=NULL;	/* MMH thinks we need another one, so
				 * that when the solution is updated
				 * on a failure, it doesn't use the
				 * last computed x_sens b/c that might
				 * be crappy.  We should use the last
				 * known good one...  I haven't done
				 * the same thing with x_sens_p.
				 */
  double **x_sens_p=NULL;	/* solution sensitivity for parameters */
  int num_pvector=0;		/*  number of solution sensitivity vectors   */

#ifdef COUPLED_FILL
  struct Aztec_Linear_Solver_System *ams[NUM_ALSS]={NULL}; 
#else /* COUPLED_FILL */
  struct Aztec_Linear_Solver_System *ams[NUM_ALSS]={NULL, NULL}; 
#endif /* COUPLED_FILL */
                 /* sl_util_structs.h */

  double *resid_vector=NULL;	/* residual */
  double *resid_vector_sens=NULL;/* residual sensitivity */

  double *scale=NULL;		/* scale vector for modified newton */

  int 	 *node_to_fill = NULL;	

  int		n;		/* total number of path steps attempted */
  int		ni;		/* total number of nonlinear solves */
  int		nt;		/* total number of successful path steps */
  int		path_step_reform; /* counter for jacobian reformation stride */
  int		converged;	/* success or failure of Newton iteration */
  int		success_ds;	/* success or failure of path step */

  int           i, nprint=0, num_total_nodes;

  int           numProcUnknowns;
  int           const_delta_s, step_print;
  double        path_print, i_print;
  double	path,		/* Current value (should have solution here) */
                path1;		/* New value (would like to get solution here) */
  double	delta_s, delta_s_new, delta_s_old, delta_s_older, delta_s_oldest;
  double        delta_t;
  double	theta=0.0;
  double        damp;
  double        eps;
  double        lambda, lambdaEnd;
  double        timeValueRead = 0.0;

  /* 
   * ALC management variables
   */
  int  aldALC,			/* direction of continuation, == -1 =>
				   beginning value is greater than ending value. */
       alqALC;			/* is -1 when we're on our last step. */

  /*
   * Other local variables 
   */
  int	        error, err, is_steady_state, inewton;
  int 		*gindex = NULL, gsize;
  int		*p_gsize=NULL;
  double	*gvec=NULL;
  double        ***gvec_elem=NULL;
  double	err_dbl;
  FILE          *cl_aux=NULL, *file=NULL;
  
  struct Results_Description  *rd=NULL;
  
  int		tnv;		/* total number of nodal variables and kinds */
  int		tev;		/* total number of elem variables and kinds */
  int		tnv_post;	/* total number of nodal variables and kinds 
				   for post processing */
  int		tev_post;	/* total number of elem variables and kinds 
				   for post processing */
  int           iUC;            /* User-defined continuation condition index */

  int max_unk_elem, one, three; /* variables used as mf_setup arguments*/

  unsigned int matrix_systems_mask;

  double evol_local=0.0;
#ifdef PARALLEL
  double evol_global=0.0;
#endif

  static const char yo[]="continue_problem"; 

  /*
   * 		BEGIN EXECUTION
   */
#ifdef DEBUG
  fprintf(stderr, "%s() begins...\n", yo);
#endif

  is_steady_state = TRUE;

  p_gsize = &gsize;
  
  /* 
   * set aside space for gather global vectors to print to exoII file
   * note: this is temporary
   *
   * For 2D prototype problem:  allocate space for T, dx, dy arrays
   */
  if( strlen(Soln_OutFile) )
    {
      file = fopen(Soln_OutFile, "w");
      if (file == NULL) {
	DPRINTF(stderr, "%s:  opening soln file for writing\n", yo);
        EH(-1, "\t");
      }
    }
#ifdef PARALLEL
  check_parallel_error("Soln output file error");
#endif
  
  /*
   * Some preliminaries to help setup EXODUS II database output.
   */
#ifdef DEBUG
  fprintf(stderr, "cnt_nodal_vars() begins...\n");
#endif

  /*  
   * tnv_post is calculated in load_nodal_tkn
   * tev_post is calculated in load_elem_tkn
   */
  tnv = cnt_nodal_vars();
  tev = cnt_elem_vars();
  
#ifdef DEBUG
  fprintf(stderr, "Found %d total primitive nodal variables to output.\n", tnv);
  fprintf(stderr, "Found %d total primitive elem variables to output.\n", tev);
#endif
  
  if (tnv < 0)
    {
      DPRINTF(stderr, "%s:\tbad tnv.\n", yo);
      EH(-1, "\t");
    }
  
  rd = (struct Results_Description *) 
    smalloc(sizeof(struct Results_Description));

  if (rd == NULL) 
    EH(-1, "Could not grab Results Description.");

  (void) memset((void *) rd, 0, sizeof(struct Results_Description));
  
  rd->nev = 0;			/* number element variables in results */
  rd->ngv = 0;			/* number global variables in results */
  rd->nhv = 0;			/* number history variables in results */

  rd->ngv = 5;			/* number global variables in results 
				   see load_global_var_info for names*/
  error = load_global_var_info(rd, 0, "CONV");
  error = load_global_var_info(rd, 1, "NEWT_IT");
  error = load_global_var_info(rd, 2, "MAX_IT");
  error = load_global_var_info(rd, 3, "CONVRATE");
  error = load_global_var_info(rd, 4, "MESH_VOLUME");

  /* load nodal types, kinds, names */
  error = load_nodal_tkn(rd, 
                         &tnv, 
                         &tnv_post); 
  
  if (error)
    {
      DPRINTF(stderr, "%s:  problem with load_nodal_tkn()\n", yo);
      EH(-1,"\t");
    }

  /* load elem types, names */
  error = load_elem_tkn(rd,
			exo,
                        tev, 
                        &tev_post); 
  
  if (error)
    {
      DPRINTF(stderr, "%s:  problem with load_elem_tkn()\n", yo);
      EH(-1,"\t");
    }
#ifdef PARALLEL
  check_parallel_error("Results file error");
#endif

  /* 
   * Write out the names of the nodal variables that we will be sending to
   * the EXODUS II output file later.
   */
#ifdef DEBUG
  fprintf(stderr, "wr_result_prelim() starts...\n", tnv);
#endif

  gvec_elem = (double ***) smalloc ( (exo->num_elem_blocks)*sizeof(double **));
  for (i = 0; i < exo->num_elem_blocks; i++)
    gvec_elem[i] = (double **) smalloc ( (tev + tev_post)*sizeof(double *));

  wr_result_prelim_exo(rd, 
                       exo, 
                       ExoFileOut,
                       gvec_elem );

#ifdef DEBUG
  fprintf(stderr, "P_%d: wr_result_prelim_exo() ends...\n", ProcID, tnv);
#endif

  /* 
   * This gvec workhorse transports output variables as nodal based vectors
   * that are gather from the solution vector. Note: it is NOT a global
   * vector at all and only carries this processor's nodal variables to
   * the exodus database.
   */
  asdv(&gvec, Num_Node);

  /*
   * Allocate space and manipulate for all the nodes that this processor
   * is aware of...
   */
  num_total_nodes = dpi->num_universe_nodes;

  numProcUnknowns = NumUnknowns + NumExtUnknowns;

  /* allocate memory for Volume Constraint Jacobian */
  if ( nAC > 0)
    for(iAC=0;iAC<nAC;iAC++)
      augc[iAC].d_evol_dx = (double*) malloc(numProcUnknowns*sizeof(double));
  
  asdv(&resid_vector, numProcUnknowns);
  asdv(&resid_vector_sens, numProcUnknowns);
  asdv(&scale, numProcUnknowns);

  for (i = 0; i < NUM_ALSS; i++) 
    {
      ams[i] = alloc_struct_1(struct Aztec_Linear_Solver_System, 1);
    }

#ifdef MPI
  AZ_set_proc_config( ams[0]->proc_config, MPI_COMM_WORLD );
#ifndef COUPLED_FILL
  if( Explicit_Fill ) AZ_set_proc_config( ams[1]->proc_config, MPI_COMM_WORLD );
#endif /* not COUPLED_FILL */
#else /* MPI */
  AZ_set_proc_config( ams[0]->proc_config, 0 );
#ifndef COUPLED_FILL
  if( Explicit_Fill ) AZ_set_proc_config( ams[1]->proc_config, 0 );
#endif /* not COUPLED_FILL */
#endif /* MPI */

  /* 
   * allocate space for and initialize solution arrays 
   */
  asdv(&x,        numProcUnknowns);
  asdv(&x_old,    numProcUnknowns);
  asdv(&x_older,  numProcUnknowns);
  asdv(&x_oldest, numProcUnknowns);
  asdv(&xdot,     numProcUnknowns);
  asdv(&xdot_old, numProcUnknowns);
  asdv(&x_update, numProcUnknowns);
  
  asdv(&x_sens,   numProcUnknowns);
  asdv(&x_sens_temp,   numProcUnknowns);

  /*
   * Initialize solid inertia flag
   */
  set_solid_inertia();
  
  /*
   * FRIENDLY COMMAND LINE EQUIV
   */
  if( ProcID == 0 )
   {
      cl_aux = fopen("goma-cl.txt", "w+");

      fprintf(cl_aux, "goma -a -i input ");
      fprintf(cl_aux, "-cb %10.6e ", cont->BegParameterValue);
      fprintf(cl_aux, "-ce %10.6e ", cont->EndParameterValue);
      fprintf(cl_aux, "-cd %10.6e ", cont->Delta_s0);
      fprintf(cl_aux, "-cn %d ", cont->MaxPathSteps);
      fprintf(cl_aux, "-cmin %10.6e ", cont->Delta_s_min);
      fprintf(cl_aux, "-cmax %10.6e ", cont->Delta_s_max);
      fprintf(cl_aux, "-cm %d ", Continuation);
      fprintf(cl_aux, "-ct %d ", cont->upType);

      switch (cont->upType)
        {
        case 1:			/* BC TYPE */
          fprintf(cl_aux, "-c_bc %d ", cont->upBCID);
          fprintf(cl_aux, "-c_df %d ", cont->upDFID);
          break;
        case 2:			/* MAT TYPE */
          fprintf(cl_aux, "-c_mn %d ", cont->upMTID+1);
          fprintf(cl_aux, "-c_mp %d ", cont->upMPID);
          break;
        case 3:			/* AC TYPE */
          fprintf(cl_aux, "-c_ac %d ", cont->upBCID);
          fprintf(cl_aux, "-c_df %d ", cont->upDFID);
          break;
        case 4:			/* USER MAT TYPE */
          fprintf(cl_aux, "-c_mn %d ", cont->upMTID+1);
          fprintf(cl_aux, "-c_mp %d ", cont->upMPID);
          fprintf(cl_aux, "-c_md %d ", cont->upMDID);
          break;
        case 5:                 /* USER-DEFINED FUNCTION TYPE */
          /* NOTE:  This is not available via the command line! */
          break;
        case 6:                 /* ANGULAR CONTINUATION TYPE */
          /* NOTE:  This requires LOCA and is not available via the command line! */
          EH(-1, "Angular continuation is available only in LOCA!");
          break;
        default:
          fprintf(stderr, "%s: Bad cont->upType, %d\n", yo, cont->upType);
          EH(-1,"Bad cont->upType");
          break;			/* duh */
        }

      fprintf(cl_aux, "\n");

      fclose(cl_aux);
   }
#ifdef PARALLEL
  check_parallel_error("Continuation setup error");
#endif
  /*
   * FIRST ORDER CONTINUATION 
   */
  lambda       = cont->BegParameterValue;
  lambdaEnd    = cont->EndParameterValue;
  
  if (lambdaEnd > lambda)
    aldALC = +1;
  else
    aldALC = -1;

  delta_s_new  = 0.0;
  Delta_s0     = cont->Delta_s0;
  Delta_s_min  = cont->Delta_s_min;
  Delta_s_max  = cont->Delta_s_max;
  MaxPathSteps = cont->MaxPathSteps;
  PathMax      = cont->PathMax;
  eps          = cont->eps;
  
  if (Delta_s0 < 0.0 )
    {
      Delta_s0 = -Delta_s0;
      const_delta_s = 1;
    } 
  else 
    const_delta_s = 0;
  
  damp = 1.0;

  path = path1 = lambda;

  if (Debug_Flag && ProcID == 0)
    {
      fprintf(stderr,"MaxPathSteps: %d \tlambdaEnd: %f\n", MaxPathSteps, lambdaEnd);
      fprintf(stderr,"continuation in progress\n");
    }

  nprint = 0;

  if (Delta_s0 > Delta_s_max) 
    Delta_s0 = Delta_s_max;

  delta_s = delta_s_old = delta_s_older = Delta_s0;
      
  delta_t = 0.0;
  tran->delta_t = 0.0;      /*for Newmark-Beta terms in Lagrangian Solid*/

  /* Call prefront (or mf_setup) if necessary */
  if (Linear_Solver == FRONT)
    {
      /* Also got to define these because it wants pointers to these numbers */
      max_unk_elem = (MAX_PROB_VAR + MAX_CONC)*MDE;

      one = 1;
      three = 3;

      /* NOTE: We need a overall flag in the vn_glob struct that tells whether FULL_DG
	 is on anywhere in domain.  This assumes only one material.  See sl_front_setup for test.
	 that test needs to be in the input parser.  */
      if(vn_glob[0]->dg_J_model == FULL_DG) 
	max_unk_elem = (MAX_PROB_VAR + MAX_CONC)*MDE + 4*vn_glob[0]->modes*4*MDE;

#ifdef PARALLEL
  if (Num_Proc > 1) EH(-1, "Whoa.  No front allowed with nproc>1");  
  check_parallel_error("Front solver not allowed with nprocs>1");
#endif
	  
#ifdef HAVE_FRONT  
       err = mf_setup(&exo->num_elems, 
		     &NumUnknowns, 
		     &max_unk_elem, 
		     &three,
		     &one,
		     exo->elem_order_map,
		     fss->el_proc_assign,
		     fss->level,
		     fss->nopdof,
		     fss->ncn,
		     fss->constraint,
		     front_scratch_directory,
		     &fss->ntra); 
      EH(err,"problems in frontal setup ");

#else
      EH(-1,"Don't have frontal solver compiled and linked in");
#endif
    }


  /*
   *  if computing parameter sensitivities, allocate space for solution
   *  sensitivity vectors
   */

        for(i=0;i<nn_post_fluxes_sens;i++)     
	  {
	    num_pvector=MAX(num_pvector,pp_fluxes_sens[i]->vector_id);
	  }
        for(i=0;i<nn_post_data_sens;i++)        
	  {
	    num_pvector=MAX(num_pvector,pp_data_sens[i]->vector_id);
	  }

  if((nn_post_fluxes_sens + nn_post_data_sens) > 0)
    {
      num_pvector++;
      num_pvector = MAX(num_pvector,2);
         x_sens_p = Dmatrix_birth(num_pvector,numProcUnknowns);
    }
  else
    x_sens_p = NULL;

  if (nAC > 0)
    {
      asdv(&x_AC, nAC);
      asdv(&x_AC_old, nAC);
      asdv(&x_AC_dot, nAC);
    }

  /*
   * ADJUST NATURAL PARAMETER
   */
  update_parameterC(0, path1, x, xdot, x_AC, delta_s, cx, exo, dpi);


  /* Allocate sparse matrix */
  if( strcmp( Matrix_Format, "msr" ) == 0)
    {
      log_msg("alloc_MSR_sparse_arrays...");
      alloc_MSR_sparse_arrays(&ija, 
			      &a, 
			      &a_old, 
			      0, 
			      node_to_fill, 
			      exo, 
			      dpi);
      /*
       * An attic to store external dofs column names is needed when
       * running in parallel.
       */
      alloc_extern_ija_buffer(num_universe_dofs, 
			      num_internal_dofs+num_boundary_dofs, 
			      ija, &ija_attic);
      /*
       * Any necessary one time initialization of the linear
       * solver package (Aztec).
       */
      ams[JAC]->bindx   = ija;
      ams[JAC]->val     = a;
      ams[JAC]->belfry  = ija_attic;
      ams[JAC]->val_old = a_old;
	  
      /*
       * These point to nowhere since we're using MSR instead of VBR
       * format.
       */
      ams[JAC]->indx  = NULL;
      ams[JAC]->bpntr = NULL;
      ams[JAC]->rpntr = NULL;
      ams[JAC]->cpntr = NULL;
      ams[JAC]->npn      = dpi->num_internal_nodes + dpi->num_boundary_nodes;
      ams[JAC]->npn_plus = dpi->num_internal_nodes + dpi->num_boundary_nodes + dpi->num_external_nodes;

      ams[JAC]->npu      = num_internal_dofs+num_boundary_dofs;
      ams[JAC]->npu_plus = num_universe_dofs;

      ams[JAC]->nnz = ija[num_internal_dofs+num_boundary_dofs] - 1;
      ams[JAC]->nnz_plus = ija[num_universe_dofs];
    }
  else if(  strcmp( Matrix_Format, "vbr" ) == 0)
    {
      log_msg("alloc_VBR_sparse_arrays...");
      alloc_VBR_sparse_arrays (ams[JAC],
			       exo,
			       dpi);
      ija_attic = NULL;
      ams[JAC]->belfry  = ija_attic;

      a = ams[JAC]->val;
      if( !save_old_A ) a_old = ams[JAC]->val_old = NULL;
    }
  else if ( strcmp( Matrix_Format, "front") == 0 )
    {
      /* Don't allocate any sparse matrix space when using front */
      ams[JAC]->bindx   = NULL;
      ams[JAC]->val     = NULL;
      ams[JAC]->belfry  = NULL;
      ams[JAC]->val_old = NULL;
      ams[JAC]->indx  = NULL;
      ams[JAC]->bpntr = NULL;
      ams[JAC]->rpntr = NULL;
      ams[JAC]->cpntr = NULL;

    }
  else
    EH(-1,"Attempted to allocate unknown sparse matrix format");

  init_vec(x, cx, exo, dpi, x_AC, nAC, &timeValueRead);

  /*  if read ACs, update data floats */
  if (nAC > 0)
    if(augc[0].iread == 1)
	{
	  for(iAC=0 ; iAC<nAC ; iAC++)
	    { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi);}
	}

  vzero(numProcUnknowns, &x_sens[0]);
  vzero(numProcUnknowns, &x_sens_temp[0]);

  /* 
   * set boundary conditions on the initial conditions 
   */

  nullify_dirichlet_bcs();

  find_and_set_Dirichlet(x, xdot, exo, dpi);

  exchange_dof(cx, dpi, x);

  dcopy1(numProcUnknowns,x,x_old);
  dcopy1(numProcUnknowns,x_old,x_older);
  dcopy1(numProcUnknowns,x_older,x_oldest);

  if(nAC > 0)
    dcopy1(nAC,x_AC, x_AC_old);

  /* 
   * initialize the counters for when to print out data 
   */
  path_print = path1;
  step_print = 1;
      
  matrix_systems_mask = 1;

  log_msg("sl_init()...");
  sl_init(matrix_systems_mask, ams, exo, dpi, cx);

  /*
  * Make sure the solver was properly initialized on all processors.
  */
#ifdef PARALLEL
  check_parallel_error("Solver initialization problems");
#endif

  ams[JAC]->options[AZ_keep_info] = 1;
  /* 
   * set the number of successful path steps to zero 
   */
  nt = 0;   

  /* 
   * LOOP THROUGH PARAMETER UNTIL MAX NUMBER 
   * OF STEPS SURPASSED
   */

  for(n = 0; n < MaxPathSteps; n++)
    {
      alqALC = 1;

      switch (aldALC)
	{
	case -1:			/* REDUCING PARAMETER DIRECTION */
	  if (path1 <= lambdaEnd)
	    { 
	      DPRINTF(stderr,"\n\t ******** LAST PATH STEP!\n");
	      alqALC = -1;
	      path1 = lambdaEnd;
	      delta_s = path-path1;
	    } 
	  break;
	case +1:			/* RISING PARAMETER DIRECTION */
	  if (path1 >= lambdaEnd)
	    { 
	      DPRINTF(stderr,"\n\t ******** LAST PATH STEP!\n");
	      alqALC = -1;
	      path1 = lambdaEnd;
	      delta_s = path1-path;
	    } 
	  break;
	default:
	  DPRINTF(stderr, "%s: Bad aldALC, %d\n", yo, aldALC);
          EH(-1,"\t");
	  break;		/* duh */
	}
#ifdef PARALLEL
  check_parallel_error("Bad aldALC");
#endif
	  
      /*
       * ADJUST NATURAL PARAMETER
       */
      update_parameterC(0, path1, x, xdot, x_AC, delta_s, 
			cx, exo, dpi);

      /*
       * IF STEP CHANGED, REDO FIRST ORDER PREDICTION
       */
      if(alqALC == -1)
	{
	  dcopy1(NumUnknowns,x_old,x);

	  switch (Continuation)
	    {
	    case ALC_ZEROTH:
	      break;
	    case  ALC_FIRST:
	      switch (aldALC)
		{
		case -1:
		  v1add(NumUnknowns, &x[0], -delta_s, &x_sens[0]);
		  break;
		case +1:
		  v1add(NumUnknowns, &x[0], +delta_s, &x_sens[0]);
		  break;
		default:
		  DPRINTF(stderr, "%s: Bad aldALC, %d\n", yo, aldALC);
                  EH(-1,"\t");
		  break;	/* duh */
		}
	      break;
	    default:
	      DPRINTF(stderr, "%s: Bad Continuation, %d\n", yo, Continuation);
              EH(-1,"\t");
	      break;		/* duh */
	    }
	}
#ifdef PARALLEL
  check_parallel_error("Bad Continuation");
#endif

      find_and_set_Dirichlet (x, xdot, exo, dpi); 

      exchange_dof(cx, dpi, x);

      if (ProcID == 0)
	{
	  fprintf(stderr, "\n\t----------------------------------");
	  switch (Continuation)
	    {
	    case ALC_ZEROTH:
	      DPRINTF(stderr, "\n\tZero Order Continuation:");
	      break;
	    case  ALC_FIRST:
	      DPRINTF(stderr, "\n\tFirst Order Continuation:");
	      break;
	    default:
	      DPRINTF(stderr, "%s: Bad Continuation, %d\n", yo, Continuation);
              EH(-1,"\t");
	      break;		/* duh */
	    }
	  DPRINTF(stderr, "\n\tStep number: %4d of %4d (max)", n+1, MaxPathSteps);
	  DPRINTF(stderr, "\n\tAttempting solution at:");
	  switch (cont->upType)
	    {
	    case 1:		/* BC */
	    case 3:		/* AC */
	      DPRINTF(stderr, "\n\tBCID=%3d DFID=%5d", cont->upBCID, cont->upDFID);
	      break;
	    case 2:		/* MT */
	      DPRINTF(stderr, "\n\tMTID=%3d MPID=%5d", cont->upMTID, cont->upMPID);
	      break;
	    case 4:		/* UM */
	      DPRINTF(stderr, "\n\tMTID=%3d MPID=%5d MDID=%3d", cont->upMTID, cont->upMPID, cont->upMDID);
	      break;

/* This case requires an inner switch block */
            case 5:             /* UF */
              for (iUC=0; iUC<nUC; iUC++)
                {
	          switch (cpuc[iUC].Type)
	            {
	              case 1:		/* BC */
	              case 3:		/* AC */
	                DPRINTF(stderr, "\n\tBCID=%3d DFID=%5d",
                                cpuc[iUC].BCID, cpuc[iUC].DFID);
	                break;
	              case 2:		/* MT */
	                DPRINTF(stderr, "\n\tMTID=%3d MPID=%5d",
                                cpuc[iUC].MTID, cpuc[iUC].MPID);
	                break;
	              case 4:		/* UM */
	                DPRINTF(stderr, "\n\tMTID=%3d MPID=%5d MDID=%3d",
                          cpuc[iUC].MTID, cpuc[iUC].MPID, cpuc[iUC].MDID);
	                break;
	              default:
	                DPRINTF(stderr, "%s: Bad user continuation type, %d\n",
                                yo, cont->upType);
                        EH(-1,"\t");
	                break;
                    }
	          DPRINTF(stderr, " Parameter= % 10.6e delta_s= %10.6e",
                    cpuc[iUC].value, (cpuc[iUC].value-cpuc[iUC].old_value) );
                }
	      break;

	    default:
	      DPRINTF(stderr, "%s: Bad cont->upType, %d\n", yo, cont->upType);
              EH(-1,"\t");
	      break;		/* duh */
	    }
          if (cont->upType != 5)
            {
	      DPRINTF(stderr, " Parameter= % 10.6e delta_s= %10.6e", path1, delta_s);
            }
	}
#ifdef PARALLEL
  check_parallel_error("Bad cont->upType");
#endif
	
      ni = 0;
      do {
	
#ifdef DEBUG
	DPRINTF(stderr, "%s: starting solve_nonlinear_problem\n", yo);
#endif
	err = solve_nonlinear_problem(ams[JAC], 
				      x, 
				      delta_t, 
				      theta,
				      x_old,
				      x_older, 
				      xdot,
				      xdot_old,
				      resid_vector, 
				      x_update,
				      scale, 
				      &converged, 
				      &nprint, 
				      tev, 
				      tev_post,
				      NULL,
				      rd,
				      gindex,
				      p_gsize,
				      gvec, 
				      gvec_elem, 
				      path1,
				      exo, 
				      dpi, 
				      cx, 
				      0, 
				      &path_step_reform,
				      is_steady_state,
				      x_AC, 
 				      x_AC_dot, 
				      path1, 
				      resid_vector_sens, 
				      x_sens_temp,
				      x_sens_p,
                                      NULL);
	  
#ifdef DEBUG
	fprintf(stderr, "%s: returned from solve_nonlinear_problem\n", yo);
#endif

	if (err == -1)
	  converged = 0;
	inewton = err;
	if (converged)
	  {
	    if (Write_Intermediate_Solutions == 0) {
#ifdef DEBUG
	      DPRINTF(stderr, "%s: write_solution call WIS\n", yo);
#endif
	      write_solution(ExoFileOut, resid_vector, x, x_sens_p,
			     x_old, xdot, xdot_old, tev, tev_post, NULL, rd, 
			     gindex, p_gsize, gvec, gvec_elem, &nprint, 
			     delta_s, theta, path1, NULL, exo, dpi);
#ifdef DEBUG
	      fprintf(stderr, "%s: write_solution end call WIS\n", yo);
#endif
	    }
#ifdef PARALLEL
	    check_parallel_error("Error writing exodusII file");
#endif

	    /*
	     * PRINT OUT VALUES OF EXTRA UNKNOWNS
	     * FROM AUGMENTING CONDITIONS
	     */
	    if (nAC > 0)
	      {
		DPRINTF(stderr, "\n------------------------------\n");
		DPRINTF(stderr, "Augmenting Conditions:    %4d\n", nAC);
		DPRINTF(stderr, "Number of extra unknowns: %4d\n\n", nAC);

		for (iAC = 0; iAC < nAC; iAC++)
                 {
		  if (augc[iAC].Type == AC_USERBC)
                   {
		    DPRINTF(stderr, "\tBC[%4d] DF[%4d] = %10.6e\n",
			    augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]);
                   }
                else if (augc[iAC].Type == AC_USERMAT ||
                           augc[iAC].Type == AC_FLUX_MAT  )
                   {
  		    DPRINTF(stderr, "\tMT[%4d] MP[%4d] = %10.6e\n",
			    augc[iAC].MTID, augc[iAC].MPID, x_AC[iAC]);
                   }
                  else if(augc[iAC].Type == AC_VOLUME)
                   {
                    evol_local = augc[iAC].evol;
#ifdef PARALLEL
                    if( Num_Proc > 1 ) {
                         MPI_Allreduce( &evol_local, &evol_global, 1, 
                                       MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
                    }
                    evol_local = evol_global;
#endif
                    DPRINTF(stderr, "\tMT[%4d] VC[%4d]=%10.6e Param=%10.6e\n",
                            augc[iAC].MTID, augc[iAC].VOLID, evol_local,
                            x_AC[iAC]);
                   }
		  else if(augc[iAC].Type == AC_POSITION)
                   {
                    evol_local = augc[iAC].evol;
#ifdef PARALLEL
                    if( Num_Proc > 1 ) {
                         MPI_Allreduce( &evol_local, &evol_global, 1, 
                                       MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
                    }
                    evol_local = evol_global;
#endif
                    DPRINTF(stderr, "\tMT[%4d] XY[%4d]=%10.6e Param=%10.6e\n",
                            augc[iAC].MTID, augc[iAC].VOLID, evol_local,
                            x_AC[iAC]);
                   }
                  else if(augc[iAC].Type == AC_FLUX)
                   {
                    DPRINTF(stderr, "\tBC[%4d] DF[%4d]=%10.6e\n",
                            augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]);
                   }
                 }
	      }

	    /*
	     * INTEGRATE FLUXES, FORCES
	     */
	    for (i = 0; i < nn_post_fluxes; i++)
	      err_dbl = evaluate_flux (exo, dpi, 
                                       pp_fluxes[i]->ss_id,
				       pp_fluxes[i]->flux_type ,
                                       pp_fluxes[i]->flux_type_name ,
				       pp_fluxes[i]->blk_id ,
				       pp_fluxes[i]->species_number,
				       pp_fluxes[i]->flux_filenm,
                                       pp_fluxes[i]->profile_flag,
				       x,xdot,NULL, delta_s,path1,1);

	    /*
	     * COMPUTE FLUX, FORCE SENSITIVITIES
	     */
	    for (i = 0; i < nn_post_fluxes_sens; i++)
	      err_dbl = evaluate_flux_sens (exo, dpi,
                                            pp_fluxes_sens[i]->ss_id,
					    pp_fluxes_sens[i]->flux_type ,
                                            pp_fluxes_sens[i]->flux_type_name ,
					    pp_fluxes_sens[i]->blk_id ,
					    pp_fluxes_sens[i]->species_number,
					    pp_fluxes_sens[i]->sens_type,
					    pp_fluxes_sens[i]->sens_id,
					    pp_fluxes_sens[i]->sens_flt,
					    pp_fluxes_sens[i]->sens_flt2,
					    pp_fluxes_sens[i]->vector_id,
					    pp_fluxes_sens[i]->flux_filenm,
                                            pp_fluxes_sens[i]->profile_flag,
					    x,xdot,x_sens_p,delta_s,path1,1);

 	    /*
      	     * Compute global volumetric quantities
      	     */
     	     for (i = 0; i < nn_volume; i++ ) {
       		evaluate_volume_integral(exo, dpi,
                                pp_volume[i]->volume_type,
                                pp_volume[i]->volume_name,
                                pp_volume[i]->blk_id,
                                pp_volume[i]->species_no,
                                pp_volume[i]->volume_fname,
                                pp_volume[i]->params,
                                NULL,  x, xdot, delta_s,
                                path1, 1);
     		}
 
	  }   /*  end of if converged block  */


	/*
	 * INCREMENT COUNTER
	 */
	ni++;

	/*
	 * DID IT CONVERGE ? 
	 * IF NOT, REDUCE STEP SIZE AND TRY AGAIN
	 */
	if (!converged)
	  {
	    if (ni > 5)
	      {
		puts("                                     ");
		puts(" ************************************");
		puts(" W: Did not converge in Newton steps.");
		puts("    Find better initial guess.       ");
		puts(" ************************************"); 
		/* This needs to have a return value of 0, indicating
		 * success, for the continuation script to not treat this
		 * as a failed command. */
		exit(0);
	      }

	    /*
	     * ADJUST STEP SIZE
	     */
	    DPRINTF(stderr, "\n\tFailed to converge:\n");

	    delta_s *= 0.5;

	    switch (aldALC)
	      {
	      case -1: 
		path1 = path - delta_s;
		break;
	      case +1: 
		path1 = path + delta_s;
		break;
	      default:
		DPRINTF(stderr, "%s: Bad aldALC, %d\n", yo, aldALC);
                EH(-1,"\t");
		break;		/* duh */
	      }
#ifdef PARALLEL
              check_parallel_error("Bad aldALC");
#endif

	    /*
	     * RESET
	     */
	    alqALC = 1;		/* If necessary, don't call this the last step... */

	    DPRINTF(stderr, "\n\tDecreasing step-length to %10.6e.\n", delta_s);

	    if (delta_s < Delta_s_min)
	      {
		puts("\n X: C step-length reduced below minimum.");
		puts("\n    Program terminated.\n");
		/* This needs to have a return value of 0, indicating
		 * success, for the continuation script to not treat this
		 * as a failed command. */
		exit(0);
	      } 
#ifdef PARALLEL
              check_parallel_error("\t");
#endif

	    /*
	     * ADJUST NATURAL PARAMETER
	     */
	    dcopy1(numProcUnknowns, x_old, x);
	    update_parameterC(0, path1, x, xdot, x_AC, delta_s, 
			      cx, exo, dpi);

	    /*
	     * GET ZERO OR FIRST ORDER PREDICTION
	     */
	    switch (Continuation)
	      {
	      case ALC_ZEROTH:
		break;
	      case  ALC_FIRST:
		switch (aldALC)
		  {
		  case -1: 
		    v1add(numProcUnknowns, &x[0], -delta_s, &x_sens[0]);
		    break;
		  case +1: 
		    v1add(numProcUnknowns, &x[0], +delta_s, &x_sens[0]);
		    break;
		  default:
		    DPRINTF(stderr, "%s: Bad aldALC, %d\n", yo, aldALC);
                    EH(-1,"\t");
		    break;		/* duh */
		  }
		break;
	      default:
		DPRINTF(stderr, "%s: Bad Continuation, %d\n", yo, Continuation);
                EH(-1,"\t");
		break;		/* duh */
	      }
#ifdef PARALLEL
              check_parallel_error("Bad Continuation");
#endif

	    /* MMH: Needed to put this in, o/w it may find that the
	     * solution and residual HAPPEN to satisfy the convergence
	     * criterion for the next newton solve...
	     */
	    find_and_set_Dirichlet(x, xdot, exo, dpi);

            exchange_dof(cx, dpi, x);

	    /*    Should be doing first order prediction on ACs
	     *    but for now, just reset the AC variables
	     */
	    if( nAC > 0)
	      {
		dcopy1(nAC, x_AC_old, x_AC);
		for(iAC=0 ; iAC<nAC ; iAC++)
		  { update_parameterAC(iAC, x, xdot, x_AC, cx, exo, dpi);}
	      }
	  }   /*  end of !converged */
	  
      } while (converged == 0);

      /*
       * CONVERGED
       */
      nt++;

      if( Continuation == ALC_ZEROTH ) {
        DPRINTF(stderr, "\n\tStep accepted, parameter = %10.6e\n", path1);
       }
      else {
        DPRINTF(stderr, "\tStep accepted, parameter = %10.6e\n", path1);
       }

      /* 
       * check path step error, if too large do not enlarge path step 
       */
      if ((ni == 1) && (n != 0) && (!const_delta_s))
	{
	  delta_s_new = path_step_control(num_total_nodes, 
					  delta_s, delta_s_old, 
					  x, 
					  eps, 
					  &success_ds, 
					  cont->use_var_norm, inewton);
	  if (delta_s_new > Delta_s_max) 
	    delta_s_new = Delta_s_max;
	}
      else
	{
	  success_ds = 1;
	  delta_s_new = delta_s;
	}
	  
      /* 
       * determine whether to print out the data or not 
       */
      i_print = 0;
      if (nt == step_print)
	{
	  i_print = 1;
	  step_print += cont->print_freq;
	}
	  
      if (alqALC == -1) 
	i_print = 1;
	  
      if (i_print)
	{
	  error = write_ascii_soln(x, resid_vector, numProcUnknowns,
				   x_AC, nAC, path1, file);
	  if (error) {
	    DPRINTF(stdout, "%s:  error writing ASCII soln file\n", yo);
	  }
	  if (Write_Intermediate_Solutions == 0 ) {
	    write_solution(ExoFileOut, resid_vector, x, x_sens_p, 
			   x_old, xdot, xdot_old, tev, tev_post, NULL,
			   rd, gindex, p_gsize, gvec, gvec_elem, &nprint,
			   delta_s, theta, path1, NULL, exo, dpi);
	    nprint++;
	  }
	}
      
      /*
       * backup old solutions
       * can use previous solutions for prediction one day
       */
      dcopy1(numProcUnknowns,x_older,x_oldest);
      dcopy1(numProcUnknowns,x_old,x_older);
      dcopy1(numProcUnknowns, x, x_old);
      dcopy1(numProcUnknowns, x_sens_temp, x_sens);

      delta_s_oldest = delta_s_older;
      delta_s_older = delta_s_old;
      delta_s_old = delta_s;
      delta_s = delta_s_new;
  
      if( nAC > 0)
	dcopy1(nAC, x_AC, x_AC_old);

      /*
       * INCREMENT/DECREMENT PARAMETER
       */
      path  = path1;
	  
      switch (aldALC)
	{
	case -1: 
	  path1 = path - delta_s;
	  break;
	case +1: 
	  path1 = path + delta_s;
	  break;
	default:
	  DPRINTF(stderr, "%s: Bad aldALC, %d\n", yo, aldALC);
          EH(-1,"\t");
	  break;		/* duh */
	}

#ifdef PARALLEL
      check_parallel_error("Bad aldALC");
#endif
      /*
       * ADJUST NATURAL PARAMETER
       */
      update_parameterC(0, path1, x, xdot, x_AC, delta_s, 
			cx, exo, dpi);

      /*
	display_parameterC(path1, x, xdot, delta_s, 
	cx, exo, dpi);
      */		   

      /*
       * GET FIRST ORDER PREDICTION
       */
      switch (Continuation)
	{
	case ALC_ZEROTH:
	  break;
	case  ALC_FIRST:
	  switch (aldALC)
	    {
	    case -1: 
	      v1add(numProcUnknowns, &x[0], -delta_s, &x_sens[0]);
	      break;
	    case +1: 
	      v1add(numProcUnknowns, &x[0], +delta_s, &x_sens[0]);
	      break;
	    default:
	      DPRINTF(stderr, "%s: Bad aldALC, %d\n", yo, aldALC);
              EH(-1,"\t");
	      break;		/* duh */
	    }
	  break;
	}
#ifdef PARALLEL
      check_parallel_error("Bad aldALC");
#endif

      /*
       * CHECK END CONTINUATION
       */
      /*
      if (alqALC == -1)
	alqALC = 0;
      else
	alqALC = 1;
      */

      if (alqALC == -1)
	{
	  DPRINTF(stderr,"\n\n\t I will continue no more!\n\t No more continuation for you!\n");
	  goto free_and_clear;
	}
    } /* for(n = 0; n < MaxPathSteps; n++) */

  if(n == MaxPathSteps &&
     aldALC * (lambdaEnd - path) > 0)
    {
      DPRINTF(stderr, "\n\tFailed to reach end of hunt in maximum number of successful steps (%d).\n\tSorry.\n",
	      MaxPathSteps);
      /*
      EH(-1,"\t");
      */
    }
#ifdef PARALLEL
      check_parallel_error("Continuation error");
#endif


  /*
   * DONE CONTINUATION
   */
 free_and_clear: 

  /*
   * Transform the node point coordinates according to the
   * displacements and write out all the results using the
   * displaced coordinates. Set the displacement field to
   * zero, too.
   */
  if (Anneal_Mesh)
    {
#ifdef DEBUG
      fprintf(stderr, "%s: anneal_mesh()...\n", yo);
#endif
      err = anneal_mesh(x, tev, tev_post, NULL, rd, path1, exo, dpi);
#ifdef DEBUG
      fprintf(stderr, "%s: anneal_mesh()-done\n", yo);
#endif
      EH(err, "anneal_mesh() bad return.");
    }
#ifdef PARALLEL
      check_parallel_error("Trouble annealing mesh");
#endif

  /* 
   * Free a bunch of variables that aren't needed anymore 
   */
  safer_free((void **) &ROT_Types);
  safer_free((void **) &node_to_fill);

  safer_free( (void **) &resid_vector);
  safer_free( (void **) &resid_vector_sens);
  safer_free( (void **) &scale);
  safer_free( (void **) &x);

  if (nAC > 0)
    {
      safer_free( (void **) &x_AC);
      safer_free( (void **) &x_AC_old);
      safer_free( (void **) &x_AC_dot);
    }

  safer_free( (void **) &x_old); 
  safer_free( (void **) &x_older); 
  safer_free( (void **) &x_oldest); 
  safer_free( (void **) &xdot); 
  safer_free( (void **) &xdot_old); 
  safer_free( (void **) &x_update); 

  safer_free( (void **) &x_sens); 
  safer_free( (void **) &x_sens_temp); 

  if((nn_post_data_sens+nn_post_fluxes_sens) > 0)
          Dmatrix_death(x_sens_p,num_pvector,numProcUnknowns);

  for(i = 0; i < MAX_NUMBER_MATLS; i++) {
    for(n = 0; n < MAX_MODES; n++) {
      safer_free((void **) &(ve_glob[i][n]->gn));
      safer_free((void **) &(ve_glob[i][n]));
    }
    safer_free((void **) &(vn_glob[i]));
  }

  sl_free(matrix_systems_mask, ams);

  for (i = 0; i < NUM_ALSS; i++)
    safer_free((void **) &(ams[i]));

  safer_free( (void **) &gvec);

  i = 0;
  for ( eb_indx = 0; eb_indx < exo->num_elem_blocks; eb_indx++ )
    {
      for ( ev_indx = 0; ev_indx < rd->nev; ev_indx++ ) {
	if (exo->elem_var_tab[i++] == 1) {
	  safer_free((void **) &(gvec_elem [eb_indx][ev_indx]));
	}
      }
      safer_free((void **) &(gvec_elem [eb_indx]));
    }

  safer_free( (void **) &gvec_elem); 
  if (cpcc != NULL) safer_free( (void **) &cpcc);

  safer_free( (void **) &rd); 
  safer_free( (void **) &Local_Offset);
  safer_free( (void **) &Dolphin);

  if (file != NULL) fclose(file);

  return;

} /* END of routine continue_problem  */
int main(int argc, char *argv[])
{
  int num_PDE_eqns=1, N_levels=3, nsmooth=2;

  int leng, level, N_grid_pts, coarsest_level;
  int leng1,leng2;
  /* See Aztec User's Guide for more information on the */
  /* variables that follow.                             */

  int    proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE];
  double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE];

  /* data structure for matrix corresponding to the fine grid */

  double *val = NULL, *xxx, *rhs, solve_time, setup_time, start_time;
  AZ_MATRIX *Amat;
  AZ_PRECOND *Pmat = NULL;
  ML *ml;
  FILE *fp;
  int i, j, Nrigid, *garbage, nblocks=0, *blocks = NULL, *block_pde=NULL;
  struct AZ_SCALING *scaling;
  ML_Aggregate *ag;
  double *mode, *rigid=NULL, alpha; 
  char filename[80];
  int    one = 1;
  int    proc,nprocs;
  char pathfilename[100];

#ifdef ML_MPI
  MPI_Init(&argc,&argv);
  /* get number of processors and the name of this processor */
  AZ_set_proc_config(proc_config, MPI_COMM_WORLD);
  proc   = proc_config[AZ_node];
  nprocs = proc_config[AZ_N_procs];
#else
  AZ_set_proc_config(proc_config, AZ_NOT_MPI);
  proc   = 0;
  nprocs = 1;
#endif

   if (proc_config[AZ_node] == 0) {
      sprintf(pathfilename,"%s/inputfile",argv[1]);
      ML_Reader_ReadInput(pathfilename, &context);
   }
   else context = (struct reader_context *) ML_allocate(sizeof(struct reader_context));
   AZ_broadcast((char *) context,  sizeof(struct reader_context), proc_config,
                AZ_PACK);
   AZ_broadcast((char *) NULL        ,   0          , proc_config, AZ_SEND);

   N_levels = context->N_levels;
   printf("N_levels %d\n",N_levels);
   nsmooth   = context->nsmooth;
   num_PDE_eqns = context->N_dofPerNode;
   printf("num_PDE_eqns %d\n",num_PDE_eqns);

   ML_Set_PrintLevel(context->output_level);

  /* read in the number of matrix equations */
  leng = 0;
  if (proc_config[AZ_node] == 0) {
        sprintf(pathfilename,"%s/data_matrix.txt",argv[1]);
        fp=fopen(pathfilename,"r");
     if (fp==NULL) {
        printf("**ERR** couldn't open file data_matrix.txt\n");
        exit(1);
     }
        fscanf(fp,"%d",&leng);
     fclose(fp);
  }
  leng = AZ_gsum_int(leng, proc_config);

  N_grid_pts=leng/num_PDE_eqns;


  /* initialize the list of global indices. NOTE: the list of global */
  /* indices must be in ascending order so that subsequent calls to  */
  /* AZ_find_index() will function properly. */
#if 0
  if (proc_config[AZ_N_procs] == 1) i = AZ_linear;
  else i = AZ_file;
#endif
  i = AZ_linear;

  /* cannot use AZ_input_update for variable blocks (forgot why, but debugged through it)*/
  /* make a linear distribution of the matrix       */
  /* if the linear distribution does not align with the blocks, */
  /* this is corrected in ML_AZ_Reader_ReadVariableBlocks */
  leng1 = leng/nprocs;
  leng2 = leng-leng1*nprocs;
  if (proc >= leng2)
  {
     leng2 += (proc*leng1);
  }
  else
  {
     leng1++;
     leng2 = proc*leng1;
  }
  N_update = leng1;
  update = (int*)AZ_allocate((N_update+1)*sizeof(int));
  if (update==NULL)
  {
      (void) fprintf (stderr, "Not enough space to allocate 'update'\n");
      fflush(stderr); exit(EXIT_FAILURE);
  }
  for (i=0; i<N_update; i++) update[i] = i+leng2;
  
#if 0 /* debug */
  printf("proc %d N_update %d\n",proc_config[AZ_node],N_update);
  fflush(stdout);                   
#endif
  sprintf(pathfilename,"%s/data_vblocks.txt",argv[1]);
  ML_AZ_Reader_ReadVariableBlocks(pathfilename,&nblocks,&blocks,&block_pde,
                                  &N_update,&update,proc_config);
#if 0 /* debug */
  printf("proc %d N_update %d\n",proc_config[AZ_node],N_update);
  fflush(stdout);                   
#endif

  sprintf(pathfilename,"%s/data_matrix.txt",argv[1]);
  AZ_input_msr_matrix(pathfilename,update, &val, &bindx, N_update, proc_config);

  /* This code is to fix things up so that we are sure we have   */ 
  /* all blocks (including the ghost nodes) the same size.       */
  /* not sure, whether this is a good idea with variable blocks  */
  /* the examples inpufiles (see top of this file) don't need it */
  /* anyway                                                      */
  /*
  AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update);
  */
  AZ_transform_norowreordering(proc_config, &external, bindx, val,  update, &update_index,
	       &extern_index, &data_org, N_update, 0, 0, 0, &cpntr,
	       AZ_MSR_MATRIX);
	
  Amat = AZ_matrix_create( leng );

  AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL);

  Amat->matrix_type  = data_org[AZ_matrix_type];
	
  data_org[AZ_N_rows]  = data_org[AZ_N_internal] + data_org[AZ_N_border];
			
  start_time = AZ_second();

  options[AZ_scaling] = AZ_none;

  ML_Create(&ml, N_levels);
			
			
  /* set up discretization matrix and matrix vector function */
  AZ_ML_Set_Amat(ml, 0, N_update, N_update, Amat, proc_config);

  ML_Set_ResidualOutputFrequency(ml, context->output);
  ML_Set_Tolerance(ml, context->tol);
  ML_Aggregate_Create( &ag );
  if (ML_strcmp(context->agg_coarsen_scheme,"Mis") == 0) {
     ML_Aggregate_Set_CoarsenScheme_MIS(ag);
  }
  else if (ML_strcmp(context->agg_coarsen_scheme,"Uncoupled") == 0) {
     ML_Aggregate_Set_CoarsenScheme_Uncoupled(ag);
  }
  else if (ML_strcmp(context->agg_coarsen_scheme,"Coupled") == 0) {
     ML_Aggregate_Set_CoarsenScheme_Coupled(ag);
  }
  else if (ML_strcmp(context->agg_coarsen_scheme,"Metis") == 0) {
     ML_Aggregate_Set_CoarsenScheme_METIS(ag);
     for (i=0; i<N_levels; i++)
        ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9);
  }
  else if (ML_strcmp(context->agg_coarsen_scheme,"VBMetis") == 0) {
     /* when no blocks read, use standard metis assuming constant block sizes */
     if (!blocks) 
        ML_Aggregate_Set_CoarsenScheme_METIS(ag);
     else {
        ML_Aggregate_Set_CoarsenScheme_VBMETIS(ag);
        ML_Aggregate_Set_Vblocks_CoarsenScheme_VBMETIS(ag,0,N_levels,nblocks,
                                                       blocks,block_pde,N_update);
     }
     for (i=0; i<N_levels; i++)
        ML_Aggregate_Set_NodesPerAggr(ml,ag,i,9);
  }
  else {
     printf("**ERR** ML: Unknown aggregation scheme %s\n",context->agg_coarsen_scheme);
     exit(-1);
  }
  ML_Aggregate_Set_DampingFactor(ag, context->agg_damping);
  ML_Aggregate_Set_MaxCoarseSize( ag, context->maxcoarsesize);
  ML_Aggregate_Set_Threshold(ag, context->agg_thresh);

  if (ML_strcmp(context->agg_spectral_norm,"Calc") == 0) {
     ML_Set_SpectralNormScheme_Calc(ml);
  }
  else if (ML_strcmp(context->agg_spectral_norm,"Anorm") == 0) {
     ML_Set_SpectralNormScheme_Anorm(ml);
  }
  else {
     printf("**WRN** ML: Unknown spectral norm scheme %s\n",context->agg_spectral_norm);
  }

  /* read in the rigid body modes */

   Nrigid = 0;
   if (proc_config[AZ_node] == 0) {
      sprintf(filename,"data_nullsp%d.txt",Nrigid);
      sprintf(pathfilename,"%s/%s",argv[1],filename);
      while( (fp = fopen(pathfilename,"r")) != NULL) {
          fclose(fp);
          Nrigid++;
          sprintf(filename,"data_nullsp%d.txt",Nrigid);
          sprintf(pathfilename,"%s/%s",argv[1],filename);
      }
    }
    Nrigid = AZ_gsum_int(Nrigid,proc_config);

    if (Nrigid != 0) {
       rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) );
       if (rigid == NULL) {
          printf("Error: Not enough space for rigid body modes\n");
       }
    }

   /* Set rhs */
   sprintf(pathfilename,"%s/data_rhs.txt",argv[1]);
   fp = fopen(pathfilename,"r");
   if (fp == NULL) {
      rhs=(double *)ML_allocate(leng*sizeof(double));
      if (proc_config[AZ_node] == 0) printf("taking linear vector for rhs\n");
      for (i = 0; i < N_update; i++) rhs[i] = (double) update[i];
   }
   else {
      fclose(fp);
      if (proc_config[AZ_node] == 0) printf("reading rhs from a file\n");
      AZ_input_msr_matrix(pathfilename, update, &rhs, &garbage, N_update, 
                          proc_config);
   }
   AZ_reorder_vec(rhs, data_org, update_index, NULL);

   for (i = 0; i < Nrigid; i++) {
      sprintf(filename,"data_nullsp%d.txt",i);
      sprintf(pathfilename,"%s/%s",argv[1],filename);
      AZ_input_msr_matrix(pathfilename, update, &mode, &garbage, N_update, 
                          proc_config);
      AZ_reorder_vec(mode, data_org, update_index, NULL);

#if 0 /* test the given rigid body mode, output-vector should be ~0 */
       Amat->matvec(mode, rigid, Amat, proc_config);
       for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]);
#endif

    for (j = 0; j < i; j++) {
       alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/
                  AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), 
                               proc_config);
       DAXPY_F77(&N_update, &alpha,  &(rigid[j*N_update]),  &one, mode, &one);
    }
   
    /* rhs orthogonalization */

    alpha = -AZ_gdot(N_update, mode, rhs, proc_config)/
                    AZ_gdot(N_update, mode, mode, proc_config);
    DAXPY_F77(&N_update, &alpha,  mode,  &one, rhs, &one);

    for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j];
    free(mode);
    free(garbage);
  }

  for (j = 0; j < Nrigid; j++) {
     alpha = -AZ_gdot(N_update, rhs, &(rigid[j*N_update]), proc_config)/
              AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), 
                      proc_config);
     DAXPY_F77(&N_update, &alpha,  &(rigid[j*N_update]),  &one, rhs, &one);
  }

#if 0 /* for testing the default nullsp */
  ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, 6, NULL, N_update);
#else
  if (Nrigid != 0) {
     ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update);
  }
#endif
  if (rigid) ML_free(rigid);

  ag->keep_agg_information = 1;
  coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, 0, 
                                            ML_INCREASING, ag);
  coarsest_level--;                                            

  if ( proc_config[AZ_node] == 0 )
	printf("Coarse level = %d \n", coarsest_level);
	
#if 0
  /* set up smoothers */
  if (!blocks)
     blocks = (int *) ML_allocate(sizeof(int)*N_update);
#endif

  for (level = 0; level < coarsest_level; level++) {

      num_PDE_eqns = ml->Amat[level].num_PDEs;
		
     /*  Sparse approximate inverse smoother that acutally does both */
     /*  pre and post smoothing.                                     */

     if (ML_strcmp(context->smoother,"Parasails") == 0) {
        ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth, 
                                parasails_sym, parasails_thresh, 
                                parasails_nlevels, parasails_filter,
                                (int) parasails_loadbal, parasails_factorized);
     }

     /* This is the symmetric Gauss-Seidel smoothing that we usually use. */
     /* In parallel, it is not a true Gauss-Seidel in that each processor */
     /* does a Gauss-Seidel on its local submatrix independent of the     */
     /* other processors.                                                 */

     else if (ML_strcmp(context->smoother,"GaussSeidel") == 0) {
       ML_Gen_Smoother_GaussSeidel(ml , level, ML_BOTH, nsmooth,1.);
     }
     else if (ML_strcmp(context->smoother,"SymGaussSeidel") == 0) {
       ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.);
     }
     else if (ML_strcmp(context->smoother,"Poly") == 0) {
       ML_Gen_Smoother_Cheby(ml, level, ML_BOTH, 30., nsmooth);
     }
     else if (ML_strcmp(context->smoother,"BlockGaussSeidel") == 0) {
       ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_BOTH, nsmooth,1.,
					 num_PDE_eqns);
     }
     else if (ML_strcmp(context->smoother,"VBSymGaussSeidel") == 0) {
         if (blocks)    ML_free(blocks);
         if (block_pde) ML_free(block_pde);
         blocks    = NULL;
         block_pde = NULL;
         nblocks   = 0;
         ML_Aggregate_Get_Vblocks_CoarsenScheme_VBMETIS(ag,level,N_levels,&nblocks,
                                                        &blocks,&block_pde);
         if (blocks==NULL) ML_Gen_Blocks_Aggregates(ag, level, &nblocks, &blocks);
         ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.,
                                              nblocks, blocks);
     }

     /* This is a true Gauss Seidel in parallel. This seems to work for  */
     /* elasticity problems.  However, I don't believe that this is very */
     /* efficient in parallel.                                           */       
     /*
      nblocks = ml->Amat[level].invec_leng;
      for (i =0; i < nblocks; i++) blocks[i] = i;
      ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml , level, ML_PRESMOOTHER,
                                                  nsmooth, 1., nblocks, blocks);
      ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml, level, ML_POSTSMOOTHER,
                                                  nsmooth, 1., nblocks, blocks);
     */

     /* Jacobi Smoothing                                                 */

     else if (ML_strcmp(context->smoother,"Jacobi") == 0) {
        ML_Gen_Smoother_Jacobi(ml , level, ML_PRESMOOTHER, nsmooth,.4);
        ML_Gen_Smoother_Jacobi(ml , level, ML_POSTSMOOTHER, nsmooth,.4);
     }

     /*  This does a block Gauss-Seidel (not true GS in parallel)        */
     /*  where each processor has 'nblocks' blocks.                      */
     /* */

     else if (ML_strcmp(context->smoother,"Metis") == 0) {
         if (blocks)    ML_free(blocks);
         if (block_pde) ML_free(block_pde);
         nblocks = 250;
         ML_Gen_Blocks_Metis(ml, level, &nblocks, &blocks);
         ML_Gen_Smoother_VBlockSymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.,
                                        nblocks, blocks);
     }
     else {
         printf("unknown smoother %s\n",context->smoother);
         exit(1);
     }
   }
	
   /* set coarse level solver */
   nsmooth   = context->coarse_its;
   /*  Sparse approximate inverse smoother that acutally does both */
   /*  pre and post smoothing.                                     */

   if (ML_strcmp(context->coarse_solve,"Parasails") == 0) {
        ML_Gen_Smoother_ParaSails(ml , coarsest_level, ML_PRESMOOTHER, nsmooth, 
                                parasails_sym, parasails_thresh, 
                                parasails_nlevels, parasails_filter,
                                (int) parasails_loadbal, parasails_factorized);
   }

   else if (ML_strcmp(context->coarse_solve,"GaussSeidel") == 0) {
       ML_Gen_Smoother_GaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.);
   }
   else if (ML_strcmp(context->coarse_solve,"Poly") == 0) {
     ML_Gen_Smoother_Cheby(ml, coarsest_level, ML_BOTH, 30., nsmooth);
   }
   else if (ML_strcmp(context->coarse_solve,"SymGaussSeidel") == 0) {
       ML_Gen_Smoother_SymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.);
   }
   else if (ML_strcmp(context->coarse_solve,"BlockGaussSeidel") == 0) {
       ML_Gen_Smoother_BlockGaussSeidel(ml, coarsest_level, ML_BOTH, nsmooth,1.,
					num_PDE_eqns);
   }
   else if (ML_strcmp(context->coarse_solve,"Aggregate") == 0) {
         if (blocks)    ML_free(blocks);
         if (block_pde) ML_free(block_pde);
         ML_Gen_Blocks_Aggregates(ag, coarsest_level, &nblocks, &blocks);
         ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, 
                                        nsmooth,1., nblocks, blocks);
   }
   else if (ML_strcmp(context->coarse_solve,"Jacobi") == 0) {
        ML_Gen_Smoother_Jacobi(ml , coarsest_level, ML_BOTH, nsmooth,.5);
   }
   else if (ML_strcmp(context->coarse_solve,"Metis") == 0) {
         if (blocks)    ML_free(blocks);
         if (block_pde) ML_free(block_pde);
         nblocks = 250;
         ML_Gen_Blocks_Metis(ml, coarsest_level, &nblocks, &blocks);
         ML_Gen_Smoother_VBlockSymGaussSeidel(ml , coarsest_level, ML_BOTH, 
                                              nsmooth,1., nblocks, blocks);
   }
   else if (ML_strcmp(context->coarse_solve,"SuperLU") == 0) {
      ML_Gen_CoarseSolverSuperLU( ml, coarsest_level);
   }
   else if (ML_strcmp(context->coarse_solve,"Amesos") == 0) {
      ML_Gen_Smoother_Amesos(ml,coarsest_level,ML_AMESOS_KLU,-1, 0.0);
   }
   else {
         printf("unknown coarse grid solver %s\n",context->coarse_solve);
         exit(1);
   }
		
   ML_Gen_Solver(ml, ML_MGV, 0, coarsest_level); 

   AZ_defaults(options, params);
	
   if (ML_strcmp(context->krylov,"Cg") == 0) {
      options[AZ_solver]   = AZ_cg;
   }
   else if (ML_strcmp(context->krylov,"Bicgstab") == 0) {
      options[AZ_solver]   = AZ_bicgstab;
   }
   else if (ML_strcmp(context->krylov,"Tfqmr") == 0) {
      options[AZ_solver]   = AZ_tfqmr;
   }
   else if (ML_strcmp(context->krylov,"Gmres") == 0) {
      options[AZ_solver]   = AZ_gmres;
   }
   else {
      printf("unknown krylov method %s\n",context->krylov);
   }
   if (blocks)            ML_free(blocks);
   if (block_pde)         ML_free(block_pde);
   options[AZ_scaling]  = AZ_none;
   options[AZ_precond]  = AZ_user_precond;
   options[AZ_conv]     = AZ_r0;
   options[AZ_output]   = 1;
   options[AZ_max_iter] = context->max_outer_its;
   options[AZ_poly_ord] = 5;
   options[AZ_kspace]   = 130;
   params[AZ_tol]       = context->tol;
   options[AZ_output]   = context->output;
   ML_free(context);
	
   AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); 
   setup_time = AZ_second() - start_time;
	
   xxx = (double *) malloc( leng*sizeof(double));

   for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; 
	

   /* Set x */
   /*
   there is no initguess supplied with these examples for the moment....
   */
   fp = fopen("initguessfile","r");
   if (fp != NULL) {
      fclose(fp);
      if (proc_config[AZ_node]== 0) printf("reading initial guess from file\n");
      AZ_input_msr_matrix("data_initguess.txt", update, &xxx, &garbage, N_update, 
                          proc_config);

      options[AZ_conv] = AZ_expected_values;
   }
   else if (proc_config[AZ_node]== 0) printf("taking 0 initial guess \n");

   AZ_reorder_vec(xxx, data_org, update_index, NULL);

   /* if Dirichlet BC ... put the answer in */

   for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) {
      if ( (val[i] > .99999999) && (val[i] < 1.0000001))
         xxx[i] = rhs[i];      
   }

   fp = fopen("AZ_no_multilevel.dat","r");
   scaling = AZ_scaling_create();
   start_time = AZ_second();
   if (fp != NULL) {
      fclose(fp);
      options[AZ_precond] = AZ_none;
      options[AZ_scaling] = AZ_sym_diag;
      options[AZ_ignore_scaling] = AZ_TRUE;

      options[AZ_keep_info] = 1;
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); 

/*
      options[AZ_pre_calc] = AZ_reuse;
      options[AZ_conv] = AZ_expected_values;
      if (proc_config[AZ_node] == 0) 
              printf("\n-------- Second solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); 
      if (proc_config[AZ_node] == 0) 
              printf("\n-------- Third solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); 
*/
   }
   else {
      options[AZ_keep_info] = 1;
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); 
      options[AZ_pre_calc] = AZ_reuse;
      options[AZ_conv] = AZ_expected_values;
/*
      if (proc_config[AZ_node] == 0) 
              printf("\n-------- Second solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); 
      if (proc_config[AZ_node] == 0) 
              printf("\n-------- Third solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); 
*/
   }
   solve_time = AZ_second() - start_time;

   if (proc_config[AZ_node] == 0) 
      printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time);

   if (proc_config[AZ_node] == 0) 
     printf("Printing out a few entries of the solution ...\n");

   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 7) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 23) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 47) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 101) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 171) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}

   ML_Aggregate_Destroy(&ag);
   ML_Destroy(&ml);
   AZ_free((void *) Amat->data_org);
   AZ_free((void *) Amat->val);
   AZ_free((void *) Amat->bindx);
   AZ_free((void *) update);
   AZ_free((void *) external);
   AZ_free((void *) extern_index);
   AZ_free((void *) update_index);
   AZ_scaling_destroy(&scaling);
   if (Amat  != NULL) AZ_matrix_destroy(&Amat);
   if (Pmat  != NULL) AZ_precond_destroy(&Pmat);
   free(xxx);
   free(rhs);

#ifdef ML_MPI
  MPI_Finalize();
#endif
	
  return 0;
	
}
Beispiel #18
0
int test_azoo_conv_with_scaling(int conv_option, int scaling_option,
                                const Epetra_Comm& comm, bool verbose)
{
  int localN = 20;
  int numprocs = comm.NumProc();
  int globalN = numprocs*localN;
 
  Epetra_Map emap(globalN, 0, comm);
  Epetra_CrsMatrix* Acrs = create_and_fill_crs_matrix(emap);

  Epetra_Vector x_crs(emap), b_crs(emap);
  x_crs.PutScalar(1.0);

  Acrs->Multiply(false, x_crs, b_crs);
  x_crs.PutScalar(0.0);

  AztecOO azoo(Acrs, &x_crs, &b_crs);
  azoo.SetAztecOption(AZ_conv, conv_option);
  azoo.SetAztecOption(AZ_solver, AZ_cg);
  azoo.SetAztecOption(AZ_scaling, scaling_option);

  azoo.Iterate(100, 1.e-9);

  //now, do the same thing with 'old-fashioned Aztec', and compare
  //the solutions.

    int* proc_config = new int[AZ_PROC_SIZE];

#ifdef EPETRA_MPI
  AZ_set_proc_config(proc_config, MPI_COMM_WORLD);
  AZ_set_comm(proc_config, MPI_COMM_WORLD);
#else
  AZ_set_proc_config(proc_config, 0);
#endif

  int *external, *update_index, *external_index;
  int *external2, *update_index2, *external_index2;
  AZ_MATRIX* Amsr = NULL;
  AZ_MATRIX* Avbr = NULL;
  int err = create_and_transform_simple_matrix(AZ_MSR_MATRIX, localN, 4.0,
                                               proc_config, Amsr,
                                               external, update_index,
                                               external_index);

  int N_update = localN+Amsr->data_org[AZ_N_border];
  double* x_msr = new double[N_update];
  double* b_msr = new double[N_update*2];
  double* b_msr_u = b_msr+N_update;
  double* x_vbr = new double[N_update];
  double* b_vbr = new double[N_update*2];
  double* b_vbr_u = b_vbr+N_update;

  err = create_and_transform_simple_matrix(AZ_VBR_MATRIX, localN, 4.0,
                                           proc_config, Avbr,
                                           external2, update_index2,
                                           external_index2);
  for(int i=0; i<N_update; ++i) {
    x_msr[i] = 1.0;
    b_msr[i] = 0.0;
    b_msr_u[i] = 0.0;
    x_vbr[i] = 1.0;
    b_vbr[i] = 0.0;
    b_vbr_u[i] = 0.0;
  }

  Amsr->matvec(x_msr, b_msr, Amsr, proc_config);
  Avbr->matvec(x_vbr, b_vbr, Avbr, proc_config);

  for(int i=0; i<N_update; ++i) {
    x_msr[i] = 0.0;
    x_vbr[i] = 0.0;
  }

  //check that the rhs's are the same.
  double max_rhs_diff1 = 0.0;
  double max_rhs_diff2 = 0.0;
  double* bptr_crs = b_crs.Values();

  AZ_invorder_vec(b_msr, Amsr->data_org, update_index, NULL, b_msr_u);
  AZ_invorder_vec(b_vbr, Avbr->data_org, update_index2, Avbr->rpntr, b_vbr_u);
  for(int i=0; i<localN; ++i) {
    if (std::abs(bptr_crs[i] - b_msr_u[i]) > max_rhs_diff1) {
      max_rhs_diff1 = std::abs(bptr_crs[i] - b_msr_u[i]);
    }
    if (std::abs(bptr_crs[i] - b_vbr_u[i]) > max_rhs_diff2) {
      max_rhs_diff2 = std::abs(bptr_crs[i] - b_vbr_u[i]);
    }
  }

  if (max_rhs_diff1> 1.e-12) {
    cout << "AztecOO rhs not equal to Aztec msr rhs "<<max_rhs_diff1<<endl;
    return(-1);
  }

  if (max_rhs_diff2> 1.e-12) {
    cout << "AztecOO rhs not equal to Aztec vbr rhs "<<max_rhs_diff2<<endl;
    return(-1);
  }

  int* az_options = new int[AZ_OPTIONS_SIZE];
  double* params = new double[AZ_PARAMS_SIZE];
  double* status = new double[AZ_STATUS_SIZE];
  AZ_defaults(az_options, params);
  az_options[AZ_solver] = AZ_cg;
  az_options[AZ_conv] = conv_option;
  az_options[AZ_scaling] = scaling_option;

  az_options[AZ_max_iter] = 100;
  params[AZ_tol] = 1.e-9;

  AZ_iterate(x_msr, b_msr, az_options, params, status, proc_config,
             Amsr, NULL, NULL);
  AZ_iterate(x_vbr, b_vbr, az_options, params, status, proc_config,
             Avbr, NULL, NULL);

  AZ_invorder_vec(x_msr, Amsr->data_org, update_index, NULL, b_msr_u);
  AZ_invorder_vec(x_vbr, Avbr->data_org, update_index2, Avbr->rpntr, b_vbr_u);

  double max_diff1 = 0.0;
  double max_diff2 = 0.0;
  double* xptr_crs = x_crs.Values();

  for(int i=0; i<localN; ++i) {
    if (std::abs(xptr_crs[i] - b_msr_u[i]) > max_diff1) {
      max_diff1 = std::abs(xptr_crs[i] - b_msr_u[i]);
    }
    if (std::abs(xptr_crs[i] - b_vbr_u[i]) > max_diff2) {
      max_diff2 = std::abs(xptr_crs[i] - b_vbr_u[i]);
    }
  }

  if (max_diff1 > 1.e-7) {
    cout << "AztecOO failed to match Aztec msr with scaling and Anorm conv."
      << endl;
    return(-1);
  }

  if (max_diff2 > 1.e-7) {
    cout << "AztecOO failed to match Aztec vbr with scaling and Anorm conv."
      << endl;
    return(-1);
  }

  delete Acrs;
  delete [] x_msr;
  delete [] b_msr;
  delete [] x_vbr;
  delete [] b_vbr;
  destroy_matrix(Amsr);
  destroy_matrix(Avbr);
  delete [] proc_config;
  free(update_index);
  free(external);
  free(external_index);
  free(update_index2);
  free(external2);
  free(external_index2);
  delete [] az_options;
  delete [] params;
  delete [] status;

  return(0);
}
Beispiel #19
0
AZ_MATRIX *user_Kn_build(struct user_partition *Node_Partition)

{
  int *Kn_bindx;
  double *Kn_val;
  int    proc_config[AZ_PROC_SIZE];
  AZ_MATRIX *Kn_mat;
  int    *reordered_glob_nodes = NULL, *cpntr = NULL, *Kn_data_org = NULL;
  int i, ii, jj, nx, global_id, Nlocal_nodes, nz_ptr;


  Nlocal_nodes = Node_Partition->Nlocal;
 Kn_bindx = (int    *) malloc((27*Nlocal_nodes+5)*sizeof(int));
  Kn_val   = (double *) malloc((27*Nlocal_nodes+5)*sizeof(double));
  Kn_bindx[0] = Nlocal_nodes+1;

  nx = (int) sqrt( ((double) Node_Partition->Nglobal) + .00001);
#ifdef periodic
  nx = (int) sqrt( ((double) Node_Partition->Nglobal - 2) + .00001); 
Nlocal_nodes -= 2; 
#endif

  for (i = 0; i < Nlocal_nodes; i++) {
    global_id = (Node_Partition->my_global_ids)[i];
    Kn_bindx[i+1] = Kn_bindx[i] + 8;
    Kn_val[i] = 8.;
    inv2dnodeindex(global_id, &ii, &jj, nx);
    nz_ptr = Kn_bindx[i];
    Kn_bindx[nz_ptr] = southeast2d(ii,jj,nx);  Kn_val[nz_ptr++] = -1.;
    Kn_bindx[nz_ptr] = northwest2d(ii,jj,nx);  Kn_val[nz_ptr++] = -1.;
    Kn_bindx[nz_ptr] = northeast2d(ii,jj,nx);  Kn_val[nz_ptr++] = -.00000001;
    if (ii == 0) ii = nx-1;
    else ii--;
    Kn_bindx[nz_ptr] = northwest2d(ii,jj,nx);  Kn_val[nz_ptr++] = -.00000001;
    if (jj == 0) jj = nx-1;
    else jj--;
    Kn_bindx[nz_ptr] = southeast2d(ii,jj,nx);  Kn_val[nz_ptr++] = -1.;
    Kn_bindx[nz_ptr] = northwest2d(ii,jj,nx);  Kn_val[nz_ptr++] = -1.;
    Kn_bindx[nz_ptr] = southwest2d(ii,jj,nx);  Kn_val[nz_ptr++] = -.00000001;
    if (ii == nx-1) ii = 0;
    else ii++;
    Kn_bindx[nz_ptr] = southeast2d(ii,jj,nx);  Kn_val[nz_ptr++] = -.00000001;
  }
#ifdef periodic
i = Nlocal_nodes;            
Kn_bindx[i+1] = Kn_bindx[i]; 
Kn_val[i] = 1.;              
i++;                         
Kn_bindx[i+1] = Kn_bindx[i]; 
Kn_val[i] = 1.;              
Nlocal_nodes += 2;           
#endif

  /* Transform the global Aztec matrix into a local Aztec matrix. That is,   */
  /* replace global column indices by local indices and set up communication */
  /* data structure 'Ke_data_org' that will be used for matvec's.            */

  AZ_set_proc_config(proc_config, COMMUNICATOR);

  AZ_transform_norowreordering(proc_config,&(Node_Partition->needed_external_ids),
			       Kn_bindx, Kn_val, Node_Partition->my_global_ids,
			       &reordered_glob_nodes, &reordered_node_externs, 
			       &Kn_data_org, Nlocal_nodes, 0, 0, 0, 
			       &cpntr, AZ_MSR_MATRIX);

  /* Convert old style Aztec matrix to newer style Aztec matrix */

  Kn_mat = AZ_matrix_create( Nlocal_nodes );
  AZ_set_MSR(Kn_mat, Kn_bindx, Kn_val, Kn_data_org, 0, NULL, AZ_LOCAL);

  return(Kn_mat);
}
Beispiel #20
0
int test_AZ_iterate_AZ_pre_calc_AZ_reuse(Epetra_Comm& Comm,
                                         int* options,
                                         bool verbose)
{
  (void)Comm;
  if (verbose) {
    cout << "testing AZ_keep_info/AZ_reuse with 'old' Aztec (solver "
         <<options[AZ_solver] <<", precond "<<options[AZ_precond]<<"/"
         << options[AZ_subdomain_solve]<<")"<<endl;
  }

  int* proc_config = new int[AZ_PROC_SIZE];

#ifdef EPETRA_MPI
  AZ_set_proc_config(proc_config, MPI_COMM_WORLD);
  AZ_set_comm(proc_config, MPI_COMM_WORLD);
#else
  AZ_set_proc_config(proc_config, 0);
#endif

  //We're going to create 2 Aztec matrices, one MSR and one VBR. We're going
  //to do 2 solves with each, reusing the preconditioner for the second solve.

  int *external, *update_index, *external_index;
  int *external2, *update_index2, *external_index2;

  int i, N = 5;
  AZ_MATRIX* Amsr = NULL;
  AZ_MATRIX* Avbr = NULL;
  int err = create_and_transform_simple_matrix(AZ_MSR_MATRIX, N, 3.0,
                                               proc_config, Amsr,
                                               external, update_index,
                                               external_index);

  err += create_and_transform_simple_matrix(AZ_VBR_MATRIX, N, 3.0,
                                            proc_config, Avbr,
                                            external2, update_index2,
                                            external_index2);


  int* az_options = new int[AZ_OPTIONS_SIZE];
  double* params = new double[AZ_PARAMS_SIZE];
  double* status = new double[AZ_STATUS_SIZE];
  AZ_defaults(az_options, params);
  az_options[AZ_solver] = options[AZ_solver];
  az_options[AZ_precond] = options[AZ_precond];
  az_options[AZ_subdomain_solve] = options[AZ_subdomain_solve];
  az_options[AZ_scaling] = AZ_sym_diag;
  if (verbose) {
    az_options[AZ_output] = AZ_warnings;
  }
  else {
    az_options[AZ_output] = 0;
  }

  int N_update = N+Amsr->data_org[AZ_N_border];
  double* x = new double[N_update];
  double* b = new double[N_update];

  for(i=0; i<N_update; ++i) {
    x[i] = 0.0;
    b[i] = 1.0;
  }

  AZ_PRECOND* Pmsr = AZ_precond_create(Amsr, AZ_precondition, NULL);
  AZ_SCALING* Smsr = AZ_scaling_create();
  AZ_PRECOND* Pvbr = AZ_precond_create(Avbr, AZ_precondition, NULL);
  AZ_SCALING* Svbr = AZ_scaling_create();

 // Amsr->data_org[AZ_name] = 1;
 // Avbr->data_org[AZ_name] = 2;

  //First solve with the first matrix (Amsr).
  if (verbose)
    cout << "solve Amsr, name: "<<Amsr->data_org[AZ_name]<<endl;

  call_AZ_iterate(Amsr, Pmsr, Smsr, x, b, az_options, params, status,
                  proc_config, 1, AZ_calc, verbose);

  //First solve with the second matrix (Avbr).
  if (verbose)
    cout << "solve Avbr, name: " <<Avbr->data_org[AZ_name]<<endl;

  call_AZ_iterate(Avbr, Pvbr, Svbr, x, b, az_options, params, status,
                  proc_config, 0, AZ_calc, verbose);

  //Second solve with Amsr, reusing preconditioner
  if (verbose)
    cout << "solve Amsr (first reuse)"<<endl;

  call_AZ_iterate(Amsr, Pmsr, Smsr, x, b, az_options, params, status,
                  proc_config, 1, AZ_reuse, verbose);

  //Second solve with Avbr, not reusing preconditioner
  if (verbose)
    cout << "solve Avbr (keepinfo==0), name: " <<Avbr->data_org[AZ_name]<<endl;

  call_AZ_iterate(Avbr, Pvbr, Svbr, x, b, az_options, params, status,
                  proc_config, 0, AZ_calc, verbose);

  if (verbose)
    std::cout << "calling AZ_free_memory..."<<std::endl;

  AZ_free_memory(Amsr->data_org[AZ_name]);
  AZ_free_memory(Avbr->data_org[AZ_name]);

  //solve with Amsr again, not reusing preconditioner
  if (verbose)
    cout << "solve Amsr (keepinfo==0)"<<endl;

  call_AZ_iterate(Amsr, Pmsr, Smsr, x, b, az_options, params, status,
                  proc_config, 0, AZ_calc, verbose);

  //Second solve with Avbr, this time with keepinfo==1
  if (verbose)
    cout << "solve Avbr (keepinfo==1), name: " <<Avbr->data_org[AZ_name]<<endl;

  call_AZ_iterate(Avbr, Pvbr, Svbr, x, b, az_options, params, status,
                  proc_config, 1, AZ_calc, verbose);

  //Second solve with Amsr, not reusing preconditioner
  if (verbose)
    cout << "solve Amsr (keepinfo==0, calc)"<<endl;

  call_AZ_iterate(Amsr, Pmsr, Smsr, x, b, az_options, params, status,
                  proc_config, 0, AZ_calc, verbose);

  //Second solve with Avbr, not reusing preconditioner
  if (verbose)
    cout << "solve Avbr (keepinfo==1, reuse), name: "<<Avbr->data_org[AZ_name]<<endl;

  call_AZ_iterate(Avbr, Pvbr, Svbr, x, b, az_options, params, status,
                  proc_config, 1, AZ_reuse, verbose);

  AZ_free_memory(Amsr->data_org[AZ_name]);
  AZ_free_memory(Avbr->data_org[AZ_name]);

  AZ_scaling_destroy(&Smsr);
  AZ_precond_destroy(&Pmsr);
  AZ_scaling_destroy(&Svbr);
  AZ_precond_destroy(&Pvbr);
  destroy_matrix(Amsr);
  destroy_matrix(Avbr);

  delete [] x;
  delete [] b;

  delete [] az_options;
  delete [] params;
  delete [] status;
  delete [] proc_config;
  free(update_index);
  free(external);
  free(external_index);
  free(update_index2);
  free(external2);
  free(external_index2);

  return(0);
}
Beispiel #21
0
int main(int argc, char *argv[])
{
  int    Nnodes=16*16;              /* Total number of nodes in the problem.*/
                                    /* 'Nnodes' must be a perfect square.   */
  int    MaxMgLevels=6;             /* Maximum number of Multigrid Levels   */
  int    Nits_per_presmooth=1;      /* # of pre & post smoothings per level */
  double tolerance = 1.0e-8;        /* At convergence:                      */
                                    /*   ||r_k||_2 < tolerance ||r_0||_2    */
  int smoothPe_flag = ML_YES;       /* ML_YES: smooth tentative prolongator */
                                    /* ML_NO: don't smooth prolongator      */

  /***************************************************************************/
  /* Select Hiptmair relaxation subsmoothers for the nodal and edge problems */
  /* Choices include                                                         */
  /*   1) ML_Gen_Smoother_SymGaussSeidel: this corresponds to a processor    */
  /*      local version of symmetric Gauss-Seidel/SOR. The number of sweeps  */
  /*      can be set via either 'edge_its' or 'nodal_its'. The damping can   */
  /*      be set via 'edge_omega' or 'nodal_omega'. When set to ML_DDEFAULT, */
  /*      the damping is set to '1' on one processor. On multiple processors */
  /*      a lower damping value is set. This is needed to converge processor */
  /*      local SOR.                                                         */
  /*   2) ML_Gen_Smoother_Cheby: this corresponds to polynomial relaxation.    */
  /*      The degree of the polynomial is set via 'edge_its' or 'nodal_its'. */
  /*      If the degree is '-1', Marian Brezina's MLS polynomial is chosen.  */
  /*      Otherwise, a Chebyshev polynomial is used over high frequencies    */
  /*      [ lambda_max/alpha , lambda_max]. Lambda_max is computed. 'alpha'  */
  /*      is hardwired in this example to correspond to twice the ratio of   */
  /*      unknowns in the fine and coarse meshes.                            */
  /*                                                                         */
  /* Using 'hiptmair_type' (see comments below) it is also possible to choose*/
  /* when edge and nodal problems are relaxed within the Hiptmair smoother.  */
  /***************************************************************************/

  void  *edge_smoother=(void *)     /* Edge relaxation:                     */
               ML_Gen_Smoother_Cheby; /*   ML_Gen_Smoother_Cheby            */
                                    /*     ML_Gen_Smoother_SymGaussSeidel   */
  void *nodal_smoother=(void *)     /* Nodal relaxation                     */
               ML_Gen_Smoother_Cheby;/*     ML_Gen_Smoother_Cheby           */
                                    /*     ML_Gen_Smoother_SymGaussSeidel   */

  int  edge_its = 3;                /* Iterations or polynomial degree for  */
  int  nodal_its = 3;               /* edge/nodal subsmoothers.             */
  double nodal_omega = ML_DDEFAULT, /* SOR damping parameter for noda/edge  */
         edge_omega  = ML_DDEFAULT; /* subsmoothers (see comments above).   */
  int   hiptmair_type=HALF_HIPTMAIR;/* FULL_HIPTMAIR: each invokation       */
                                    /*     smoothes on edges, then nodes,   */
                                    /*     and then once again on edges.    */
                                    /* HALF_HIPTMAIR: each pre-invokation   */
                                    /*     smoothes on edges, then nodes.   */
                                    /*     Each post-invokation smoothes    */
                                    /*     on nodes then edges. .           */


  ML_Operator  *Tmat, *Tmat_trans, **Tmat_array, **Tmat_trans_array;
  ML           *ml_edges, *ml_nodes;
  ML_Aggregate *ag;
  int          Nfine_edge, Ncoarse_edge, Nfine_node, Ncoarse_node, Nlevels;
  int          level, coarsest_level, itmp;
  double       edge_coarsening_rate, node_coarsening_rate, *rhs, *xxx;
  void         **edge_args, **nodal_args;
  struct       user_partition Edge_Partition = {NULL, NULL,0,0}, 
                                Node_Partition = {NULL, NULL,0,0};
  struct Tmat_data Tmat_data;
int i, Ntotal;
 ML_Comm *comm;

  /* See Aztec User's Guide for information on these variables */

#ifdef AZTEC
  AZ_MATRIX    *Ke_mat, *Kn_mat;
  AZ_PRECOND   *Pmat = NULL;
  int          proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE];
  double       params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE];
#endif


  /* get processor information (proc id & # of procs) and set ML's printlevel. */

#ifdef ML_MPI
  MPI_Init(&argc,&argv);
#endif
#ifdef AZTEC
  AZ_set_proc_config(proc_config, COMMUNICATOR);
#endif
  ML_Set_PrintLevel(10);   /* set ML's output level: 0 gives least output */

  /* Set the # of global nodes/edges and partition both the edges and the */
  /* nodes over the processors. NOTE: I believe we assume that if an edge */
  /* is assigned to a processor at least one of its nodes must be also    */
  /* assigned to that processor.                                          */

  Node_Partition.Nglobal = Nnodes;
  Edge_Partition.Nglobal = Node_Partition.Nglobal*2;
  Node_Partition.type = NODE;
  Edge_Partition.type = EDGE;
#define perxodic
#ifdef periodic
Node_Partition.Nglobal += 2; 
#endif
  partition_edges(&Edge_Partition);
  partition_nodes(&Node_Partition);
xxx = (double *) ML_allocate((Edge_Partition.Nlocal+100)*sizeof(double)); 
rhs = (double *) ML_allocate((Edge_Partition.Nlocal+100)*sizeof(double)); 
 for (i = 0; i < Edge_Partition.Nlocal + 100; i++) xxx[i] = -1.;
 for (i = 0; i < Edge_Partition.Nlocal; i++) xxx[i] = (double) 
        Edge_Partition.my_global_ids[i];

update_ghost_edges(xxx, (void *) &Edge_Partition);


  /* Create an empty multigrid hierarchy and set the 'MaxMGLevels-1'th   */
  /* level discretization within this hierarchy to the ML matrix         */
  /* representing Ke (Maxwell edge discretization).                      */

  ML_Create(&ml_edges, MaxMgLevels);
#ifdef AZTEC
  /* Build Ke as an Aztec matrix. Use built-in function AZ_ML_Set_Amat() */
  /* to convert to an ML matrix and put in hierarchy.                    */

  Ke_mat = user_Ke_build(&Edge_Partition);
  AZ_ML_Set_Amat(ml_edges, MaxMgLevels-1, Edge_Partition.Nlocal,
      		 Edge_Partition.Nlocal, Ke_mat, proc_config);
#else
  /* Build Ke directly as an ML matrix.                                  */

  ML_Init_Amatrix      (ml_edges, MaxMgLevels-1, Edge_Partition.Nlocal,
			Edge_Partition.Nlocal, &Edge_Partition);

  Ntotal = Edge_Partition.Nlocal;
  if (Edge_Partition.nprocs == 2) Ntotal += Edge_Partition.Nghost;
  ML_Set_Amatrix_Getrow(ml_edges, MaxMgLevels-1,  Ke_getrow, update_ghost_edges, Ntotal);
  ML_Set_Amatrix_Matvec(ml_edges, MaxMgLevels-1,  Ke_matvec);

#endif



  /* Build an Aztec matrix representing an auxiliary nodal PDE problem.  */
  /* This should be a variable coefficient Poisson problem (with unknowns*/
  /* at the nodes). The coefficients should be chosen to reflect the     */
  /* conductivity of the original edge problems.                         */
  /* Create an empty multigrid hierarchy. Convert the Aztec matrix to an */
  /* ML matrix and put it in the 'MaxMGLevels-1' level of the hierarchy. */
  /* Note it is possible to multiply T'*T for get this matrix though this*/
  /* will not incorporate material properties.                           */

  ML_Create(&ml_nodes, MaxMgLevels);

#ifdef AZTEC
  Kn_mat = user_Kn_build( &Node_Partition);
  AZ_ML_Set_Amat(ml_nodes, MaxMgLevels-1, Node_Partition.Nlocal, 
		 Node_Partition.Nlocal, Kn_mat, proc_config);
#else
  ML_Init_Amatrix      (ml_nodes, MaxMgLevels-1 , Node_Partition.Nlocal,
			Node_Partition.Nlocal, &Node_Partition);
  Ntotal = Node_Partition.Nlocal;
  if (Node_Partition.nprocs == 2) Ntotal += Node_Partition.Nghost;
  ML_Set_Amatrix_Getrow(ml_nodes, MaxMgLevels-1,  Kn_getrow, update_ghost_nodes, Ntotal);
#endif

  /* Build an ML matrix representing the null space of the PDE problem. */
  /* This should be a discrete gradient (nodes to edges).               */

#ifdef AZTEC
    Tmat = user_T_build (&Edge_Partition, &Node_Partition, 
  		   &(ml_nodes->Amat[MaxMgLevels-1]));
#else
    Tmat = ML_Operator_Create(ml_nodes->comm);
    Tmat_data.edge = &Edge_Partition;
    Tmat_data.node = &Node_Partition;
    Tmat_data.Kn   = &(ml_nodes->Amat[MaxMgLevels-1]);

    ML_Operator_Set_ApplyFuncData( Tmat,	Node_Partition.Nlocal,
				   Edge_Partition.Nlocal, ML_EMPTY, (void *) &Tmat_data, 
				   Edge_Partition.Nlocal, NULL, 0);
    ML_Operator_Set_Getrow( Tmat, ML_INTERNAL, Edge_Partition.Nlocal,Tmat_getrow);
    ML_Operator_Set_ApplyFunc(Tmat, ML_INTERNAL, Tmat_matvec);
  ML_Comm_Create( &comm);

  ML_CommInfoOP_Generate( &(Tmat->getrow->pre_comm), update_ghost_nodes, 
			  &Node_Partition,comm, Tmat->invec_leng, 
			  Node_Partition.Nghost);
#endif


  /********************************************************************/
  /* Set some ML parameters.                                          */
  /*------------------------------------------------------------------*/
	
  ML_Set_ResidualOutputFrequency(ml_edges, 1);
  ML_Set_Tolerance(ml_edges, 1.0e-8);
  ML_Aggregate_Create( &ag );
  ML_Aggregate_Set_CoarsenScheme_Uncoupled(ag);
  ML_Aggregate_Set_DampingFactor(ag, 0.0); /* must use 0 for maxwell */
  ML_Aggregate_Set_MaxCoarseSize(ag, 30);
  ML_Aggregate_Set_Threshold(ag, 0.0);


  /********************************************************************/
  /*                      Set up Tmat_trans                           */
  /*------------------------------------------------------------------*/

  Tmat_trans = ML_Operator_Create(ml_edges->comm);
  ML_Operator_Transpose_byrow(Tmat, Tmat_trans);


  Nlevels=ML_Gen_MGHierarchy_UsingReitzinger(ml_edges, &ml_nodes,MaxMgLevels-1,
					     ML_DECREASING,ag,Tmat,Tmat_trans, 
					     &Tmat_array,&Tmat_trans_array, 
					     smoothPe_flag, 1.5);

  /* Set the Hiptmair subsmoothers */

  if (nodal_smoother == (void *) ML_Gen_Smoother_SymGaussSeidel) {
    nodal_args = ML_Smoother_Arglist_Create(2);
    ML_Smoother_Arglist_Set(nodal_args, 0, &nodal_its);
    ML_Smoother_Arglist_Set(nodal_args, 1, &nodal_omega);
  }
  if (edge_smoother == (void *) ML_Gen_Smoother_SymGaussSeidel) {
    edge_args = ML_Smoother_Arglist_Create(2);
    ML_Smoother_Arglist_Set(edge_args, 0, &edge_its);
    ML_Smoother_Arglist_Set(edge_args, 1, &edge_omega);
  }
  if (nodal_smoother == (void *) ML_Gen_Smoother_Cheby) {
    nodal_args = ML_Smoother_Arglist_Create(2);
    ML_Smoother_Arglist_Set(nodal_args, 0, &nodal_its);
    Nfine_node = Tmat_array[MaxMgLevels-1]->invec_leng;
    Nfine_node = ML_gsum_int(Nfine_node, ml_edges->comm);
  }
  if (edge_smoother == (void *) ML_Gen_Smoother_Cheby) {
    edge_args = ML_Smoother_Arglist_Create(2);
    ML_Smoother_Arglist_Set(edge_args, 0, &edge_its);
    Nfine_edge = Tmat_array[MaxMgLevels-1]->outvec_leng;
    Nfine_edge = ML_gsum_int(Nfine_edge, ml_edges->comm);
  }

  /****************************************************
  * Set up smoothers for all levels but the coarsest. *
  ****************************************************/
  coarsest_level = MaxMgLevels - Nlevels;

  for (level = MaxMgLevels-1; level > coarsest_level; level--)
    {
      if (edge_smoother == (void *) ML_Gen_Smoother_Cheby) {
	Ncoarse_edge = Tmat_array[level-1]->outvec_leng;
	Ncoarse_edge = ML_gsum_int(Ncoarse_edge, ml_edges->comm);
	edge_coarsening_rate =  2.*((double) Nfine_edge)/ ((double) Ncoarse_edge);
	ML_Smoother_Arglist_Set(edge_args, 1, &edge_coarsening_rate);
	Nfine_edge = Ncoarse_edge;
      }
      if (nodal_smoother == (void *) ML_Gen_Smoother_Cheby) {
	Ncoarse_node = Tmat_array[level-1]->invec_leng;
	Ncoarse_node = ML_gsum_int(Ncoarse_node, ml_edges->comm);
	node_coarsening_rate =  2.*((double) Nfine_node)/ ((double) Ncoarse_node);
	ML_Smoother_Arglist_Set(nodal_args, 1, &node_coarsening_rate);
	Nfine_node = Ncoarse_node;
      }
      ML_Gen_Smoother_Hiptmair(ml_edges, level, ML_BOTH, Nits_per_presmooth,
			       Tmat_array, Tmat_trans_array, NULL, edge_smoother,
			       edge_args, nodal_smoother,nodal_args, hiptmair_type);
    }

  /*******************************************
  * Set up coarsest level smoother
  *******************************************/

  if (edge_smoother == (void *) ML_Gen_Smoother_Cheby) {
    edge_coarsening_rate = (double) Nfine_edge;
    ML_Smoother_Arglist_Set(edge_args, 1, &edge_coarsening_rate);
  }
  if (nodal_smoother == (void *) ML_Gen_Smoother_Cheby) {
    node_coarsening_rate = (double) Nfine_node;
    ML_Smoother_Arglist_Set(nodal_args,1,&node_coarsening_rate);
  }
  ML_Gen_CoarseSolverSuperLU( ml_edges, coarsest_level);
  

  /* Must be called before invoking the preconditioner */
  ML_Gen_Solver(ml_edges, ML_MGV, MaxMgLevels-1, coarsest_level); 



  /* Set the initial guess and the right hand side. Invoke solver */	

  xxx = (double *) ML_allocate(Edge_Partition.Nlocal*sizeof(double)); 
  ML_random_vec(xxx, Edge_Partition.Nlocal, ml_edges->comm);
  rhs = (double *) ML_allocate(Edge_Partition.Nlocal*sizeof(double)); 
  ML_random_vec(rhs, Edge_Partition.Nlocal, ml_edges->comm);

#ifdef AZTEC
  /* Choose the Aztec solver and criteria. Also tell Aztec that */
  /* ML will be supplying the preconditioner.                   */

  AZ_defaults(options, params);
  options[AZ_solver]   = AZ_fixed_pt;
  options[AZ_solver]   = AZ_gmres;
  options[AZ_kspace]   = 80;
  params[AZ_tol]       = tolerance;
  AZ_set_ML_preconditioner(&Pmat, Ke_mat, ml_edges, options); 
  options[AZ_conv] = AZ_noscaled;
  AZ_iterate(xxx, rhs, options, params, status, proc_config, Ke_mat, Pmat, NULL);
#else
  ML_Iterate(ml_edges, xxx, rhs);
#endif


  /* clean up. */

  ML_Smoother_Arglist_Delete(&nodal_args);
  ML_Smoother_Arglist_Delete(&edge_args);
  ML_Aggregate_Destroy(&ag);
  ML_Destroy(&ml_edges);
  ML_Destroy(&ml_nodes);
#ifdef AZTEC
  AZ_free((void *) Ke_mat->data_org);
  AZ_free((void *) Ke_mat->val);
  AZ_free((void *) Ke_mat->bindx);
  if (Ke_mat  != NULL) AZ_matrix_destroy(&Ke_mat);
  if (Pmat  != NULL) AZ_precond_destroy(&Pmat);
  if (Kn_mat != NULL) AZ_matrix_destroy(&Kn_mat);
#endif
  free(xxx);
  free(rhs);
  ML_Operator_Destroy(&Tmat);
  ML_Operator_Destroy(&Tmat_trans);
  ML_MGHierarchy_ReitzingerDestroy(MaxMgLevels-2, &Tmat_array, &Tmat_trans_array);

#ifdef ML_MPI
  MPI_Finalize();
#endif
		
  return 0;
		
}
Beispiel #22
0
int main(int argc, char *argv[])
{
	int num_PDE_eqns=6, N_levels=4, nsmooth=2;

	int    leng, level, N_grid_pts, coarsest_level;

  /* See Aztec User's Guide for more information on the */
  /* variables that follow.                             */

  int    proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE];
  double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE];

  /* data structure for matrix corresponding to the fine grid */

  double *val = NULL, *xxx, *rhs, solve_time, setup_time, start_time;
  AZ_MATRIX *Amat;
  AZ_PRECOND *Pmat = NULL;
  ML *ml;
  FILE *fp;
  int i, j, Nrigid, *garbage = NULL;
#ifdef ML_partition
  int nblocks;
  int *block_list = NULL;
  int k;
#endif
  struct AZ_SCALING *scaling;
  ML_Aggregate *ag;
double *mode, *rigid;
char filename[80];
double alpha;
int allocated = 0;
int old_prec, old_sol;
double old_tol;
/*
double *Amode, beta, biggest;
int big_ind = -1, ii;
*/
ML_Operator *Amatrix;
int *rowi_col = NULL, rowi_N, count2, ccc;
double *rowi_val = NULL;
double max_diag, min_diag, max_sum, sum;
 int nBlocks, *blockIndices, Ndof;
#ifdef ML_partition
   FILE *fp2;
   int count;

   if (argc != 2) {
     printf("Usage: ml_read_elas num_processors\n");
     exit(1);
   }
   else sscanf(argv[1],"%d",&nblocks);
#endif

#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
  /* get number of processors and the name of this processor */

  AZ_set_proc_config(proc_config, MPI_COMM_WORLD);
#else
  AZ_set_proc_config(proc_config, AZ_NOT_MPI);
#endif

  /* read in the number of matrix equations */
  leng = 0;
  if (proc_config[AZ_node] == 0) {
#    ifdef binary
	fp=fopen(".data","rb");
#    else
	fp=fopen(".data","r");
#    endif
     if (fp==NULL) {
        printf("couldn't open file .data\n");
        exit(1);
     }
#    ifdef binary
        fread(&leng, sizeof(int), 1, fp);
#    else
        fscanf(fp,"%d",&leng);
#    endif
     fclose(fp);
  }
  leng = AZ_gsum_int(leng, proc_config);

  N_grid_pts=leng/num_PDE_eqns;

  /* initialize the list of global indices. NOTE: the list of global */
  /* indices must be in ascending order so that subsequent calls to  */
  /* AZ_find_index() will function properly. */

  if (proc_config[AZ_N_procs] == 1) i = AZ_linear;
  else i = AZ_file;
  AZ_read_update(&N_update, &update, proc_config, N_grid_pts, num_PDE_eqns,i);

  AZ_read_msr_matrix(update, &val, &bindx, N_update, proc_config);


  /* This code is to fix things up so that we are sure we have */
  /* all block (including the ghost nodes the same size.       */

  AZ_block_MSR(&bindx, &val, N_update, num_PDE_eqns, update);

  AZ_transform_norowreordering(proc_config, &external, bindx, val,  update, &update_index,
	       &extern_index, &data_org, N_update, 0, 0, 0, &cpntr,
	       AZ_MSR_MATRIX);

  Amat = AZ_matrix_create( leng );
  AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL);

  Amat->matrix_type  = data_org[AZ_matrix_type];

  data_org[AZ_N_rows]  = data_org[AZ_N_internal] + data_org[AZ_N_border];

#ifdef SCALE_ME
  ML_MSR_sym_diagonal_scaling(Amat, proc_config, &scaling_vect);
#endif

  start_time = AZ_second();

  options[AZ_scaling] = AZ_none;
  ML_Create(&ml, N_levels);
  ML_Set_PrintLevel(10);


  /* set up discretization matrix and matrix vector function */

  AZ_ML_Set_Amat(ml, N_levels-1, N_update, N_update, Amat, proc_config);

#ifdef ML_partition

  /* this code is meant to partition the matrices so that things can be */
  /* run in parallel later.                                             */
  /* It is meant to be run on only one processor.                       */
#ifdef	MB_MODIF
  fp2 = fopen(".update","w");
#else
  fp2 = fopen("partition_file","w");
#endif

  ML_Operator_AmalgamateAndDropWeak(&(ml->Amat[N_levels-1]), num_PDE_eqns, 0.0);
  ML_Gen_Blocks_Metis(ml, N_levels-1, &nblocks, &block_list);

  for (i = 0; i < nblocks; i++) {
     count = 0;
     for (j = 0; j < ml->Amat[N_levels-1].outvec_leng; j++) {
        if (block_list[j] == i) count++;
     }
     fprintf(fp2,"   %d\n",count*num_PDE_eqns);
     for (j = 0; j < ml->Amat[N_levels-1].outvec_leng; j++) {
        if (block_list[j] == i) {
           for (k = 0; k < num_PDE_eqns; k++)  fprintf(fp2,"%d\n",j*num_PDE_eqns+k);
        }
     }
  }
  fclose(fp2);
  ML_Operator_UnAmalgamateAndDropWeak(&(ml->Amat[N_levels-1]),num_PDE_eqns,0.0);
#ifdef	MB_MODIF
  printf(" partition file dumped in .update\n");
#endif
  exit(1);
#endif

  ML_Aggregate_Create( &ag );
/*
  ML_Aggregate_Set_CoarsenScheme_MIS(ag);
*/
#ifdef MB_MODIF
  ML_Aggregate_Set_DampingFactor(ag,1.50);
#else
  ML_Aggregate_Set_DampingFactor(ag,1.5);
#endif
  ML_Aggregate_Set_CoarsenScheme_METIS(ag);
  ML_Aggregate_Set_NodesPerAggr( ml, ag, -1, 35);
  /*
  ML_Aggregate_Set_Phase3AggregateCreationAggressiveness(ag, 10.001);
  */


  ML_Aggregate_Set_Threshold(ag, 0.0);
  ML_Aggregate_Set_MaxCoarseSize( ag, 300);


  /* read in the rigid body modes */

   Nrigid = 0;

  /* to ensure compatibility with RBM dumping software */
   if (proc_config[AZ_node] == 0) {

      sprintf(filename,"rigid_body_mode%02d",Nrigid+1);
      while( (fp = fopen(filename,"r")) != NULL) {
	which_filename = 1;
          fclose(fp);
          Nrigid++;
          sprintf(filename,"rigid_body_mode%02d",Nrigid+1);
      }
      sprintf(filename,"rigid_body_mode%d",Nrigid+1);
      while( (fp = fopen(filename,"r")) != NULL) {
          fclose(fp);
          Nrigid++;
          sprintf(filename,"rigid_body_mode%d",Nrigid+1);
      }
    }

    Nrigid = AZ_gsum_int(Nrigid,proc_config);

    if (Nrigid != 0) {
       rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) );
       if (rigid == NULL) {
          printf("Error: Not enough space for rigid body modes\n");
       }
    }

    rhs   = (double *) malloc(leng*sizeof(double));
    xxx   = (double *) malloc(leng*sizeof(double));

    for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0;



    for (i = 0; i < Nrigid; i++) {
       if (which_filename == 1) sprintf(filename,"rigid_body_mode%02d",i+1);
       else sprintf(filename,"rigid_body_mode%d",i+1);
       AZ_input_msr_matrix(filename,update,&mode,&garbage,N_update,proc_config);
       AZ_reorder_vec(mode, data_org, update_index, NULL);
       /* here is something to stick a rigid body mode as the initial */
       /* The idea is to solve A x = 0 without smoothing with a two   */
       /* level method. If everything is done properly, we should     */
       /* converge in 2 iterations.                                   */
       /* Note: we must also zero out components of the rigid body    */
       /* mode that correspond to Dirichlet bcs.                      */

       if (i == -4) {
          for (iii = 0; iii < leng; iii++) xxx[iii] = mode[iii];

          ccc = 0;
          Amatrix = &(ml->Amat[N_levels-1]);
          for (iii = 0; iii < Amatrix->outvec_leng; iii++) {
             ML_get_matrix_row(Amatrix,1,&iii,&allocated,&rowi_col,&rowi_val,
                               &rowi_N, 0);
             count2 = 0;
             for (j = 0; j < rowi_N; j++) if (rowi_val[j] != 0.) count2++;
             if (count2 <= 1) { xxx[iii] = 0.; ccc++; }
          }
          free(rowi_col); free(rowi_val);
          allocated = 0; rowi_col = NULL; rowi_val = NULL;
       }

       /*
        *  Rescale matrix/rigid body modes and checking
        *
        AZ_sym_rescale_sl(mode, Amat->data_org, options, proc_config, scaling);
        Amat->matvec(mode, rigid, Amat, proc_config);
        for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]);
        */

        /* Here is some code to check that the rigid body modes are  */
        /* really rigid body modes. The idea is to multiply by A and */
        /* then to zero out things that we "think" are boundaries.   */
        /* In this hardwired example, things near boundaries         */
        /* correspond to matrix rows that do not have 81 nonzeros.   */
        /*

        Amode = (double *) malloc(leng*sizeof(double));
        Amat->matvec(mode, Amode, Amat, proc_config);
        j = 0;
        biggest = 0.0;
        for (ii = 0; ii < N_update; ii++) {
           if ( Amat->bindx[ii+1] - Amat->bindx[ii] != 80) {
              Amode[ii] = 0.; j++;
           }
           else {
              if ( fabs(Amode[ii]) > biggest) {
                 biggest=fabs(Amode[ii]); big_ind = ii;
              }
           }
        }
        printf("%d entries zeroed out of %d elements\n",j,N_update);
        alpha = AZ_gdot(N_update, Amode, Amode, proc_config);
        beta  = AZ_gdot(N_update,  mode,  mode, proc_config);
        printf("||A r||^2 =%e, ||r||^2 = %e, ratio = %e\n",
               alpha,beta,alpha/beta);
        printf("the biggest is %e at row %d\n",biggest,big_ind);
        free(Amode);

        */

        /* orthogonalize mode with respect to previous modes. */

        for (j = 0; j < i; j++) {
           alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/
                    AZ_gdot(N_update, &(rigid[j*N_update]),
                               &(rigid[j*N_update]), proc_config);
	   /*           daxpy_(&N_update,&alpha,&(rigid[j*N_update]),  &one, mode, &one); */
        }
#ifndef	MB_MODIF
       printf(" after mb %e %e %e\n",mode[0],mode[1],mode[2]);
#endif

        for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j];
        free(mode);
        free(garbage); garbage = NULL;

    }

    if (Nrigid != 0) {
             ML_Aggregate_Set_BlockDiagScaling(ag);
       ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update);
       free(rigid);
    }
#ifdef SCALE_ME
    ML_Aggregate_Scale_NullSpace(ag, scaling_vect, N_update);
#endif

    coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, N_levels-1,
				ML_DECREASING, ag);
   AZ_defaults(options, params);
   coarsest_level = N_levels - coarsest_level;
   if ( proc_config[AZ_node] == 0 )
	printf("Coarse level = %d \n", coarsest_level);

   /* set up smoothers */

   for (level = N_levels-1; level > coarsest_level; level--) {

/*
      ML_Gen_Smoother_BlockGaussSeidel(ml, level,ML_BOTH, 1, 1., num_PDE_eqns);
*/

    /*  Sparse approximate inverse smoother that acutally does both */
    /*  pre and post smoothing.                                     */
    /*
      ML_Gen_Smoother_ParaSails(ml , level, ML_PRESMOOTHER, nsmooth,
                                parasails_sym, parasails_thresh,
                                parasails_nlevels, parasails_filter,
                                parasails_loadbal, parasails_factorized);
     */

     /* This is the symmetric Gauss-Seidel smoothing that we usually use. */
     /* In parallel, it is not a true Gauss-Seidel in that each processor */
     /* does a Gauss-Seidel on its local submatrix independent of the     */
     /* other processors.                                                 */

     /* ML_Gen_Smoother_Cheby(ml, level, ML_BOTH, 30., nsmooth); */
     Ndof = ml->Amat[level].invec_leng;

     ML_Gen_Blocks_Aggregates(ag, level, &nBlocks, &blockIndices);

     ML_Gen_Smoother_BlockDiagScaledCheby(ml, level, ML_BOTH, 30.,nsmooth,
					  nBlocks, blockIndices);

     /*
      ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_BOTH, nsmooth,1.);
     */


      /* This is a true Gauss Seidel in parallel. This seems to work for  */
      /* elasticity problems.  However, I don't believe that this is very */
      /* efficient in parallel.                                           */
     /*
      nblocks = ml->Amat[level].invec_leng/num_PDE_eqns;
      blocks = (int *) ML_allocate(sizeof(int)*N_update);
      for (i =0; i < ml->Amat[level].invec_leng; i++)
         blocks[i] = i/num_PDE_eqns;

      ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml , level, ML_PRESMOOTHER,
                                                  nsmooth, 1., nblocks, blocks);
      ML_Gen_Smoother_VBlockSymGaussSeidelSequential(ml, level, ML_POSTSMOOTHER,
                                                  nsmooth, 1., nblocks, blocks);
      free(blocks);
*/

      /* Block Jacobi Smoothing */
      /*
      nblocks = ml->Amat[level].invec_leng/num_PDE_eqns;
      blocks = (int *) ML_allocate(sizeof(int)*N_update);
      for (i =0; i < ml->Amat[level].invec_leng; i++)
         blocks[i] = i/num_PDE_eqns;

      ML_Gen_Smoother_VBlockJacobi(ml , level, ML_BOTH, nsmooth,
                                   ML_ONE_STEP_CG, nblocks, blocks);
      free(blocks);
      */

      /* Jacobi Smoothing                                                 */
     /*

      ML_Gen_Smoother_Jacobi(ml , level, ML_PRESMOOTHER, nsmooth, ML_ONE_STEP_CG);
      ML_Gen_Smoother_Jacobi(ml , level, ML_POSTSMOOTHER, nsmooth,ML_ONE_STEP_CG);
     */



      /*  This does a block Gauss-Seidel (not true GS in parallel)        */
      /*  where each processor has 'nblocks' blocks.                      */
      /*
      nblocks = 250;
      ML_Gen_Blocks_Metis(ml, level, &nblocks, &blocks);
      ML_Gen_Smoother_VBlockJacobi(ml , level, ML_BOTH, nsmooth,ML_ONE_STEP_CG,
                                        nblocks, blocks);
      free(blocks);
      */
      num_PDE_eqns = 6;
   }
   /* Choose coarse grid solver: mls, superlu, symGS, or Aztec */

   /*
   ML_Gen_Smoother_Cheby(ml, coarsest_level, ML_BOTH, 30., nsmooth);
   ML_Gen_CoarseSolverSuperLU( ml, coarsest_level);
   */
   /*
   ML_Gen_Smoother_SymGaussSeidel(ml , coarsest_level, ML_BOTH, nsmooth,1.);
   */

   old_prec = options[AZ_precond];
   old_sol  = options[AZ_solver];
   old_tol  = params[AZ_tol];
   params[AZ_tol] = 1.0e-9;
   params[AZ_tol] = 1.0e-5;
   options[AZ_precond] = AZ_Jacobi;
   options[AZ_solver]  = AZ_cg;
   options[AZ_poly_ord] = 1;
   options[AZ_conv] = AZ_r0;
   options[AZ_orth_kvecs] = AZ_TRUE;

   j = AZ_gsum_int(ml->Amat[coarsest_level].outvec_leng, proc_config);

   options[AZ_keep_kvecs] = j - 6;
   options[AZ_max_iter] =  options[AZ_keep_kvecs];

   ML_Gen_SmootherAztec(ml, coarsest_level, options, params,
            proc_config, status, options[AZ_keep_kvecs], ML_PRESMOOTHER, NULL);

   options[AZ_conv] = AZ_noscaled;
   options[AZ_keep_kvecs] = 0;
   options[AZ_orth_kvecs] = 0;
   options[AZ_precond] = old_prec;
   options[AZ_solver] = old_sol;
   params[AZ_tol] = old_tol;

   /*   */


#ifdef RST_MODIF
   ML_Gen_Solver(ml, ML_MGV, N_levels-1, coarsest_level);
#else
#ifdef	MB_MODIF
   ML_Gen_Solver(ml, ML_SAAMG,   N_levels-1, coarsest_level);
#else
   ML_Gen_Solver(ml, ML_MGFULLV, N_levels-1, coarsest_level);
#endif
#endif

   options[AZ_solver]   = AZ_GMRESR;
         options[AZ_solver]   = AZ_cg;
   options[AZ_scaling]  = AZ_none;
   options[AZ_precond]  = AZ_user_precond;
   options[AZ_conv]     = AZ_r0;
   options[AZ_conv] = AZ_noscaled;
   options[AZ_output]   = 1;
   options[AZ_max_iter] = 500;
   options[AZ_poly_ord] = 5;
   options[AZ_kspace]   = 40;
   params[AZ_tol]       = 4.8e-6;

   AZ_set_ML_preconditioner(&Pmat, Amat, ml, options);
   setup_time = AZ_second() - start_time;

   /* Set rhs */

   fp = fopen("AZ_capture_rhs.dat","r");
   if (fp == NULL) {
      AZ_random_vector(rhs, data_org, proc_config);
      if (proc_config[AZ_node] == 0) printf("taking random vector for rhs\n");
      for (i = 0; i < -N_update; i++) {
        rhs[i] = (double) update[i]; rhs[i] = 7.;
      }
   }
   else {
      if (proc_config[AZ_node]== 0) printf("reading rhs guess from file\n");
      AZ_input_msr_matrix("AZ_capture_rhs.dat", update, &rhs, &garbage,
			  N_update, proc_config);
      free(garbage);
   }
   AZ_reorder_vec(rhs, data_org, update_index, NULL);

   printf("changing rhs by multiplying with A\n");
  Amat->matvec(rhs, xxx, Amat, proc_config);
  for (i = 0; i < N_update; i++) rhs[i] = xxx[i];

   fp = fopen("AZ_capture_init_guess.dat","r");
   if (fp != NULL) {
      fclose(fp);
      if (proc_config[AZ_node]== 0) printf("reading initial guess from file\n");
      AZ_input_msr_matrix("AZ_capture_init_guess.dat", update, &xxx, &garbage,
      			  N_update, proc_config);
      free(garbage);


      xxx = (double *) realloc(xxx, sizeof(double)*(
					 Amat->data_org[AZ_N_internal]+
					 Amat->data_org[AZ_N_border] +
					 Amat->data_org[AZ_N_external]));
   }
   AZ_reorder_vec(xxx, data_org, update_index, NULL);

   /* if Dirichlet BC ... put the answer in */

/*
   for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) {
      if ( (val[i] > .99999999) && (val[i] < 1.0000001))
         xxx[i] = rhs[i];
   }
*/

   fp = fopen("AZ_no_multilevel.dat","r");
   scaling = AZ_scaling_create();
   start_time = AZ_second();


   if (fp != NULL) {
      fclose(fp);
      options[AZ_precond] = AZ_none;
      options[AZ_scaling] = AZ_sym_diag;
      options[AZ_ignore_scaling] = AZ_TRUE;

      options[AZ_keep_info] = 1;
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling);

/*
      options[AZ_pre_calc] = AZ_reuse;
      options[AZ_conv] = AZ_expected_values;
      if (proc_config[AZ_node] == 0)
              printf("\n-------- Second solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling);
      if (proc_config[AZ_node] == 0)
              printf("\n-------- Third solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling);
*/
   }
   else {
      options[AZ_keep_info] = 1;
      options[AZ_conv] = AZ_noscaled;
      options[AZ_conv] = AZ_r0;
      params[AZ_tol] = 1.0e-7;
      /* ML_Iterate(ml, xxx, rhs); */
alpha = sqrt(AZ_gdot(N_update, xxx, xxx, proc_config));
printf("init guess = %e\n",alpha);
alpha = sqrt(AZ_gdot(N_update, rhs, rhs, proc_config));
printf("rhs = %e\n",alpha);
#ifdef SCALE_ME
	ML_MSR_scalerhs(rhs, scaling_vect, data_org[AZ_N_internal] +
                    data_org[AZ_N_border]);
	ML_MSR_scalesol(xxx, scaling_vect, data_org[AZ_N_internal] +
			data_org[AZ_N_border]);
#endif

max_diag = 0.;
min_diag = 1.e30;
max_sum  = 0.;
for (i = 0; i < N_update; i++) {
   if (Amat->val[i] < 0.) printf("woops negative diagonal A(%d,%d) = %e\n",
				 i,i,Amat->val[i]);
   if (Amat->val[i] > max_diag) max_diag = Amat->val[i];
   if (Amat->val[i] < min_diag) min_diag = Amat->val[i];
   sum = fabs(Amat->val[i]);
   for (j = Amat->bindx[i]; j < Amat->bindx[i+1]; j++) {
      sum += fabs(Amat->val[j]);
   }
   if (sum > max_sum) max_sum = sum;
}
printf("Largest diagonal = %e, min diag = %e large abs row sum = %e\n",
max_diag, min_diag, max_sum);

      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling);

      options[AZ_pre_calc] = AZ_reuse;
      options[AZ_conv] = AZ_expected_values;
/*
      if (proc_config[AZ_node] == 0)
              printf("\n-------- Second solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling);
      if (proc_config[AZ_node] == 0)
              printf("\n-------- Third solve with improved convergence test -----\n");
      AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling);
*/
   }
   solve_time = AZ_second() - start_time;

   if (proc_config[AZ_node] == 0)
      printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time);
   if (proc_config[AZ_node] == 0)
     printf("Printing out a few entries of the solution ...\n");

   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 7) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 23) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 47) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 101) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}
   j = AZ_gsum_int(7, proc_config); /* sync processors */
   for (j=0;j<Amat->data_org[AZ_N_internal]+ Amat->data_org[AZ_N_border];j++)
     if (update[j] == 171) {printf("solution(gid = %d) = %10.4e\n",
			      update[j],xxx[update_index[j]]); fflush(stdout);}


   ML_Aggregate_Destroy(&ag);
   ML_Destroy(&ml);
   AZ_free((void *) Amat->data_org);
   AZ_free((void *) Amat->val);
   AZ_free((void *) Amat->bindx);
   AZ_free((void *) update);
   AZ_free((void *) external);
   AZ_free((void *) extern_index);
   AZ_free((void *) update_index);
   AZ_scaling_destroy(&scaling);
   if (Amat  != NULL) AZ_matrix_destroy(&Amat);
   if (Pmat  != NULL) AZ_precond_destroy(&Pmat);
   free(xxx);
   free(rhs);


#ifdef HAVE_MPI
  MPI_Finalize();
#endif

  return 0;

}
Beispiel #23
0
Datei: main.c Projekt: goma/goma
int
main(int argc, char **argv)
     
     /*
      * Initial main driver for GOMA. Derived from a (1/93) release of
      * the rf_salsa program by
      *        
      *        Original Authors: John  Shadid (1421)
      *		                 Scott Hutchinson (1421)
      *        		         Harry Moffat (1421)
      *       
      *        Date:		12/3/92
      * 
      *
      *        Updates and Changes by:
      *                           Randy Schunk (9111)
      *                           P. A. Sackinger (9111)
      *                           R. R. Rao       (9111)
      *                           R. A. Cairncross (Univ. of Delaware)
      *        Dates:           2/93 - 6/96
      *
      *       Modified for continuation
      *                           Ian Gates
      *       Dates:            2/98 - 10/98
      *       Dates:            7/99 - 8/99
      * 
      * Last modified: Wed  June 26 14:21:35 MST 1994 [email protected]
      * Hello.
      * 
      * Note: Many modifications from an early 2/93 pre-release
      *	      version of rf_salsa were made by various persons 
      *       in order to test ideas about moving/deforming meshes...
      */ 
{
  /* Local Declarations */

  double time_start, total_time;   /* timing variables */
#ifndef PARALLEL
  /*  struct tm *tm_ptr;               additional serial timing variables */
  time_t now;
#endif

  int error;
  int i;
  int j;

  char	**ptmp;
  char *yo;

  struct Command_line_command **clc=NULL; /* point to command line structure */
  int           nclc = 0;		/* number of command line commands */

/********************** BEGIN EXECUTION ***************************************/

#ifdef FP_EXCEPT
  feenableexcept ((FE_OVERFLOW | FE_DIVBYZERO | FE_INVALID));
#endif

/* assume number of commands is less than or equal to the number of 
 * arguments in the command line minus 1 (1st is program name) */

  /*
  *  Get the name of the executable, yo
  */
  yo = argv[0];

#ifdef PARALLEL
  MPI_Init(&argc, &argv);
  time_start = MPI_Wtime();
#endif /* PARALLEL */
#ifndef PARALLEL
  (void)time(&now);
  time_start = (double)now;
#endif /* PARALLEL */

  time_goma_started = time_start;

  Argv = argv;

  Argc = argc;

#ifdef PARALLEL
  /*
   * Determine the parallel processing status, if any. We need to know
   * pretty early if we're "one of many" or the only process.
   */

  error = MPI_Comm_size(MPI_COMM_WORLD, &Num_Proc);
  error = MPI_Comm_rank(MPI_COMM_WORLD, &ProcID);

  /*
   * Setup a default Proc_config so we can use utility routines 
   * from Aztec
   */

  AZ_set_proc_config(Proc_Config, MPI_COMM_WORLD);

  /* set the output limit flag if need be */

  if( Num_Proc > DP_PROC_PRINT_LIMIT ) Unlimited_Output = FALSE;

#ifdef HAVE_MPE_H
  error = MPE_Init_log();
#endif /* HAVE_MPE_H */

  Dim = 0;			/* for any hypercube legacy code...  */

#endif /* PARALLEL */
  
#ifndef PARALLEL
  Dim        = 0;
  ProcID     = 0;
  Num_Proc   = 1;
#endif /* PARALLEL */


  /*
  *   HKM - Change the ieee exception handling based on the machine and
  *         the level of debugging/speed desired. This call currently causes
  *         core dumps for floating point exceptions.
  */

  handle_ieee();
  
  log_msg("--------------");
  log_msg("GOMA begins...");

  /*
   * Some initial stuff that only the master process does.
   */

  if ( ProcID == 0 )
    {
      if (argc > 1)
	{
	  log_msg("Preprocessing command line options.");
	  clc = (struct Command_line_command **) 
	    smalloc( argc * sizeof(struct Command_line_command *));
	  for (i=0; i<argc; i++)
	    {
	      clc[i] = (struct Command_line_command *) 
		smalloc(sizeof(struct Command_line_command));
	      clc[i]->type   = 0; /* initialize command line structure */
	      clc[i]->i_val  = 0;
	      clc[i]->r_val  = 0.;
	      clc[i]->string = (char *) 
		smalloc(MAX_COMMAND_LINE_LENGTH*sizeof(char));
	      for ( j=0; j<MAX_COMMAND_LINE_LENGTH; j++)
		{
		  clc[i]->string[j] = '\0';
		}
#ifdef DEBUG
	      fprintf(stderr, "clc[%d]->string is at 0x%x\n", i, clc[i]->string);
	      fprintf(stderr, "clc[%d]         is at 0x%x\n", i, clc[i]);
#endif
	    }
	}

      strcpy(Input_File, "input");
      strcpy(Echo_Input_File , "echo_input");

      if (argc > 1) translate_command_line(argc, argv, clc, &nclc);
	  	  
	  ECHO("OPEN", Echo_Input_File);
      
	  echo_command_line( argc, argv, Echo_Input_File );
      print_code_version();
      ptmp = legal_notice;
      while ( strcmp(*ptmp, LAST_LEGAL_STRING) != 0 )
	{
	  fprintf(stderr, "%s", *ptmp++);
	}
    }

  /*
   *  Allocate the uniform problem description structure and
   *  the problem description structures on all processors
   */
  error = pd_alloc();
  EH(error, "pd_alloc problem");

#ifdef DEBUG
  fprintf(stderr, "P_%d at barrier after pd_alloc\n", ProcID);
#ifdef PARALLEL
  error = MPI_Barrier(MPI_COMM_WORLD);
#endif
#endif

  log_msg("Allocating mp, gn, ...");

  error = mp_alloc();
  EH(error, "mp_alloc problem");

  error = gn_alloc();
  EH(error, "gn_alloc problem");

  error = ve_alloc();
  EH(error, "ve_alloc problem");

  error = elc_alloc();
  EH(error, "elc_alloc problem");

  error = elc_rs_alloc();
  EH(error, "elc_alloc problem");

  error = cr_alloc();
  EH(error, "cr_alloc problem");

  error = evp_alloc();
  EH(error, "evp_alloc problem");

  error = tran_alloc();
  EH(error, "tran_alloc problem");

  error = eigen_alloc();
  EH(error, "eigen_alloc problem");

  error = cont_alloc();
  EH(error, "cont_alloc problem");

  error = loca_alloc();
  EH(error, "loca_alloc problem");

  error = efv_alloc();
  EH(error, "efv_alloc problem");

#ifdef DEBUG
  fprintf(stderr, "P_%d at barrier before read_input_file()\n", ProcID);
#ifdef PARALLEL
  error = MPI_Barrier(MPI_COMM_WORLD);
#endif
#endif

  /*
   * Read ASCII input file, data files, related exodusII FEM databases.
   */	
  if ( ProcID == 0 )
    {
      log_msg("Reading input file ...");
      read_input_file(clc, nclc); /* Read ascii input file get file names */

      /* update inputed data to account for command line arguments that
       * might override the input deck...
       */
      log_msg("Overriding any input file specs w/ any command line specs...");
      if (argc > 1) apply_command_line(clc, nclc);

#ifdef DEBUG
      DPRINTF(stderr, "apply_command_line() is done.\n");
#endif
    }

  /*
   * The user-defined material properties, etc. available to goma users
   * mean that some dynamically allocated data needs to be communicated.
   *
   * To handle this, sizing information from the input file scan is
   * broadcast in stages so that the other processors can allocate space
   * accordingly to hold the data.
   *
   * Note: instead of handpacking a data structure, use MPI derived datatypes
   * to gather and scatter. Pray this is done efficiently. Certainly it costs
   * less from a memory standpoint.
   */

#ifdef PARALLEL

  /*
   *  Make sure the input file was successully processed before moving on
   */
  check_parallel_error("Input file error");


  /*
   * This is some sizing information that helps fit a little bit more
   * onto the ark later on.
   */

#ifdef DEBUG
  fprintf(stderr, "P_%d at barrier before noahs_raven()\n", ProcID);
  error = MPI_Barrier(MPI_COMM_WORLD);
#endif

  noahs_raven();

#ifdef DEBUG
  fprintf(stderr, "P_%d at barrier before MPI_Bcast of Noahs_Raven\n", ProcID);
  error = MPI_Barrier(MPI_COMM_WORLD);
#endif

  MPI_Bcast(MPI_BOTTOM, 1, Noahs_Raven->new_type, 0, MPI_COMM_WORLD);

#ifdef DEBUG
  fprintf(stderr, "P_%d at barrier after Bcast/before raven_landing()\n", 
	  ProcID);
  error = MPI_Barrier(MPI_COMM_WORLD);
#endif  
  /*
   * Get the other processors ready to handle ark data.
   */

  raven_landing();

#ifdef DEBUG
  fprintf(stderr, "P_%d at barrier before noahs_ark()\n", ProcID);
  error = MPI_Barrier(MPI_COMM_WORLD);
#endif
  
  
  /*
   * This is the main body of communicated information, including some
   * whose sizes were determined because of advanced legwork by the raven.
   */

  noahs_ark();
  MPI_Bcast(MPI_BOTTOM, 1, Noahs_Ark->new_type, 0, MPI_COMM_WORLD);

  /*
   * Chemkin was initialized on processor zero during the input file
   * process. Now, distribute it to all processors
   */
#ifdef USE_CHEMKIN
  if (Chemkin_Needed) {
    chemkin_initialize_mp();
  }
#endif 

  /*
   * Once the ark has landed, there are additional things that will need to
   * be sent by dove. Example: BC_Types[]->u-BC arrays.
   *
   */

  ark_landing();

  noahs_dove();
  MPI_Bcast(MPI_BOTTOM, 1, Noahs_Dove->new_type, 0, MPI_COMM_WORLD);


#endif          /* End of ifdef PARALLEL */


  /*
   * We sent the packed line to all processors that contained geometry
   * creation commands.  Now we need to step through it and create
   * geometry as we go (including possibly reading an ACIS .sat file).
   *
   */

  /* Check to see if BRK File option exists and if so check if file exits */
  if (Brk_Flag == 1) {
    check_for_brkfile(Brk_File);
  }
  check_parallel_error("Error encountered in check for brkfile");

  /* Now break the exodus files */
  if (Num_Proc > 1 && ProcID == 0 && Brk_Flag == 1) {
    call_brk();
  }
  check_parallel_error("Error in brking exodus files");
  MPI_Barrier(MPI_COMM_WORLD);

  /*
   * For parallel execution, assume the following variables will be changed
   * to reflect the multiple file aspect of the problem.
   *
   *	FEM file = file.exoII		--> file_3of15.exoII
   *
   *	Output EXODUS II file = out.exoII --> out_3of15.exoII
   *
   */


  /*
   * Allocate space for structures holding the EXODUS II finite element
   * database information and for the Distributed Processing information.
   *
   * These are mostly skeletons with pointers that get allocated in the
   * rd_exoII and rd_dpi routines. Remember to free up those arrays first
   * before freeing the major pointers.
   */

  EXO_ptr = alloc_struct_1(Exo_DB, 1);
  init_exo_struct(EXO_ptr);
  DPI_ptr = alloc_struct_1(Dpi, 1);
  init_dpi_struct(DPI_ptr);  

  log_msg("Reading mesh from EXODUS II file...");
  error = read_mesh_exoII(EXO_ptr, DPI_ptr);

  /*
   *   Missing files on any processor are detected at a lower level
   *   forcing a return to the higher level
   *         rd_exo -->  rd_mesh  -->  main
   *   Shutdown now, if any of the exodus files weren't found
   */
  if (error < 0) {
#ifdef PARALLEL
    MPI_Finalize();
#endif
    return(-1);
  }

  /*
   * All of the MPI_Type_commit() calls called behind the scenes that build
   * the dove, ark and raven really allocated memory. Let's free it up now that
   * the initial information has been communicated.
   */

#ifdef PARALLEL
  MPI_Type_free(&(Noahs_Raven->new_type));
  MPI_Type_free(&(Noahs_Ark->new_type));
  MPI_Type_free(&(Noahs_Dove->new_type));
#endif   

  /*
   * Setup the rest of the Problem Description structure that depends on
   * the mesh that was read in from the EXODUS II file...
   * 
   * Note that memory allocation and some setup has already been performed
   * in mm_input()...
   */

  error = setup_pd();
  EH( error, "Problem setting up Problem_Description.");
  /*
   * Let's check to see if we need the large elasto-plastic global tensors
   * and allocate them if so 
   */
  error = evp_tensor_alloc(EXO_ptr);
  EH( error, "Problems setting up evp tensors");
  
  /*
   * Now that we know about what kind of problem we're solving and the
   * mesh information, let's allocate space for elemental assembly structures
   *
   */
#ifdef DEBUG
  DPRINTF(stderr, "About to assembly_alloc()...\n");
#endif
  log_msg("Assembly allocation...");

  error = assembly_alloc(EXO_ptr);
  EH( error, "Problem from assembly_alloc");

  if (Debug_Flag)  {
    DPRINTF(stderr, "%s:  setting up EXODUS II output files...\n", yo);
  }

  /*
   * These are not critical - just niceties. Also, they should not overburden
   * your db with too much of this - they're capped verbiage compliant routines.
   */

  add_qa_stamp(EXO_ptr);

  add_info_stamp(EXO_ptr);

#ifdef DEBUG
  fprintf(stderr, "added qa and info stamps\n");
#endif

  /*
   * If the output EXODUS II database file is different from the input
   * file, then we'll need to replicate all the basic mesh information.
   * But, remember that if we're parallel, that the output file names must
   * be multiplexed first...
   */
  if ( Num_Proc > 1 )
    {
      multiname(ExoFileOut,     ProcID, Num_Proc);      
      multiname(Init_GuessFile, ProcID, Num_Proc);

      if ( strcmp( Soln_OutFile, "" ) != 0 )
	{
	  multiname(Soln_OutFile, ProcID, Num_Proc);
	}

      if( strcmp( ExoAuxFile, "" ) != 0 )
        {
          multiname(ExoAuxFile, ProcID, Num_Proc);
        }

      if( efv->Num_external_field != 0 )
        {
          for( i=0; i<efv->Num_external_field; i++ )
            {
              multiname(efv->file_nm[i], ProcID, Num_Proc);
            }
        }
    }


  /***********************************************************************/
  /***********************************************************************/
  /***********************************************************************/
  /*
   *   Preprocess the exodus mesh
   *        -> Allocate pointers to structures containing element
   *           side bc info, First_Elem_Side_BC_Array, and
   *           element edge info, First_Elem_Edge_BC_Array.
   *        -> Determine Unique_Element_Types[] array
   */
#ifdef DEBUG
  fprintf(stderr, "pre_process()...\n");
#endif
  log_msg("Pre processing of mesh...");
#ifdef PARALLEL
  error = MPI_Barrier(MPI_COMM_WORLD);
#endif
  pre_process(EXO_ptr);

  /***********************************************************************/
  /***********************************************************************/
  /***********************************************************************/
  /*
   * Load up a few key indeces in the bfd prototype basis function structures
   * and make sure that each active eqn/vbl has a bf[v] that points to the
   * right bfd[]...needs pre_process to find out the number of unique
   * element types in the problem.
   */

#ifdef DEBUG
  fprintf(stderr, "bf_init()...\n");
#endif
  log_msg("Basis function initialization...");
  error = bf_init(EXO_ptr);
  EH( error, "Problem from bf_init");

  /*
   * check for parallel errors before continuing
   */
  check_parallel_error("Error encountered in problem setup");

  /***********************************************************************/
  /***********************************************************************/
  /***********************************************************************/  
  /*
   * Allocate space for each communication exchange description.
   */
#ifdef PARALLEL
#ifdef DEBUG
  fprintf(stderr, "P_%d: Parallel cx allocation\n", ProcID);
#endif
  if (DPI_ptr->num_neighbors > 0) {
    cx = alloc_struct_1(Comm_Ex, DPI_ptr->num_neighbors);
    Request = alloc_struct_1(MPI_Request, 
			     Num_Requests * DPI_ptr->num_neighbors);
    Status = alloc_struct_1(MPI_Status, 
			    Num_Requests * DPI_ptr->num_neighbors);
  }
#endif

  /***********************************************************************/
  /***********************************************************************/
  /***********************************************************************/
  /*
   *                           SET UP THE PROBLEM
   *
   * Setup node-based structures
   * Finalise how boundary conditions are to be handled
   * Determine what unknowns are at each owned node and then tell
   *  neighboring processors about your nodes
   * Set up communications pattern for fast unknown updates between
   *  processors.
   */
  (void) setup_problem(EXO_ptr, DPI_ptr);

  /*
   * check for parallel errors before continuing
   */
  check_parallel_error("Error encountered in problem setup");

  /***********************************************************************/
  /***********************************************************************/
  /***********************************************************************/
  /*
   *               CREATE BRK_FILE IF ONE DOES NOT EXIST
   *
   * If no Brk_File exists but the option was configured in the input or
   * optional command we create one now and exit from goma.
   */
  if ( Brk_Flag == 2 ) {
    write_brk_file(Brk_File, EXO_ptr);
    exit(0);
  }
  
  /***********************************************************************/
  /***********************************************************************/
  /***********************************************************************/
  /*
   *                     WRITE OUT INITIAL INFO TO EXODUS FILE
   */

  /*
   *  Only have to initialize the exodus file if we are using different
   *  files for the output versus the input mesh
   */
  if (strcmp(ExoFile, ExoFileOut)) {
    /*
     * Temporarily we'll need to renumber the nodes and elements in the
     * mesh to be 1-based. After writing, return to the 0 based indexing
     * that is more convenient in C.
     */
#ifdef DEBUG
    fprintf(stderr, "1-base; wr_mesh; 0-base\n");
#endif
    one_base(EXO_ptr);
    wr_mesh_exo(EXO_ptr, ExoFileOut, 0);
    zero_base(EXO_ptr);

    /*
     * If running on a distributed computer, augment the plain finite
     * element information of EXODUS with the description of how this
     * piece fits into the global problem.
     */
    if (Num_Proc > 1) {
#ifdef PARALLEL
#ifdef DEBUG
      fprintf(stderr, "P_%d at barrier before wr_dpi()\n", ProcID);
      fprintf(stderr, "P_%d ExoFileOut = \"%s\"\n", ProcID, ExoFileOut);
      error = MPI_Barrier(MPI_COMM_WORLD);
#endif
#endif
      wr_dpi(DPI_ptr, ExoFileOut, 0);
    }
  }

  /***********************************************************************/
  /***********************************************************************/
  /***********************************************************************/
  /*
   *                           SOLVE THE PROBLEM
   */

  if (Debug_Flag) {
    switch (Continuation) {
    case ALC_ZEROTH:
        P0PRINTF("%s: continue_problem (zeroth order) ...\n", yo);
        break;
    case  ALC_FIRST:
        P0PRINTF("%s: continue_problem (first order) ...\n", yo);
        break;
    case HUN_ZEROTH:
        P0PRINTF("%s: hunt_problem (zeroth order) ...\n", yo);
        break;
    case  HUN_FIRST:
        P0PRINTF("%s: hunt_problem (first order) ...\n", yo);
        break;
    case LOCA:
        P0PRINTF("%s: do_loca ...\n", yo);
        break;
    default:
        P0PRINTF("%s: solve_problem...\n", yo);
        break;
    }
  }  
#ifdef DEBUG
  switch (Continuation) {
  case ALC_ZEROTH:
      DPRINTF(stderr, "%s: continue_problem (zeroth order) ...\n", yo);
      break;
  case  ALC_FIRST:
      DPRINTF(stderr, "%s: continue_problem (first order) ...\n", yo);
      break;
  case HUN_ZEROTH:
      DPRINTF(stderr, "%s: hunt_problem (zeroth order) ...\n", yo);
      break;
  case  HUN_FIRST:
      DPRINTF(stderr, "%s: hunt_problem (first order) ...\n", yo);
      break;
  case LOCA:
      DPRINTF(stderr, "%s: do_loca ...\n", yo);
      break;
  default:
      DPRINTF(stderr, "%s: solve_problem...\n", yo);
      break;
  }
#endif

    
  if( TimeIntegration == TRANSIENT)
        {
        Continuation = ALC_NONE;
        if (Debug_Flag) {
          P0PRINTF("%s: solve_problem...TRANSIENT superceded Continuation...\n", yo);
          }
#ifdef DEBUG
   DPRINTF(stderr, "%s: solve_problem...TRANSIENT superceded Continuation...\n", yo);
#endif
        solve_problem(EXO_ptr, DPI_ptr, NULL);
        }  

  switch (Continuation) {
  case ALC_ZEROTH:
  case ALC_FIRST:
    log_msg("Solving continuation problem");
    continue_problem(cx, EXO_ptr, DPI_ptr);
    break;
  case HUN_ZEROTH:
  case HUN_FIRST:
    log_msg("Solving hunt problem");
    hunt_problem(cx, EXO_ptr, DPI_ptr);
    break;
  case LOCA:
    log_msg("Solving continuation problem with LOCA");
    error = do_loca(cx, EXO_ptr, DPI_ptr);
    break;
  default:
    log_msg("Solving problem");
    if (loca_in->Cont_Alg == LOCA_LSA_ONLY)
      {
        error = do_loca(cx, EXO_ptr, DPI_ptr);
      }
    else if(TimeIntegration != TRANSIENT)
      {
        solve_problem(EXO_ptr, DPI_ptr, NULL);
      }
    break;
  }

#ifdef PARALLEL
   MPI_Barrier(MPI_COMM_WORLD);
#endif

  if (ProcID == 0 && Brk_Flag == 1 && Num_Proc > 1) {
    fix_output();
  }
  
  /***********************************************************************/
  /***********************************************************************/
  /***********************************************************************/
  /*
   *  PRINT A MESSAGE TO STDOUT SAYING WE ARE DONE
   */
  P0PRINTF("\n-done\n\n");

  /***********************************************************************/
  /***********************************************************************/
  /***********************************************************************/
  /*
   *       FREE MEMORY ALLOCATED BY THE PROGRAM
   */
  /*
   * free the element block / element based structures
   */
  free_element_blocks(EXO_ptr);

  /*
   * free nodal based structures
   */
  free_nodes();
#ifdef FREE_PROBLEM
  free_problem ( EXO_ptr, DPI_ptr );
#endif

  /*
   * Free command line stuff
   */
  if ( ProcID == 0 )
    {
      if ( argc > 1 ) 
	{
	  for (i=0; i<argc; i++)
	    {
#ifdef DEBUG
	      fprintf(stderr, "clc[%d]->string &= 0x%x\n", i, clc[i]->string);
	      fprintf(stderr, "clc[%d]         &= 0x%x\n", i, clc[i]);
#endif
	      safer_free((void **) &(clc[i]->string));
	      safer_free((void **) (clc + i));
	    }
	  safer_free((void **) &clc);
	}
    }

  /*
   * Free exodus database structures
   */
  free_exo(EXO_ptr);
  safer_free((void **) &EXO_ptr);

  if ( Num_Proc > 1 )
  {
    free_dpi(DPI_ptr);
  }
  else
  {
    free_dpi_uni(DPI_ptr);
  }

  safer_free((void **) &DPI_ptr);

  /*
   * Remove front scratch file [/tmp/lu.'pid'.0]
   */
  if (Linear_Solver == FRONT) 	
    {
  unlerr = unlink(front_scratch_directory);
  WH(unlerr, "Unlink problem with front scratch file");
    }


#ifdef PARALLEL
  total_time = ( MPI_Wtime() - time_start )/ 60. ;
  DPRINTF(stderr, "\nProc 0 runtime: %10.2f Minutes.\n\n",total_time);
  MPI_Finalize();
#endif  
#ifndef PARALLEL
  (void)time(&now);
  total_time = (double)(now) - time_start;
  fprintf(stderr, "\nProc 0 runtime: %10.2f Minutes.\n\n",total_time/60);
#endif  
  fflush(stdout);
  fflush(stderr);
  log_msg("GOMA ends normally.");
  return (0);
}
Beispiel #24
0
int main(int argc, char *argv[])
{
  char  global[]="global";
  char  local[]="local";

  int    proc_config[AZ_PROC_SIZE];/* Processor information.                */
  int    options[AZ_OPTIONS_SIZE]; /* Array used to select solver options.  */
  double params[AZ_PARAMS_SIZE];   /* User selected solver paramters.       */
  int    *data_org;
                                   /* Array to specify data layout          */
  double status[AZ_STATUS_SIZE];   /* Information returned from AZ_solve(). */
  int    *update;                  /* vector elements updated on this node. */
  int    *external;
                                   /* vector elements needed by this node.  */
  int    *update_index;
                                   /* ordering of update[] and external[]   */
  int    *extern_index;
                                   /* locally on this processor.            */
  int    *indx;   /* MSR format of real and imag parts */
  int    *bindx;
  int    *bpntr;
  int    *rpntr;
  int    *cpntr;
  AZ_MATRIX *Amat;
  AZ_PRECOND *Prec;
  double *val;
  double *x, *b, *xexact, *xsolve;
  int    n_nonzeros, n_blk_nonzeros;
  int    N_update;           /* # of block unknowns updated on this node    */
  int    N_local;
                                 /* Number scalar equations on this node */
  int    N_global, N_blk_global; /* Total number of equations */
  int    N_external, N_blk_eqns;

  double *val_msr;
  int *bindx_msr;
  
  double norm, d ;

  int matrix_type;

  int has_global_indices, option;
  int i, j, m, mp ;
  int ione = 1;

#ifdef TEST_SINGULAR
  double * xnull; /* will contain difference of given exact solution and computed solution*/
  double * Axnull; /* Product of A time xnull */
  double norm_Axnull;
#endif

#ifdef AZTEC_MPI
  double MPI_Wtime(void) ;
#endif
  double time ;
#ifdef AZTEC_MPI
  MPI_Init(&argc,&argv);
#endif

  /* get number of processors and the name of this processor */
 
#ifdef AZTEC_MPI
  AZ_set_proc_config(proc_config,MPI_COMM_WORLD);
#else
  AZ_set_proc_config(proc_config,0);
#endif

  printf("proc %d of %d is alive\n",
	 proc_config[AZ_node],proc_config[AZ_N_procs]) ;

#ifdef AZTEC_MPI
  MPI_Barrier(MPI_COMM_WORLD) ;
#endif

#ifdef VBRMATRIX
  if(argc != 3) 
    perror("error: enter name of data and partition file on command line") ; 
#else
  if(argc != 2) perror("error: enter name of data file on command line") ; 
#endif
  /* Set exact solution to NULL */
  xexact = NULL;

  /* Read matrix file and distribute among processors.  
     Returns with this processor's set of rows */ 

#ifdef VBRMATRIX
  read_hb(argv[1], proc_config, &N_global, &n_nonzeros, 
	  &val_msr,  &bindx_msr, &x, &b, &xexact);
  
  create_vbr(argv[2], proc_config, &N_global, &N_blk_global,
	     &n_nonzeros, &n_blk_nonzeros, &N_update, &update,
	     bindx_msr, val_msr, &val, &indx, 
	     &rpntr, &cpntr, &bpntr, &bindx);

  if(proc_config[AZ_node] == 0) 
    {
      free ((void *) val_msr);
      free ((void *) bindx_msr);
      free ((void *) cpntr);
    }
    matrix_type = AZ_VBR_MATRIX;

#ifdef AZTEC_MPI
  MPI_Barrier(MPI_COMM_WORLD) ;
#endif

  distrib_vbr_matrix( proc_config, N_global, N_blk_global, 
		      &n_nonzeros, &n_blk_nonzeros,
		      &N_update, &update, 
		      &val, &indx, &rpntr, &cpntr, &bpntr, &bindx, 
		      &x, &b, &xexact);

#else
    read_hb(argv[1], proc_config, &N_global, &n_nonzeros,
             &val,  &bindx, &x, &b, &xexact);

#ifdef AZTEC_MPI
  MPI_Barrier(MPI_COMM_WORLD) ;
#endif

  distrib_msr_matrix(proc_config, N_global, &n_nonzeros, &N_update,
		  &update, &val, &bindx, &x, &b, &xexact);

#ifdef DEBUG
  for (i = 0; i<N_update; i++)
    if (val[i] == 0.0 ) printf("Zero diagonal at row %d\n",i);
#endif
    matrix_type = AZ_MSR_MATRIX;
#endif
  /* convert matrix to a local distributed matrix */
    cpntr = NULL;
  AZ_transform(proc_config, &external, bindx, val, update,
	       &update_index, &extern_index, &data_org, 
	       N_update, indx, bpntr, rpntr, &cpntr,
               matrix_type);

  printf("Processor %d: Completed AZ_transform\n",proc_config[AZ_node]) ;
      has_global_indices = 0;
      option = AZ_LOCAL;

#ifdef VBRMATRIX
  N_local = rpntr[N_update];
#else
  N_local = N_update;
#endif

  Amat = AZ_matrix_create(N_local);

#ifdef VBRMATRIX
  AZ_set_VBR(Amat, rpntr, cpntr, bpntr, indx, bindx, val, data_org,
          N_update, update, option);
#else
  AZ_set_MSR(Amat, bindx, val, data_org, N_update, update, option);
#endif


  printf("proc %d Completed AZ_create_matrix\n",proc_config[AZ_node]) ;

#ifdef AZTEC_MPI
  MPI_Barrier(MPI_COMM_WORLD) ;
#endif

  /* initialize AZTEC options */
 
  AZ_defaults(options, params);
  options[AZ_solver]  = AZ_gmres;
  options[AZ_precond] = AZ_sym_GS; 
  options[AZ_poly_ord] = 1;
  options[AZ_graph_fill] = 1;
  params[AZ_rthresh] = 0.0E-7;
  params[AZ_athresh] = 0.0E-7;
  options[AZ_overlap] = 1;
 /*
  params[AZ_ilut_fill] = 2.0;
  params[AZ_drop] = 0.01;
  options[AZ_overlap] = 0;
  options[AZ_reorder] = 0;
  params[AZ_rthresh] = 1.0E-1;
  params[AZ_athresh] = 1.0E-1;
  options[AZ_precond] = AZ_dom_decomp ;
  options[AZ_subdomain_solve] = AZ_bilu_ifp;
  options[AZ_reorder] = 0;
  options[AZ_graph_fill] = 0;
  params[AZ_rthresh] = 1.0E-7;
  params[AZ_athresh] = 1.0E-7;
 options[AZ_poly_ord] = 1;
 options[AZ_precond] = AZ_Jacobi;
  params[AZ_omega] = 1.0;
  options[AZ_precond] = AZ_none ;

  options[AZ_poly_ord] = 1;
  options[AZ_precond] = AZ_Jacobi ;
  options[AZ_scaling] = AZ_sym_row_sum ;
  options[AZ_scaling] = AZ_sym_diag;


  options[AZ_conv] = AZ_noscaled;
  options[AZ_scaling] = AZ_Jacobi ;

  options[AZ_precond] = AZ_dom_decomp ;
  options[AZ_subdomain_solve] = AZ_icc ;
  options[AZ_subdomain_solve] = AZ_ilut ;
  params[AZ_omega] = 1.2;
  params[AZ_ilut_fill] = 2.0;
  params[AZ_drop] = 0.01;
  options[AZ_reorder] = 0;
  options[AZ_overlap] = 0;
  options[AZ_type_overlap] = AZ_symmetric;

  options[AZ_precond] = AZ_dom_decomp ;
  options[AZ_subdomain_solve] = AZ_bilu ;
  options[AZ_graph_fill] = 0;
  options[AZ_overlap] = 0;

  options[AZ_precond] = AZ_dom_decomp ;
  options[AZ_subdomain_solve] = AZ_bilu_ifp ;
  options[AZ_graph_fill] = 0;
  options[AZ_overlap] = 0;
  params[AZ_rthresh] = 1.0E-3;
  params[AZ_athresh] = 1.0E-3;

 options[AZ_poly_ord] = 1;
 options[AZ_precond] = AZ_Jacobi ; */


  options[AZ_kspace] = 600 ;

  options[AZ_max_iter] = 600 ;
  params[AZ_tol] = 1.0e-14;

#ifdef BGMRES
  options[AZ_gmres_blocksize] = 3;
  options[AZ_gmres_num_rhs] = 1;
#endif

#ifdef DEBUG
  if (proc_config[AZ_N_procs]==1)
    write_vec("rhs.dat", N_local, b);
#endif

  /* xsolve is a little longer vector needed to account for external 
     entries.  Make it and copy x (initial guess) into it. 
  */

  if (has_global_indices)
    {
      N_external = 0;
    }
  else
    {
      N_external = data_org[AZ_N_external];
    }

  xsolve  = (double *) calloc(N_local + N_external, 
			   sizeof(double)) ;

  for (i=0; i<N_local; i++) xsolve[i] = x[i];

  /* Reorder rhs and xsolve to match matrix ordering from AZ_transform */
  if (!has_global_indices)
    {
      AZ_reorder_vec(b, data_org, update_index, rpntr) ;
      AZ_reorder_vec(xsolve, data_org, update_index, rpntr) ;
    }

#ifdef VBRMATRIX
  AZ_check_vbr(N_update, data_org[AZ_N_ext_blk], AZ_LOCAL, 
	       bindx, bpntr, cpntr, rpntr, proc_config);
#else
  AZ_check_msr(bindx, N_update, N_external, AZ_LOCAL, proc_config);
#endif

  printf("Processor %d of %d N_local = %d N_external = %d NNZ = %d\n",
	 proc_config[AZ_node],proc_config[AZ_N_procs],N_local,N_external,
	 n_nonzeros);

  /* solve the system of equations using b  as the right hand side */

  Prec = AZ_precond_create(Amat,AZ_precondition, NULL);

  AZ_iterate(xsolve, b, options, params, status, proc_config,
	     Amat, Prec, NULL);
  /*AZ_ifpack_iterate(xsolve, b, options, params, status, proc_config,
    Amat);*/

  if (proc_config[AZ_node]==0)
    {
      printf("True residual norm = %22.16g\n",status[AZ_r]);
      printf("True scaled res    = %22.16g\n",status[AZ_scaled_r]);
      printf("Computed res norm  = %22.16g\n",status[AZ_rec_r]);
    }

#ifdef TEST_SINGULAR

   xnull  = (double *) calloc(N_local + N_external, sizeof(double)) ;
   Axnull  = (double *) calloc(N_local + N_external, sizeof(double)) ;
   for (i=0; i<N_local; i++) xnull[i] = xexact[i];
   if (!has_global_indices)  AZ_reorder_vec(xnull, data_org, update_index, rpntr);
   for (i=0; i<N_local; i++) xnull[i] -= xsolve[i]; /* fill with nullerence */
   Amat->matvec(xnull, Axnull, Amat, proc_config);

   norm_Axnull = AZ_gvector_norm(N_local, 2, Axnull, proc_config);

   if (proc_config[AZ_node]==0) printf("Norm of A(xexact-xsolve) = %12.4g\n",norm_Axnull);
   free((void *) xnull);
   free((void *) Axnull);
#endif


  /* Get solution back into original ordering */
   if (!has_global_indices) {
     AZ_invorder_vec(xsolve, data_org, update_index, rpntr, x);
     free((void *) xsolve);
   }
  else {
    free((void *) x);
    x = xsolve;
  }

#ifdef DEBUG
  if (proc_config[AZ_N_procs]==1)
      write_vec("solution.dat", N_local, x);
#endif
  if (xexact != NULL)
    {
      double sum = 0.0;
      double largest = 0.0;
      for (i=0; i<N_local; i++) sum += fabs(x[i]-xexact[i]);
 printf("Processor %d:  Difference between exact and computed solution = %12.4g\n",
	     proc_config[AZ_node],sum);
      for (i=0; i<N_local; i++) largest = AZ_MAX(largest,fabs(xexact[i]));
 printf("Processor %d:  Difference divided by max abs value of exact   = %12.4g\n",
	     proc_config[AZ_node],sum/largest);
    }

				       

  free((void *) val);
  free((void *) bindx);
#ifdef VBRMATRIX
  free((void *) rpntr);
  free((void *) bpntr);
  free((void *) indx);
#endif
  free((void *) b);
  free((void *) x);
  if (xexact!=NULL) free((void *) xexact);

  AZ_free((void *) update);
  AZ_free((void *) update_index);
  AZ_free((void *) external); 
  AZ_free((void *) extern_index);
  AZ_free((void *) data_org);
  if (cpntr!=NULL) AZ_free((void *) cpntr);
  AZ_precond_destroy(&Prec);
  AZ_matrix_destroy(&Amat);
  


#ifdef AZTEC_MPI
  MPI_Finalize() ;
#endif

/* end main
*/
return 0 ;
}
Beispiel #25
0
int main(int argc, char *argv[])
{

  /* See Aztec User's Guide for the variables that follow:         */
  int    proc_config[AZ_PROC_SIZE];/* Processor information.                */
  int    N_update;                 /* # of unknowns updated on this node    */
  int    *update;                  /* vector elements updated on this node  */

  int    *data_orgA;               /* Array to specify data layout          */
  int    *externalA;               /* vector elements needed by this node.  */
  int    *update_indexA;           /* ordering of update[] and external[]   */
  int    *extern_indexA;           /* locally on this processor.            */
  int    *bindxA;                  /* Sparse matrix to be solved is stored  */
  double *valA;                    /* in these MSR arrays.                  */
  AZ_MATRIX *mat_curl_edge;        /* curl operator matrix                  */

  int    *data_orgB;                /* Array to specify data layout          */
  int    *externalB;                /* vector elements needed by this node.  */
  int    *update_indexB;            /* ordering of update[] and external[]   */
  int    *extern_indexB;            /* locally on this processor.            */
  int    *bindxB;                   /* Sparse matrix to be solved is stored  */
  double *valB;                     /* in these MSR arrays.                  */
  AZ_MATRIX *mat_curl_face;         /* curl operator matrix                  */

  int *bc_indx;
  int n_bc;

  double *efield;
  double *bfield;
  double *epsilon;
  double *tmp_vec;
  double *tmp_vec2;

  int    i, nrow, x, y, z;
  int k, t;
  long startTime, endTime;
  int myrank;
  int vec_len;


  /* get number of processors and the name of this processor */
#ifdef AZ_MPI
  MPI_Init(&argc,&argv);
  AZ_set_proc_config(proc_config, MPI_COMM_WORLD);
  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
#else
  myrank = 0;
  AZ_set_proc_config(proc_config, AZ_NOT_MPI);
#endif

  nrow = ncomp * nx * ny * nz;  /* overll number of matrix rows  */

  // Define partitioning:  matrix rows (ascending order) owned by this node
  // Here it is done automatically, but it can also be specified by hand
  AZ_read_update(&N_update, &update, proc_config, nrow, 1, AZ_linear);


  // In the following we set up the matrix for the edge centered curl operator
  // All the steps are described in detail in the AZTEC manual.
  // first: allocate space for the first matrix.
  bindxA = (int    *) malloc((N_update*MAX_NZ_ROW+1)*sizeof(int));
  valA   = (double *) malloc((N_update*MAX_NZ_ROW+1)*sizeof(double));
  if (valA == NULL) perror("Error: Not enough space to create matrix");
  // Initialize the index for the first off diagonal element
  bindxA[0] = N_update+1;

  // Create the matrix row by row. Each processor creates only rows appearing
  // in update[] (using global col. numbers).
  for (i = 0; i < N_update; i++)
    create_curl_matrix_row_edge(update[i], i, valA, bindxA);

  // convert matrix to a local distributed matrix
  AZ_transform(proc_config, &externalA, bindxA, valA, update, &update_indexA,
               &extern_indexA, &data_orgA, N_update, NULL, NULL, NULL, NULL,
               AZ_MSR_MATRIX);

  // convert the matrix arrays into a matrix structure, used in the
  // matrix vector multiplication
  mat_curl_edge = AZ_matrix_create(data_orgA[AZ_N_internal] + data_orgA[AZ_N_border]);
  AZ_set_MSR(mat_curl_edge, bindxA, valA, data_orgA, 0, NULL, AZ_LOCAL);

  // at this point the edge centered curl matrix is completed.

  // In the following we set up the matrix for the face centered curl operator
  // All the steps are described in detail in the AZTEC manual.
  // first: allocate space for the first matrix.
  bindxB = (int    *) malloc((N_update*MAX_NZ_ROW+1)*sizeof(int));
  valB   = (double *) malloc((N_update*MAX_NZ_ROW+1)*sizeof(double));
  if (valB == NULL) perror("Error: Not enough space to create matrix");

  // Initialize the index for the first off diagonal element
  bindxB[0] = N_update+1;

  // Create the matrix row by row. Each processor creates only rows appearing
  // in update[] (using global col. numbers).
  for (i = 0; i < N_update; i++)
      create_curl_matrix_row_face(update[i], i, valB, bindxB);

  // convert matrix to a local distributed matrix
  AZ_transform(proc_config, &externalB, bindxB, valB, update, &update_indexB,
			                   &extern_indexB, &data_orgB,
					   N_update, NULL, NULL, NULL, NULL,
					                  AZ_MSR_MATRIX);
  // convert the matrix arrays into a matrix structure, used in the
  // matrix vector multiplication
  mat_curl_face = AZ_matrix_create(data_orgB[AZ_N_internal] + data_orgB[AZ_N_border]);
  AZ_set_MSR(mat_curl_face, bindxB, valB, data_orgB, 0, NULL, AZ_LOCAL);

  // at this point the face centered curl matrix is completed.


  //  allocate memory for the fields and a temporary vector
  vec_len = N_update + data_orgA[AZ_N_external];
  efield = (double *) malloc(vec_len*sizeof(double));
  bfield = (double *) malloc(vec_len*sizeof(double));
  epsilon = (double *) malloc(vec_len*sizeof(double));
  tmp_vec = (double *) malloc(vec_len*sizeof(double));
  tmp_vec2 = (double *) malloc(vec_len*sizeof(double));

  // setup the boundary condition. We will get an arry that tells us
  // which positions need to be updated and where the results needs
  // to be stored in the E field.
  setup_bc(update, update_indexB, N_update, &bc_indx, &n_bc);

  // initialize the field vectors
  for(k = 0; k < vec_len; k++){
     efield[k] = 0.;
     bfield[k] = 0.;
     epsilon[k] = 1.;
     tmp_vec[k] = 0.;
  }

  // initialize the dielectric structure. Ugly hard-coded stuff,
  // needs to be cleaned out...
  for(y=45; y<55; y++){
   for(x = y; x<100; x++)
      epsilon[compZ + pos_to_row(x, y, 0)] = 0.95;
  }	
  // reorder the dielectric vector in order to align with the B field
  AZ_reorder_vec(epsilon, data_orgA, update_indexA, NULL);


  printf("Begin iteration \n");

  // just some timing ...
  startTime = currentTimeMillis();

  // *******************
  // begin of the time stepping loop
  // *******************

  for( t = 0; t < nsteps; t++){

    // first we do the e field update

    // convert the B field to the H field

    for(k = 0 ; k < vec_len; k++)
      bfield[k] *= epsilon[k];

    // setup the initial condition
    for( k = 0; k < n_bc; k++){
      x = bc_indx[4*k];
      y = bc_indx[4*k+1];
      z = bc_indx[4*k+2];
      efield[bc_indx[4*k+3]] =
           sin((double) y * 5. * 3.14159 / (double) ny) *
           sin(omega * dt * (double) (t + 1));
    }

    //  E field update:
    //  tmp_vec = Curl_Op * bfield
    //  efield = efield +  c^2 * dt * tmp_vec
    AZ_MSR_matvec_mult( bfield, tmp_vec, mat_curl_edge, proc_config);

    // reorder the result in tmp_vec so that it aligns with the
    // decomposition of the E field
    AZ_invorder_vec(tmp_vec, data_orgA, update_indexA, NULL, tmp_vec2);
    AZ_reorder_vec(tmp_vec2, data_orgB, update_indexB, NULL);

    // update the efield
    for(k = 0 ; k < N_update; k++)
      efield[k] = efield[k] + c2 * tmp_vec2[k] * dt;

    // bfield update :
    // tmp_vec = DualCurl_Op * efield
    // bfield = bfield - tmp_vec * dt
    AZ_MSR_matvec_mult( efield, tmp_vec, mat_curl_face, proc_config);

    // reorder the result so that it fits the decomposition of the bfield
    AZ_invorder_vec(tmp_vec, data_orgB, update_indexB, NULL, tmp_vec2);
    AZ_reorder_vec(tmp_vec2, data_orgA, update_indexA, NULL);

    // update the b field
    for(k = 0;  k < N_update; k++)
	  bfield[k] = bfield[k] - tmp_vec2[k] * dt;

    if(myrank == 0)
      printf("Taking step %d at time %g\n", t,
		       (double) (currentTimeMillis() - startTime) / 1000.);
  }
  // ******************
  // end of timestepping loop
  // *****************

  endTime = currentTimeMillis();
  printf("After iteration: %g\n", (double)(endTime - startTime) / 1000. );

#if 1
  system("rm efield.txt bfield.txt");

  // dump filed data: efield
  AZ_invorder_vec(efield, data_orgB, update_indexB, NULL, tmp_vec);
  write_file("efield.txt", tmp_vec, N_update);

  // dump filed data: bfield
  AZ_invorder_vec(bfield, data_orgA, update_indexA, NULL, tmp_vec);
  write_file("bfield.txt", tmp_vec, N_update);
#endif

  /* Free allocated memory */
  AZ_matrix_destroy( &mat_curl_edge);
  free((void *) update);   free((void *) update_indexA);
  free((void *) externalA); free((void *) extern_indexA);
  free((void *) bindxA);    free((void *) valA);  free((void *) data_orgA);

  AZ_matrix_destroy( &mat_curl_face);
  free((void *) externalB); free((void *) extern_indexB);
  free((void *) bindxB);    free((void *) valB);  free((void *) data_orgB);

  free((void *) efield);    free((void *) bfield);
  free((void *) tmp_vec);   free((void *) tmp_vec2);


#ifdef AZ_MPI
  MPI_Finalize();
#endif
  return(1);

}
Beispiel #26
0
int main(int argc, char *argv[])
{
	int num_PDE_eqns=3, N_levels=3, nsmooth=1;

	int    leng, level, N_grid_pts, coarsest_level;

  /* See Aztec User's Guide for more information on the */
  /* variables that follow.                             */

  int    proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE];
  double params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE];

  /* data structure for matrix corresponding to the fine grid */

  int    *data_org = NULL, *update = NULL, *external = NULL;
  int    *update_index = NULL, *extern_index = NULL;
  int    *cpntr = NULL;
  int    *bindx = NULL, N_update, iii;
  double *val = NULL;
	double *xxx, *rhs;

	AZ_MATRIX *Amat;
	AZ_PRECOND *Pmat = NULL;
	ML *ml;
	FILE *fp;
  int ch,i,j, Nrigid, *garbage;
   struct AZ_SCALING *scaling;
double solve_time, setup_time, start_time, *mode, *rigid;
ML_Aggregate *ag;
int  nblocks, *blocks;
char filename[80];
double alpha;
int one = 1;


#ifdef ML_MPI
  MPI_Init(&argc,&argv);

  /* get number of processors and the name of this processor */

  AZ_set_proc_config(proc_config, MPI_COMM_WORLD);
#else
  AZ_set_proc_config(proc_config, AZ_NOT_MPI);
#endif

leng = 0;
if (proc_config[AZ_node] == 0) {
#ifdef binary
	fp=fopen(".data","rb");
#else
	fp=fopen(".data","r");
#endif
	if (fp==NULL)
		{
			printf("couldn't open file .data\n");
			exit(1);
		}
#ifdef binary
        fread(&leng, sizeof(int), 1, fp);
#else
	fscanf(fp,"%d",&leng);
#endif

	fclose(fp);
}
leng = AZ_gsum_int(leng, proc_config);

	N_grid_pts=leng/num_PDE_eqns;



  /* initialize the list of global indices. NOTE: the list of global */
  /* indices must be in ascending order so that subsequent calls to  */
  /* AZ_find_index() will function properly. */
	
  AZ_read_update(&N_update, &update, proc_config, N_grid_pts, num_PDE_eqns,
                 AZ_linear);
	
	
  AZ_read_msr_matrix(update, &val, &bindx, N_update, proc_config);

  AZ_transform(proc_config, &external, bindx, val,  update, &update_index,
	       &extern_index, &data_org, N_update, 0, 0, 0, &cpntr, 
               AZ_MSR_MATRIX);
	
  Amat = AZ_matrix_create( leng );
  AZ_set_MSR(Amat, bindx, val, data_org, 0, NULL, AZ_LOCAL);

  Amat->matrix_type  = data_org[AZ_matrix_type];
	
  data_org[AZ_N_rows]  = data_org[AZ_N_internal] + data_org[AZ_N_border];
			
  start_time = AZ_second();

AZ_defaults(options, params);
/*
scaling = AZ_scaling_create();
xxx = (double *) calloc( leng,sizeof(double));
rhs=(double *)calloc(leng,sizeof(double));
options[AZ_scaling] = AZ_sym_diag;
options[AZ_precond] = AZ_none;
options[AZ_max_iter] = 30;
options[AZ_keep_info] = 1;
AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); 
don't forget vector rescaling ...
free(xxx);
free(rhs);
*/
options[AZ_scaling] = AZ_none;
	



  ML_Create(&ml, N_levels);
			
			
  /* set up discretization matrix and matrix vector function */
	
  AZ_ML_Set_Amat(ml, N_levels-1, N_update, N_update, Amat, proc_config);
	
  ML_Aggregate_Create( &ag );

  Nrigid = 0;
if (proc_config[AZ_node] == 0) {
  sprintf(filename,"rigid_body_mode%d",Nrigid+1);
  while( (fp = fopen(filename,"r")) != NULL) {
     fclose(fp);
     Nrigid++;
     sprintf(filename,"rigid_body_mode%d",Nrigid+1);
  }
}
Nrigid = AZ_gsum_int(Nrigid,proc_config);

  if (Nrigid != 0) {
     rigid = (double *) ML_allocate( sizeof(double)*Nrigid*(N_update+1) );
     if (rigid == NULL) {
        printf("Error: Not enough space for rigid body modes\n");
     }
  }

rhs=(double *)malloc(leng*sizeof(double));
AZ_random_vector(rhs, data_org, proc_config);
  
  for (i = 0; i < Nrigid; i++) {
     sprintf(filename,"rigid_body_mode%d",i+1);
     AZ_input_msr_matrix(filename, update, &mode, &garbage, 
                         N_update, proc_config);


/*
AZ_sym_rescale_sl(mode, Amat->data_org, options, proc_config, scaling);
*/
/*
Amat->matvec(mode, rigid, Amat, proc_config);
for (j = 0; j < N_update; j++) printf("this is %d %e\n",j,rigid[j]);
*/
for (j = 0; j < i; j++) {
alpha = -AZ_gdot(N_update, mode, &(rigid[j*N_update]), proc_config)/AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config);
daxpy_(&N_update, &alpha,  &(rigid[j*N_update]),  &one, mode, &one);
printf("alpha1 is %e\n",alpha);
}
alpha = -AZ_gdot(N_update, mode, rhs, proc_config)/AZ_gdot(N_update, mode, mode, proc_config);
printf("alpha2 is %e\n",alpha);
daxpy_(&N_update, &alpha,  mode,  &one, rhs, &one);

  
     for (j = 0; j < N_update; j++) rigid[i*N_update+j] = mode[j];
     free(mode);
     free(garbage);
  }
for (j = 0; j < Nrigid; j++) {
alpha = -AZ_gdot(N_update, rhs, &(rigid[j*N_update]), proc_config)/AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config);
daxpy_(&N_update, &alpha,  &(rigid[j*N_update]),  &one, rhs, &one);
printf("alpha4 is %e\n",alpha);
}


for (i = 0; i < Nrigid; i++) {
  alpha = -AZ_gdot(N_update, &(rigid[i*N_update]), rhs, proc_config);
  printf("alpha is %e\n",alpha);
}
  if (Nrigid != 0) {
     ML_Aggregate_Set_NullSpace(ag, num_PDE_eqns, Nrigid, rigid, N_update);
/*
     free(rigid);
*/
  }

	coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, N_levels-1, ML_DECREASING, ag);
	coarsest_level = N_levels - coarsest_level;
/*
ML_Operator_Print(&(ml->Pmat[N_levels-2]), "Pmat");
exit(1);
*/

	if ( proc_config[AZ_node] == 0 )
		printf("Coarse level = %d \n", coarsest_level);
	
	/* set up smoothers */
	
	for (level = N_levels-1; level > coarsest_level; level--) {
j = 10;
if (level == N_levels-1) j = 10;
options[AZ_solver] = AZ_cg;
options[AZ_precond]=AZ_sym_GS; options[AZ_subdomain_solve]=AZ_icc;
/*
options[AZ_precond] = AZ_none;
*/
options[AZ_poly_ord] = 5;
ML_Gen_SmootherAztec(ml, level, options, params, proc_config, status,
j, ML_PRESMOOTHER,NULL);
ML_Gen_SmootherAztec(ml, level, options, params, proc_config, status,
j, ML_POSTSMOOTHER,NULL);
/*
		ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_PRESMOOTHER, nsmooth,1.0);
		ML_Gen_Smoother_SymGaussSeidel(ml , level, ML_POSTSMOOTHER, nsmooth,1.0);
*/
/*
                nblocks = ML_Aggregate_Get_AggrCount( ag, level );
                ML_Aggregate_Get_AggrMap( ag, level, &blocks);
                ML_Gen_Smoother_VBlockSymGaussSeidel( ml , level, ML_BOTH, nsmooth, 1.0,
                                                 nblocks, blocks);
                ML_Gen_Smoother_VBlockSymGaussSeidel( ml , level, ML_POSTSMOOTHER, nsmooth, 1.0, 
                                                 nblocks, blocks);
*/
/*
                ML_Gen_Smoother_VBlockJacobi( ml , level, ML_PRESMOOTHER, nsmooth, .5,
                                                 nblocks, blocks);
                ML_Gen_Smoother_VBlockJacobi( ml , level, ML_POSTSMOOTHER, nsmooth,.5,
                                                 nblocks, blocks);
*/
/*
		ML_Gen_Smoother_GaussSeidel(ml , level, ML_PRESMOOTHER, nsmooth);
		ML_Gen_Smoother_GaussSeidel(ml , level, ML_POSTSMOOTHER, nsmooth);    
*/
/* 
need to change this when num_pdes is different on different levels
*/
/*
if (level == N_levels-1) {
		ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_PRESMOOTHER, nsmooth, 0.5, num_PDE_eqns);
		ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_POSTSMOOTHER, nsmooth, 0.5, num_PDE_eqns);
}
else {
		ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_PRESMOOTHER, nsmooth, 0.5, 2*num_PDE_eqns);
		ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_POSTSMOOTHER, nsmooth, 0.5, 2*num_PDE_eqns);
}
*/
/*
*/

/*
			ML_Gen_SmootherJacobi(ml , level, ML_PRESMOOTHER, nsmooth, .67);
			ML_Gen_SmootherJacobi(ml , level, ML_POSTSMOOTHER, nsmooth, .67 );
*/
		
		
	}
	
/*
	ML_Gen_CoarseSolverSuperLU( ml, coarsest_level);
*/
/*
ML_Gen_SmootherSymGaussSeidel(ml , coarsest_level, ML_PRESMOOTHER, 2*nsmooth,1.);
*/
/*
ML_Gen_SmootherBlockGaussSeidel(ml , level, ML_PRESMOOTHER, 50*nsmooth, 1.0, 2*num_PDE_eqns);
*/
ML_Gen_Smoother_BlockGaussSeidel(ml , level, ML_PRESMOOTHER, 2*nsmooth, 1.0, num_PDE_eqns);
		
	
	ML_Gen_Solver(ml, ML_MGV, N_levels-1, coarsest_level); 
	AZ_defaults(options, params);
	
        options[AZ_solver]   = AZ_GMRESR;
        options[AZ_scaling]  = AZ_none;
        options[AZ_precond]  = AZ_user_precond;
        options[AZ_conv]     = AZ_rhs;
        options[AZ_output]   = 1;
        options[AZ_max_iter] = 1500;
        options[AZ_poly_ord] = 5;
        options[AZ_kspace]   = 130;
        params[AZ_tol]       = 1.0e-8;
	
	AZ_set_ML_preconditioner(&Pmat, Amat, ml, options); 
setup_time = AZ_second() - start_time;
	
	xxx = (double *) malloc( leng*sizeof(double));

	
        /* Set rhs */
 
        fp = fopen("AZ_capture_rhs.dat","r");
        if (fp == NULL) {
           if (proc_config[AZ_node] == 0) printf("taking random vector for rhs\n");
/*
           AZ_random_vector(rhs, data_org, proc_config);
           AZ_reorder_vec(rhs, data_org, update_index, NULL);
           AZ_random_vector(xxx, data_org, proc_config);
           AZ_reorder_vec(xxx, data_org, update_index, NULL);
           Amat->matvec(xxx, rhs, Amat, proc_config);
*/
        }
        else {
           ch = getc(fp);
           if (ch == 'S') {
              while ( (ch = getc(fp)) != '\n') ;
           }
           else ungetc(ch,fp);
           for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) 
              fscanf(fp,"%lf",&(rhs[i]));
           fclose(fp);
        }
	for (iii = 0; iii < leng; iii++) xxx[iii] = 0.0; 

        /* Set x */

        fp = fopen("AZ_capture_init_guess.dat","r");
        if (fp != NULL) {
           ch = getc(fp);
           if (ch == 'S') {
              while ( (ch = getc(fp)) != '\n') ;
           }
           else ungetc(ch,fp);
           for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++)
              fscanf(fp,"%lf",&(xxx[i]));
           fclose(fp);
           options[AZ_conv] = AZ_expected_values;
        }

        /* if Dirichlet BC ... put the answer in */

        for (i = 0; i < data_org[AZ_N_internal]+data_org[AZ_N_border]; i++) {
           if ( (val[i] > .99999999) && (val[i] < 1.0000001))
              xxx[i] = rhs[i];      
        }

        fp = fopen("AZ_no_multilevel.dat","r");
        scaling = AZ_scaling_create();
start_time = AZ_second();
        if (fp != NULL) {
           fclose(fp);
           options[AZ_precond] = AZ_none;
           options[AZ_scaling] = AZ_sym_diag;
           options[AZ_ignore_scaling] = AZ_TRUE;

           options[AZ_keep_info] = 1;
           AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); 

/*
           options[AZ_pre_calc] = AZ_reuse;
           options[AZ_conv] = AZ_expected_values;
           if (proc_config[AZ_node] == 0) 
              printf("\n-------- Second solve with improved convergence test -----\n");
           AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); 
           if (proc_config[AZ_node] == 0) 
              printf("\n-------- Third solve with improved convergence test -----\n");
           AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, NULL, scaling); 
*/
        }
        else {
           options[AZ_keep_info] = 1;
/*
options[AZ_max_iter] = 40;
*/
           AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); 
for (j = 0; j < Nrigid; j++) {
alpha = -AZ_gdot(N_update, xxx, &(rigid[j*N_update]), proc_config)/AZ_gdot(N_update, &(rigid[j*N_update]), &(rigid[j*N_update]), proc_config);
daxpy_(&N_update, &alpha,  &(rigid[j*N_update]),  &one, xxx, &one);
printf("alpha5 is %e\n",alpha);
}
AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); 
           options[AZ_pre_calc] = AZ_reuse;
           options[AZ_conv] = AZ_expected_values;
/*
           if (proc_config[AZ_node] == 0) 
              printf("\n-------- Second solve with improved convergence test -----\n");
           AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); 
           if (proc_config[AZ_node] == 0) 
              printf("\n-------- Third solve with improved convergence test -----\n");
           AZ_iterate(xxx, rhs, options, params, status, proc_config, Amat, Pmat, scaling); 
*/
        }
   solve_time = AZ_second() - start_time;

   if (proc_config[AZ_node] == 0) 
      printf("Solve time = %e, MG Setup time = %e\n", solve_time, setup_time);

   ML_Aggregate_Destroy(&ag);
   ML_Destroy(&ml);
   AZ_free((void *) Amat->data_org);
   AZ_free((void *) Amat->val);
   AZ_free((void *) Amat->bindx);
   AZ_free((void *) update);
   AZ_free((void *) external);
   AZ_free((void *) extern_index);
   AZ_free((void *) update_index);
   if (Amat  != NULL) AZ_matrix_destroy(&Amat);
   if (Pmat  != NULL) AZ_precond_destroy(&Pmat);
   free(xxx);
   free(rhs);


#ifdef ML_MPI
  MPI_Finalize();
#endif
	
  return 0;
	
}