Ejemplo n.º 1
0
// ====================================================================== 
Operator GetJacobiIterationOperator(const Operator& Amat, double Damping)
{

  struct ML_AGG_Matrix_Context* widget = new struct ML_AGG_Matrix_Context;
  widget->near_bdry = 0;
  widget->aggr_info = 0;
  widget->drop_tol  = 0.0;

  widget->Amat = Amat.GetML_Operator();
  widget->omega = Damping;

  ML_Operator* tmp_ML = ML_Operator_Create(GetML_Comm());
  ML_Operator_Set_ApplyFuncData(tmp_ML, widget->Amat->invec_leng,
                                widget->Amat->outvec_leng, widget,
                                widget->Amat->matvec->Nrows, NULL, 0);

  tmp_ML->data_destroy = widget_destroy;

  ML_Operator_Set_Getrow(tmp_ML, widget->Amat->getrow->Nrows, 
                         ML_AGG_JacobiSmoother_Getrows);

  // Creates a new copy of pre_comm, so that the old pre_comm
  // can be destroyed without worry
  ML_CommInfoOP_Clone(&(tmp_ML->getrow->pre_comm),
                      widget->Amat->getrow->pre_comm);

  Operator tmp(Amat.GetDomainSpace(), Amat.GetRangeSpace(), tmp_ML, true,
               Amat.GetRCPOperatorBox());

  return(tmp);
}
Ejemplo n.º 2
0
// ====================================================================== 
Operator GetIdentity(const Space& DomainSpace, const Space& RangeSpace)
{
  ML_Operator* ML_eye = ML_Operator_Create(GetML_Comm());
  int size = DomainSpace.GetNumMyElements();
  ML_Operator_Set_ApplyFuncData(ML_eye, size, size,
            NULL, size, eye_matvec, 0);
  ML_Operator_Set_Getrow(ML_eye, size, eye_getrows);
  Operator eye(DomainSpace,DomainSpace,ML_eye,true);
  return(eye);
}
// ================================================ ====== ==== ==== == =
//! Build the face-to-node prolongator described by Bochev, Siefert, Tuminaro, Xu and Zhu (2007).
int ML_Epetra::FaceMatrixFreePreconditioner::PBuildSparsity(ML_Operator *P, Epetra_CrsMatrix *&Psparse){

  /* Create wrapper to do abs(T) */
  // NTS: Assume D0 has already been reindexed by now.
  ML_Operator* AbsFN_ML = ML_Operator_Create(ml_comm_);
  ML_CHK_ERR(ML_Operator_WrapEpetraCrsMatrix(const_cast<Epetra_CrsMatrix*>(&*FaceNode_Matrix_),AbsFN_ML,verbose_));
  ML_Operator_Set_Getrow(AbsFN_ML,AbsFN_ML->outvec_leng,CSR_getrow_ones);

  /* Form abs(T) * P_n */
  ML_Operator* AbsFNP = ML_Operator_Create(ml_comm_);
  ML_2matmult(AbsFN_ML,P,AbsFNP, ML_CSR_MATRIX);

  /* Wrap P_n into Epetra-land */
  Epetra_CrsMatrix_Wrap_ML_Operator(AbsFNP,*Comm_,*FaceRangeMap_,&Psparse,Copy,0);

  /* Nuke the rows in Psparse */
  if(BCfaces_.size()>0) Apply_BCsToMatrixRows(BCfaces_.get(),BCfaces_.size(),*Psparse);

  // Cleanup
  ML_Operator_Destroy(&AbsFN_ML);
  ML_Operator_Destroy(&AbsFNP);

  return 0;
}
Ejemplo n.º 4
0
void ML_getrow_matvec(ML_Operator *matrix, double *vec, int Nvec, 
                      double *ovec, int *Novec)
{
   ML_Operator *temp, *temp2, *temp3, *temp4, *tptr;
   int *cols, i;
   int allocated, row_length;

   if (matrix->getrow->func_ptr == NULL) {
      printf("ML_getrow_matvec: empty object? \n");
      exit(1);
   }
   temp = ML_Operator_Create(matrix->comm);
   ML_Operator_Set_1Levels(temp, matrix->from, matrix->from);
   ML_Operator_Set_ApplyFuncData(temp,1,Nvec,vec,Nvec,NULL,0);

   ML_Operator_Set_Getrow(temp,Nvec, VECTOR_getrows);
   temp->max_nz_per_row = 1;
   temp->N_nonzeros     = Nvec;

   if (matrix->getrow->pre_comm != NULL) {
      ML_exchange_rows(temp, &temp2, matrix->getrow->pre_comm);
   }
   else temp2 = temp;

   ML_matmat_mult(matrix, temp2, &temp3);

   if (matrix->getrow->post_comm != NULL)
      ML_exchange_rows(temp3, &temp4, matrix->getrow->post_comm);
   else temp4 = temp3;

   allocated = temp4->getrow->Nrows + 1;
   cols = (int *) ML_allocate(allocated*sizeof(int));
   if (cols == NULL) {
      printf("no space in ML_getrow_matvec()\n");
      exit(1);
   }
   for (i = 0; i < temp4->getrow->Nrows; i++) {
      ML_get_matrix_row(temp4, 1, &i, &allocated , &cols, &ovec,
                   &row_length, i);
      if (allocated != temp4->getrow->Nrows + 1)
         printf("memory problems ... we can't reallocate here\n");
   }

   ML_free(cols);

   if ( *Novec != temp4->getrow->Nrows) {
     printf("Warning: The length of ML's output vector does not agree with\n");
     printf("         the user's length for the output vector (%d vs. %d).\n",
            *Novec, temp4->getrow->Nrows);
     printf("         indicate a problem.\n");
   }
   *Novec = temp4->getrow->Nrows;

   if (matrix->getrow->pre_comm != NULL) {
      tptr = temp2;
      while ( (tptr!= NULL) && (tptr->sub_matrix != temp))
         tptr = tptr->sub_matrix;
      if (tptr != NULL) tptr->sub_matrix = NULL;
      ML_RECUR_CSR_MSRdata_Destroy(temp2);
      ML_Operator_Destroy(&temp2);
   }
   if (matrix->getrow->post_comm != NULL) {
      tptr = temp4;
      while ( (tptr!= NULL) && (tptr->sub_matrix != temp3))
         tptr = tptr->sub_matrix;
      if (tptr != NULL) tptr->sub_matrix = NULL;
      ML_RECUR_CSR_MSRdata_Destroy(temp4);
      ML_Operator_Destroy(&temp4);
   }

   ML_Operator_Destroy(&temp);
   ML_RECUR_CSR_MSRdata_Destroy(temp3);
   ML_Operator_Destroy(&temp3);
}
Ejemplo n.º 5
0
int main(int argc, char *argv[])
{
  int    Nnodes=16*16;              /* Total number of nodes in the problem.*/
                                    /* 'Nnodes' must be a perfect square.   */
  int    MaxMgLevels=6;             /* Maximum number of Multigrid Levels   */
  int    Nits_per_presmooth=1;      /* # of pre & post smoothings per level */
  double tolerance = 1.0e-8;        /* At convergence:                      */
                                    /*   ||r_k||_2 < tolerance ||r_0||_2    */
  int smoothPe_flag = ML_YES;       /* ML_YES: smooth tentative prolongator */
                                    /* ML_NO: don't smooth prolongator      */

  /***************************************************************************/
  /* Select Hiptmair relaxation subsmoothers for the nodal and edge problems */
  /* Choices include                                                         */
  /*   1) ML_Gen_Smoother_SymGaussSeidel: this corresponds to a processor    */
  /*      local version of symmetric Gauss-Seidel/SOR. The number of sweeps  */
  /*      can be set via either 'edge_its' or 'nodal_its'. The damping can   */
  /*      be set via 'edge_omega' or 'nodal_omega'. When set to ML_DDEFAULT, */
  /*      the damping is set to '1' on one processor. On multiple processors */
  /*      a lower damping value is set. This is needed to converge processor */
  /*      local SOR.                                                         */
  /*   2) ML_Gen_Smoother_Cheby: this corresponds to polynomial relaxation.    */
  /*      The degree of the polynomial is set via 'edge_its' or 'nodal_its'. */
  /*      If the degree is '-1', Marian Brezina's MLS polynomial is chosen.  */
  /*      Otherwise, a Chebyshev polynomial is used over high frequencies    */
  /*      [ lambda_max/alpha , lambda_max]. Lambda_max is computed. 'alpha'  */
  /*      is hardwired in this example to correspond to twice the ratio of   */
  /*      unknowns in the fine and coarse meshes.                            */
  /*                                                                         */
  /* Using 'hiptmair_type' (see comments below) it is also possible to choose*/
  /* when edge and nodal problems are relaxed within the Hiptmair smoother.  */
  /***************************************************************************/

  void  *edge_smoother=(void *)     /* Edge relaxation:                     */
               ML_Gen_Smoother_Cheby; /*   ML_Gen_Smoother_Cheby            */
                                    /*     ML_Gen_Smoother_SymGaussSeidel   */
  void *nodal_smoother=(void *)     /* Nodal relaxation                     */
               ML_Gen_Smoother_Cheby;/*     ML_Gen_Smoother_Cheby           */
                                    /*     ML_Gen_Smoother_SymGaussSeidel   */

  int  edge_its = 3;                /* Iterations or polynomial degree for  */
  int  nodal_its = 3;               /* edge/nodal subsmoothers.             */
  double nodal_omega = ML_DDEFAULT, /* SOR damping parameter for noda/edge  */
         edge_omega  = ML_DDEFAULT; /* subsmoothers (see comments above).   */
  int   hiptmair_type=HALF_HIPTMAIR;/* FULL_HIPTMAIR: each invokation       */
                                    /*     smoothes on edges, then nodes,   */
                                    /*     and then once again on edges.    */
                                    /* HALF_HIPTMAIR: each pre-invokation   */
                                    /*     smoothes on edges, then nodes.   */
                                    /*     Each post-invokation smoothes    */
                                    /*     on nodes then edges. .           */


  ML_Operator  *Tmat, *Tmat_trans, **Tmat_array, **Tmat_trans_array;
  ML           *ml_edges, *ml_nodes;
  ML_Aggregate *ag;
  int          Nfine_edge, Ncoarse_edge, Nfine_node, Ncoarse_node, Nlevels;
  int          level, coarsest_level, itmp;
  double       edge_coarsening_rate, node_coarsening_rate, *rhs, *xxx;
  void         **edge_args, **nodal_args;
  struct       user_partition Edge_Partition = {NULL, NULL,0,0}, 
                                Node_Partition = {NULL, NULL,0,0};
  struct Tmat_data Tmat_data;
int i, Ntotal;
 ML_Comm *comm;

  /* See Aztec User's Guide for information on these variables */

#ifdef AZTEC
  AZ_MATRIX    *Ke_mat, *Kn_mat;
  AZ_PRECOND   *Pmat = NULL;
  int          proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE];
  double       params[AZ_PARAMS_SIZE], status[AZ_STATUS_SIZE];
#endif


  /* get processor information (proc id & # of procs) and set ML's printlevel. */

#ifdef ML_MPI
  MPI_Init(&argc,&argv);
#endif
#ifdef AZTEC
  AZ_set_proc_config(proc_config, COMMUNICATOR);
#endif
  ML_Set_PrintLevel(10);   /* set ML's output level: 0 gives least output */

  /* Set the # of global nodes/edges and partition both the edges and the */
  /* nodes over the processors. NOTE: I believe we assume that if an edge */
  /* is assigned to a processor at least one of its nodes must be also    */
  /* assigned to that processor.                                          */

  Node_Partition.Nglobal = Nnodes;
  Edge_Partition.Nglobal = Node_Partition.Nglobal*2;
  Node_Partition.type = NODE;
  Edge_Partition.type = EDGE;
#define perxodic
#ifdef periodic
Node_Partition.Nglobal += 2; 
#endif
  partition_edges(&Edge_Partition);
  partition_nodes(&Node_Partition);
xxx = (double *) ML_allocate((Edge_Partition.Nlocal+100)*sizeof(double)); 
rhs = (double *) ML_allocate((Edge_Partition.Nlocal+100)*sizeof(double)); 
 for (i = 0; i < Edge_Partition.Nlocal + 100; i++) xxx[i] = -1.;
 for (i = 0; i < Edge_Partition.Nlocal; i++) xxx[i] = (double) 
        Edge_Partition.my_global_ids[i];

update_ghost_edges(xxx, (void *) &Edge_Partition);


  /* Create an empty multigrid hierarchy and set the 'MaxMGLevels-1'th   */
  /* level discretization within this hierarchy to the ML matrix         */
  /* representing Ke (Maxwell edge discretization).                      */

  ML_Create(&ml_edges, MaxMgLevels);
#ifdef AZTEC
  /* Build Ke as an Aztec matrix. Use built-in function AZ_ML_Set_Amat() */
  /* to convert to an ML matrix and put in hierarchy.                    */

  Ke_mat = user_Ke_build(&Edge_Partition);
  AZ_ML_Set_Amat(ml_edges, MaxMgLevels-1, Edge_Partition.Nlocal,
      		 Edge_Partition.Nlocal, Ke_mat, proc_config);
#else
  /* Build Ke directly as an ML matrix.                                  */

  ML_Init_Amatrix      (ml_edges, MaxMgLevels-1, Edge_Partition.Nlocal,
			Edge_Partition.Nlocal, &Edge_Partition);

  Ntotal = Edge_Partition.Nlocal;
  if (Edge_Partition.nprocs == 2) Ntotal += Edge_Partition.Nghost;
  ML_Set_Amatrix_Getrow(ml_edges, MaxMgLevels-1,  Ke_getrow, update_ghost_edges, Ntotal);
  ML_Set_Amatrix_Matvec(ml_edges, MaxMgLevels-1,  Ke_matvec);

#endif



  /* Build an Aztec matrix representing an auxiliary nodal PDE problem.  */
  /* This should be a variable coefficient Poisson problem (with unknowns*/
  /* at the nodes). The coefficients should be chosen to reflect the     */
  /* conductivity of the original edge problems.                         */
  /* Create an empty multigrid hierarchy. Convert the Aztec matrix to an */
  /* ML matrix and put it in the 'MaxMGLevels-1' level of the hierarchy. */
  /* Note it is possible to multiply T'*T for get this matrix though this*/
  /* will not incorporate material properties.                           */

  ML_Create(&ml_nodes, MaxMgLevels);

#ifdef AZTEC
  Kn_mat = user_Kn_build( &Node_Partition);
  AZ_ML_Set_Amat(ml_nodes, MaxMgLevels-1, Node_Partition.Nlocal, 
		 Node_Partition.Nlocal, Kn_mat, proc_config);
#else
  ML_Init_Amatrix      (ml_nodes, MaxMgLevels-1 , Node_Partition.Nlocal,
			Node_Partition.Nlocal, &Node_Partition);
  Ntotal = Node_Partition.Nlocal;
  if (Node_Partition.nprocs == 2) Ntotal += Node_Partition.Nghost;
  ML_Set_Amatrix_Getrow(ml_nodes, MaxMgLevels-1,  Kn_getrow, update_ghost_nodes, Ntotal);
#endif

  /* Build an ML matrix representing the null space of the PDE problem. */
  /* This should be a discrete gradient (nodes to edges).               */

#ifdef AZTEC
    Tmat = user_T_build (&Edge_Partition, &Node_Partition, 
  		   &(ml_nodes->Amat[MaxMgLevels-1]));
#else
    Tmat = ML_Operator_Create(ml_nodes->comm);
    Tmat_data.edge = &Edge_Partition;
    Tmat_data.node = &Node_Partition;
    Tmat_data.Kn   = &(ml_nodes->Amat[MaxMgLevels-1]);

    ML_Operator_Set_ApplyFuncData( Tmat,	Node_Partition.Nlocal,
				   Edge_Partition.Nlocal, ML_EMPTY, (void *) &Tmat_data, 
				   Edge_Partition.Nlocal, NULL, 0);
    ML_Operator_Set_Getrow( Tmat, ML_INTERNAL, Edge_Partition.Nlocal,Tmat_getrow);
    ML_Operator_Set_ApplyFunc(Tmat, ML_INTERNAL, Tmat_matvec);
  ML_Comm_Create( &comm);

  ML_CommInfoOP_Generate( &(Tmat->getrow->pre_comm), update_ghost_nodes, 
			  &Node_Partition,comm, Tmat->invec_leng, 
			  Node_Partition.Nghost);
#endif


  /********************************************************************/
  /* Set some ML parameters.                                          */
  /*------------------------------------------------------------------*/
	
  ML_Set_ResidualOutputFrequency(ml_edges, 1);
  ML_Set_Tolerance(ml_edges, 1.0e-8);
  ML_Aggregate_Create( &ag );
  ML_Aggregate_Set_CoarsenScheme_Uncoupled(ag);
  ML_Aggregate_Set_DampingFactor(ag, 0.0); /* must use 0 for maxwell */
  ML_Aggregate_Set_MaxCoarseSize(ag, 30);
  ML_Aggregate_Set_Threshold(ag, 0.0);


  /********************************************************************/
  /*                      Set up Tmat_trans                           */
  /*------------------------------------------------------------------*/

  Tmat_trans = ML_Operator_Create(ml_edges->comm);
  ML_Operator_Transpose_byrow(Tmat, Tmat_trans);


  Nlevels=ML_Gen_MGHierarchy_UsingReitzinger(ml_edges, &ml_nodes,MaxMgLevels-1,
					     ML_DECREASING,ag,Tmat,Tmat_trans, 
					     &Tmat_array,&Tmat_trans_array, 
					     smoothPe_flag, 1.5);

  /* Set the Hiptmair subsmoothers */

  if (nodal_smoother == (void *) ML_Gen_Smoother_SymGaussSeidel) {
    nodal_args = ML_Smoother_Arglist_Create(2);
    ML_Smoother_Arglist_Set(nodal_args, 0, &nodal_its);
    ML_Smoother_Arglist_Set(nodal_args, 1, &nodal_omega);
  }
  if (edge_smoother == (void *) ML_Gen_Smoother_SymGaussSeidel) {
    edge_args = ML_Smoother_Arglist_Create(2);
    ML_Smoother_Arglist_Set(edge_args, 0, &edge_its);
    ML_Smoother_Arglist_Set(edge_args, 1, &edge_omega);
  }
  if (nodal_smoother == (void *) ML_Gen_Smoother_Cheby) {
    nodal_args = ML_Smoother_Arglist_Create(2);
    ML_Smoother_Arglist_Set(nodal_args, 0, &nodal_its);
    Nfine_node = Tmat_array[MaxMgLevels-1]->invec_leng;
    Nfine_node = ML_gsum_int(Nfine_node, ml_edges->comm);
  }
  if (edge_smoother == (void *) ML_Gen_Smoother_Cheby) {
    edge_args = ML_Smoother_Arglist_Create(2);
    ML_Smoother_Arglist_Set(edge_args, 0, &edge_its);
    Nfine_edge = Tmat_array[MaxMgLevels-1]->outvec_leng;
    Nfine_edge = ML_gsum_int(Nfine_edge, ml_edges->comm);
  }

  /****************************************************
  * Set up smoothers for all levels but the coarsest. *
  ****************************************************/
  coarsest_level = MaxMgLevels - Nlevels;

  for (level = MaxMgLevels-1; level > coarsest_level; level--)
    {
      if (edge_smoother == (void *) ML_Gen_Smoother_Cheby) {
	Ncoarse_edge = Tmat_array[level-1]->outvec_leng;
	Ncoarse_edge = ML_gsum_int(Ncoarse_edge, ml_edges->comm);
	edge_coarsening_rate =  2.*((double) Nfine_edge)/ ((double) Ncoarse_edge);
	ML_Smoother_Arglist_Set(edge_args, 1, &edge_coarsening_rate);
	Nfine_edge = Ncoarse_edge;
      }
      if (nodal_smoother == (void *) ML_Gen_Smoother_Cheby) {
	Ncoarse_node = Tmat_array[level-1]->invec_leng;
	Ncoarse_node = ML_gsum_int(Ncoarse_node, ml_edges->comm);
	node_coarsening_rate =  2.*((double) Nfine_node)/ ((double) Ncoarse_node);
	ML_Smoother_Arglist_Set(nodal_args, 1, &node_coarsening_rate);
	Nfine_node = Ncoarse_node;
      }
      ML_Gen_Smoother_Hiptmair(ml_edges, level, ML_BOTH, Nits_per_presmooth,
			       Tmat_array, Tmat_trans_array, NULL, edge_smoother,
			       edge_args, nodal_smoother,nodal_args, hiptmair_type);
    }

  /*******************************************
  * Set up coarsest level smoother
  *******************************************/

  if (edge_smoother == (void *) ML_Gen_Smoother_Cheby) {
    edge_coarsening_rate = (double) Nfine_edge;
    ML_Smoother_Arglist_Set(edge_args, 1, &edge_coarsening_rate);
  }
  if (nodal_smoother == (void *) ML_Gen_Smoother_Cheby) {
    node_coarsening_rate = (double) Nfine_node;
    ML_Smoother_Arglist_Set(nodal_args,1,&node_coarsening_rate);
  }
  ML_Gen_CoarseSolverSuperLU( ml_edges, coarsest_level);
  

  /* Must be called before invoking the preconditioner */
  ML_Gen_Solver(ml_edges, ML_MGV, MaxMgLevels-1, coarsest_level); 



  /* Set the initial guess and the right hand side. Invoke solver */	

  xxx = (double *) ML_allocate(Edge_Partition.Nlocal*sizeof(double)); 
  ML_random_vec(xxx, Edge_Partition.Nlocal, ml_edges->comm);
  rhs = (double *) ML_allocate(Edge_Partition.Nlocal*sizeof(double)); 
  ML_random_vec(rhs, Edge_Partition.Nlocal, ml_edges->comm);

#ifdef AZTEC
  /* Choose the Aztec solver and criteria. Also tell Aztec that */
  /* ML will be supplying the preconditioner.                   */

  AZ_defaults(options, params);
  options[AZ_solver]   = AZ_fixed_pt;
  options[AZ_solver]   = AZ_gmres;
  options[AZ_kspace]   = 80;
  params[AZ_tol]       = tolerance;
  AZ_set_ML_preconditioner(&Pmat, Ke_mat, ml_edges, options); 
  options[AZ_conv] = AZ_noscaled;
  AZ_iterate(xxx, rhs, options, params, status, proc_config, Ke_mat, Pmat, NULL);
#else
  ML_Iterate(ml_edges, xxx, rhs);
#endif


  /* clean up. */

  ML_Smoother_Arglist_Delete(&nodal_args);
  ML_Smoother_Arglist_Delete(&edge_args);
  ML_Aggregate_Destroy(&ag);
  ML_Destroy(&ml_edges);
  ML_Destroy(&ml_nodes);
#ifdef AZTEC
  AZ_free((void *) Ke_mat->data_org);
  AZ_free((void *) Ke_mat->val);
  AZ_free((void *) Ke_mat->bindx);
  if (Ke_mat  != NULL) AZ_matrix_destroy(&Ke_mat);
  if (Pmat  != NULL) AZ_precond_destroy(&Pmat);
  if (Kn_mat != NULL) AZ_matrix_destroy(&Kn_mat);
#endif
  free(xxx);
  free(rhs);
  ML_Operator_Destroy(&Tmat);
  ML_Operator_Destroy(&Tmat_trans);
  ML_MGHierarchy_ReitzingerDestroy(MaxMgLevels-2, &Tmat_array, &Tmat_trans_array);

#ifdef ML_MPI
  MPI_Finalize();
#endif
		
  return 0;
		
}
Ejemplo n.º 6
0
int ML_Aggregate_CoarsenUser(ML_Aggregate *ml_ag, ML_Operator *Amatrix, 
			      ML_Operator **Pmatrix, ML_Comm *comm)
{
  unsigned int nbytes, length;
  int     i, j,  k, Nrows, exp_Nrows;
  int     diff_level;
  int     aggr_count, index, mypid, num_PDE_eqns;
  int     *aggr_index = NULL, nullspace_dim;
  int     Ncoarse, count;
  int     *new_ia = NULL, *new_ja = NULL, new_Nrows;
  int     exp_Ncoarse;
  int     *aggr_cnt_array = NULL;
  int     level, index3, max_agg_size;
  int     **rows_in_aggs = NULL, lwork, info;
  double  *new_val = NULL, epsilon;
  double  *nullspace_vect = NULL, *qr_tmp = NULL;
  double  *tmp_vect = NULL, *work = NULL, *new_null = NULL;
  ML_SuperNode          *aggr_head = NULL, *aggr_curr, *supernode;
  struct ML_CSR_MSRdata *csr_data;
  int                   total_nz = 0;
  char str[80];

  int * graph_decomposition = NULL;
  ML_Aggregate_Viz_Stats * aggr_viz_and_stats;
  ML_Aggregate_Viz_Stats * grid_info;
  int Nprocs;
  char * unamalg_bdry = NULL;
  char* label;
  int N_dimensions;
  double* x_coord = NULL;
  double* y_coord = NULL;
  double* z_coord = NULL;

  /* ------------------- execution begins --------------------------------- */

  label =  ML_GetUserLabel();
  sprintf(str, "%s (level %d) :", label, ml_ag->cur_level);

  /* ============================================================= */
  /* get the machine information and matrix references             */
  /* ============================================================= */

  mypid                   = comm->ML_mypid;
  Nprocs                  = comm->ML_nprocs;
  epsilon                 = ml_ag->threshold;
  num_PDE_eqns            = ml_ag->num_PDE_eqns;
  nullspace_dim           = ml_ag->nullspace_dim;
  nullspace_vect          = ml_ag->nullspace_vect;
  Nrows                   = Amatrix->outvec_leng;

  if (mypid == 0 && 5 < ML_Get_PrintLevel()) {
    printf("%s num PDE eqns = %d\n",
           str,
           num_PDE_eqns);
  }

  /* ============================================================= */
  /* check the system size versus null dimension size              */
  /* ============================================================= */

  if ( Nrows % num_PDE_eqns != 0 )
  {
    printf("ML_Aggregate_CoarsenUser ERROR : Nrows must be multiples");
    printf(" of num_PDE_eqns.\n");
    exit(EXIT_FAILURE);
  }
  diff_level = ml_ag->max_levels - ml_ag->cur_level - 1;
  if ( diff_level > 0 ) num_PDE_eqns = nullspace_dim; /* ## 12/20/99 */

  /* ============================================================= */
  /* set up the threshold for weight-based coarsening              */
  /* ============================================================= */

  diff_level = ml_ag->begin_level - ml_ag->cur_level;
  if (diff_level == 0) 
    ml_ag->curr_threshold = ml_ag->threshold;
  epsilon = ml_ag->curr_threshold;
  ml_ag->curr_threshold *= 0.5;

  if (mypid == 0 && 7 < ML_Get_PrintLevel())
    printf("%s current eps = %e\n", str, epsilon);

  epsilon = epsilon * epsilon;

  ML_Operator_AmalgamateAndDropWeak(Amatrix, num_PDE_eqns, epsilon);
  Nrows /= num_PDE_eqns;

  exp_Nrows = Nrows;

  /* ********************************************************************** */
  /* allocate memory for aggr_index, which will contain the decomposition   */
  /* ********************************************************************** */

  nbytes = (Nrows*num_PDE_eqns) * sizeof(int);

  if ( nbytes > 0 ) {
    ML_memory_alloc((void**) &aggr_index, nbytes, "ACJ");
    if( aggr_index == NULL ) {
      fprintf( stderr,
              "*ML*ERR* not enough memory for %d bytes\n"
              "*ML*ERR* (file %s, line %d)\n",
              nbytes,
              __FILE__,
              __LINE__ );
      exit( EXIT_FAILURE );
    }
  }
  else              aggr_index = NULL;

  for( i=0 ; i<Nrows*num_PDE_eqns ; i++ ) aggr_index[i] = -1;

  unamalg_bdry = (char *) ML_allocate( sizeof(char) * (Nrows+1) );

  if( unamalg_bdry == NULL ) {
    fprintf( stderr,
            "*ML*ERR* on proc %d, not enough space for %d bytes\n"
            "*ML*ERR* (file %s, line %d)\n",
            mypid,
            (int)sizeof(char) * Nrows,
            __FILE__,
            __LINE__ );
    exit( EXIT_FAILURE );
  }

  N_dimensions = ml_ag->N_dimensions;
  grid_info = (ML_Aggregate_Viz_Stats*) Amatrix->to->Grid->Grid;
  x_coord = grid_info->x;

  if (N_dimensions > 1 && x_coord)
    y_coord = grid_info->y;
  else
    y_coord = 0;
  if (N_dimensions > 2 && x_coord)
    z_coord = grid_info->z;
  else
    z_coord = 0;

  aggr_count = ML_GetUserPartitions(Amatrix,unamalg_bdry,
                                    epsilon,
                                    x_coord,y_coord,z_coord,
                                    aggr_index,&total_nz);

#ifdef ML_MPI
  MPI_Allreduce( &Nrows, &i, 1, MPI_INT, MPI_SUM, Amatrix->comm->USR_comm );
  MPI_Allreduce( &aggr_count, &j, 1, MPI_INT, MPI_SUM, Amatrix->comm->USR_comm );
#else
  i = Nrows;
  j = aggr_count;
#endif

  if( mypid == 0 && 7 < ML_Get_PrintLevel() ) {
    printf("%s Using %d (block) aggregates (globally)\n",
           str,
           j );
    printf("%s # (block) aggre/ # (block) rows = %8.5f %% ( = %d / %d)\n",
           str,
           100.0*j/i,
           j, i);
  }

  j = ML_gsum_int( aggr_count, comm );
  if (mypid == 0 && 7 < ML_Get_PrintLevel())  {
    printf("%s %d (block) aggregates (globally)\n",
           str, j );
  }   

  /* ********************************************************************** */
  /* I allocate room to copy aggr_index and pass this value to the user,    */
  /* who will be able to analyze and visualize this after the construction  */
  /* of the levels. This way, the only price we have to pay for stats and   */
  /* viz is essentially a little bit of memory.                             */
  /* this memory will be cleaned with the object ML_Aggregate ml_ag.        */
  /* I set the pointers using the ML_Aggregate_Info structure. This is      */
  /* allocated using ML_Aggregate_Info_Setup(ml,MaxNumLevels)               */
  /* ********************************************************************** */

  if (Amatrix->to->Grid->Grid != NULL) {

    graph_decomposition = (int *)ML_allocate(sizeof(int)*(Nrows+1));
    if( graph_decomposition == NULL ) {
      fprintf( stderr,
              "*ML*ERR* Not enough memory for %d bytes\n"
              "*ML*ERR* (file %s, line %d)\n",
              (int)sizeof(int)*Nrows,
              __FILE__,
              __LINE__ );
      exit( EXIT_FAILURE );
    }

    for( i=0 ; i<Nrows ; i++ ) graph_decomposition[i] = aggr_index[i];

    aggr_viz_and_stats = (ML_Aggregate_Viz_Stats *) (Amatrix->to->Grid->Grid);
    aggr_viz_and_stats->graph_decomposition = graph_decomposition;
    aggr_viz_and_stats->Nlocal = Nrows;
    aggr_viz_and_stats->Naggregates = aggr_count;
    aggr_viz_and_stats->local_or_global = ML_LOCAL_INDICES;
    aggr_viz_and_stats->is_filled = ML_YES;
    aggr_viz_and_stats->Amatrix = Amatrix;
  }

  /* ********************************************************************** */
  /* take the decomposition as created by METIS and form the aggregates     */
  /* ********************************************************************** */

  total_nz = ML_Comm_GsumInt( comm, total_nz);
  i = ML_Comm_GsumInt( comm, Nrows);

  if ( mypid == 0 && 7 < ML_Get_PrintLevel())
    printf("%s Total (block) nnz = %d ( = %5.2f/(block)row)\n",
           str,
           total_nz,1.0*total_nz/i);

  if ( ml_ag->operator_complexity == 0.0 ) {
    ml_ag->fine_complexity = total_nz;
    ml_ag->operator_complexity = total_nz;
  }
  else ml_ag->operator_complexity += total_nz;

  /* fix aggr_index for num_PDE_eqns > 1 */

  for (i = Nrows - 1; i >= 0; i-- ) {
    for (j = num_PDE_eqns-1; j >= 0; j--) {
      aggr_index[i*num_PDE_eqns+j] = aggr_index[i];
    }
  }

  if ( mypid == 0 && 8 < ML_Get_PrintLevel())
  {
    printf("Calling ML_Operator_UnAmalgamateAndDropWeak\n");
    fflush(stdout);
  }

  ML_Operator_UnAmalgamateAndDropWeak(Amatrix, num_PDE_eqns, epsilon);

  Nrows      *= num_PDE_eqns;
  exp_Nrows  *= num_PDE_eqns;

  /* count the size of each aggregate */

  aggr_cnt_array = (int *) ML_allocate(sizeof(int)*(aggr_count+1));
  for (i = 0; i < aggr_count ; i++) aggr_cnt_array[i] = 0;
  for (i = 0; i < exp_Nrows; i++) {
    if (aggr_index[i] >= 0) {
      if( aggr_index[i] >= aggr_count ) {
        fprintf( stderr,
                "*ML*WRN* on process %d, something weird happened...\n"
                "*ML*WRN* node %d belong to aggregate %d (#aggr = %d)\n"
                "*ML*WRN* (file %s, line %d)\n",
                comm->ML_mypid,
                i,
                aggr_index[i],
                aggr_count,
                __FILE__,
                __LINE__ );
      } else {
        aggr_cnt_array[aggr_index[i]]++;
      }
    }
  }

  /* ============================================================= */
  /* Form tentative prolongator                                    */
  /* ============================================================= */

  Ncoarse = aggr_count;

  /* ============================================================= */
  /* check and copy aggr_index                                     */
  /* ------------------------------------------------------------- */

  level = ml_ag->cur_level;
  nbytes = (Nrows+1) * sizeof( int );
  ML_memory_alloc((void**) &(ml_ag->aggr_info[level]), nbytes, "AGl");
  count = aggr_count;
  for ( i = 0; i < Nrows; i+=num_PDE_eqns ) 
  {
    if ( aggr_index[i] >= 0 )
    {
      for ( j = 0; j < num_PDE_eqns; j++ ) 
        ml_ag->aggr_info[level][i+j] = aggr_index[i];
      if (aggr_index[i] >= count) count = aggr_index[i] + 1;
    }
    /*else
     *{
     *   printf("%d : CoarsenMIS error : aggr_index[%d] < 0\n",
     *          mypid,i);
     *   exit(1);
     *}*/
  }
  ml_ag->aggr_count[level] = count; /* for relaxing boundary points */ 

  /* ============================================================= */
  /* set up the new operator                                       */
  /* ------------------------------------------------------------- */

  new_Nrows = Nrows;
  exp_Ncoarse = Nrows;

  for ( i = 0; i < new_Nrows; i++ ) 
  {
    if ( aggr_index[i] >= exp_Ncoarse ) 
    {
      printf("*ML*WRN* index out of bound %d = %d(%d)\n",
             i, aggr_index[i], 
             exp_Ncoarse);
    }
  }
  nbytes = ( new_Nrows+1 ) * sizeof(int); 
  ML_memory_alloc((void**)&(new_ia), nbytes, "AIA");
  nbytes = ( new_Nrows+1)  * nullspace_dim * sizeof(int); 
  ML_memory_alloc((void**)&(new_ja), nbytes, "AJA");
  nbytes = ( new_Nrows+1)  * nullspace_dim * sizeof(double); 
  ML_memory_alloc((void**)&(new_val), nbytes, "AVA");
  for ( i = 0; i < new_Nrows*nullspace_dim; i++ ) new_val[i] = 0.0;

  /* ------------------------------------------------------------- */
  /* set up the space for storing the new null space               */
  /* ------------------------------------------------------------- */

  nbytes = (Ncoarse+1) * nullspace_dim * nullspace_dim * sizeof(double);
  ML_memory_alloc((void**)&(new_null),nbytes,"AGr");
  if( new_null == NULL ) {
    fprintf( stderr,
            "*ML*ERR* on process %d, not enough memory for %d bytes\n"
            "*ML*ERR* (file %s, line %d)\n",
            mypid,
            nbytes,
            __FILE__,
            __LINE__ );
    exit( EXIT_FAILURE );
  }

  for (i = 0; i < Ncoarse*nullspace_dim*nullspace_dim; i++) 
    new_null[i] = 0.0;

  /* ------------------------------------------------------------- */
  /* initialize the row pointer for the CSR prolongation operator  */
  /* (each row will have at most nullspace_dim nonzero entries)    */
  /* ------------------------------------------------------------- */

  for (i = 0; i <= Nrows; i++) new_ia[i] = i * nullspace_dim;

  /* trying this when a Dirichlet row is taken out */
  j = 0;
  new_ia[0] = 0;
  for (i = 0; i < Nrows; i++) {
    if (aggr_index[i] != -1) j += nullspace_dim;
    new_ia[i+1] = j;
  }

  /* ------------------------------------------------------------- */
  /* generate an array to store which aggregate has which rows.Then*/
  /* loop through the rows of A checking which aggregate each row  */
  /* is in, and adding it to the appropriate spot in rows_in_aggs  */
  /* ------------------------------------------------------------- */

  ML_memory_alloc((void**)&rows_in_aggs,aggr_count*sizeof(int*),"MLs");
  for (i = 0; i < aggr_count; i++) {
    nbytes = aggr_cnt_array[i]+1;
    rows_in_aggs[i] = (int *) ML_allocate(nbytes*sizeof(int));
    aggr_cnt_array[i] = 0;
    if (rows_in_aggs[i] == NULL)  {
      printf("*ML*ERR* couldn't allocate memory in CoarsenMETIS\n");
      exit(1);
    }
  }
  for (i = 0; i < exp_Nrows; i+=num_PDE_eqns) {
    if ( aggr_index[i] >= 0 && aggr_index[i] < aggr_count)
    {
      for (j = 0; j < num_PDE_eqns; j++)
      {
        index = aggr_cnt_array[aggr_index[i]]++; 
        rows_in_aggs[aggr_index[i]][index] = i + j;
      }
    }
  }

  /* ------------------------------------------------------------- */
  /* allocate work arrays for QR factorization                     */
  /* work and lwork are needed for lapack's QR routine.  These     */
  /* settings seemed easiest since I don't quite understand        */
  /* what they do, but may want to do something better here later  */
  /* ------------------------------------------------------------- */

  max_agg_size = 0;
  for (i = 0; i < aggr_count; i++) 
  {
    if (aggr_cnt_array[i] > max_agg_size) max_agg_size = aggr_cnt_array[i];
  }
  nbytes = max_agg_size * nullspace_dim * sizeof(double);
  ML_memory_alloc((void**)&qr_tmp, nbytes, "AGu");
  nbytes = nullspace_dim * sizeof(double);
  ML_memory_alloc((void**)&tmp_vect, nbytes, "AGv");

  lwork  = nullspace_dim;
  nbytes = nullspace_dim * sizeof(double);
  ML_memory_alloc((void**)&work, nbytes, "AGw");

  /* ------------------------------------------------------------- */
  /* perform block QR decomposition                                */
  /* ------------------------------------------------------------- */

  for (i = 0; i < aggr_count; i++) 
  {
    /* ---------------------------------------------------------- */
    /* set up the matrix we want to decompose into Q and R:       */
    /* ---------------------------------------------------------- */

    length = aggr_cnt_array[i];
    if (nullspace_vect == NULL) 
    {
      for (j = 0; j < (int) length; j++)
      {
        index = rows_in_aggs[i][j];

        for (k = 0; k < nullspace_dim; k++)
        {
          if ( unamalg_bdry[index/num_PDE_eqns] == 'T')
            qr_tmp[k*length+j] = 0.;
          else
          {
            if (index % num_PDE_eqns == k) qr_tmp[k*length+j] = 1.0;
            else                           qr_tmp[k*length+j] = 0.0;
          }
        }
      }
    }
    else 
    {
      for (k = 0; k < nullspace_dim; k++)
      {
        for (j = 0; j < (int) length; j++)
        {
          index = rows_in_aggs[i][j];
          if ( unamalg_bdry[index/num_PDE_eqns] == 'T')
            qr_tmp[k*length+j] = 0.;
          else {
            if (index < Nrows) {
              qr_tmp[k*length+j] = nullspace_vect[k*Nrows+index];
            }
            else {
              fprintf( stderr,
                      "*ML*ERR* in QR\n"
                      "*ML*ERR* (file %s, line %d)\n",
                      __FILE__,
                      __LINE__ );
              exit( EXIT_FAILURE );
            }
          }
        }
      }
    }

    /* ---------------------------------------------------------- */
    /* now calculate QR using an LAPACK routine                   */
    /* ---------------------------------------------------------- */

    if (aggr_cnt_array[i] >= nullspace_dim) {

      DGEQRF_F77(&(aggr_cnt_array[i]), &nullspace_dim, qr_tmp, 
                 &(aggr_cnt_array[i]), tmp_vect, work, &lwork, &info);
      if (info != 0)
        pr_error("ErrOr in CoarsenMIS : dgeqrf returned a non-zero %d %d\n",
                 aggr_cnt_array[i],i);

      if (work[0] > lwork) 
      {
        lwork=(int) work[0]; 
        ML_memory_free((void**) &work);
        ML_memory_alloc((void**) &work, sizeof(double)*lwork, "AGx");
      }
      else lwork=(int) work[0];

      /* ---------------------------------------------------------- */
      /* the upper triangle of qr_tmp is now R, so copy that into   */
      /* the new nullspace                                          */
      /* ---------------------------------------------------------- */

      for (j = 0; j < nullspace_dim; j++)
        for (k = j; k < nullspace_dim; k++)
          new_null[i*nullspace_dim+j+k*Ncoarse*nullspace_dim] = 
            qr_tmp[j+aggr_cnt_array[i]*k];

      /* ---------------------------------------------------------- */
      /* to get this block of P, need to run qr_tmp through another */
      /* LAPACK function:                                           */
      /* ---------------------------------------------------------- */

      if ( aggr_cnt_array[i] < nullspace_dim ){
        printf("Error in dorgqr on %d row (dims are %d, %d)\n",i,aggr_cnt_array[i],
               nullspace_dim);
        printf("ERROR : performing QR on a MxN matrix where M<N.\n");
      }
      DORGQR_F77(&(aggr_cnt_array[i]), &nullspace_dim, &nullspace_dim, 
                 qr_tmp, &(aggr_cnt_array[i]), tmp_vect, work, &lwork, &info);
      if (info != 0) {
        printf("Error in dorgqr on %d row (dims are %d, %d)\n",i,aggr_cnt_array[i],
               nullspace_dim);
        pr_error("Error in CoarsenMIS: dorgqr returned a non-zero\n");
      }

      if (work[0] > lwork) 
      {
        lwork=(int) work[0]; 
        ML_memory_free((void**) &work);
        ML_memory_alloc((void**) &work, sizeof(double)*lwork, "AGy");
      }
      else lwork=(int) work[0];

      /* ---------------------------------------------------------- */
      /* now copy Q over into the appropriate part of P:            */
      /* The rows of P get calculated out of order, so I assume the */
      /* Q is totally dense and use what I know of how big each Q   */
      /* will be to determine where in ia, ja, etc each nonzero in  */
      /* Q belongs.  If I did not assume this, I would have to keep */
      /* all of P in memory in order to determine where each entry  */
      /* should go                                                  */
      /* ---------------------------------------------------------- */

      for (j = 0; j < aggr_cnt_array[i]; j++)
      {
        index = rows_in_aggs[i][j];

        if ( index < Nrows )
        {
          index3 = new_ia[index];
          for (k = 0; k < nullspace_dim; k++) 
          {
            new_ja [index3+k] = i * nullspace_dim + k;
            new_val[index3+k] = qr_tmp[ k*aggr_cnt_array[i]+j];
          }
        }
        else 
        {
          fprintf( stderr,
                  "*ML*ERR* in QR: index out of bounds (%d - %d)\n",
                  index,
                  Nrows );
        }
      }
    }
    else {
      /* We have a small aggregate such that the QR factorization can not */
      /* be performed. Instead let us copy the null space from the fine   */
      /* into the coarse grid nullspace and put the identity for the      */
      /* prolongator????                                                  */
      for (j = 0; j < nullspace_dim; j++)
        for (k = 0; k < nullspace_dim; k++)
          new_null[i*nullspace_dim+j+k*Ncoarse*nullspace_dim] = 
            qr_tmp[j+aggr_cnt_array[i]*k];
      for (j = 0; j < aggr_cnt_array[i]; j++) {
        index = rows_in_aggs[i][j];
        index3 = new_ia[index];
        for (k = 0; k < nullspace_dim; k++) {
          new_ja [index3+k] = i * nullspace_dim + k;
          if (k == j) new_val[index3+k] = 1.;
          else new_val[index3+k] = 0.;
        }
      }
    }


  }

  ML_Aggregate_Set_NullSpace(ml_ag, num_PDE_eqns, nullspace_dim, 
                             new_null, Ncoarse*nullspace_dim);
  ML_memory_free( (void **) &new_null);

  /* ------------------------------------------------------------- */
  /* set up the csr_data data structure                            */
  /* ------------------------------------------------------------- */

  ML_memory_alloc((void**) &csr_data, sizeof(struct ML_CSR_MSRdata),"CSR");
  csr_data->rowptr  = new_ia;
  csr_data->columns = new_ja;
  csr_data->values  = new_val;

  ML_Operator_Set_ApplyFuncData( *Pmatrix, nullspace_dim*Ncoarse, Nrows, 
                                csr_data, Nrows, NULL, 0);
  (*Pmatrix)->data_destroy = ML_CSR_MSR_ML_memorydata_Destroy;
  (*Pmatrix)->getrow->pre_comm = ML_CommInfoOP_Create();
  (*Pmatrix)->max_nz_per_row = 1;

  ML_Operator_Set_Getrow((*Pmatrix), Nrows, CSR_getrow);
  ML_Operator_Set_ApplyFunc((*Pmatrix), CSR_matvec);
  (*Pmatrix)->max_nz_per_row = 1;
  /* this must be set so that the hierarchy generation does not abort early
     in adaptive SA */
  (*Pmatrix)->num_PDEs = nullspace_dim;

  /* ------------------------------------------------------------- */
  /* clean up                                                      */
  /* ------------------------------------------------------------- */

  ML_free(unamalg_bdry);
  ML_memory_free((void**)&aggr_index);
  ML_free(aggr_cnt_array);
  for (i = 0; i < aggr_count; i++) ML_free(rows_in_aggs[i]);
  ML_memory_free((void**)&rows_in_aggs);
  ML_memory_free((void**)&qr_tmp);
  ML_memory_free((void**)&tmp_vect);
  ML_memory_free((void**)&work);

  aggr_curr = aggr_head;
  while ( aggr_curr != NULL ) 
  {
    supernode = aggr_curr;
    aggr_curr = aggr_curr->next;
    if ( supernode->length > 0 ) ML_free( supernode->list );
    ML_free( supernode );
  }

  return Ncoarse*nullspace_dim;

} /* ML_Aggregate_CoarsenUser */
Ejemplo n.º 7
0
// ====================================================================== 
int ML_Operator_Add2(ML_Operator *A, ML_Operator *B, ML_Operator *C,
		    int matrix_type, double scalarA, double scalarB)
{
  int A_allocated = 0, *A_bindx = NULL, B_allocated = 0, *B_bindx = NULL;
  double *A_val = NULL, *B_val = NULL, *hashed_vals;
  int i, A_length, B_length, *hashed_inds;
  int max_nz_per_row = 0, min_nz_per_row=1e6, j;
  int hash_val, index_length;
  int *columns, *rowptr, nz_ptr, hash_used, global_col;
  double *values;
  struct ML_CSR_MSRdata *temp;
  int *A_gids, *B_gids;
  int max_per_proc;
#ifdef ML_WITH_EPETRA
  int count;
#endif

  if (A->getrow == NULL) 
    pr_error("ML_Operator_Add: A does not have a getrow function.\n");

  if (B->getrow == NULL) 
    pr_error("ML_Operator_Add: B does not have a getrow function.\n");

  if (A->getrow->Nrows != B->getrow->Nrows) {
    printf("ML_Operator_Add: Can not add, two matrices do not have the same");
    printf(" number of rows %d vs %d",A->getrow->Nrows,B->getrow->Nrows);
    exit(1);
  }

  if (A->invec_leng != B->invec_leng) {
    printf("ML_Operator_Add: Can not add, two matrices do not have the same");
    printf(" number of columns %d vs %d",A->getrow->Nrows,B->getrow->Nrows);
    exit(1);
  }

  /* let's just count some things */
  index_length = A->invec_leng + 1;
  if (A->getrow->pre_comm != NULL) {
    ML_CommInfoOP_Compute_TotalRcvLength(A->getrow->pre_comm);
    index_length += A->getrow->pre_comm->total_rcv_length;
  }
  if (B->getrow->pre_comm != NULL) {
    ML_CommInfoOP_Compute_TotalRcvLength(B->getrow->pre_comm);
    index_length += B->getrow->pre_comm->total_rcv_length;
  }

  ML_create_unique_col_id(A->invec_leng, &A_gids, A->getrow->pre_comm,
			  &max_per_proc,A->comm);
  ML_create_unique_col_id(B->invec_leng, &B_gids, B->getrow->pre_comm,
			  &max_per_proc,B->comm);


  hashed_inds = (int *) ML_allocate(sizeof(int)*index_length);
  hashed_vals = (double *) ML_allocate(sizeof(double)*index_length);

  for (i = 0; i < index_length; i++) hashed_inds[i] = -1;
  for (i = 0; i < index_length; i++) hashed_vals[i] = 0.;

  nz_ptr = 0;
  for (i = 0 ; i < A->getrow->Nrows; i++) {
    hash_used = 0;
      ML_get_matrix_row(A, 1, &i, &A_allocated, &A_bindx, &A_val,
                        &A_length, 0);
      for (j = 0; j < A_length; j++) {
	global_col = A_gids[A_bindx[j]];
	ML_hash_it(global_col, hashed_inds, index_length,&hash_used,&hash_val);
        hashed_inds[hash_val] = global_col;
        hashed_vals[hash_val] += scalarA * A_val[j];
	A_bindx[j] = hash_val;
      }

      ML_get_matrix_row(B, 1, &i, &B_allocated, &B_bindx, &B_val,
                        &B_length, 0);
      for (j = 0; j < B_length; j++) {
	global_col = B_gids[B_bindx[j]];
	ML_hash_it(global_col, hashed_inds, index_length,&hash_used, &hash_val);
        hashed_inds[hash_val] = global_col;
        hashed_vals[hash_val] += scalarB*B_val[j];
        B_bindx[j] = hash_val;
      }

      for (j = 0; j < A_length; j++) {
        nz_ptr++;
	hashed_inds[A_bindx[j]] = -1;
	hashed_vals[A_bindx[j]] = 0.;
      }
      for (j = 0; j < B_length; j++) {
        if (hashed_inds[B_bindx[j]] != -1) {
	  nz_ptr++;
	  hashed_inds[B_bindx[j]] = -1;
	  hashed_vals[B_bindx[j]] = 0.;
	}
      }
  }
  nz_ptr++;

  columns = 0;
  values = 0;

  rowptr = (int    *) ML_allocate(sizeof(int)*(A->outvec_leng+1));
  if (matrix_type == ML_CSR_MATRIX) {
    columns= (int    *) ML_allocate(sizeof(int)*nz_ptr);
    values = (double *) ML_allocate(sizeof(double)*nz_ptr);
  }
#ifdef ML_WITH_EPETRA
  else if (matrix_type == ML_EpetraCRS_MATRIX) {
    columns= (int    *) ML_allocate(sizeof(int)*(index_length+1));
    values = (double *) ML_allocate(sizeof(double)*(index_length+1));
  }
#endif
  else {
    pr_error("ML_Operator_Add: Unknown matrix type\n");
  }

  nz_ptr = 0;
  rowptr[0] = 0;
  for (i = 0 ; i < A->getrow->Nrows; i++) {
    hash_used = 0;
      ML_get_matrix_row(A, 1, &i, &A_allocated, &A_bindx, &A_val,
                        &A_length, 0);
      for (j = 0; j < A_length; j++) {
	global_col = A_gids[A_bindx[j]];
	ML_hash_it(global_col, hashed_inds, index_length,&hash_used, &hash_val);
        hashed_inds[hash_val] = global_col;
        hashed_vals[hash_val] += scalarA * A_val[j];
	A_bindx[j] = hash_val;
      }

      ML_get_matrix_row(B, 1, &i, &B_allocated, &B_bindx, &B_val,
                        &B_length, 0);
      for (j = 0; j < B_length; j++) {
	global_col = B_gids[B_bindx[j]];
	ML_hash_it(global_col, hashed_inds, index_length,&hash_used, &hash_val);
        hashed_inds[hash_val] = global_col;
        hashed_vals[hash_val] += scalarB*B_val[j];
        B_bindx[j] = hash_val;
      }
#ifdef ML_WITH_EPETRA
      if (matrix_type == ML_EpetraCRS_MATRIX) {
	for (j = 0; j < A_length; j++) {
	  columns[j] = hashed_inds[A_bindx[j]];
	  values[j]  = hashed_vals[A_bindx[j]];
	  nz_ptr++;
	  hashed_inds[A_bindx[j]] = -1;
	  hashed_vals[A_bindx[j]] = 0.;
	}
	count = A_length;
	for (j = 0; j < B_length; j++) {
	  if (hashed_inds[B_bindx[j]] != -1) {
	    columns[count] = hashed_inds[B_bindx[j]];
	    values[count++]  = hashed_vals[B_bindx[j]];
	    nz_ptr++;
	    hashed_inds[B_bindx[j]] = -1;
	    hashed_vals[B_bindx[j]] = 0.;
	  }
	}
	ML_Epetra_CRSinsert(C,i,columns,values,count);
      }
      else {
#endif
	for (j = 0; j < A_length; j++) {
	  columns[nz_ptr] = hashed_inds[A_bindx[j]];
	  values[nz_ptr]  = hashed_vals[A_bindx[j]];
	  nz_ptr++;
	  hashed_inds[A_bindx[j]] = -1;
	  hashed_vals[A_bindx[j]] = 0.;
	}
	for (j = 0; j < B_length; j++) {
	  if (hashed_inds[B_bindx[j]] != -1) {
	    columns[nz_ptr] = hashed_inds[B_bindx[j]];
	    values[nz_ptr]  = hashed_vals[B_bindx[j]];
	    nz_ptr++;
	    hashed_inds[B_bindx[j]] = -1;
	    hashed_vals[B_bindx[j]] = 0.;
	  }
	}
#ifdef ML_WITH_EPETRA
      }
#endif
      rowptr[i+1] = nz_ptr;
      j = rowptr[i+1] - rowptr[i];
      if (j > max_nz_per_row)
        max_nz_per_row = j;
      if (j < min_nz_per_row && j>0)
        min_nz_per_row = j;
  }
  if (matrix_type == ML_CSR_MATRIX) {
    temp = (struct ML_CSR_MSRdata *) ML_allocate(sizeof(struct ML_CSR_MSRdata));
    if (temp == NULL) pr_error("ML_Operator_Add: no space for temp\n");
    temp->columns = columns;
    temp->values  = values;
    temp->rowptr   = rowptr;

    ML_Operator_Set_ApplyFuncData(C, B->invec_leng, A->outvec_leng, 
				  temp,A->outvec_leng, NULL,0);
    ML_Operator_Set_Getrow(C, A->outvec_leng, CSR_getrow);
    ML_Operator_Set_ApplyFunc (C, CSR_matvec);
    ML_globalcsr2localcsr(C, max_per_proc);
    C->data_destroy = ML_CSR_MSRdata_Destroy;

    C->max_nz_per_row = max_nz_per_row;
    C->min_nz_per_row = min_nz_per_row;
    C->N_nonzeros     = nz_ptr;
  }
#ifdef ML_WITH_EPETRA
  else {
    ML_free(rowptr); 
    ML_free(columns);
    ML_free(values);
  }
#endif

  ML_free(A_gids);
  ML_free(B_gids);
  ML_free(hashed_vals);
  ML_free(hashed_inds);
  ML_free(A_val);
  ML_free(A_bindx);
  ML_free(B_val);
  ML_free(B_bindx);

  return 1;

}