Beispiel #1
0
void ML_rap(ML_Operator *Rmat, ML_Operator *Amat,
            ML_Operator *Pmat, ML_Operator *Result, int matrix_type)
{
  int         max_per_proc, i, j, N_input_vector;
   ML_Operator *APmat, *RAPmat, *Pcomm, *RAPcomm, *APcomm, *AP2comm, *tptr;
   ML_CommInfoOP *getrow_comm;
   double      *scales = NULL;
#  ifdef ML_TIMING
   double tpre,tmult,tpost,ttotal;
#  endif

   /* Check that N_input_vector is reasonable */

#  ifdef ML_TIMING
   tpre = GetClock();
   ttotal = GetClock();
#  endif

   N_input_vector = Pmat->invec_leng;
   getrow_comm = Pmat->getrow->pre_comm;
   if ( getrow_comm != NULL) {
      for (i = 0; i < getrow_comm->N_neighbors; i++) {
         for (j = 0; j < getrow_comm->neighbors[i].N_send; j++) {
            if (getrow_comm->neighbors[i].send_list[j] >= N_input_vector) {
              printf("(%d) Error: N_input_vector (%d) argument to rap() is not \n", Amat->comm->ML_mypid,N_input_vector);
              printf("(%d) Error: larger than %dth element (%d) sent to node %d\n", Amat->comm->ML_mypid,j+1,
                     getrow_comm->neighbors[i].send_list[j],
                     getrow_comm->neighbors[i].ML_id);
printf("(%d) Error: Amat(%d,%d)  Rmat(%d,%d)  Pmat(%d,%d)\n",
                Amat->comm->ML_mypid,
                Amat->outvec_leng,Amat->invec_leng,
                Rmat->outvec_leng,Rmat->invec_leng,
                Pmat->outvec_leng,Pmat->invec_leng);
fflush(stdout);
              exit(1);
            }
         }
      }
   }


   ML_create_unique_col_id(N_input_vector, &(Pmat->getrow->loc_glob_map),
                           getrow_comm, &max_per_proc, Pmat->comm);
   Pmat->getrow->use_loc_glob_map = ML_YES;



   if (Amat->getrow->pre_comm != NULL)
      ML_exchange_rows( Pmat, &Pcomm, Amat->getrow->pre_comm);
   else Pcomm = Pmat;

#ifdef DEBUG
   if ( Pmat->comm->ML_mypid == 0 )
      printf("ML_rap : A * P begins...\n");
#endif

#  ifdef ML_TIMING
   tpre = GetClock() - tpre;
   tmult = GetClock();
#  endif

   ML_matmat_mult(Amat, Pcomm , &APmat);

#  ifdef ML_TIMING
   tmult = GetClock() - tmult;
   tpost = GetClock();
#  endif

#ifdef DEBUG
   if ( Pmat->comm->ML_mypid == 0 )
      printf("ML_rap : A * P ends.\n");
#endif

   ML_free(Pmat->getrow->loc_glob_map); Pmat->getrow->loc_glob_map = NULL;
   Pmat->getrow->use_loc_glob_map = ML_NO;
   if (Amat->getrow->pre_comm != NULL) {
      tptr = Pcomm;
      while ( (tptr!= NULL) && (tptr->sub_matrix != Pmat))
         tptr = tptr->sub_matrix;
      if (tptr != NULL) tptr->sub_matrix = NULL;
      ML_RECUR_CSR_MSRdata_Destroy(Pcomm);
      ML_Operator_Destroy(&Pcomm);
   }

   if (Amat->getrow->post_comm != NULL) {
      ML_exchange_rows(APmat, &APcomm, Amat->getrow->post_comm);
   }
   else APcomm = APmat;

   /* Take into account any scaling in Amat */

   if (Rmat->from != NULL)
      ML_DVector_GetDataPtr(Rmat->from->Amat_Normalization,&scales);
   if (scales != NULL)
      ML_Scale_CSR(APcomm, scales, 0);

   if (Rmat->getrow->pre_comm != NULL)
      ML_exchange_rows( APcomm, &AP2comm, Rmat->getrow->pre_comm);
   else AP2comm = APcomm;

#  ifdef ML_TIMING
   tpost = GetClock() - tpost;
   if ( Pmat->comm->ML_mypid == 0 && ML_Get_PrintLevel() > 5) {
     int level=-1;
     if (Amat->from != NULL)
       level = Amat->from->levelnum-1;
     printf("Timing summary (in seconds) for product RAP on level %d\n", level);
     printf("     (level %d) RAP right: pre-multiply communication time    = %3.2e\n", level, tpre);
     printf("     (level %d) RAP right: multiply time                      = %3.2e\n", level, tmult);
     printf("     (level %d) RAP right: post-multiply communication time   = %3.2e\n", level, tpost);
   }
#  endif

#ifdef DEBUG
   if ( Pmat->comm->ML_mypid == 0 )
      printf("ML_rap : R * AP begins...\n");
#endif

#  ifdef ML_TIMING
   tmult = GetClock();
#  endif
   ML_matmat_mult(Rmat,AP2comm, &RAPmat);

#ifdef DEBUG
   if ( Pmat->comm->ML_mypid == 0 )
      printf("ML_rap : R * AP ends.\n");
#endif

   ML_RECUR_CSR_MSRdata_Destroy(AP2comm);
   ML_Operator_Destroy(&AP2comm);

#  ifdef ML_TIMING
   tmult = GetClock()-tmult;
   tpost = GetClock();
#  endif

   if (Rmat->getrow->post_comm != NULL)
      ML_exchange_rows( RAPmat, &RAPcomm, Rmat->getrow->post_comm);
   else RAPcomm = RAPmat;

   scales = NULL;
   if (Rmat->to != NULL)
      ML_DVector_GetDataPtr(Rmat->to->Amat_Normalization,&scales);
   if (scales != NULL)
      ML_Scale_CSR(RAPcomm, scales, 1);

   RAPcomm->num_PDEs = Amat->num_PDEs;
   RAPcomm->num_rigid = Amat->num_rigid;
   if (matrix_type == ML_MSR_MATRIX)
     ML_back_to_local(RAPcomm, Result, max_per_proc);
   else if (matrix_type == ML_CSR_MATRIX)
     ML_back_to_csrlocal(RAPcomm, Result, max_per_proc);
   else if (matrix_type == ML_EpetraCRS_MATRIX)
#ifdef ML_WITH_EPETRA
     ML_back_to_epetraCrs(RAPcomm, Result, Rmat, Pmat);
#else
     pr_error("ML_RAP: ML_EpetraCRS_MATRIX requires epetra to be compiled in.\n");
#endif
   else pr_error("ML_RAP: Unknown matrix type\n");
// ====================================================================== 
int ML_Operator_Add2(ML_Operator *A, ML_Operator *B, ML_Operator *C,
		    int matrix_type, double scalarA, double scalarB)
{
  int A_allocated = 0, *A_bindx = NULL, B_allocated = 0, *B_bindx = NULL;
  double *A_val = NULL, *B_val = NULL, *hashed_vals;
  int i, A_length, B_length, *hashed_inds;
  int max_nz_per_row = 0, min_nz_per_row=1e6, j;
  int hash_val, index_length;
  int *columns, *rowptr, nz_ptr, hash_used, global_col;
  double *values;
  struct ML_CSR_MSRdata *temp;
  int *A_gids, *B_gids;
  int max_per_proc;
#ifdef ML_WITH_EPETRA
  int count;
#endif

  if (A->getrow == NULL) 
    pr_error("ML_Operator_Add: A does not have a getrow function.\n");

  if (B->getrow == NULL) 
    pr_error("ML_Operator_Add: B does not have a getrow function.\n");

  if (A->getrow->Nrows != B->getrow->Nrows) {
    printf("ML_Operator_Add: Can not add, two matrices do not have the same");
    printf(" number of rows %d vs %d",A->getrow->Nrows,B->getrow->Nrows);
    exit(1);
  }

  if (A->invec_leng != B->invec_leng) {
    printf("ML_Operator_Add: Can not add, two matrices do not have the same");
    printf(" number of columns %d vs %d",A->getrow->Nrows,B->getrow->Nrows);
    exit(1);
  }

  /* let's just count some things */
  index_length = A->invec_leng + 1;
  if (A->getrow->pre_comm != NULL) {
    ML_CommInfoOP_Compute_TotalRcvLength(A->getrow->pre_comm);
    index_length += A->getrow->pre_comm->total_rcv_length;
  }
  if (B->getrow->pre_comm != NULL) {
    ML_CommInfoOP_Compute_TotalRcvLength(B->getrow->pre_comm);
    index_length += B->getrow->pre_comm->total_rcv_length;
  }

  ML_create_unique_col_id(A->invec_leng, &A_gids, A->getrow->pre_comm,
			  &max_per_proc,A->comm);
  ML_create_unique_col_id(B->invec_leng, &B_gids, B->getrow->pre_comm,
			  &max_per_proc,B->comm);


  hashed_inds = (int *) ML_allocate(sizeof(int)*index_length);
  hashed_vals = (double *) ML_allocate(sizeof(double)*index_length);

  for (i = 0; i < index_length; i++) hashed_inds[i] = -1;
  for (i = 0; i < index_length; i++) hashed_vals[i] = 0.;

  nz_ptr = 0;
  for (i = 0 ; i < A->getrow->Nrows; i++) {
    hash_used = 0;
      ML_get_matrix_row(A, 1, &i, &A_allocated, &A_bindx, &A_val,
                        &A_length, 0);
      for (j = 0; j < A_length; j++) {
	global_col = A_gids[A_bindx[j]];
	ML_hash_it(global_col, hashed_inds, index_length,&hash_used,&hash_val);
        hashed_inds[hash_val] = global_col;
        hashed_vals[hash_val] += scalarA * A_val[j];
	A_bindx[j] = hash_val;
      }

      ML_get_matrix_row(B, 1, &i, &B_allocated, &B_bindx, &B_val,
                        &B_length, 0);
      for (j = 0; j < B_length; j++) {
	global_col = B_gids[B_bindx[j]];
	ML_hash_it(global_col, hashed_inds, index_length,&hash_used, &hash_val);
        hashed_inds[hash_val] = global_col;
        hashed_vals[hash_val] += scalarB*B_val[j];
        B_bindx[j] = hash_val;
      }

      for (j = 0; j < A_length; j++) {
        nz_ptr++;
	hashed_inds[A_bindx[j]] = -1;
	hashed_vals[A_bindx[j]] = 0.;
      }
      for (j = 0; j < B_length; j++) {
        if (hashed_inds[B_bindx[j]] != -1) {
	  nz_ptr++;
	  hashed_inds[B_bindx[j]] = -1;
	  hashed_vals[B_bindx[j]] = 0.;
	}
      }
  }
  nz_ptr++;

  columns = 0;
  values = 0;

  rowptr = (int    *) ML_allocate(sizeof(int)*(A->outvec_leng+1));
  if (matrix_type == ML_CSR_MATRIX) {
    columns= (int    *) ML_allocate(sizeof(int)*nz_ptr);
    values = (double *) ML_allocate(sizeof(double)*nz_ptr);
  }
#ifdef ML_WITH_EPETRA
  else if (matrix_type == ML_EpetraCRS_MATRIX) {
    columns= (int    *) ML_allocate(sizeof(int)*(index_length+1));
    values = (double *) ML_allocate(sizeof(double)*(index_length+1));
  }
#endif
  else {
    pr_error("ML_Operator_Add: Unknown matrix type\n");
  }

  nz_ptr = 0;
  rowptr[0] = 0;
  for (i = 0 ; i < A->getrow->Nrows; i++) {
    hash_used = 0;
      ML_get_matrix_row(A, 1, &i, &A_allocated, &A_bindx, &A_val,
                        &A_length, 0);
      for (j = 0; j < A_length; j++) {
	global_col = A_gids[A_bindx[j]];
	ML_hash_it(global_col, hashed_inds, index_length,&hash_used, &hash_val);
        hashed_inds[hash_val] = global_col;
        hashed_vals[hash_val] += scalarA * A_val[j];
	A_bindx[j] = hash_val;
      }

      ML_get_matrix_row(B, 1, &i, &B_allocated, &B_bindx, &B_val,
                        &B_length, 0);
      for (j = 0; j < B_length; j++) {
	global_col = B_gids[B_bindx[j]];
	ML_hash_it(global_col, hashed_inds, index_length,&hash_used, &hash_val);
        hashed_inds[hash_val] = global_col;
        hashed_vals[hash_val] += scalarB*B_val[j];
        B_bindx[j] = hash_val;
      }
#ifdef ML_WITH_EPETRA
      if (matrix_type == ML_EpetraCRS_MATRIX) {
	for (j = 0; j < A_length; j++) {
	  columns[j] = hashed_inds[A_bindx[j]];
	  values[j]  = hashed_vals[A_bindx[j]];
	  nz_ptr++;
	  hashed_inds[A_bindx[j]] = -1;
	  hashed_vals[A_bindx[j]] = 0.;
	}
	count = A_length;
	for (j = 0; j < B_length; j++) {
	  if (hashed_inds[B_bindx[j]] != -1) {
	    columns[count] = hashed_inds[B_bindx[j]];
	    values[count++]  = hashed_vals[B_bindx[j]];
	    nz_ptr++;
	    hashed_inds[B_bindx[j]] = -1;
	    hashed_vals[B_bindx[j]] = 0.;
	  }
	}
	ML_Epetra_CRSinsert(C,i,columns,values,count);
      }
      else {
#endif
	for (j = 0; j < A_length; j++) {
	  columns[nz_ptr] = hashed_inds[A_bindx[j]];
	  values[nz_ptr]  = hashed_vals[A_bindx[j]];
	  nz_ptr++;
	  hashed_inds[A_bindx[j]] = -1;
	  hashed_vals[A_bindx[j]] = 0.;
	}
	for (j = 0; j < B_length; j++) {
	  if (hashed_inds[B_bindx[j]] != -1) {
	    columns[nz_ptr] = hashed_inds[B_bindx[j]];
	    values[nz_ptr]  = hashed_vals[B_bindx[j]];
	    nz_ptr++;
	    hashed_inds[B_bindx[j]] = -1;
	    hashed_vals[B_bindx[j]] = 0.;
	  }
	}
#ifdef ML_WITH_EPETRA
      }
#endif
      rowptr[i+1] = nz_ptr;
      j = rowptr[i+1] - rowptr[i];
      if (j > max_nz_per_row)
        max_nz_per_row = j;
      if (j < min_nz_per_row && j>0)
        min_nz_per_row = j;
  }
  if (matrix_type == ML_CSR_MATRIX) {
    temp = (struct ML_CSR_MSRdata *) ML_allocate(sizeof(struct ML_CSR_MSRdata));
    if (temp == NULL) pr_error("ML_Operator_Add: no space for temp\n");
    temp->columns = columns;
    temp->values  = values;
    temp->rowptr   = rowptr;

    ML_Operator_Set_ApplyFuncData(C, B->invec_leng, A->outvec_leng, 
				  temp,A->outvec_leng, NULL,0);
    ML_Operator_Set_Getrow(C, A->outvec_leng, CSR_getrow);
    ML_Operator_Set_ApplyFunc (C, CSR_matvec);
    ML_globalcsr2localcsr(C, max_per_proc);
    C->data_destroy = ML_CSR_MSRdata_Destroy;

    C->max_nz_per_row = max_nz_per_row;
    C->min_nz_per_row = min_nz_per_row;
    C->N_nonzeros     = nz_ptr;
  }
#ifdef ML_WITH_EPETRA
  else {
    ML_free(rowptr); 
    ML_free(columns);
    ML_free(values);
  }
#endif

  ML_free(A_gids);
  ML_free(B_gids);
  ML_free(hashed_vals);
  ML_free(hashed_inds);
  ML_free(A_val);
  ML_free(A_bindx);
  ML_free(B_val);
  ML_free(B_bindx);

  return 1;

}