Beispiel #1
0
//=============================================================================
int Amesos_Umfpack::ConvertToUmfpackCRS()
{
  ResetTimer(0);
  ResetTimer(1);
  
  // Convert matrix to the form that Umfpack expects (Ap, Ai, Aval),
  // only on processor 0. The matrix has already been assembled in
  // SerialMatrix_; if only one processor is used, then SerialMatrix_
  // points to the problem's matrix.

  if (MyPID_ == 0) 
  {
    Ap.resize( NumGlobalElements_+1 );
    Ai.resize( EPETRA_MAX( NumGlobalElements_, numentries_) ) ; 
    Aval.resize( EPETRA_MAX( NumGlobalElements_, numentries_) ) ; 

    int NumEntries = SerialMatrix_->MaxNumEntries();

    int NumEntriesThisRow;
    int Ai_index = 0 ; 
    int MyRow;
    for (MyRow = 0 ; MyRow < NumGlobalElements_; MyRow++) 
    {
      int ierr;
      Ap[MyRow] = Ai_index ; 
      ierr = SerialMatrix_->ExtractMyRowCopy(MyRow, NumEntries, 
					     NumEntriesThisRow, 
					     &Aval[Ai_index], &Ai[Ai_index]);
      if (ierr)
	AMESOS_CHK_ERR(-1);

#if 1
      // MS // added on 15-Mar-05 and KSS restored 8-Feb-06
      if (AddToDiag_ != 0.0) {
        for (int i = 0 ; i < NumEntriesThisRow ; ++i) {
          if (Ai[Ai_index+i] == MyRow) {
            Aval[Ai_index+i] += AddToDiag_;
            break;
          }
        }
      }
#endif
      Ai_index += NumEntriesThisRow;
    }

    Ap[MyRow] = Ai_index ; 
  }

  MtxConvTime_ = AddTime("Total matrix conversion time", MtxConvTime_, 0);
  OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);
  
  return 0;
}   
//==============================================================================
int Ifpack_CrsRiluk::BlockGraph2PointGraph(const Epetra_CrsGraph & BG, Epetra_CrsGraph & PG, bool Upper) {

  if (!BG.IndicesAreLocal()) {EPETRA_CHK_ERR(-1);} // Must have done FillComplete on BG

  int * ColFirstPointInElementList = BG.RowMap().FirstPointInElementList();
  int * ColElementSizeList = BG.RowMap().ElementSizeList();
  if (BG.Importer()!=0) {
    ColFirstPointInElementList = BG.ImportMap().FirstPointInElementList();
    ColElementSizeList = BG.ImportMap().ElementSizeList();
  }

  int Length = (BG.MaxNumIndices()+1) * BG.ImportMap().MaxMyElementSize();
  vector<int> tmpIndices(Length);

  int BlockRow, BlockOffset, NumEntries;
  int NumBlockEntries;
  int * BlockIndices;

  int NumMyRows_tmp = PG.NumMyRows();

  for (int i=0; i<NumMyRows_tmp; i++) {
    EPETRA_CHK_ERR(BG.RowMap().FindLocalElementID(i, BlockRow, BlockOffset));
    EPETRA_CHK_ERR(BG.ExtractMyRowView(BlockRow, NumBlockEntries, BlockIndices));

    int * ptr = &tmpIndices[0]; // Set pointer to beginning of buffer

    int RowDim = BG.RowMap().ElementSize(BlockRow);
    NumEntries = 0;

    // This next line make sure that the off-diagonal entries in the block diagonal of the 
    // original block entry matrix are included in the nonzero pattern of the point graph
    if (Upper) {
      int jstart = i+1;
      int jstop = EPETRA_MIN(NumMyRows_tmp,i+RowDim-BlockOffset);
      for (int j= jstart; j< jstop; j++) {*ptr++ = j; NumEntries++;}
    }

    for (int j=0; j<NumBlockEntries; j++) {
      int ColDim = ColElementSizeList[BlockIndices[j]];
      NumEntries += ColDim;
      assert(NumEntries<=Length); // Sanity test
      int Index = ColFirstPointInElementList[BlockIndices[j]];
      for (int k=0; k < ColDim; k++) *ptr++ = Index++;
    }

    // This next line make sure that the off-diagonal entries in the block diagonal of the 
    // original block entry matrix are included in the nonzero pattern of the point graph
    if (!Upper) {
      int jstart = EPETRA_MAX(0,i-RowDim+1);
      int jstop = i;
      for (int j = jstart; j < jstop; j++) {*ptr++ = j; NumEntries++;}
    }

    EPETRA_CHK_ERR(PG.InsertMyIndices(i, NumEntries, &tmpIndices[0]));
  }

  SetAllocated(true);

  return(0);
}
//=========================================================================
int Epetra_MapColoring::CopyAndPermute(const Epetra_SrcDistObject& Source,
               int NumSameIDs,
               int NumPermuteIDs,
               int * PermuteToLIDs,
               int *PermuteFromLIDs,
               const Epetra_OffsetIndex * Indexor,
               Epetra_CombineMode CombineMode)
{
  (void)Indexor;

  if(    CombineMode != Add
      && CombineMode != Zero
      && CombineMode != Insert
      && CombineMode != AbsMax )
    EPETRA_CHK_ERR(-1); //Unsupported CombinedMode, will default to Zero

  const Epetra_MapColoring & A = dynamic_cast<const Epetra_MapColoring &>(Source);

  int * From = A.ElementColors();
  int *To = ElementColors_;

  // Do copy first
  if (NumSameIDs>0)
    if (To!=From) {
  if (CombineMode==Add)
    for (int j=0; j<NumSameIDs; j++) To[j] += From[j]; // Add to existing value
  else if(CombineMode==Insert)
    for (int j=0; j<NumSameIDs; j++) To[j] = From[j];
  else if(CombineMode==AbsMax) {
    for (int j=0; j<NumSameIDs; j++)  To[j] = EPETRA_MAX( To[j],std::abs(From[j]));
  }
    }
  // Do local permutation next
  if (NumPermuteIDs>0) {
    if (CombineMode==Add)
      for (int j=0; j<NumPermuteIDs; j++) To[PermuteToLIDs[j]] += From[PermuteFromLIDs[j]]; // Add to existing value
    else if(CombineMode==Insert || CombineMode == Zero)
      for (int j=0; j<NumPermuteIDs; j++) To[PermuteToLIDs[j]] = From[PermuteFromLIDs[j]];
    else if(CombineMode==AbsMax) {
      for (int j=0; j<NumPermuteIDs; j++)  To[PermuteToLIDs[j]] = EPETRA_MAX( To[PermuteToLIDs[j]],std::abs(From[PermuteFromLIDs[j]]));
    }
  }

  return(0);
}
//=============================================================================
long long Epetra_LongLongVector::MaxValue() {
  long long result = std::numeric_limits<long long>::min(); // smallest 64 bit int
  int iend = MyLength();
  if (iend>0) result = Values_[0];
  for (int i=0; i<iend; i++) result = EPETRA_MAX(result, Values_[i]);
  long long globalResult;
  this->Comm().MaxAll(&result, &globalResult, 1);
  return(globalResult);
}
//=============================================================================
int Epetra_IntVector::MaxValue() {

  int result = -2000000000; // Negative 2 billion is close to smallest 32 bit int
  int iend = MyLength();
  if (iend>0) result = Values_[0];
  for (int i=0; i<iend; i++) result = EPETRA_MAX(result, Values_[i]);
  int globalResult;
  this->Comm().MaxAll(&result, &globalResult, 1);
  return(globalResult);
}
//=============================================================================
int Epetra_IntSerialDenseMatrix::OneNorm() {
	int anorm = 0;
	int* ptr = 0;
	for(int j = 0; j < N_; j++) {
		int sum = 0;
		ptr = A_ + j*LDA_;
		for(int i = 0; i < M_; i++) 
			sum += std::abs(*ptr++);
		anorm = EPETRA_MAX(anorm, sum);
	}
	return(anorm);
}
//=============================================================================
long long Epetra_LongLongSerialDenseMatrix::OneNorm() {
  long long anorm = 0;
  long long* ptr = 0;
  for(int j = 0; j < N_; j++) {
    long long sum = 0;
    ptr = A_ + j*LDA_;
    for(int i = 0; i < M_; i++) 
    {
      const long long val = *ptr++;
      sum += (val > 0 ? val : -val); // No std::abs(long long) on VS2005.
    }
    anorm = EPETRA_MAX(anorm, sum);
  }
  return(anorm);
}
//=============================================================================
double Epetra_SerialSymDenseMatrix::NormInf(void) const {

  int i, j;

  double anorm = 0.0;
  double * ptr;

  if (!Upper()) {
    for (j=0; j<N_; j++) {
      double sum = 0.0;
      ptr = A_ + j + j*LDA_;
      for (i=j; i<N_; i++) sum += std::abs(*ptr++);
      ptr = A_ + j;
      for (i=0; i<j; i++) {
	sum += std::abs(*ptr);
	ptr += LDA_;
      }
      anorm = EPETRA_MAX(anorm, sum);
    }
  }
  else {
    for (j=0; j<N_; j++) {
      double sum = 0.0;
      ptr = A_ + j*LDA_;
      for (i=0; i<j; i++) sum += std::abs(*ptr++);
      ptr = A_ + j + j*LDA_;
      for (i=j; i<N_; i++) {
	sum += std::abs(*ptr);
	ptr += LDA_;
      }
      anorm = EPETRA_MAX(anorm, sum);
    }
  }
  UpdateFlops(N_*N_);
  return(anorm);
}
//=============================================================================
int Epetra_IntSerialDenseMatrix::InfNorm() {	
	int anorm = 0;
	int* ptr = 0;
	// Loop across columns in inner loop.  Most expensive memory access, but 
	// requires no extra storage.
	for(int i = 0; i < M_; i++) {
		int sum = 0;
		ptr = A_ + i;
		for(int j = 0; j < N_; j++) {
			sum += std::abs(*ptr);
			ptr += LDA_;
		}
		anorm = EPETRA_MAX(anorm, sum);
	}
	return(anorm);
}
//=============================================================================
double Epetra_MsrMatrix::NormInf() const {

  if (NormInf_>-1.0) return(NormInf_);

  double Local_NormInf = 0.0;
  for (int i=0; i < NumMyRows_; i++) {
    int NumEntries = GetRow(i);
    double sum = 0.0;
    for (int j=0; j < NumEntries; j++) sum += fabs(Values_[j]);
    
    Local_NormInf = EPETRA_MAX(Local_NormInf, sum);
  }
  Comm().MaxAll(&Local_NormInf, &NormInf_, 1);
  UpdateFlops(NumGlobalNonzeros());
  return(NormInf_);
}
//=============================================================================
long long Epetra_LongLongSerialDenseMatrix::InfNorm() {  
  long long anorm = 0;
  long long* ptr = 0;
  // Loop across columns in inner loop.  Most expensive memory access, but 
  // requires no extra storage.
  for(int i = 0; i < M_; i++) {
    long long sum = 0;
    ptr = A_ + i;
    for(int j = 0; j < N_; j++) {
      const long long val = *ptr;
      sum += (val > 0 ? val : -val); // No std::abs(long long) on VS2005.
      ptr += LDA_;
    }
    anorm = EPETRA_MAX(anorm, sum);
  }
  return(anorm);
}
//=========================================================================
int Epetra_MapColoring::UnpackAndCombine(const Epetra_SrcDistObject & Source,
           int NumImportIDs,
                                         int * ImportLIDs, 
                                         int LenImports,
           char * Imports,
                                         int & SizeOfPacket, 
           Epetra_Distributor & Distor, 
           Epetra_CombineMode CombineMode,
                                         const Epetra_OffsetIndex * Indexor )
{
  (void)Source;
  (void)LenImports;
  (void)Imports;
  (void)SizeOfPacket;
  (void)Distor;
  (void)Indexor;
  int j;
  
  if(    CombineMode != Add
      && CombineMode != Zero
      && CombineMode != Insert
      && CombineMode != AbsMax )
    EPETRA_CHK_ERR(-1); //Unsupported CombinedMode, will default to Zero

  if (NumImportIDs<=0) return(0);

  int * To = ElementColors_;

  int * ptr;
  // Unpack it...

  ptr = (int *) Imports;
    
  if (CombineMode==Add)
    for (j=0; j<NumImportIDs; j++) To[ImportLIDs[j]] += ptr[j]; // Add to existing value
  else if(CombineMode==Insert)
    for (j=0; j<NumImportIDs; j++) To[ImportLIDs[j]] = ptr[j];
  else if(CombineMode==AbsMax) {
    for (j=0; j<NumImportIDs; j++) To[ImportLIDs[j]] = 0;
    for (j=0; j<NumImportIDs; j++)  To[ImportLIDs[j]] = EPETRA_MAX( To[ImportLIDs[j]],std::abs(ptr[j]));
  }
  
  return(0);
}
Beispiel #13
0
void Amesos_Mumps::CheckParameters() 
{
#ifndef HAVE_AMESOS_MPI_C2F
  MaxProcs_ = -3;
#endif

  // check parameters and fix values of MaxProcs_

  int NumGlobalNonzeros, NumRows;
  
  NumGlobalNonzeros = Matrix().NumGlobalNonzeros(); 
  NumRows = Matrix().NumGlobalRows(); 

  // optimal value for MaxProcs == -1
  
  int OptNumProcs1 = 1 + EPETRA_MAX(NumRows/10000, NumGlobalNonzeros/100000);
  OptNumProcs1 = EPETRA_MIN(Comm().NumProc(),OptNumProcs1);

  // optimal value for MaxProcs == -2

  int OptNumProcs2 = (int)sqrt(1.0 *  Comm().NumProc());
  if (OptNumProcs2 < 1) OptNumProcs2 = 1;

  // fix the value of MaxProcs

  switch (MaxProcs_) {
  case -1:
    MaxProcs_ = OptNumProcs1;
    break;
  case -2:
    MaxProcs_ = OptNumProcs2;
    break;
  case -3:
    MaxProcs_ = Comm().NumProc();
    break;
  }

  // few checks
  if (MaxProcs_ > Comm().NumProc()) MaxProcs_ = Comm().NumProc();
//  if ( MaxProcs_ > 1 ) MaxProcs_ = Comm().NumProc();     // Bug - bogus kludge here  - didn't work anyway
}
Beispiel #14
0
//=========================================================================
int Epetra_IntVector::UnpackAndCombine(const Epetra_SrcDistObject & Source,
                                       int NumImportIDs,
                                       int * ImportLIDs,
                                       int LenImports,
                                       char * Imports,
                                       int & SizeOfPacket,
                                       Epetra_Distributor & Distor,
                                       Epetra_CombineMode CombineMode,
                                       const Epetra_OffsetIndex * Indexor)
{
  (void)Source;
  (void)LenImports;
  (void)SizeOfPacket;
  (void)Distor;
  (void)Indexor;
  int j, jj, k;

  if(    CombineMode != Add
      && CombineMode != Zero
      && CombineMode != Insert
      && CombineMode != Average
      && CombineMode != AbsMax )
    EPETRA_CHK_ERR(-1); //Unsupported CombinedMode, will default to Zero

  if (NumImportIDs<=0) return(0);

  int * To = Values_;
  int MaxElementSize = Map().MaxElementSize();
  bool ConstantElementSize = Map().ConstantElementSize();

  int * ToFirstPointInElementList = 0;
  int * ToElementSizeList = 0;

  if (!ConstantElementSize) {
    ToFirstPointInElementList = Map().FirstPointInElementList();
    ToElementSizeList = Map().ElementSizeList();
  }

  int * ptr;
  // Unpack it...

  ptr = (int *) Imports;

  // Point entry case
  if (MaxElementSize==1) {

      if (CombineMode==Add)
  for (j=0; j<NumImportIDs; j++) To[ImportLIDs[j]] += *ptr++; // Add to existing value
      else if(CombineMode==Insert)
  for (j=0; j<NumImportIDs; j++) To[ImportLIDs[j]] = *ptr++;
      else if(CombineMode==AbsMax)
        for (j=0; j<NumImportIDs; j++) {
    To[ImportLIDs[j]] = EPETRA_MAX( To[ImportLIDs[j]],std::abs(*ptr));
    ptr++;
  }
      // Note:  The following form of averaging is not a true average if more that one value is combined.
      //        This might be an issue in the future, but we leave this way for now.
      else if(CombineMode==Average)
  for (j=0; j<NumImportIDs; j++) {To[ImportLIDs[j]] += *ptr++; To[ImportLIDs[j]] /= 2;}
  }

  // constant element size case

  else if (ConstantElementSize) {

    if (CombineMode==Add) {
      for (j=0; j<NumImportIDs; j++) {
  jj = MaxElementSize*ImportLIDs[j];
    for (k=0; k<MaxElementSize; k++)
      To[jj+k] += *ptr++; // Add to existing value
      }
    }
    else if(CombineMode==Insert) {
      for (j=0; j<NumImportIDs; j++) {
  jj = MaxElementSize*ImportLIDs[j];
    for (k=0; k<MaxElementSize; k++)
      To[jj+k] = *ptr++;
      }
    }
    else if(CombineMode==AbsMax) {
      for (j=0; j<NumImportIDs; j++) {
  jj = MaxElementSize*ImportLIDs[j];
  for (k=0; k<MaxElementSize; k++) {
      To[jj+k] = EPETRA_MAX( To[jj+k], std::abs(*ptr));
      ptr++;
  }
      }
    }
    // Note:  The following form of averaging is not a true average if more that one value is combined.
    //        This might be an issue in the future, but we leave this way for now.
    else if(CombineMode==Average) {
      for (j=0; j<NumImportIDs; j++) {
  jj = MaxElementSize*ImportLIDs[j];
    for (k=0; k<MaxElementSize; k++)
      { To[jj+k] += *ptr++; To[jj+k] /= 2;}
      }
    }
  }

  // variable element size case

  else {

    int thisSizeOfPacket = MaxElementSize;

    if (CombineMode==Add) {
      for (j=0; j<NumImportIDs; j++) {
  ptr = (int *) Imports + j*thisSizeOfPacket;
  jj = ToFirstPointInElementList[ImportLIDs[j]];
  int ElementSize = ToElementSizeList[ImportLIDs[j]];
    for (k=0; k<ElementSize; k++)
      To[jj+k] += *ptr++; // Add to existing value
      }
    }
    else  if(CombineMode==Insert){
      for (j=0; j<NumImportIDs; j++) {
  ptr = (int *) Imports + j*thisSizeOfPacket;
  jj = ToFirstPointInElementList[ImportLIDs[j]];
  int ElementSize = ToElementSizeList[ImportLIDs[j]];
    for (k=0; k<ElementSize; k++)
      To[jj+k] = *ptr++;
      }
    }
    else  if(CombineMode==AbsMax){
      for (j=0; j<NumImportIDs; j++) {
  ptr = (int *) Imports + j*thisSizeOfPacket;
  jj = ToFirstPointInElementList[ImportLIDs[j]];
  int ElementSize = ToElementSizeList[ImportLIDs[j]];
  for (k=0; k<ElementSize; k++) {
      To[jj+k] = EPETRA_MAX( To[jj+k], std::abs(*ptr));
      ptr++;
  }
      }
    }
    // Note:  The following form of averaging is not a true average if more that one value is combined.
    //        This might be an issue in the future, but we leave this way for now.
    else if(CombineMode==Average) {
      for (j=0; j<NumImportIDs; j++) {
  ptr = (int *) Imports + j*thisSizeOfPacket;
  jj = ToFirstPointInElementList[ImportLIDs[j]];
  int ElementSize = ToElementSizeList[ImportLIDs[j]];
    for (k=0; k<ElementSize; k++)
      { To[jj+k] += *ptr++; To[jj+k] /= 2;}
      }
    }
  }

  return(0);
}
void TTrilinos_Util_CountMatrixMarket( const char *data_file, 
				      std::vector<int> &non_zeros,
				      int_type &N_rows, int_type &nnz, 
				      const Epetra_Comm  &comm) { 

  FILE *in_file ;
  
  N_rows = 0 ; 
  nnz = 0 ; 
  int_type vecsize = non_zeros.size(); 
  assert( vecsize == 0 ) ; 
  const int BUFSIZE = 800 ; 
  char buffer[BUFSIZE] ; 
  bool first_off_diag = true ; 
  bool upper ;

  if(comm.MyPID() == 0)  { 
    /* Get information about the array stored in the file specified in the  */
    /* argument list:                                                       */
    
    in_file = fopen( data_file, "r");
    if (in_file == NULL)
      {
	printf("Error: Cannot open file: %s\n",data_file);
	exit(1);
      }
    
    fgets( buffer, BUFSIZE, in_file ) ;
    bool symmetric = false ; 
    std::string headerline1 = buffer;
    if ( headerline1.find("symmetric") != std::string::npos) symmetric = true; 
    fgets( buffer, BUFSIZE, in_file ) ;
    while ( fgets( buffer, BUFSIZE, in_file ) ) { 
      int_type i, j; 
      float val ; 
      if(sizeof(int) == sizeof(int_type))
        sscanf( buffer, "%d %d %f", &i, &j, &val ) ; 
      else if(sizeof(long long) == sizeof(int_type))
        sscanf( buffer, "%lld %lld %f", &i, &j, &val ) ; 
      else
        assert(false);
      int_type needvecsize = i;
      if (symmetric) needvecsize = EPETRA_MAX(i,j) ;
      if ( needvecsize >= vecsize ) {
	int_type oldvecsize = vecsize; 
	vecsize += EPETRA_MAX((int_type) 1000,needvecsize-vecsize) ; 
	non_zeros.resize(vecsize) ; 
        for ( int_type i= oldvecsize; i < vecsize ; i++ ) non_zeros[i] = 0 ; 
      }
      N_rows = EPETRA_MAX( N_rows, i ) ; 
      if (symmetric) N_rows = EPETRA_MAX( N_rows, j ) ; 
      non_zeros[i-1]++ ; 
      nnz++; 
      if ( symmetric && i != j ) {
	if ( first_off_diag ) { 
	  upper = j > i ; 
	  first_off_diag = false ; 
	}
	if ( ( j > i && ! upper ) || ( i > j && upper ) ) { 
	  std::cout << "file not symmetric" << std::endl ; 
	  exit(1) ; 
	}
	non_zeros[j-1]++ ; 
	nnz++; 
      } 
    } 
    fclose(in_file);
  }
  comm.Broadcast( &N_rows, 1, 0 );
  comm.Broadcast( &nnz, 1, 0 );
  return;
}
Beispiel #16
0
//=========================================================================
int Epetra_IntVector::CopyAndPermute(const Epetra_SrcDistObject& Source,
                                     int NumSameIDs,
                                     int NumPermuteIDs,
                                     int * PermuteToLIDs,
                                     int *PermuteFromLIDs,
                                     const Epetra_OffsetIndex * Indexor,
                                     Epetra_CombineMode CombineMode)
{
  (void)Indexor;
  const Epetra_IntVector & A = dynamic_cast<const Epetra_IntVector &>(Source);

  if(    CombineMode != Add
      && CombineMode != Zero
      && CombineMode != Insert
      && CombineMode != Average
      && CombineMode != AbsMax )
    EPETRA_CHK_ERR(-1); //Unsupported CombinedMode, will default to Zero

  int * From;
  A.ExtractView(&From);
  int *To = Values_;

  int * ToFirstPointInElementList = 0;
  int * FromFirstPointInElementList = 0;
  int * FromElementSizeList = 0;
  int MaxElementSize = Map().MaxElementSize();
  bool ConstantElementSize = Map().ConstantElementSize();

  if (!ConstantElementSize) {
    ToFirstPointInElementList =   Map().FirstPointInElementList();
    FromFirstPointInElementList = A.Map().FirstPointInElementList();
    FromElementSizeList = A.Map().ElementSizeList();
  }
  int j, jj, jjj, k;

  int NumSameEntries;

  bool Case1 = false;
  bool Case2 = false;
  // bool Case3 = false;

  if (MaxElementSize==1) {
    Case1 = true;
    NumSameEntries = NumSameIDs;
  }
  else if (ConstantElementSize) {
    Case2 = true;
    NumSameEntries = NumSameIDs * MaxElementSize;
  }
  else {
    // Case3 = true;
    NumSameEntries = FromFirstPointInElementList[NumSameIDs];
  }

  // Short circuit for the case where the source and target vector is the same.
  if (To==From) NumSameEntries = 0;

  // Do copy first
  if (NumSameIDs>0)
    if (To!=From) {
      if (CombineMode==Add)
  for (j=0; j<NumSameEntries; j++) To[j] += From[j]; // Add to existing value
      else if(CombineMode==Insert)
  for (j=0; j<NumSameEntries; j++) To[j] = From[j];
      else if(CombineMode==AbsMax)
        for (j=0; j<NumSameEntries; j++) {
    To[j] = EPETRA_MAX( To[j],From[j]);
  }
      // Note:  The following form of averaging is not a true average if more that one value is combined.
      //        This might be an issue in the future, but we leave this way for now.
      else if(CombineMode==Average)
  for (j=0; j<NumSameEntries; j++) {To[j] += From[j]; To[j] /= 2;}
    }
  // Do local permutation next
  if (NumPermuteIDs>0) {

    // Point entry case
    if (Case1) {

      if (CombineMode==Add)
  for (j=0; j<NumPermuteIDs; j++) To[PermuteToLIDs[j]] += From[PermuteFromLIDs[j]]; // Add to existing value
      else if(CombineMode==Insert)
  for (j=0; j<NumPermuteIDs; j++) To[PermuteToLIDs[j]] = From[PermuteFromLIDs[j]];
      else if(CombineMode==AbsMax)
        for (j=0; j<NumPermuteIDs; j++) {
    To[PermuteToLIDs[j]] = EPETRA_MAX( To[PermuteToLIDs[j]],From[PermuteFromLIDs[j]]);
  }
      // Note:  The following form of averaging is not a true average if more that one value is combined.
      //        This might be an issue in the future, but we leave this way for now.
      else if(CombineMode==Average)
  for (j=0; j<NumPermuteIDs; j++) {To[PermuteToLIDs[j]] += From[PermuteFromLIDs[j]]; To[PermuteToLIDs[j]] /= 2;}
    }
    // constant element size case
    else if (Case2) {

      if (CombineMode==Add)
      for (j=0; j<NumPermuteIDs; j++) {
  jj = MaxElementSize*PermuteToLIDs[j];
  jjj = MaxElementSize*PermuteFromLIDs[j];
    for (k=0; k<MaxElementSize; k++)
      To[jj+k] += From[jjj+k];
      }
      else if(CombineMode==Insert)
      for (j=0; j<NumPermuteIDs; j++) {
  jj = MaxElementSize*PermuteToLIDs[j];
  jjj = MaxElementSize*PermuteFromLIDs[j];
    for (k=0; k<MaxElementSize; k++)
      To[jj+k] = From[jjj+k];
      }
      else if(CombineMode==AbsMax)
      for (j=0; j<NumPermuteIDs; j++) {
  jj = MaxElementSize*PermuteToLIDs[j];
  jjj = MaxElementSize*PermuteFromLIDs[j];
    for (k=0; k<MaxElementSize; k++)
    To[jj+k] = EPETRA_MAX( To[jj+k],From[jjj+k]);
      }
      // Note:  The following form of averaging is not a true average if more that one value is combined.
      //        This might be an issue in the future, but we leave this way for now.
      else if(CombineMode==Average)
      for (j=0; j<NumPermuteIDs; j++) {
  jj = MaxElementSize*PermuteToLIDs[j];
  jjj = MaxElementSize*PermuteFromLIDs[j];
    for (k=0; k<MaxElementSize; k++)
      {To[jj+k] += From[jjj+k]; To[jj+k] /= 2;}
      }

    }

    // variable element size case
    else {

      if (CombineMode==Add)
      for (j=0; j<NumPermuteIDs; j++) {
  jj = ToFirstPointInElementList[PermuteToLIDs[j]];
  jjj = FromFirstPointInElementList[PermuteFromLIDs[j]];
  int ElementSize = FromElementSizeList[PermuteFromLIDs[j]];
    for (k=0; k<ElementSize; k++)
      To[jj+k] += From[jjj+k];
      }
      else if(CombineMode==Insert)
      for (j=0; j<NumPermuteIDs; j++) {
  jj = ToFirstPointInElementList[PermuteToLIDs[j]];
  jjj = FromFirstPointInElementList[PermuteFromLIDs[j]];
  int ElementSize = FromElementSizeList[PermuteFromLIDs[j]];
    for (k=0; k<ElementSize; k++)
      To[jj+k] = From[jjj+k];
      }
      else if(CombineMode==AbsMax)
      for (j=0; j<NumPermuteIDs; j++) {
  jj = ToFirstPointInElementList[PermuteToLIDs[j]];
  jjj = FromFirstPointInElementList[PermuteFromLIDs[j]];
  int ElementSize = FromElementSizeList[PermuteFromLIDs[j]];
    for (k=0; k<ElementSize; k++)
      To[jj+k] = EPETRA_MAX( To[jj+k],From[jjj+k]);
      }
      else if(CombineMode==Average)
      for (j=0; j<NumPermuteIDs; j++) {
  jj = ToFirstPointInElementList[PermuteToLIDs[j]];
  jjj = FromFirstPointInElementList[PermuteFromLIDs[j]];
  int ElementSize = FromElementSizeList[PermuteFromLIDs[j]];
    for (k=0; k<ElementSize; k++)
      {To[jj+k] += From[jjj+k]; To[jj+k] /= 2;}
      }

    }
  }
  return(0);
}
  int TestOneMatrix( std::string HBname, std::string MMname, std::string TRIname, Epetra_Comm &Comm, bool verbose ) { 

  if ( Comm.MyPID() != 0 ) verbose = false ; 

  Epetra_Map * readMap = 0;

  Epetra_CrsMatrix * HbA = 0; 
  Epetra_Vector * Hbx = 0; 
  Epetra_Vector * Hbb = 0; 
  Epetra_Vector * Hbxexact = 0;
   
  Epetra_CrsMatrix * TriplesA = 0; 
  Epetra_Vector * Triplesx = 0; 
  Epetra_Vector * Triplesb = 0;
  Epetra_Vector * Triplesxexact = 0;
   
  Epetra_CrsMatrix * MatrixMarketA = 0; 
  Epetra_Vector * MatrixMarketx = 0; 
  Epetra_Vector * MatrixMarketb = 0;
  Epetra_Vector * MatrixMarketxexact = 0;
   
  int TRI_Size = TRIname.size() ; 
  std::string LastFiveBytes = TRIname.substr( EPETRA_MAX(0,TRI_Size-5), TRI_Size );

  if ( LastFiveBytes == ".TimD" ) { 
    // Call routine to read in a file with a Tim Davis header and zero-based indexing
    EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra64( &TRIname[0], false, Comm, 
						      readMap, TriplesA, Triplesx, 
						      Triplesb, Triplesxexact, false, true, true ) );
    delete readMap;
  } else {
    if ( LastFiveBytes == ".triU" ) { 
    // Call routine to read in unsymmetric Triplet matrix
      EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra64( &TRIname[0], false, Comm, 
							readMap, TriplesA, Triplesx, 
							Triplesb, Triplesxexact, false, false ) );
      delete readMap;
    } else {
      if ( LastFiveBytes == ".triS" ) { 
	// Call routine to read in symmetric Triplet matrix
	EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra64( &TRIname[0], true, Comm, 
							  readMap, TriplesA, Triplesx, 
							  Triplesb, Triplesxexact, false, false ) );
        delete readMap;
      } else {
	assert( false ) ; 
      }
    }
  }

  EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra64( &MMname[0], Comm, readMap, 
							 MatrixMarketA, MatrixMarketx, 
							 MatrixMarketb, MatrixMarketxexact) );
  delete readMap;

  // Call routine to read in HB problem
  Trilinos_Util_ReadHb2Epetra64( &HBname[0], Comm, readMap, HbA, Hbx, 
			       Hbb, Hbxexact) ;


#if 0
  std::cout << " HbA " ; 
  HbA->Print( std::cout ) ; 
  std::cout << std::endl ; 

  std::cout << " MatrixMarketA " ; 
  MatrixMarketA->Print( std::cout ) ; 
  std::cout << std::endl ; 

  std::cout << " TriplesA " ; 
  TriplesA->Print( std::cout ) ; 
  std::cout << std::endl ; 
#endif


  int TripleErr = 0 ; 
  int MMerr = 0 ; 
  for ( int i = 0 ; i < 10 ; i++ ) 
    {
      double resid_Hb_Triples;
      double resid_Hb_Matrix_Market;
      double norm_A ;
      Hbx->Random();
      //
      //  Set the output vectors to different values:
      //
      Triplesb->PutScalar(1.1);
      Hbb->PutScalar(1.2);
      MatrixMarketb->PutScalar(1.3);

      HbA->Multiply( false, *Hbx, *Hbb );
      norm_A = HbA->NormOne( ) ; 

      TriplesA->Multiply( false, *Hbx, *Triplesb );
      Triplesb->Update( 1.0, *Hbb, -1.0 ) ; 


      MatrixMarketA->Multiply( false, *Hbx, *MatrixMarketb );
      MatrixMarketb->Update( 1.0, *Hbb, -1.0 ) ; 

      Triplesb->Norm1( &resid_Hb_Triples ) ; 
      MatrixMarketb->Norm1( &resid_Hb_Matrix_Market ) ; 

      TripleErr += ( resid_Hb_Triples > 1e-11 * norm_A ) ; 
      MMerr += ( resid_Hb_Matrix_Market > 1e-11 * norm_A ) ; 

      if ( verbose && resid_Hb_Triples > 1e-11 * norm_A ) 
	std::cout << " resid_Hb_Triples = " <<  resid_Hb_Triples 
	     << " norm_A = " << norm_A << std::endl ; 
      if ( verbose && resid_Hb_Matrix_Market > 1e-11 * norm_A ) 
	std::cout << " resid_Hb_Matrix_Market = " <<  resid_Hb_Matrix_Market 
	     << " norm_A = " << norm_A << std::endl ; 

    }

  if ( verbose ) { 
    if ( TripleErr ) std::cout << " Error in reading " << HBname << " or " << TRIname << std::endl ; 
    if ( MMerr ) std::cout << " Error in reading " << HBname << " or " << MMname << std::endl ; 
  }

  delete HbA; 
  delete Hbx; 
  delete Hbb; 
  delete Hbxexact;
   
  delete TriplesA; 
  delete Triplesx; 
  delete Triplesb;
  delete Triplesxexact;
   
  delete MatrixMarketA; 
  delete MatrixMarketx; 
  delete MatrixMarketb;
  delete MatrixMarketxexact;

  delete readMap;

  return TripleErr+MMerr ; 
  }
Beispiel #18
0
int Amesos_Scalapack::Solve() { 
  
  if( debug_ == 1 ) std::cout << "Entering `Solve()'" << std::endl;
  
  NumSolve_++;
  
  Epetra_MultiVector   *vecX = Problem_->GetLHS() ; 
  Epetra_MultiVector   *vecB = Problem_->GetRHS() ; 
  
  //
  //  Compute the number of right hands sides 
  //  (and check that X and B have the same shape) 
  //
  int nrhs; 
  if ( vecX == 0 ) { 
    nrhs = 0 ;
    EPETRA_CHK_ERR( vecB != 0 ) ; 
  } else { 
    nrhs = vecX->NumVectors() ; 
    EPETRA_CHK_ERR( vecB->NumVectors() != nrhs ) ; 
  }
  
  Epetra_MultiVector *ScalapackB =0;
  Epetra_MultiVector *ScalapackX =0;
  //
  //  Extract Scalapack versions of X and B 
  //
  double *ScalapackXvalues ;
  
  Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
  Time_->ResetStartTime(); // track time to broadcast vectors
  //
  //  Copy B to the scalapack version of B
  //
  const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap();
  Epetra_MultiVector *ScalapackXextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ; 
  Epetra_MultiVector *ScalapackBextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ; 
  
  Epetra_Import ImportToScalapack( *VectorMap_, OriginalMap );
  ScalapackBextract->Import( *vecB, ImportToScalapack, Insert ) ;
  ScalapackB = ScalapackBextract ; 
  ScalapackX = ScalapackXextract ; 
  
  VecTime_ += Time_->ElapsedTime();
  
  //
  //  Call SCALAPACKs PDGETRS to perform the solve
  //
  
  int DescX[10];  
  
  ScalapackX->Scale(1.0, *ScalapackB) ;  
  
  int ScalapackXlda ; 
  
  Time_->ResetStartTime(); // tract time to solve
  
  //
  //  Setup DescX 
  //
  
  if( nrhs > nb_ ) {
    EPETRA_CHK_ERR( -2 );  
  }
  
  int Ierr[1] ; 
  Ierr[0] = 0 ; 
  const int zero = 0 ; 
  const int one = 1 ; 
  if ( iam_ < nprow_ * npcol_ ) {
    assert( ScalapackX->ExtractView( &ScalapackXvalues, &ScalapackXlda ) == 0 ) ; 
    
    if ( false ) std::cout << "Amesos_Scalapack.cpp: " << __LINE__ << " ScalapackXlda = "  <<  ScalapackXlda 
		      << " lda_ = "  << lda_ 
		      << " nprow_ = "  << nprow_ 
		      << " npcol_ = "  << npcol_ 
		      << " myprow_ = "  << myprow_ 
		      << " mypcol_ = "  << mypcol_ 
		      << " iam_ = "  << iam_ << std::endl ;
    if (  TwoD_distribution_ )    assert( mypcol_ >0 || EPETRA_MAX(ScalapackXlda,1) == lda_ ) ; 
    
    DESCINIT_F77(DescX, 
		 &NumGlobalElements_, 
		 &nrhs, 
		 &nb_,
		 &nb_,
		 &zero,
		 &zero,
		 &ictxt_,
		 &lda_,
		 Ierr ) ;
    assert( Ierr[0] == 0 ) ; 
    
    //
    //  For the 1D data distribution, we factor the transposed 
    //  matrix, hence we must invert the sense of the transposition
    //
    char trans = 'N';
    if ( TwoD_distribution_ ) {
      if ( UseTranspose() ) trans = 'T' ;
    } else {
      
      if ( ! UseTranspose() ) trans = 'T' ;
    }
    
    if ( nprow_ * npcol_ == 1 ) { 
      DGETRS_F77(&trans,
		 &NumGlobalElements_,  
		 &nrhs, 
		 &DenseA_[0],
		 &lda_,
		 &Ipiv_[0],
		 ScalapackXvalues,
		 &lda_,
		 Ierr ) ;
    } else { 
      PDGETRS_F77(&trans,
		  &NumGlobalElements_,  
		  &nrhs, 
		  &DenseA_[0],
		  &one,
		  &one, 
		  DescA_,
		  &Ipiv_[0],
		  ScalapackXvalues,
		  &one,
		  &one, 
		  DescX,
		  Ierr ) ;
    }
  }
  
  SolTime_ += Time_->ElapsedTime();
  
  Time_->ResetStartTime();  // track time to broadcast vectors
  //
  //  Copy X back to the original vector
  // 
  Epetra_Import ImportFromScalapack( OriginalMap, *VectorMap_ );
  vecX->Import( *ScalapackX, ImportFromScalapack, Insert ) ;
  delete ScalapackBextract ;
  delete ScalapackXextract ;
  
  VecTime_ += Time_->ElapsedTime();
  
  //  All processes should return the same error code
  if ( nprow_ * npcol_ < Comm().NumProc() ) 
    Comm().Broadcast( Ierr, 1, 0 ) ; 
  
  // MS // compute vector norms
  if( ComputeVectorNorms_ == true || verbose_ == 2 ) {
    double NormLHS, NormRHS;
    for( int i=0 ; i<nrhs ; ++i ) {
      assert((*vecX)(i)->Norm2(&NormLHS)==0);
      assert((*vecB)(i)->Norm2(&NormRHS)==0);
      if( verbose_ && Comm().MyPID() == 0 ) {
	std::cout << "Amesos_Scalapack : vector " << i << ", ||x|| = " << NormLHS
	     << ", ||b|| = " << NormRHS << std::endl;
      }
    }
  }
  
  // MS // compute true residual
  if( ComputeTrueResidual_ == true || verbose_ == 2  ) {
    double Norm;
    Epetra_MultiVector Ax(vecB->Map(),nrhs);
    for( int i=0 ; i<nrhs ; ++i ) {
      (Problem_->GetMatrix()->Multiply(UseTranspose(), *((*vecX)(i)), Ax));
      (Ax.Update(1.0, *((*vecB)(i)), -1.0));
      (Ax.Norm2(&Norm));
      
      if( verbose_ && Comm().MyPID() == 0 ) {
	std::cout << "Amesos_Scalapack : vector " << i << ", ||Ax - b|| = " << Norm << std::endl;
      }
    }
  }
  
  return Ierr[0];
  
}
Beispiel #19
0
int Amesos_Scalapack::ConvertToScalapack(){
  
  //
  //  Convert matrix and vector to the form that Scalapack expects
  //  ScaLAPACK accepts the matrix to be in any 2D block-cyclic form
  //
  //  Amesos_ScaLAPACK uses one of two 2D data distributions: 
  //  a simple 1D non-cyclic data distribution with npcol= 1, or a 
  //  full  2D block-cyclic data distribution.
  //
  //  2D data distribvution:
  //    Because the Epetra export operation is oriented toward a 1D 
  //    data distribution in which each row is entirely stored on 
  //    a single process, we create two intermediate matrices: FatIn and
  //    FatOut, both of which have dimension:  
  //      NumGlobalElements * nprow by NumGlobalElements
  //    This allows each row of FatOut to be owned by a single process.
  //    The larger dimension does not significantly increase the 
  //    storage requirements and allows the export operation to be 
  //    efficient.  
  //
  //  1D data distribution:
  //  We have chosen the simplest 2D block-cyclic form, a 1D blocked (not-cyclic)
  //  data distribution, for the matrix A.
  //  We use the same distribution for the multivectors X and B.  However, 
  //  except for very large numbers of right hand sides, this places all of X and B
  //  on process 0, making it effectively a serial matrix.  
  //  
  //  For now, we simply treat X and B as serial matrices (as viewed from epetra)
  //  though ScaLAPACK treats them as distributed matrices. 
  //
  
  if( debug_ == 1 ) std::cout << "Entering `ConvertToScalapack()'" << std::endl;
  
  Time_->ResetStartTime();
  
  if ( iam_ < nprow_ * npcol_ ) { 
    if ( TwoD_distribution_ ) { 
      
      DenseA_.resize( NumOurRows_ * NumOurColumns_ ); 
      for ( int i = 0 ; i < (int)DenseA_.size() ; i++ ) DenseA_[i] = 0 ; 
      assert( lda_ == EPETRA_MAX(1,NumOurRows_) ) ;
      assert( DescA_[8] == lda_ ) ;
      
      int NzThisRow ;
      int MyRow;
      
      double *RowValues;
      int *ColIndices;
      int MaxNumEntries = FatOut_->MaxNumEntries();
      
      std::vector<int>ColIndicesV(MaxNumEntries);
      std::vector<double>RowValuesV(MaxNumEntries);
      
      int NumMyElements = FatOut_->NumMyRows() ; 
      for ( MyRow = 0; MyRow < NumMyElements ; MyRow++ ) {
	EPETRA_CHK_ERR( FatOut_->
			ExtractMyRowView( MyRow, NzThisRow, RowValues, 
					  ColIndices ) != 0 ) ;
	//
	//  The following eight lines are just a sanity check on MyRow:
	//
	int MyGlobalRow =  FatOut_->GRID( MyRow );
	assert( MyGlobalRow%npcol_ == mypcol_ ) ;   // I should only own rows belonging to my processor column
	int MyTrueRow = MyGlobalRow/npcol_ ;  // This is the original row
	int UniformRows =  ( MyTrueRow / ( nprow_ * nb_ ) ) * nb_ ; 
	int AllExcessRows = MyTrueRow - UniformRows * nprow_ ; 
	int OurExcessRows =  AllExcessRows - ( myprow_ * nb_ ) ; 
	
	if (  MyRow != UniformRows + OurExcessRows ) {
	  std::cout << " iam _ = " << iam_ 
	       << " MyGlobalRow = " << MyGlobalRow 
	       << " MyTrueRow = " << MyTrueRow 
	       << " UniformRows = " << UniformRows 
	       << " AllExcessRows = " << AllExcessRows 
	       << " OurExcessRows = " << OurExcessRows 
	       << " MyRow = " << MyRow  << std::endl ;  
	}
	
	assert( OurExcessRows >= 0 &&  OurExcessRows < nb_ );
	assert( MyRow == UniformRows + OurExcessRows ) ; 
	
	for ( int j = 0; j < NzThisRow; j++ ) { 
	  assert(  FatOut_->RowMatrixColMap().GID( ColIndices[j] ) ==
		   FatOut_->GCID( ColIndices[j] ) );
	  
	  int MyGlobalCol =  FatOut_->GCID( ColIndices[j] );
	  assert( (MyGlobalCol/nb_)%npcol_ == mypcol_ ) ; 
	  int UniformCols =  ( MyGlobalCol / ( npcol_ * nb_ ) ) * nb_ ; 
	  int AllExcessCols = MyGlobalCol - UniformCols * npcol_ ; 
	  int OurExcessCols =  AllExcessCols - ( mypcol_ * nb_ ) ; 
	  assert( OurExcessCols >= 0 &&  OurExcessCols < nb_ );
	  int MyCol = UniformCols + OurExcessCols ; 
	  
	  DenseA_[ MyCol * lda_ + MyRow ] = RowValues[j] ; 
	}
      }
      
    } else { 
      
      int NumMyElements = ScaLAPACK1DMatrix_->NumMyRows() ; 
      
      assert( NumGlobalElements_ ==ScaLAPACK1DMatrix_->NumGlobalRows());
      assert( NumGlobalElements_ ==ScaLAPACK1DMatrix_->NumGlobalCols());
      DenseA_.resize( NumGlobalElements_ * NumMyElements ) ;
      for ( int i = 0 ; i < (int)DenseA_.size() ; i++ ) DenseA_[i] = 0 ; 
      
      int NzThisRow ;
      int MyRow;
      
      double *RowValues;
      int *ColIndices;
      int MaxNumEntries = ScaLAPACK1DMatrix_->MaxNumEntries();
      
      assert( DescA_[8] == lda_ ) ; //  Double check Lda
      std::vector<int>ColIndicesV(MaxNumEntries);
      std::vector<double>RowValuesV(MaxNumEntries);
      
      for ( MyRow = 0; MyRow < NumMyElements ; MyRow++ ) {
	EPETRA_CHK_ERR( ScaLAPACK1DMatrix_->
			ExtractMyRowView( MyRow, NzThisRow, RowValues, 
					  ColIndices ) != 0 ) ;
	
	for ( int j = 0; j < NzThisRow; j++ ) { 
	  DenseA_[ ( ScaLAPACK1DMatrix_->RowMatrixColMap().GID( ColIndices[j] ) ) 
		   + MyRow * NumGlobalElements_ ] = RowValues[j] ; 
	}
      }
      //
      //  Create a map to allow us to redistribute the vectors X and B 
      //
      Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
      const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap() ; 
      assert( NumGlobalElements_ == OriginalMap.NumGlobalElements() ) ;
      int NumMyElements_ = 0 ;
      if (iam_==0) NumMyElements_ = NumGlobalElements_;
      
      if (VectorMap_) { delete VectorMap_ ; VectorMap_ = 0 ; } 
      VectorMap_ = new Epetra_Map( NumGlobalElements_, NumMyElements_, 0, Comm() );
    }
  }
  ConTime_ += Time_->ElapsedTime();
  
  return 0;
}   
Beispiel #20
0
int Amesos_Scalapack::RedistributeA( ) {

  if( debug_ == 1 ) std::cout << "Entering `RedistributeA()'" << std::endl;

  Time_->ResetStartTime();
  
  Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
  EPETRA_CHK_ERR( RowMatrixA == 0 ) ; 

  const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap() ; 
  int NumberOfProcesses = Comm().NumProc() ; 

  //
  //  Compute a uniform distribution as ScaLAPACK would want it
  //    MyFirstElement - The first element which this processor would have
  //    NumExpectedElemetns - The number of elements which this processor would have
  //

  int NumRows_ = RowMatrixA->NumGlobalRows() ; 
  int NumColumns_  = RowMatrixA->NumGlobalCols() ; 
  if ( MaxProcesses_ > 0 ) {
    NumberOfProcesses = EPETRA_MIN( NumberOfProcesses, MaxProcesses_ ) ; 
  }
  else {
    int ProcessNumHeuristic = (1+NumRows_/200)*(1+NumRows_/200);
    NumberOfProcesses = EPETRA_MIN( NumberOfProcesses,  ProcessNumHeuristic );
  }
  
  if ( debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:171" << std::endl;
  //
  // Create the ScaLAPACK data distribution.
  // The TwoD data distribution is created in a completely different
  // manner and is not transposed (whereas the SaLAPACK 1D data
  // distribution was transposed) 
  //
  if ( debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:163" << std::endl;
  Comm().Barrier(); 
  if ( TwoD_distribution_ ) { 
    if ( debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:166" << std::endl;
    Comm().Barrier(); 
    npcol_ = EPETRA_MIN( NumberOfProcesses, 
			 EPETRA_MAX ( 2, (int) sqrt( NumberOfProcesses * 0.5 ) ) ) ; 
    nprow_ = NumberOfProcesses / npcol_ ;

    //
    //  Create the map for FatA - our first intermediate matrix
    //
    int NumMyElements = RowMatrixA->RowMatrixRowMap().NumMyElements() ;
    std::vector<int> MyGlobalElements( NumMyElements );
    RowMatrixA->RowMatrixRowMap().MyGlobalElements( &MyGlobalElements[0] ) ;

    int NumMyColumns = RowMatrixA->RowMatrixColMap().NumMyElements() ;
    std::vector<int> MyGlobalColumns( NumMyColumns );
    RowMatrixA->RowMatrixColMap().MyGlobalElements( &MyGlobalColumns[0] ) ;

    if ( debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:194" << std::endl;

    std::vector<int> MyFatElements( NumMyElements * npcol_ ); 
    
    for( int LocalRow=0; LocalRow<NumMyElements; LocalRow++ ) {
      for (int i = 0 ; i < npcol_; i++ ){
	MyFatElements[LocalRow*npcol_+i] = MyGlobalElements[LocalRow]*npcol_+i;
      } 
    }
    
    Epetra_Map FatInMap( npcol_*NumRows_, NumMyElements*npcol_, 
			 &MyFatElements[0], 0, Comm() ); 
    
    //
    //  Create FatIn, our first intermediate matrix
    //
    Epetra_CrsMatrix FatIn( Copy, FatInMap, 0 );
    
    
    std::vector<std::vector<int> > FatColumnIndices(npcol_,std::vector<int>(1));
    std::vector<std::vector<double> > FatMatrixValues(npcol_,std::vector<double>(1));
    std::vector<int> FatRowPtrs(npcol_);  // A FatRowPtrs[i] = the number 
    // of entries in local row LocalRow*npcol_ + i 
    
    if ( debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:219" << std::endl;
    //
    mypcol_ = iam_%npcol_;
    myprow_ = (iam_/npcol_)%nprow_;
    if ( iam_ >= nprow_ * npcol_ ) {
      myprow_ = nprow_; 
      mypcol_ = npcol_; 
    }
    //  Each row is split into npcol_ rows, with each of the 
    //  new rows containing only those elements belonging to 
    //  its process column (in the ScaLAPACK 2D process grid)
    //
    int MaxNumIndices = RowMatrixA->MaxNumEntries();
    int NumIndices;
    std::vector<int> ColumnIndices(MaxNumIndices);
    std::vector<double> MatrixValues(MaxNumIndices); 
    
    if ( debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:232 NumMyElements = " 
			    << NumMyElements 
			    << std::endl;
    
    nb_ = grid_nb_;
    
    for( int LocalRow=0; LocalRow<NumMyElements; ++LocalRow ) {
      
      RowMatrixA->ExtractMyRowCopy( LocalRow, 
				    MaxNumIndices,
				    NumIndices, 
				    &MatrixValues[0],
				    &ColumnIndices[0] );
      
      for (int i=0; i<npcol_; i++ )  FatRowPtrs[i] = 0 ; 

      //
      //  Deal the individual matrix entries out to the row owned by
      //  the process to which this matrix entry will belong.
      //
      for( int i=0 ; i<NumIndices ; ++i ) {
	int GlobalCol = MyGlobalColumns[ ColumnIndices[i] ];
	int pcol_i = pcolnum( GlobalCol, nb_, npcol_ ) ;
	if ( FatRowPtrs[ pcol_i ]+1 >= FatColumnIndices[ pcol_i ].size() ) {
	  FatColumnIndices[ pcol_i ]. resize( 2 * FatRowPtrs[ pcol_i ]+1 );
	  FatMatrixValues[ pcol_i ]. resize( 2 * FatRowPtrs[ pcol_i ]+1 );
	}
	FatColumnIndices[pcol_i][FatRowPtrs[pcol_i]] = GlobalCol ;
	FatMatrixValues[pcol_i][FatRowPtrs[pcol_i]] = MatrixValues[i];
	
	FatRowPtrs[ pcol_i ]++;
      }
      
      //
      //  Insert each of the npcol_ rows individually
      //
      for ( int pcol_i = 0 ; pcol_i < npcol_ ; pcol_i++ ) { 
	FatIn.InsertGlobalValues( MyGlobalElements[LocalRow]*npcol_ + pcol_i, 
				  FatRowPtrs[ pcol_i ],
				  &FatMatrixValues[ pcol_i ][0], 
				  &FatColumnIndices[ pcol_i ][0] );
      }
    }
    FatIn.FillComplete( false );
    
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:260" << std::endl;
    if (  debug_ == 1) std::cout  << "Amesos_Scalapack.cpp:265B" 
			     << " iam_ = " << iam_ 
			     << " nb_ = " << nb_ 
			     << " nprow_ = " << nprow_ 
			     << " npcol_ = " << npcol_ 
			     << std::endl;
    
    //
    //  Compute the map for our second intermediate matrix, FatOut
    //
    //  Compute directly
    int UniformRows =  ( NumRows_ / ( nprow_ * nb_ ) ) * nb_ ; 
    int AllExcessRows = NumRows_ - UniformRows * nprow_ ; 
    int OurExcessRows = EPETRA_MIN( nb_, AllExcessRows - ( myprow_ * nb_ ) ) ; 
    OurExcessRows = EPETRA_MAX( 0, OurExcessRows );
    NumOurRows_ = UniformRows + OurExcessRows ; 
    
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:277" << std::endl;
    int UniformColumns =  ( NumColumns_ / ( npcol_ * nb_ ) ) * nb_ ; 
    int AllExcessColumns = NumColumns_ - UniformColumns * npcol_ ; 
    int OurExcessColumns = EPETRA_MIN( nb_, AllExcessColumns - ( mypcol_ * nb_ ) ) ; 
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:281" << std::endl;
    OurExcessColumns = EPETRA_MAX( 0, OurExcessColumns );
    NumOurColumns_ = UniformColumns + OurExcessColumns ; 
    
    if ( iam_ >= nprow_ * npcol_ ) {
      UniformRows = 0;
      NumOurRows_ = 0;
      NumOurColumns_ = 0;
    }
    
    Comm().Barrier(); 
    
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:295" << std::endl;
#if 0
    //  Compute using ScaLAPACK's numroc routine, assert agreement  
    int izero = 0; // All matrices start at process 0
    int NumRocSays = numroc_( &NumRows_, &nb_, &myprow_, &izero, &nprow_ );
    assert( NumOurRows_ == NumRocSays );
#endif
    //
    //  Compute the rows which this process row owns in the ScaLAPACK 2D
    //  process grid.
    //
    std::vector<int> AllOurRows(NumOurRows_);
    
    int RowIndex = 0 ; 
    int BlockRow = 0 ;
    for ( ; BlockRow < UniformRows / nb_ ; BlockRow++ ) {
      for ( int RowOffset = 0; RowOffset < nb_ ; RowOffset++ ) {
	AllOurRows[RowIndex++] = BlockRow*nb_*nprow_  + myprow_*nb_ + RowOffset ;
      } 
    }
    Comm().Barrier(); 
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:315" << std::endl;
    assert ( BlockRow == UniformRows / nb_ ) ; 
    for ( int RowOffset = 0; RowOffset < OurExcessRows ; RowOffset++ ) {
      AllOurRows[RowIndex++] = BlockRow*nb_*nprow_ + myprow_*nb_ + RowOffset ;
    } 
    assert( RowIndex == NumOurRows_ );
    //
    //  Distribute those rows amongst all the processes in that process row
    //  This is an artificial distribution with the following properties:
    //  1)  It is a 1D data distribution (each row belogs entirely to 
    //      a single process
    //  2)  All data which will eventually belong to a given process row, 
    //      is entirely contained within the processes in that row.
    //
    
    Comm().Barrier(); 
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:312" << std::endl;
    //
    //  Compute MyRows directly
    //
    std::vector<int>MyRows(NumOurRows_);
    RowIndex = 0 ; 
    BlockRow = 0 ;
    for ( ; BlockRow < UniformRows / nb_ ; BlockRow++ ) {
      for ( int RowOffset = 0; RowOffset < nb_ ; RowOffset++ ) {
	MyRows[RowIndex++] = BlockRow*nb_*nprow_*npcol_  + 
	  myprow_*nb_*npcol_ + RowOffset*npcol_  + mypcol_ ;
      } 
    }
    
    Comm().Barrier(); 
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:326" << std::endl;
    
    assert ( BlockRow == UniformRows / nb_ ) ; 
    for ( int RowOffset = 0; RowOffset < OurExcessRows ; RowOffset++ ) {
      MyRows[RowIndex++] = BlockRow*nb_*nprow_*npcol_  + 
	myprow_*nb_*npcol_ + RowOffset*npcol_  + mypcol_ ;
    } 
    
    Comm().Barrier(); 
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:334" << std::endl;
    Comm().Barrier(); 
    
    for (int i=0; i < NumOurRows_; i++ ) { 
      assert( MyRows[i] == AllOurRows[i]*npcol_+mypcol_ );
    } 
    
    Comm().Barrier(); 
    if (  debug_ == 1) std::cout  << "Amesos_Scalapack.cpp:340" 
			     << " iam_ = " << iam_ 
			     << " myprow_ = " << myprow_ 
			     << " mypcol_ = " << mypcol_ 
			     << " NumRows_ = " << NumRows_ 
			     << " NumOurRows_ = " << NumOurRows_ 
			     << std::endl;
    
    Comm().Barrier(); 
    Epetra_Map FatOutMap( npcol_*NumRows_, NumOurRows_, &MyRows[0], 0, Comm() ); 
    
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:344" << std::endl;
    Comm().Barrier(); 
    
    if ( FatOut_ ) delete FatOut_ ; 
    FatOut_ = new Epetra_CrsMatrix( Copy, FatOutMap, 0 ) ;
    
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:348" << std::endl;
    
    Epetra_Export ExportToFatOut( FatInMap, FatOutMap ) ;
    
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:360" << std::endl;
    
    FatOut_->Export( FatIn, ExportToFatOut, Add );
    FatOut_->FillComplete( false );
    
    //
    //  Create a map to allow us to redistribute the vectors X and B 
    //
    Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
    const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap() ; 
    assert( NumGlobalElements_ == OriginalMap.NumGlobalElements() ) ;
    int NumMyVecElements = 0 ;
    if ( mypcol_ == 0 ) { 
      NumMyVecElements = NumOurRows_;
    }
    
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:385" << std::endl;
    
    if (VectorMap_) { delete VectorMap_ ; VectorMap_ = 0 ; } 
    VectorMap_ = new Epetra_Map( NumGlobalElements_, 
				 NumMyVecElements, 
				 &AllOurRows[0], 
				 0, 
				 Comm() );
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << " Amesos_Scalapack.cpp:393 debug_ = "
			     << debug_ << std::endl;
    
  } else {
    nprow_ = 1 ;
    npcol_ = NumberOfProcesses / nprow_ ;
    assert ( nprow_ * npcol_ == NumberOfProcesses ) ; 
    
    m_per_p_ = ( NumRows_ + NumberOfProcesses - 1 ) / NumberOfProcesses ;
    int MyFirstElement = EPETRA_MIN( iam_ * m_per_p_, NumRows_ ) ;
    int MyFirstNonElement = EPETRA_MIN( (iam_+1) * m_per_p_, NumRows_ ) ;
    int NumExpectedElements = MyFirstNonElement - MyFirstElement ; 
    
    assert( NumRows_ ==  RowMatrixA->NumGlobalRows() ) ; 
    if ( ScaLAPACK1DMap_ ) delete( ScaLAPACK1DMap_ ) ; 
    ScaLAPACK1DMap_ = new Epetra_Map( NumRows_, NumExpectedElements, 0, Comm() );
    if ( ScaLAPACK1DMatrix_ ) delete( ScaLAPACK1DMatrix_ ) ; 
    ScaLAPACK1DMatrix_ = new Epetra_CrsMatrix(Copy, *ScaLAPACK1DMap_, 0);
    Epetra_Export ExportToScaLAPACK1D_( OriginalMap, *ScaLAPACK1DMap_);
    
    ScaLAPACK1DMatrix_->Export( *RowMatrixA, ExportToScaLAPACK1D_, Add ); 
    
    ScaLAPACK1DMatrix_->FillComplete( false ) ; 
  }
  if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << " Amesos_Scalapack.cpp:417 debug_ = "
			   << debug_ << std::endl;
  if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:402"
			   << " nprow_ = " << nprow_
			   << " npcol_ = " << npcol_ << std::endl ; 
  int info; 
  const int zero = 0 ; 
  if ( ictxt_ == -1313 ) {
    ictxt_ = 0 ; 
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:408" << std::endl;
    SL_INIT_F77(&ictxt_, &nprow_, &npcol_) ; 
  }
  if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:410A" << std::endl;
  
  int nprow;
  int npcol;
  int myrow;
  int mycol;
  BLACS_GRIDINFO_F77(&ictxt_, &nprow, &npcol, &myrow, &mycol) ; 
  if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "iam_ = " << iam_ << " Amesos_Scalapack.cpp:410" << std::endl;
  if ( iam_ < nprow_ * npcol_ ) { 
    assert( nprow == nprow_ ) ; 
    if ( npcol != npcol_ ) std::cout << "Amesos_Scalapack.cpp:430 npcol = " << 
      npcol << " npcol_ = " << npcol_ << std::endl ; 
    assert( npcol == npcol_ ) ; 
    if ( TwoD_distribution_ ) {
      assert( myrow == myprow_ ) ; 
      assert( mycol == mypcol_ ) ; 
      lda_ = EPETRA_MAX(1,NumOurRows_) ;
    } else { 
      assert( myrow == 0 ) ; 
      assert( mycol == iam_ ) ; 
      nb_ = m_per_p_;
      lda_ = EPETRA_MAX(1,NumGlobalElements_);
    }
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  
			     << "Amesos_Scalapack.cpp: " << __LINE__ 
			     << " TwoD_distribution_ = "  << TwoD_distribution_ 
			     << " NumGlobalElements_ = "  << NumGlobalElements_ 
			     << " debug_ = "  << debug_ 
			     << " nb_ = "  << nb_ 
			     << " lda_ = "  << lda_ 
			     << " nprow_ = "  << nprow_ 
			     << " npcol_ = "  << npcol_ 
			     << " myprow_ = "  << myprow_ 
			     << " mypcol_ = "  << mypcol_ 
			     << " iam_ = "  << iam_ << std::endl ;
    AMESOS_PRINT( myprow_ );
    DESCINIT_F77(DescA_, 
		 &NumGlobalElements_, 
		 &NumGlobalElements_, 
		 &nb_,
		 &nb_,
		 &zero,
		 &zero,
		 &ictxt_,
		 &lda_,
		 &info) ;
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:441" << std::endl;
    assert( info == 0 ) ; 
  } else {
    DescA_[0] = -13;
    if (  debug_ == 1) std::cout  << "iam_ = " << iam_  << "Amesos_Scalapack.cpp:458 nprow = " << nprow << std::endl;
    assert( nprow == -1 ) ; 
  }
  
  if (  debug_ == 1) std::cout  << "Amesos_Scalapack.cpp:446" << std::endl;
  MatTime_ += Time_->ElapsedTime();
  
  return 0;
}
Beispiel #21
0
//=============================================================================
int Amesos_Dscpack::PerformSymbolicFactorization()
{
  ResetTimer(0);
  ResetTimer(1);

  MyPID_    = Comm().MyPID();
  NumProcs_ = Comm().NumProc();
  
  Epetra_RowMatrix *RowMatrixA = Problem_->GetMatrix();
  if (RowMatrixA == 0)
    AMESOS_CHK_ERR(-1);

  const Epetra_Map& OriginalMap = RowMatrixA->RowMatrixRowMap() ;
  const Epetra_MpiComm& comm1   = dynamic_cast<const Epetra_MpiComm &> (Comm());
  int numrows                   = RowMatrixA->NumGlobalRows();
  int numentries                = RowMatrixA->NumGlobalNonzeros();

  Teuchos::RCP<Epetra_CrsGraph> Graph;

  Epetra_CrsMatrix* CastCrsMatrixA = 
    dynamic_cast<Epetra_CrsMatrix*>(RowMatrixA); 

  if (CastCrsMatrixA)
  {
    Graph = Teuchos::rcp(const_cast<Epetra_CrsGraph*>(&(CastCrsMatrixA->Graph())), false);
  }
  else
  {
    int MaxNumEntries = RowMatrixA->MaxNumEntries();
    Graph = Teuchos::rcp(new Epetra_CrsGraph(Copy, OriginalMap, MaxNumEntries));

    std::vector<int>    Indices(MaxNumEntries);
    std::vector<double> Values(MaxNumEntries);

    for (int i = 0 ; i < RowMatrixA->NumMyRows() ; ++i)
    {
      int NumEntries;
      RowMatrixA->ExtractMyRowCopy(i, MaxNumEntries, NumEntries,
                                   &Values[0], &Indices[0]);

      for (int j = 0 ; j < NumEntries ; ++j)
        Indices[j] = RowMatrixA->RowMatrixColMap().GID(Indices[j]);

      int GlobalRow = RowMatrixA->RowMatrixRowMap().GID(i);
      Graph->InsertGlobalIndices(GlobalRow, NumEntries, &Indices[0]);
    }

    Graph->FillComplete();
  }

  //
  //  Create a replicated map and graph 
  //
  std::vector<int> AllIDs( numrows ) ; 
  for ( int i = 0; i < numrows ; i++ ) AllIDs[i] = i ; 

  Epetra_Map      ReplicatedMap( -1, numrows, &AllIDs[0], 0, Comm());
  Epetra_Import   ReplicatedImporter(ReplicatedMap, OriginalMap);
  Epetra_CrsGraph ReplicatedGraph( Copy, ReplicatedMap, 0 ); 

  AMESOS_CHK_ERR(ReplicatedGraph.Import(*Graph, ReplicatedImporter, Insert));
  AMESOS_CHK_ERR(ReplicatedGraph.FillComplete());

  //
  //  Convert the matrix to Ap, Ai
  //
  std::vector <int> Replicates(numrows);
  std::vector <int> Ap(numrows + 1);
  std::vector <int> Ai(EPETRA_MAX(numrows, numentries));

  for( int i = 0 ; i < numrows; i++ ) Replicates[i] = 1; 
  
  int NumEntriesPerRow ;
  int *ColIndices = 0 ;
  int Ai_index = 0 ; 
  for ( int MyRow = 0; MyRow <numrows; MyRow++ ) {
    AMESOS_CHK_ERR( ReplicatedGraph.ExtractMyRowView( MyRow, NumEntriesPerRow, ColIndices ) );
    Ap[MyRow] = Ai_index ; 
    for ( int j = 0; j < NumEntriesPerRow; j++ ) { 
      Ai[Ai_index] = ColIndices[j] ; 
      Ai_index++;
    }
  }
  assert( Ai_index == numentries ) ; 
  Ap[ numrows ] = Ai_index ; 
  
  MtxConvTime_ = AddTime("Total matrix conversion time", MtxConvTime_, 0);

  ResetTimer(0);

  //
  //  Call Dscpack Symbolic Factorization
  //  
  int OrderCode = 2;
  std::vector<double> MyANonZ;
  
  NumLocalNonz = 0 ; 
  GlobalStructNewColNum = 0 ; 
  GlobalStructNewNum = 0 ;  
  GlobalStructOwner = 0 ; 
  LocalStructOldNum = 0 ; 
  
  NumGlobalCols = 0 ; 
  
  // MS // Have to define the maximum number of processes to be used
  // MS // This is only a suggestion as Dscpack uses a number of processes that is a power of 2  

  int NumGlobalNonzeros = GetProblem()->GetMatrix()->NumGlobalNonzeros();
  int NumRows = GetProblem()->GetMatrix()->NumGlobalRows(); 

  // optimal value for MaxProcs == -1
  
  int OptNumProcs1 = 1+EPETRA_MAX( NumRows/10000, NumGlobalNonzeros/1000000 );
  OptNumProcs1 = EPETRA_MIN(NumProcs_,OptNumProcs1 );

  // optimal value for MaxProcs == -2

  int OptNumProcs2 = (int)sqrt(1.0 * NumProcs_);
  if( OptNumProcs2 < 1 ) OptNumProcs2 = 1;

  // fix the value of MaxProcs

  switch (MaxProcs_) 
  {
  case -1:
    MaxProcs_ = EPETRA_MIN(OptNumProcs1, NumProcs_);
    break;
  case -2:
    MaxProcs_ = EPETRA_MIN(OptNumProcs2, NumProcs_);
    break;
  case -3:
    MaxProcs_ = NumProcs_;
    break;
  }

#if 0
  if (MyDscRank>=0 && A_and_LU_built) { 
    DSC_ReFactorInitialize(PrivateDscpackData_->MyDSCObject);
  }
#endif
  //  if ( ! A_and_LU_built ) { 
  //    DSC_End( PrivateDscpackData_->MyDSCObject ) ; 
  //    PrivateDscpackData_->MyDSCObject = DSC_Begin() ;
  //  } 

  // MS // here I continue with the old code...
  
  OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);

  DscNumProcs = 1 ; 
  int DscMax = DSC_Analyze( numrows, &Ap[0], &Ai[0], &Replicates[0] );

  while ( DscNumProcs * 2 <=EPETRA_MIN( MaxProcs_, DscMax ) )  DscNumProcs *= 2 ;
  
  MyDscRank = -1; 
  DSC_Open0( PrivateDscpackData_->MyDSCObject_, DscNumProcs, &MyDscRank, comm1.Comm()) ; 
  
  NumLocalCols = 0 ; // This is for those processes not in the Dsc grid
  if ( MyDscRank >= 0 ) { 
    assert( MyPID_ == MyDscRank ) ; 
    AMESOS_CHK_ERR( DSC_Order ( PrivateDscpackData_->MyDSCObject_, OrderCode, numrows, &Ap[0], &Ai[0], 
				&Replicates[0], &NumGlobalCols, &NumLocalStructs, 
				&NumLocalCols, &NumLocalNonz, 
				&GlobalStructNewColNum, &GlobalStructNewNum, 
				&GlobalStructOwner, &LocalStructOldNum ) ) ; 
    assert( NumGlobalCols == numrows ) ; 
    assert( NumLocalCols == NumLocalStructs ) ; 
  }

  if ( MyDscRank >= 0 ) { 
    int MaxSingleBlock; 
    
    const int Limit = 5000000 ;  //  Memory Limit set to 5 Terabytes 
    AMESOS_CHK_ERR( DSC_SFactor ( PrivateDscpackData_->MyDSCObject_, &TotalMemory_, 
				  &MaxSingleBlock, Limit, DSC_LBLAS3, DSC_DBLAS2 ) ) ; 
    
  }
  
  //  A_and_LU_built = true;   // If you uncomment this, TestOptions fails
  
  SymFactTime_ = AddTime("Total symbolic factorization time", SymFactTime_, 0);

  return(0);
}
int CrsMatrixTranspose( Epetra_CrsMatrix *In,  Epetra_CrsMatrix *Out ) { 

   
  int iam = In->Comm().MyPID() ;

  int numentries = In->NumGlobalNonzeros();
  int NumRowEntries = 0;
  double *RowValues = 0;
  int *ColIndices = 0;

  int numrows = In->NumGlobalRows();
  int numcols = In->NumGlobalCols();

  std::vector <int> Ap( numcols+1 );       // Column i is stored in Aval(Ap[i]..Ap[i+1]-1)
  std::vector <int> nextAp( numcols+1 );   // Where to store next value in Column i
  std::vector <int> Ai( EPETRA_MAX( numcols, numentries) ) ; //  Row indices
  std::vector <double> Aval( EPETRA_MAX( numcols, numentries) ) ; 

  if ( iam == 0 ) { 

    assert( In->NumMyRows() == In->NumGlobalRows() ) ; 
    //
    //  Count the number of entries in each column
    //
    std::vector <int>RowsPerCol( numcols ) ; 
    for ( int i = 0 ; i < numcols ; i++ ) RowsPerCol[i] = 0 ; 
    for ( int MyRow = 0; MyRow <numrows; MyRow++ ) {
      assert( In->ExtractMyRowView( MyRow, NumRowEntries, RowValues, ColIndices ) == 0 ) ;
      for ( int j = 0; j < NumRowEntries; j++ ) { 
	RowsPerCol[ ColIndices[j] ] ++ ; 
      }
    }
    //
    //  Set Ap and nextAp based on RowsPerCol
    //
    Ap[0] = 0 ; 
    for ( int i = 0 ; i < numcols ; i++ ) {
      Ap[i+1]= Ap[i] + RowsPerCol[i] ; 
      nextAp[i] = Ap[i];
    }
    //
    //  Populate Ai and Aval 
    //
    for ( int MyRow = 0; MyRow <numrows; MyRow++ ) {
      assert( In->ExtractMyRowView( MyRow, NumRowEntries, RowValues, ColIndices ) == 0 ) ;
      for ( int j = 0; j < NumRowEntries; j++ ) { 
	Ai[ nextAp[ ColIndices[j] ] ] = MyRow ; 
	Aval[ nextAp[ ColIndices[j] ] ] = RowValues[j] ; 
	nextAp[ ColIndices[j] ] ++ ; 
      }
    }

    //
    //  Insert values into Out 
    //
    for ( int MyRow = 0; MyRow <numrows; MyRow++ ) {
      int NumInCol = Ap[MyRow+1] -  Ap[MyRow] ;
      Out->InsertGlobalValues( MyRow, NumInCol, &Aval[Ap[MyRow]], 
			   &Ai[Ap[MyRow]] );
      assert( Out->IndicesAreGlobal() ) ; 
    }
  } else {
    assert( In->NumMyRows() == 0 ) ; 
  }


  assert( Out->FillComplete()==0 ) ;
  return 0 ; 
}
//=========================================================================
RowMatrix_Transpose::NewTypeRef
RowMatrix_Transpose::
operator()( OriginalTypeRef orig )
{
  origObj_ = &orig;

  int i, j, err;

  if( !TransposeRowMap_ )
  {
    if( IgnoreNonLocalCols_ )
      TransposeRowMap_ = (Epetra_Map *) &(orig.OperatorRangeMap()); // Should be replaced with refcount =
    else
      TransposeRowMap_ = (Epetra_Map *) &(orig.OperatorDomainMap()); // Should be replaced with refcount =
  }

  // This routine will work for any RowMatrix object, but will attempt cast the matrix to a CrsMatrix if
  // possible (because we can then use a View of the matrix and graph, which is much cheaper).

  // First get the local indices to count how many nonzeros will be in the 
  // transpose graph on each processor
  Epetra_CrsMatrix * OrigCrsMatrix = dynamic_cast<Epetra_CrsMatrix*>(&orig);

  OrigMatrixIsCrsMatrix_ = (OrigCrsMatrix!=0); // If this pointer is non-zero, the cast to CrsMatrix worked

  NumMyRows_ = orig.NumMyRows();
  NumMyCols_ = orig.NumMyCols();
  TransNumNz_ = new int[NumMyCols_];
  TransIndices_ = new int*[NumMyCols_];
  TransValues_ = new double*[NumMyCols_];
  TransMyGlobalEquations_ = new int[NumMyCols_];

  int NumIndices;

  if (OrigMatrixIsCrsMatrix_)
  {
    const Epetra_CrsGraph & OrigGraph = OrigCrsMatrix->Graph(); // Get matrix graph

    for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0;
    for (i=0; i<NumMyRows_; i++)
    {
      err = OrigGraph.ExtractMyRowView(i, NumIndices, Indices_); // Get view of ith row
      if (err != 0) throw OrigGraph.ReportError("ExtractMyRowView failed",err);
      for (j=0; j<NumIndices; j++) ++TransNumNz_[Indices_[j]];
    }
  }
  else // Original is not a CrsMatrix
  {
    MaxNumEntries_ = 0;
    int NumEntries;
    for (i=0; i<NumMyRows_; i++)
    {
      orig.NumMyRowEntries(i, NumEntries);
      MaxNumEntries_ = EPETRA_MAX(MaxNumEntries_, NumEntries);
    }
    Indices_ = new int[MaxNumEntries_];
    Values_ = new double[MaxNumEntries_];

    for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0;
    for (i=0; i<NumMyRows_; i++)
    {
      err = orig.ExtractMyRowCopy(i, MaxNumEntries_, NumIndices, Values_, Indices_); 
      if (err != 0) {
        std::cerr << "ExtractMyRowCopy failed."<<std::endl;
        throw err;
      }
      for (j=0; j<NumIndices; j++) ++TransNumNz_[Indices_[j]];
    }
  }

  // Most of remaining code is common to both cases
  for(i=0; i<NumMyCols_; i++)
  {
    NumIndices = TransNumNz_[i];
    if (NumIndices>0)
    {
      TransIndices_[i] = new int[NumIndices];
      TransValues_[i] = new double[NumIndices];
    }
  }

  // Now copy values and global indices into newly create transpose storage

  for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0; // Reset transpose NumNz counter
  for (i=0; i<NumMyRows_; i++)
  {
    if (OrigMatrixIsCrsMatrix_)
      err = OrigCrsMatrix->ExtractMyRowView(i, NumIndices, Values_, Indices_);
    else
      err = orig.ExtractMyRowCopy(i, MaxNumEntries_, NumIndices, Values_, Indices_);
    if (err != 0) {
      std::cerr << "ExtractMyRowCopy failed."<<std::endl;
      throw err;
    }

    int ii = orig.RowMatrixRowMap().GID(i);
    for (j=0; j<NumIndices; j++)
    {
      int TransRow = Indices_[j];
      int loc = TransNumNz_[TransRow];
      TransIndices_[TransRow][loc] = ii;
      TransValues_[TransRow][loc] = Values_[j];
      ++TransNumNz_[TransRow]; // increment counter into current transpose row
    }
  }

  //  Build Transpose matrix with some rows being shared across processors.
  //  We will use a view here since the matrix will not be used for anything else

  const Epetra_Map & TransMap = orig.RowMatrixColMap();

  Epetra_CrsMatrix TempTransA1(View, TransMap, TransNumNz_);
  TransMap.MyGlobalElements(TransMyGlobalEquations_);
  
  for (i=0; i<NumMyCols_; i++) {
    err = TempTransA1.InsertGlobalValues(TransMyGlobalEquations_[i], 
                     TransNumNz_[i], TransValues_[i], TransIndices_[i]);
    if (err < 0) throw TempTransA1.ReportError("InsertGlobalValues failed.",err);
  }
 
  // Note: The following call to FillComplete is currently necessary because
  //      some global constants that are needed by the Export () are computed in this routine
  err = TempTransA1.FillComplete(orig.OperatorRangeMap(),*TransposeRowMap_, false);
  if (err != 0) {
    throw TempTransA1.ReportError("FillComplete failed.",err);
  }

  // Now that transpose matrix with shared rows is entered, create a new matrix that will
  // get the transpose with uniquely owned rows (using the same row distribution as A).
  if( IgnoreNonLocalCols_ )
    TransposeMatrix_ = new Epetra_CrsMatrix(Copy, *TransposeRowMap_, *TransposeRowMap_, 0);
  else
    TransposeMatrix_ = new Epetra_CrsMatrix(Copy, *TransposeRowMap_,0);

  // Create an Export object that will move TempTransA around
  TransposeExporter_ = new Epetra_Export(TransMap, *TransposeRowMap_);

  err = TransposeMatrix_->Export(TempTransA1, *TransposeExporter_, Add);
  if (err != 0) throw TransposeMatrix_->ReportError("Export failed.",err);
  
  err = TransposeMatrix_->FillComplete(orig.OperatorRangeMap(),*TransposeRowMap_);
  if (err != 0) throw TransposeMatrix_->ReportError("FillComplete failed.",err);

  if (MakeDataContiguous_) {
    err = TransposeMatrix_->MakeDataContiguous();
    if (err != 0) throw TransposeMatrix_->ReportError("MakeDataContiguous failed.",err);
  }

  newObj_ = TransposeMatrix_;

  return *newObj_;
}
Beispiel #24
0
int TestOtherClasses( const char* AmesosClass,
		      int EpetraMatrixType,
		      Epetra_CrsMatrix *& Amat, 
		      const bool transpose, 
		      const bool verbose, 
		      const int Levels,
		      const double Rcond,
		      bool RowMapEqualsColMap, 
		      bool TestAddZeroToDiag,
		      int ExpectedError,
		      double &maxrelerror, 
		      double &maxrelresidual,
		      int &NumTests ) {


  int iam = Amat->Comm().MyPID() ;  
  int NumErrors = 0 ;
  maxrelerror = 0.0;
  maxrelresidual = 0.0;
  const Epetra_Comm& Comm = Amat->Comm();

  bool MyVerbose = false ; // if set equal to verbose, we exceed the test harness 1 Megabyte limit
  std::string StringAmesosClass = AmesosClass ; 
  if ( AmesosClass ) MyVerbose = verbose ;    // Turn this on temporarily to debug Mumps on atlantis
  {
    Teuchos::ParameterList ParamList ;

    ParamList.set( "NoDestroy", true );    // Only affects Amesos_Mumps
    ParamList.set( "Redistribute", true );
    ParamList.set( "AddZeroToDiag", false );
    ParamList.set( "MaxProcs", 100000 );
    //  ParamList.print( std::cerr, 10 ) ; 

    double relerror;
    double relresidual;
   
    if (MyVerbose) std::cout << __FILE__ << "::" << __LINE__ << " AmesosClass= " << AmesosClass 
		      << " ParamList = " << ParamList 
		      << " transpose = " << transpose 
		      << " Levels = " << Levels 
		      << std::endl ; 

    int Errors = PerformOneSolveAndTest(AmesosClass,
					EpetraMatrixType,
					Comm, 
					transpose, 
					MyVerbose,
					ParamList, 
					Amat, 
					Levels,
					Rcond, 
					relerror, 
					relresidual, ExpectedError ) ; 

    if (MyVerbose  || ( Errors && iam==0 )  ) std::cout << __FILE__ << "::" << __LINE__ 
				  << " AmesosClass= " << AmesosClass 
				  << " Errors = " << Errors 
				  << std::endl ; 

    if ( Errors < 0 ) {
      NumErrors++;
      NumTests++ ; 
      if ( MyVerbose ) {
	std::cout << AmesosClass << " failed with error code " << Errors << " " << __FILE__ << "::" << __LINE__ << std::endl ; 
      }
    } else { 
      NumErrors += Errors ; 

      maxrelerror = EPETRA_MAX( relerror, maxrelerror ) ; 
      maxrelresidual = EPETRA_MAX( relresidual, maxrelresidual ) ; 
      NumTests++ ; 

    }
    if (MyVerbose)  std::cout << " TestOtherClasses " << AmesosClass << "" << "::" << __LINE__ << " NumErrors = " << NumErrors << std::endl ; 
    if ( MyVerbose && Errors ) {
      std::cout << AmesosClass << " failed with transpose = " << 
	(transpose?"true":"false") << std::endl ;  
    }
  }

  std::string AC = AmesosClass ; 

  bool taucs = ( AC ==  "Amesos_Taucs" );
  bool klu = ( AC ==  "Amesos_Klu" );
  bool paraklete = ( AC ==  "Amesos_Paraklete" );
  bool mumps = ( AC ==  "Amesos_Mumps" );
  bool scalapack = ( AC ==  "Amesos_Scalapack" ) ;
  bool lapack = ( AC ==  "Amesos_Lapack" );


  //
  //  Testing AddZeroToDiag and AddToDiag 
  //  When AddZeroToDiag is true, the value of AddToDiag is added to every diagonal element whether or not 
  //    that element exists in the structure of the matrix.
  //  When AddZeroToDiag is false, the value of AddToDiag is added only to those diagonal elements 
  //    which are structually non-zero.
  //  Support for these two flags varies
  //
  //
  //  klu, superludist and parakalete support AddToDiag with or without AddZeroToDiag 
  //  scalapack and lapack, being dense codes, support AddToDiag, but only when AddZeroToDiag is set 
  //
  //  pardiso does not support AddToDiag - bug #1993 
  bool supports_AddToDiag_with_AddZeroToDiag = ( klu || paraklete || scalapack || lapack ) ; 
  bool supports_AddToDiag_with_when_AddZeroTo_Diag_is_false = ( klu  || paraklete  || mumps || taucs || lapack ) ; 


  if ( RowMapEqualsColMap && supports_AddToDiag_with_AddZeroToDiag && TestAddZeroToDiag ) {
    Teuchos::ParameterList ParamList ;
    ParamList.set( "NoDestroy", true );    // Only affects Amesos_Mumps
    ParamList.set( "Redistribute", false );
    ParamList.set( "AddZeroToDiag", true );
    ParamList.set( "AddToDiag", 1.3e2 );

    //  ParamList.print( std::cerr, 10 ) ; 

    double relerror;
    double relresidual;
   
    if (MyVerbose) std::cout << __FILE__ << "::" << __LINE__ << " AmesosClass= " << AmesosClass 
		      << " ParamList = " << ParamList 
		      << " transpose = " << transpose 
		      << " Levels = " << Levels 
		      << std::endl ; 

    int Errors = PerformOneSolveAndTest(AmesosClass,
					EpetraMatrixType,
					Comm, 
					transpose, 
					MyVerbose,
					ParamList, 
					Amat, 
					Levels,
					Rcond, 
					relerror, 
					relresidual, ExpectedError ) ; 


    if (MyVerbose  || ( Errors && iam==0 )  ) std::cout << __FILE__ << "::" << __LINE__ 
				  << " AmesosClass= " << AmesosClass 
				  << " Errors = " << Errors 
				  << std::endl ; 

    if ( Errors < 0 ) {
      NumErrors++;
      NumTests++ ; 
      if ( MyVerbose ) {
	std::cout  << __FILE__ << "::" << __LINE__ 
	  << AmesosClass << " failed with error code " << Errors << " " << __FILE__ << "::" << __LINE__ << std::endl ; 
      }
    } else { 
      NumErrors += Errors ; 

      maxrelerror = EPETRA_MAX( relerror, maxrelerror ) ; 
      maxrelresidual = EPETRA_MAX( relresidual, maxrelresidual ) ; 
      NumTests++ ; 

    }
    if (MyVerbose)  std::cout << " TestOtherClasses " << AmesosClass << "" << "::" << __LINE__ << " NumErrors = " << NumErrors << std::endl ; 
    if ( MyVerbose && Errors ) {
      std::cout << AmesosClass << " failed with transpose = " << 
	(transpose?"true":"false") << std::endl ;  
    }



  }
  if ( RowMapEqualsColMap && supports_AddToDiag_with_when_AddZeroTo_Diag_is_false ) {
    Teuchos::ParameterList ParamList ;
    ParamList.set( "NoDestroy", true );    // Only affects Amesos_Mumps
    ParamList.set( "Redistribute", false );
    ParamList.set( "AddToDiag", 1e2 );

    //  ParamList.print( std::cerr, 10 ) ; 

    double relerror;
    double relresidual;
   
    if (MyVerbose) std::cout << __FILE__ << "::" << __LINE__ << " AmesosClass= " << AmesosClass 
		      << " ParamList = " << ParamList 
		      << " transpose = " << transpose 
		      << " Levels = " << Levels 
		      << std::endl ; 

    int Errors = PerformOneSolveAndTest(AmesosClass,
					EpetraMatrixType,
					Comm, 
					transpose, 
					MyVerbose,
					ParamList, 
					Amat, 
					Levels,
					Rcond, 
					relerror, 
					relresidual, ExpectedError ) ; 


    if (MyVerbose  || ( Errors && iam==0 )  ) std::cout << __FILE__ << "::" << __LINE__ 
				  << " AmesosClass= " << AmesosClass 
				  << " Errors = " << Errors 
				  << std::endl ; 

    if ( Errors < 0 ) {
      NumErrors++;
      NumTests++ ; 
      if ( MyVerbose ) {
	std::cout  << __FILE__ << "::" << __LINE__ 
	  << AmesosClass << " failed with error code " << Errors << " " << __FILE__ << "::" << __LINE__ << std::endl ; 
      }
    } else { 
      NumErrors += Errors ; 

      maxrelerror = EPETRA_MAX( relerror, maxrelerror ) ; 
      maxrelresidual = EPETRA_MAX( relresidual, maxrelresidual ) ; 
      NumTests++ ; 

    }
    if (MyVerbose)  std::cout << " TestOtherClasses " << AmesosClass << "" << "::" << __LINE__ << " NumErrors = " << NumErrors << std::endl ; 
    if ( MyVerbose && Errors ) {
      std::cout << AmesosClass << " failed with transpose = " << 
	(transpose?"true":"false") << std::endl ;  
    }



  }

  //
  //     2)  Refactorize = true 
  {
    Teuchos::ParameterList ParamList ;
    ParamList.set( "NoDestroy", true );    // Only affects Amesos_Mumps
    ParamList.set( "Refactorize", true );
      
    double relerror;
    double relresidual;
      
    if (MyVerbose) std::cout << __FILE__ << "::" << __LINE__ << " AmesosClass= " << AmesosClass 
		      << " ParamList = " << ParamList 
		      << " transpose = " << transpose 
		      << " Levels = " << Levels 
		      << std::endl ; 

    int Errors = PerformOneSolveAndTest(AmesosClass,
					EpetraMatrixType,
					Comm, 
					transpose, 
					MyVerbose,
					ParamList, 
					Amat, 
					Levels,
					Rcond, 
					relerror, 
					relresidual, ExpectedError ) ; 
      
    if (MyVerbose  || ( Errors && iam==0 )  ) std::cout << __FILE__ << "::" << __LINE__ 
				  << " AmesosClass= " << AmesosClass 
				  << " Errors = " << Errors 
				  << std::endl ; 

    if (Errors < 0 ) {
      if (MyVerbose ) std::cout << AmesosClass << " not built in this executable " << std::endl ; 
      return 0 ; 
    } else { 
      NumErrors += Errors ; 
	
      maxrelerror = EPETRA_MAX( relerror, maxrelerror ) ; 
      maxrelresidual = EPETRA_MAX( relresidual, maxrelresidual ) ; 
      NumTests++ ; 

    }
    if (MyVerbose)  std::cout << " TestOtherClasses " << AmesosClass << "" << "::" << __LINE__ << " NumErrors = " << NumErrors << std::endl ; 
    if ( MyVerbose && Errors ) {
      std::cout << AmesosClass << " failed with transpose = " << 
	(transpose?"true":"false") << std::endl ;  
    }
  }

  //
  //     5)  MaxProcs = 2 
  {
    Teuchos::ParameterList ParamList ;
    ParamList.set( "NoDestroy", true );    // Only affects Amesos_Mumps
    ParamList.set( "MaxProcs", 2 );   // Only affects Paraklete (maybe Mumps) also Superludist byt that isn't tested here 
      
    double relerror;
    double relresidual;
      
    if (MyVerbose) std::cout << __FILE__ << "::" << __LINE__ << " AmesosClass= " << AmesosClass 
		      << " ParamList = " << ParamList 
		      << " transpose = " << transpose 
		      << " Levels = " << Levels 
		      << std::endl ; 

    int Errors = PerformOneSolveAndTest(AmesosClass,
					EpetraMatrixType,
					Comm, 
					transpose, 
					MyVerbose,
					ParamList, 
					Amat, 
					Levels,
					Rcond, 
					relerror, 
					relresidual, ExpectedError ) ; 
      
    if (MyVerbose  || ( Errors && iam==0 )  ) std::cout << __FILE__ << "::" << __LINE__ 
				  << " AmesosClass= " << AmesosClass 
				  << " Errors = " << Errors 
				  << std::endl ; 

    if (Errors < 0 ) {
      if (MyVerbose ) std::cout << AmesosClass << " not built in this executable " << std::endl ; 
      return 0 ; 
    } else { 
      NumErrors += Errors ; 
	
      maxrelerror = EPETRA_MAX( relerror, maxrelerror ) ; 
      maxrelresidual = EPETRA_MAX( relresidual, maxrelresidual ) ; 
      NumTests++ ; 

    }
    if (MyVerbose)  std::cout << " TestOtherClasses " << AmesosClass << "" << "::" << __LINE__ << " NumErrors = " << NumErrors << std::endl ; 
    if ( MyVerbose && Errors ) {
      std::cout << AmesosClass << " failed with transpose = " << 
	(transpose?"true":"false") << std::endl ;  
    }
  }
  //
  //  ComputeTrueResidual is, by design, not quiet - it prints out the residual 
  //
#if 0
  //
  //     4)  ComputeTrueResidual==true
  {
    Teuchos::ParameterList ParamList ;
    ParamList.set( "NoDestroy", true );    // Only affects Amesos_Mumps
    ParamList.set( "ComputeTrueResidual", true );
      
    double relerror;
    double relresidual;
      
    int Errors = PerformOneSolveAndTest(AmesosClass,
					EpetraMatrixType,
					Comm, 
					transpose, 
					MyVerbose,
					ParamList, 
					Amat, 
					Levels,
					Rcond, 
					relerror, 
					relresidual, ExpectedError ) ;

    if (Errors < 0 ) {
      if (MyVerbose ) std::cout << AmesosClass << " not built in this executable " << std::endl ; 
      return 0 ; 
    } else { 
      NumErrors += Errors ; 
	
      maxrelerror = EPETRA_MAX( relerror, maxrelerror ) ; 
      maxrelresidual = EPETRA_MAX( relresidual, maxrelresidual ) ; 
      NumTests++ ; 

    }
    if (MyVerbose)  std::cout << " TestOtherClasses " << AmesosClass << "" << "::" << __LINE__ << " NumErrors = " << NumErrors << std::endl ; 
    if ( MyVerbose && Errors > 0 ) {
      std::cout << AmesosClass << " failed with transpose = " << 
	(transpose?"true":"false") << std::endl ;  
    }
  }
#endif


  return NumErrors; 
  }
Beispiel #25
0
void Trilinos_Util_ReadHpc2Epetra(char *data_file,
				 const Epetra_Comm  &comm, 
				 Epetra_Map *& map, 
				 Epetra_CrsMatrix *& A, 
				 Epetra_Vector *& x, 
				 Epetra_Vector *& b,
				 Epetra_Vector *&xexact) {


  FILE *in_file ;

  int l;
  int * lp = &l;
  double v;
  double * vp = &v;
#ifdef DEBUG
  bool debug = true;
#else
  bool debug = false;
#endif

  int size = comm.NumProc();
  int rank = comm.MyPID();
  printf("Reading matrix info from %s...\n",data_file);
  
  in_file = fopen( data_file, "r");
  if (in_file == NULL)
    {
      printf("Error: Cannot open file: %s\n",data_file);
      exit(1);
    }
  int numGlobalEquations, total_nnz;
  fscanf(in_file,"%d",&numGlobalEquations);
  fscanf(in_file,"%d",&total_nnz);
  map = new Epetra_Map(numGlobalEquations, 0, comm); // Create map with uniform distribution
  
  A = new Epetra_CrsMatrix(Copy, *map, 0); // Construct matrix

  x = new Epetra_Vector(*map);
  b = new Epetra_Vector(*map);
  xexact = new Epetra_Vector(*map);
  int numMyEquations = map->NumMyPoints();

  // Allocate arrays that are of length numMyEquations

  // Find max nnz per row for this processor

  int max_nnz = 0;

  for (int i=0; i<numGlobalEquations; i++) {
      fscanf(in_file, "%d",lp); /* row #, nnz in row */
      if (map->MyGID(i)) max_nnz = EPETRA_MAX(max_nnz,l);
    }


  // Allocate arrays that are of length local_nnz
  double * list_of_vals = new double[max_nnz];
  int *list_of_inds = new int   [max_nnz];

  {for (int i=0; i<numGlobalEquations; i++)
    {
      int cur_nnz;
      fscanf(in_file, "%d",&cur_nnz);
      if (map->MyGID(i)) // See if nnz for row should be added
	{
	  if (debug) cout << "Process "<<rank
			  <<" of "<<size<<" getting row "<<i<<endl;
	  int nnz_kept = 0;
	  for (int j=0; j<cur_nnz; j++) 
	    {
	      fscanf(in_file, "%lf %d",vp,lp);
	      if (v!=0.0) {
		list_of_vals[nnz_kept] = v;
		list_of_inds[nnz_kept] = l;
		nnz_kept++;
	      }
	    }
	  A->InsertGlobalValues(i, nnz_kept, list_of_vals, list_of_inds);
	}
      else
	for (int j=0; j<cur_nnz; j++) fscanf(in_file, "%lf %d",vp,lp); // otherwise read and discard
    }}

  double xt, bt, xxt;
  {for (int i=0; i<numGlobalEquations; i++) 
    {
      if (map->MyGID(i)) // See if entry should be added
	{
	  if (debug) cout << "Process "<<rank<<" of "
                       <<size<<" getting RHS "<<i<<endl;
	  fscanf(in_file, "%lf %lf %lf",&xt, &bt, &xxt);
	  int cur_local_row = map->LID(i);
	  (*x)[cur_local_row] = xt;
	  (*b)[cur_local_row] = bt;
	  (*xexact)[cur_local_row] = xxt;
	}
      else
	fscanf(in_file, "%lf %lf %lf",vp, vp, vp); // or thrown away
    }}

  fclose(in_file);

  
  if (debug)
    cout << "Process "<<rank<<" of "<<size<<" has "<<numMyEquations
	 << " rows. Min global row "<< map->MinMyGID()
	 <<" Max global row "<< map->MaxMyGID() <<endl
	 <<" and "<<A->NumMyNonzeros()<<" nonzeros."<<endl;

  A->FillComplete();
  

  Epetra_Vector bcomp(*map);

  A->Multiply(false, *xexact, bcomp);
  double residual;
  bcomp.Norm2(&residual);
  if (comm.MyPID()==0) cout << "Norm of computed b = " << residual << endl;
  b->Norm2(&residual);
  if (comm.MyPID()==0) cout << "Norm of given b    = " << residual << endl;
  bcomp.Update(-1.0, *b, 1.0);
  bcomp.Norm2(&residual);
  if (comm.MyPID()==0) cout << "Norm of difference between computed b and given b for xexact = " << residual << endl;
  
  delete [] list_of_vals;
  delete []list_of_inds;

  return;
}
  Teuchos::RCP<Epetra_CrsGraph> BlockAdjacencyGraph::compute( Epetra_CrsGraph& B, int nbrr, std::vector<int>&r, std::vector<double>& weights, bool verbose)
  {
    // Check if the graph is on one processor.
    int myMatProc = -1, matProc = -1;
    int myPID = B.Comm().MyPID();
    for (int proc=0; proc<B.Comm().NumProc(); proc++)
      {
	if (B.NumGlobalEntries() == B.NumMyEntries())
	  myMatProc = myPID;
      }
    B.Comm().MaxAll( &myMatProc, &matProc, 1 );
    
    if( matProc == -1)
      { cout << "FAIL for Global!  All CrsGraph entries must be on one processor!\n"; abort(); }
    
    int i= 0, j = 0, k, l = 0, p, pm, q = -1, ns;
    int tree_height;
    int error = -1;    /* error detected, possibly a problem with the input */
    int nrr;           /* number of rows in B */
    int nzM = 0;       /* number of edges in graph */
    int m = 0;         /* maximum number of nonzeros in any block row of B */
    int* colstack = 0; /* stack used to process each block row */
    int* bstree = 0;   /* binary search tree */
    std::vector<int> Mi, Mj, Mnum(nbrr+1,0);
    nrr = B.NumMyRows();
    if ( matProc == myPID && verbose )
      std::printf(" Matrix Size = %d      Number of Blocks = %d\n",nrr, nbrr);
    else
      nrr = -1;     /* Prevent processor from doing any computations */
    bstree = csr_bst(nbrr);  /* 0 : nbrr-1 */
    tree_height = ceil31log2(nbrr) + 1;
    error = -1;

    l = 0; j = 0; m = 0;
    for( i = 0; i < nrr; i++ ){
      if( i >= r[l+1] ){
	++l;                 /* new block row */
	m = EPETRA_MAX(m,j) ;   /* nonzeros in block row */
	j = B.NumGlobalIndices(i);
      }else{
	j += B.NumGlobalIndices(i);
      }
    }
    /* one more time for the final block */
     m = EPETRA_MAX(m,j) ;   /* nonzeros in block row */

    colstack = (int*) malloc( EPETRA_MAX(m,1) * sizeof(int) );
    // The compressed graph is actually computed twice,
    // due to concerns about memory limitations.  First, 
    // without memory allocation, just nzM is computed.  
    // Next Mj is allocated. Then, the second time, the
    // arrays are actually populated.
    nzM = 0; q = -1; l = 0;
    int * indices;
    int numEntries;
    for( i = 0; i <= nrr; i++ ){
      if( i >= r[l+1] ){
	if( q > 0 ) std::qsort(colstack,q+1,sizeof(int),compare_ints); /* sort stack */
	if( q >= 0 ) ns = 1; /* l, colstack[0] M */
	for( j=1; j<=q ; j++ ){ /* delete copies */
	  if( colstack[j] > colstack[j-1] ) ++ns;
	}
	nzM += ns; /*M->p[l+1] = M->p[l] + ns;*/
	++l;
	q = -1;
      }
      if( i < nrr ){
	B.ExtractMyRowView( i, numEntries, indices );
	for( k = 0; k < numEntries; k++){
	  j = indices[k];  ns = 0; p = 0;
	  while( (r[bstree[p]] > j)  ||  (j >= r[bstree[p]+1])  ){
	    if( r[bstree[p]] > j){
	      p = 2*p+1;
	    }else{
	      if( r[bstree[p]+1] <= j) p = 2*p+2;
	    }
	    ++ns;
	    if( p > nbrr || ns > tree_height ) {
	      error = j;
	      std::printf("error: p %d  nbrr %d  ns %d %d\n",p,nbrr,ns,j); break;
	    }
	  }
	  colstack[++q] = bstree[p];
	}
	//if( error >-1 ){ std::printf("%d\n",error); break; }
        // p > nbrr is a fatal error that is ignored
      }
    }
    
    if ( matProc == myPID && verbose )
      std::printf("nzM =  %d \n", nzM );
    Mi.resize( nzM );
    Mj.resize( nzM );
    q = -1; l = 0; pm = -1;
    for( i = 0; i <= nrr; i++ ){
      if( i >= r[l+1] ){
	if( q > 0 ) std::qsort(colstack,q+1,sizeof(colstack[0]),compare_ints); /* sort stack */
	if( q >= 0 ){
	  Mi[++pm] = l;
	  Mj[pm] = colstack[0];
	}
	for( j=1; j<=q ; j++ ){ /* delete copies */
	  if( colstack[j] > colstack[j-1] ){ /* l, colstack[j] */
	    Mi[++pm] = l;
	    Mj[pm] = colstack[j];
	  }
	}
	++l;
	Mnum[l] = pm + 1;
	
	/* sparse row format: M->p[l+1] = M->p[l] + ns; */
	q = -1;
      }
      if( i < nrr ){
	B.ExtractMyRowView( i, numEntries, indices );
	for( k = 0; k < numEntries; k++){
	  j = indices[k]; ns = 0; p = 0;
	  while( (r[bstree[p]] > j)  ||  (j >= r[bstree[p]+1])  ){
	    if( r[bstree[p]] > j){
	      p = 2*p+1;
	    }else{
	      if( r[bstree[p]+1] <= j) p = 2*p+2;
	    }
	    ++ns;
	  }
	  colstack[++q] = bstree[p];
	}
      }
    }
    if ( bstree ) free ( bstree );
    if ( colstack ) free( colstack );
    
    // Compute weights as number of rows in each block.
    weights.resize( nbrr );
    for( l=0; l<nbrr; l++) weights[l] = r[l+1] - r[l];
    
    // Compute Epetra_CrsGraph and return
    Teuchos::RCP<Epetra_Map> newMap;
    if ( matProc == myPID )
      newMap = Teuchos::rcp( new Epetra_Map(nbrr, nbrr, 0, B.Comm() ) );
    else
      newMap = Teuchos::rcp( new Epetra_Map( nbrr, 0, 0, B.Comm() ) );
    Teuchos::RCP<Epetra_CrsGraph> newGraph = Teuchos::rcp( new Epetra_CrsGraph( Copy, *newMap, 0 ) );
    for( l=0; l<newGraph->NumMyRows(); l++) {
      newGraph->InsertGlobalIndices( l, Mnum[l+1]-Mnum[l], &Mj[Mnum[l]] );
    }
    newGraph->FillComplete();
    
    return (newGraph);  
  }
//
//  Amesos_TestMultiSolver.cpp reads in a matrix in Harwell-Boeing format, 
//  calls one of the sparse direct solvers, using blocked right hand sides
//  and computes the error and residual.  
//
//  TestSolver ignores the Harwell-Boeing right hand sides, creating
//  random right hand sides instead.  
//
//  Amesos_TestMultiSolver can test either A x = b or A^T x = b.
//  This can be a bit confusing because sparse direct solvers 
//  use compressed column storage - the transpose of Trilinos'
//  sparse row storage.
//
//  Matrices:
//    readA - Serial.  As read from the file.
//    transposeA - Serial.  The transpose of readA.
//    serialA - if (transpose) then transposeA else readA 
//    distributedA - readA distributed to all processes
//    passA - if ( distributed ) then distributedA else serialA
//
//
int Amesos_TestMultiSolver( Epetra_Comm &Comm, char *matrix_file, int numsolves, 
		      SparseSolverType SparseSolver, bool transpose,
		      int special, AMESOS_MatrixType matrix_type ) {


  int iam = Comm.MyPID() ;

  
  //  int hatever;
  //  if ( iam == 0 )  std::cin >> hatever ; 
  Comm.Barrier();


  Epetra_Map * readMap;
  Epetra_CrsMatrix * readA; 
  Epetra_Vector * readx; 
  Epetra_Vector * readb;
  Epetra_Vector * readxexact;
   
  std::string FileName = matrix_file ;
  int FN_Size = FileName.size() ; 
  std::string LastFiveBytes = FileName.substr( EPETRA_MAX(0,FN_Size-5), FN_Size );
  std::string LastFourBytes = FileName.substr( EPETRA_MAX(0,FN_Size-4), FN_Size );
  bool NonContiguousMap = false; 

  if ( LastFiveBytes == ".triU" ) { 
    NonContiguousMap = true; 
    // Call routine to read in unsymmetric Triplet matrix
    EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, false, Comm, readMap, readA, readx, 
						      readb, readxexact, NonContiguousMap ) );
  } else {
    if ( LastFiveBytes == ".triS" ) { 
      NonContiguousMap = true; 
      // Call routine to read in symmetric Triplet matrix
      EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, true, Comm, 
							readMap, readA, readx, 
							readb, readxexact, NonContiguousMap ) );
    } else {
      if (  LastFourBytes == ".mtx" ) { 
	EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra( matrix_file, Comm, readMap, 
							       readA, readx, readb, readxexact) );
      } else {
	// Call routine to read in HB problem
	Trilinos_Util_ReadHb2Epetra( matrix_file, Comm, readMap, readA, readx, 
						     readb, readxexact) ;
      }
    }
  }

  Epetra_CrsMatrix transposeA(Copy, *readMap, 0);
  Epetra_CrsMatrix *serialA ; 

  if ( transpose ) {
    assert( CrsMatrixTranspose( readA, &transposeA ) == 0 ); 
    serialA = &transposeA ; 
  } else {
    serialA = readA ; 
  }

  // Create uniform distributed map
  Epetra_Map map(readMap->NumGlobalElements(), 0, Comm);
  Epetra_Map* map_;

  if( NonContiguousMap ) {
    //
    //  map gives us NumMyElements and MyFirstElement;
    //
    int NumGlobalElements =  readMap->NumGlobalElements();
    int NumMyElements = map.NumMyElements();
    int MyFirstElement = map.MinMyGID();
    std::vector<int> MapMap_( NumGlobalElements );
    readMap->MyGlobalElements( &MapMap_[0] ) ;
    Comm.Broadcast( &MapMap_[0], NumGlobalElements, 0 ) ; 
    map_ = new Epetra_Map( NumGlobalElements, NumMyElements, &MapMap_[MyFirstElement], 0, Comm);
  } else {
    map_ = new Epetra_Map( map ) ; 
  }


  // Create Exporter to distribute read-in matrix and vectors
  Epetra_Export exporter(*readMap, *map_);
  Epetra_CrsMatrix A(Copy, *map_, 0);

  Epetra_RowMatrix * passA = 0; 
  Epetra_MultiVector * passx = 0; 
  Epetra_MultiVector * passb = 0;
  Epetra_MultiVector * passxexact = 0;
  Epetra_MultiVector * passresid = 0;
  Epetra_MultiVector * passtmp = 0;

  Epetra_MultiVector x(*map_,numsolves);
  Epetra_MultiVector b(*map_,numsolves);
  Epetra_MultiVector xexact(*map_,numsolves);
  Epetra_MultiVector resid(*map_,numsolves);
  Epetra_MultiVector tmp(*map_,numsolves);

  Epetra_MultiVector serialx(*readMap,numsolves);
  Epetra_MultiVector serialb(*readMap,numsolves);
  Epetra_MultiVector serialxexact(*readMap,numsolves);
  Epetra_MultiVector serialresid(*readMap,numsolves);
  Epetra_MultiVector serialtmp(*readMap,numsolves);

  bool distribute_matrix = ( matrix_type == AMESOS_Distributed ) ; 
  if ( distribute_matrix ) { 
    //
    //  Initialize x, b and xexact to the values read in from the file
    //
    
    A.Export(*serialA, exporter, Add);
    Comm.Barrier();

    assert(A.FillComplete()==0);    
    Comm.Barrier();

    passA = &A; 
    passx = &x; 
    passb = &b;
    passxexact = &xexact;
    passresid = &resid;
    passtmp = &tmp;
  } else { 
    passA = serialA; 
    passx = &serialx; 
    passb = &serialb;
    passxexact = &serialxexact;
    passresid = &serialresid;
    passtmp = &serialtmp;
  }

  passxexact->SetSeed(131) ; 
  passxexact->Random();
  passx->SetSeed(11231) ; 
  passx->Random();

  passb->PutScalar( 0.0 );
  passA->Multiply( transpose, *passxexact, *passb ) ; 

  Epetra_MultiVector CopyB( *passb ) ;

  double Anorm = passA->NormInf() ; 
  SparseDirectTimingVars::SS_Result.Set_Anorm(Anorm) ;

  Epetra_LinearProblem Problem(  (Epetra_RowMatrix *) passA, 
				 (Epetra_MultiVector *) passx, 
				 (Epetra_MultiVector *) passb );

  double max_resid = 0.0;
  for ( int j = 0 ; j < special+1 ; j++ ) { 
    
    Epetra_Time TotalTime( Comm ) ; 
    if ( false ) { 
#ifdef TEST_UMFPACK

      unused code

    } else if ( SparseSolver == UMFPACK ) { 
      UmfpackOO umfpack( (Epetra_RowMatrix *) passA, 
			 (Epetra_MultiVector *) passx, 
			 (Epetra_MultiVector *) passb ) ; 
    
      umfpack.SetTrans( transpose ) ; 
      umfpack.Solve() ; 
#endif
#ifdef TEST_SUPERLU
    } else if ( SparseSolver == SuperLU ) { 
      SuperluserialOO superluserial( (Epetra_RowMatrix *) passA, 
				     (Epetra_MultiVector *) passx, 
				     (Epetra_MultiVector *) passb ) ; 

      superluserial.SetPermc( SuperLU_permc ) ; 
      superluserial.SetTrans( transpose ) ; 
      superluserial.SetUseDGSSV( special == 0 ) ; 
      superluserial.Solve() ; 
#endif
#ifdef HAVE_AMESOS_SLUD
    } else if ( SparseSolver == SuperLUdist ) { 
      SuperludistOO superludist( Problem ) ; 
      superludist.SetTrans( transpose ) ; 
      EPETRA_CHK_ERR( superludist.Solve( true ) ) ;
#endif 
#ifdef HAVE_AMESOS_SLUD2
    } else if ( SparseSolver == SuperLUdist2 ) { 
      Superludist2_OO superludist2( Problem ) ; 
      superludist2.SetTrans( transpose ) ; 
      EPETRA_CHK_ERR( superludist2.Solve( true ) ) ;
#endif 
#ifdef TEST_SPOOLES
    } else if ( SparseSolver == SPOOLES ) { 
      SpoolesOO spooles( (Epetra_RowMatrix *) passA, 
			 (Epetra_MultiVector *) passx, 
			 (Epetra_MultiVector *) passb ) ; 
    
      spooles.SetTrans( transpose ) ; 
      spooles.Solve() ; 
#endif
#ifdef HAVE_AMESOS_DSCPACK
    } else if ( SparseSolver == DSCPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Dscpack dscpack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( dscpack.SetParameters( ParamList ) ); 
    
      EPETRA_CHK_ERR( dscpack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_UMFPACK
    } else if ( SparseSolver == UMFPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Umfpack umfpack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( umfpack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( umfpack.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( umfpack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_KLU
    } else if ( SparseSolver == KLU ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Klu klu( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( klu.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( klu.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( klu.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( klu.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( klu.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_PARAKLETE
    } else if ( SparseSolver == PARAKLETE ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Paraklete paraklete( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( paraklete.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( paraklete.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( paraklete.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( paraklete.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( paraklete.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SLUS
    } else if ( SparseSolver == SuperLU ) { 
      Epetra_SLU superluserial( &Problem ) ; 
      EPETRA_CHK_ERR( superluserial.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( superluserial.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superluserial.NumericFactorization(  ) ); 

      EPETRA_CHK_ERR( superluserial.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_LAPACK
    } else if ( SparseSolver == LAPACK ) { 
      Teuchos::ParameterList ParamList ;
      ParamList.set( "MaxProcs", -3 );
      Amesos_Lapack lapack( Problem ) ; 
      EPETRA_CHK_ERR( lapack.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( lapack.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( lapack.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( lapack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_TAUCS
    } else if ( SparseSolver == TAUCS ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Taucs taucs( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( taucs.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( taucs.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( taucs.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( taucs.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( taucs.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_PARDISO
    } else if ( SparseSolver == PARDISO ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Pardiso pardiso( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( pardiso.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( pardiso.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( pardiso.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( pardiso.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( pardiso.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_PARKLETE
    } else if ( SparseSolver == PARKLETE ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Parklete parklete( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( parklete.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( parklete.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( parklete.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( parklete.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( parklete.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_MUMPS
    } else if ( SparseSolver == MUMPS ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Mumps mumps( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( mumps.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( mumps.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( mumps.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( mumps.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( mumps.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SCALAPACK
    } else if ( SparseSolver == SCALAPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Scalapack scalapack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( scalapack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( scalapack.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( scalapack.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( scalapack.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( scalapack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SUPERLUDIST
    } else if ( SparseSolver == SUPERLUDIST ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Superludist superludist( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( superludist.SetParameters( ParamList ) ); 

      EPETRA_CHK_ERR( superludist.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( superludist.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superludist.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( superludist.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SUPERLU
    } else if ( SparseSolver == SUPERLU ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Superlu superlu( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( superlu.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( superlu.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( superlu.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superlu.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( superlu.Solve( ) ); 
#endif
#ifdef TEST_SPOOLESSERIAL 
    } else if ( SparseSolver == SPOOLESSERIAL ) { 
      SpoolesserialOO spoolesserial( (Epetra_RowMatrix *) passA, 
				     (Epetra_MultiVector *) passx, 
				     (Epetra_MultiVector *) passb ) ; 
    
      spoolesserial.Solve() ;
#endif
    } else { 
      SparseDirectTimingVars::log_file << "Solver not implemented yet" << std::endl ;
      std::cerr << "\n\n####################  Requested solver not available (Or not tested with blocked RHS) on this platform #####################\n" << std::endl ;
    }

    SparseDirectTimingVars::SS_Result.Set_Total_Time( TotalTime.ElapsedTime() ); 
    //    SparseDirectTimingVars::SS_Result.Set_First_Time( 0.0 ); 
    //    SparseDirectTimingVars::SS_Result.Set_Middle_Time( 0.0 ); 
    //    SparseDirectTimingVars::SS_Result.Set_Last_Time( 0.0 ); 

    //
    //  Compute the error = norm(xcomp - xexact )
    //
    std::vector <double> error(numsolves) ; 
    double max_error = 0.0;
  
    passresid->Update(1.0, *passx, -1.0, *passxexact, 0.0);

    passresid->Norm2(&error[0]);
    for ( int i = 0 ; i< numsolves; i++ ) 
      if ( error[i] > max_error ) max_error = error[i] ; 
    SparseDirectTimingVars::SS_Result.Set_Error(max_error) ;

    //  passxexact->Norm2(&error[0] ) ; 
    //  passx->Norm2(&error ) ; 

    //
    //  Compute the residual = norm(Ax - b)
    //
    std::vector <double> residual(numsolves) ; 
  
    passtmp->PutScalar(0.0);
    passA->Multiply( transpose, *passx, *passtmp);
    passresid->Update(1.0, *passtmp, -1.0, *passb, 0.0); 
    //    passresid->Update(1.0, *passtmp, -1.0, CopyB, 0.0); 
    passresid->Norm2(&residual[0]);

    for ( int i = 0 ; i< numsolves; i++ ) 
      if ( residual[i] > max_resid ) max_resid = residual[i] ; 


    SparseDirectTimingVars::SS_Result.Set_Residual(max_resid) ;
    
    std::vector <double> bnorm(numsolves); 
    passb->Norm2( &bnorm[0] ) ; 
    SparseDirectTimingVars::SS_Result.Set_Bnorm(bnorm[0]) ;

    std::vector <double> xnorm(numsolves); 
    passx->Norm2( &xnorm[0] ) ; 
    SparseDirectTimingVars::SS_Result.Set_Xnorm(xnorm[0]) ;


    if ( false && iam == 0 ) { 

      std::cout << " Amesos_TestMutliSolver.cpp " << std::endl ; 
      for ( int i = 0 ; i< numsolves && i < 10 ; i++ ) {
	std::cout << "i=" << i 
	     << " error = " << error[i] 
	     << " xnorm = " << xnorm[i] 
	     << " residual = " << residual[i] 
	     << " bnorm = " << bnorm[i] 
	     << std::endl ; 
      
      }
    
      std::cout << std::endl << " max_resid = " << max_resid ; 
      std::cout << " max_error = " << max_error << std::endl ; 
      std::cout << " Get_residual() again = " << SparseDirectTimingVars::SS_Result.Get_Residual() << std::endl ;

    }
  }
  delete readA;
  delete readx;
  delete readb;
  delete readxexact;
  delete readMap;
  delete map_;
  
  Comm.Barrier();

return 0 ;
}
Beispiel #28
0
void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
			int * xoff, int * yoff,
			int nsizes, int * sizes, int nrhs,
			const Epetra_Comm  &comm, bool verbose, bool summary,
			Epetra_BlockMap *& map,
			Epetra_VbrMatrix *& A,
			Epetra_MultiVector *& b,
			Epetra_MultiVector *& bt,
			Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {

  int i;

  // Determine my global IDs
  long long * myGlobalElements;
  GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);

  int numMyElements = numNodesX*numNodesY;

  Epetra_Map ptMap((long long)-1, numMyElements, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
  delete [] myGlobalElements;

  Epetra_IntVector elementSizes(ptMap); // This vector will have the list of element sizes
  for (i=0; i<numMyElements; i++)
    elementSizes[i] = sizes[ptMap.GID64(i)%nsizes]; // cycle through sizes array

  map = new Epetra_BlockMap((long long)-1, numMyElements, ptMap.MyGlobalElements64(), elementSizes.Values(),
			    ptMap.IndexBase64(), ptMap.Comm());

  int profile = 0; if (StaticProfile) profile = numPoints;

// FIXME: Won't compile until Epetra_VbrMatrix is modified.
#if 0
  int j;
  long long numGlobalEquations = ptMap.NumGlobalElements64();

  if (MakeLocalOnly)
    A = new Epetra_VbrMatrix(Copy, *map, *map, profile); // Construct matrix rowmap=colmap
  else
    A = new Epetra_VbrMatrix(Copy, *map, profile); // Construct matrix

  long long * indices = new long long[numPoints];

  // This section of code creates a vector of random values that will be used to create
  // light-weight dense matrices to pass into the VbrMatrix construction process.

  int maxElementSize = 0;
  for (i=0; i< nsizes; i++) maxElementSize = EPETRA_MAX(maxElementSize, sizes[i]);

  Epetra_LocalMap lmap((long long)maxElementSize*maxElementSize, ptMap.IndexBase(), ptMap.Comm());
  Epetra_Vector randvec(lmap);
  randvec.Random();
  randvec.Scale(-1.0); // Make value negative
  int nx = numNodesX*numProcsX;


  for (i=0; i<numMyElements; i++) {
    long long rowID = map->GID64(i);
    int numIndices = 0;
    int rowDim = sizes[rowID%nsizes];
    for (j=0; j<numPoints; j++) {
      long long colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
      if (colID>-1 && colID<numGlobalEquations)
	indices[numIndices++] = colID;
    }
			
    A->BeginInsertGlobalValues(rowID, numIndices, indices);
		
    for (j=0; j < numIndices; j++) {
      int colDim = sizes[indices[j]%nsizes];
      A->SubmitBlockEntry(&(randvec[0]), rowDim, rowDim, colDim);
    }
    A->EndSubmitEntries();
  }

  delete [] indices;

  A->FillComplete();

  // Compute the InvRowSums of the matrix rows
  Epetra_Vector invRowSums(A->RowMap());
  Epetra_Vector rowSums(A->RowMap());
  A->InvRowSums(invRowSums);
  rowSums.Reciprocal(invRowSums);

  // Jam the row sum values into the diagonal of the Vbr matrix (to make it diag dominant)
  int numBlockDiagonalEntries;
  int * rowColDims;
  int * diagoffsets = map->FirstPointInElementList();
  A->BeginExtractBlockDiagonalView(numBlockDiagonalEntries, rowColDims);
  for (i=0; i< numBlockDiagonalEntries; i++) {
    double * diagVals;
    int diagLDA;
    A->ExtractBlockDiagonalEntryView(diagVals, diagLDA);
    int rowDim = map->ElementSize(i);
    for (j=0; j<rowDim; j++) diagVals[j+j*diagLDA] = rowSums[diagoffsets[i]+j];
  }

  if (nrhs<=1) {
    b = new Epetra_Vector(*map);
    bt = new Epetra_Vector(*map);
    xexact = new Epetra_Vector(*map);
  }
  else {
    b = new Epetra_MultiVector(*map, nrhs);
    bt = new Epetra_MultiVector(*map, nrhs);
    xexact = new Epetra_MultiVector(*map, nrhs);
  }

  xexact->Random(); // Fill xexact with random values

  A->Multiply(false, *xexact, *b);
  A->Multiply(true, *xexact, *bt);

#endif // EPETRA_NO_32BIT_GLOBAL_INDICES

  return;
}
void Trilinos_Util_GenerateVbrProblem(int nx, int ny, int npoints, int * xoff, int * yoff, 
																			int nsizes, int * sizes, int nrhs,
																			const Epetra_Comm  &comm, 
																			Epetra_BlockMap *& map, 
																			Epetra_VbrMatrix *& A, 
																			Epetra_MultiVector *& x, 
																			Epetra_MultiVector *& b,
																			Epetra_MultiVector *&xexact) {

	int i, j;

	// Number of global equations is nx*ny.  These will be distributed in a linear fashion
	int numGlobalEquations = nx*ny;
  Epetra_Map ptMap(numGlobalEquations, 0, comm); // Create map with equal distribution of equations.

	int numMyElements = ptMap.NumMyElements();

	Epetra_IntVector elementSizes(ptMap); // This vector will have the list of element sizes
	for (i=0; i<numMyElements; i++) 
		elementSizes[i] = sizes[ptMap.GID64(i)%nsizes]; // cycle through sizes array

	map = new Epetra_BlockMap(-1, numMyElements, ptMap.MyGlobalElements(), elementSizes.Values(),
														ptMap.IndexBase(), ptMap.Comm());

  
  A = new Epetra_VbrMatrix(Copy, *map, 0); // Construct matrix

	int * indices = new int[npoints];
//	double * values = new double[npoints];

//	double dnpoints = (double) npoints;

	// This section of code creates a vector of random values that will be used to create
	// light-weight dense matrices to pass into the VbrMatrix construction process.

	int maxElementSize = 0;
	for (i=0; i< nsizes; i++) maxElementSize = EPETRA_MAX(maxElementSize, sizes[i]);

	Epetra_LocalMap lmap(maxElementSize*maxElementSize, ptMap.IndexBase(), ptMap.Comm());
	Epetra_Vector randvec(lmap);
	randvec.Random();
	randvec.Scale(-1.0); // Make value negative


	for (i=0; i<numMyElements; i++) {
		int rowID = map->GID(i);
		int numIndices = 0;
		int rowDim = sizes[rowID%nsizes];
		for (j=0; j<npoints; j++) {
			int colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
			if (colID>-1 && colID<numGlobalEquations)
				indices[numIndices++] = colID;
		}
			
		A->BeginInsertGlobalValues(rowID, numIndices, indices);
		
		for (j=0; j < numIndices; j++) {
			int colDim = sizes[indices[j]%nsizes];
			A->SubmitBlockEntry(&(randvec[0]), rowDim, rowDim, colDim);
		}
		A->EndSubmitEntries();
	}

	delete [] indices;

  A->FillComplete();

	// Compute the InvRowSums of the matrix rows
	Epetra_Vector invRowSums(A->RowMap());
	Epetra_Vector rowSums(A->RowMap());
	A->InvRowSums(invRowSums);
	rowSums.Reciprocal(invRowSums);

	// Jam the row sum values into the diagonal of the Vbr matrix (to make it diag dominant)
	int numBlockDiagonalEntries;
	int * rowColDims;
	int * diagoffsets = map->FirstPointInElementList();
	A->BeginExtractBlockDiagonalView(numBlockDiagonalEntries, rowColDims);
	for (i=0; i< numBlockDiagonalEntries; i++) {
		double * diagVals;
		int diagLDA;
		A->ExtractBlockDiagonalEntryView(diagVals, diagLDA);
		int rowDim = map->ElementSize(i);
		for (j=0; j<rowDim; j++) diagVals[j+j*diagLDA] = rowSums[diagoffsets[i]+j];
	}

	if (nrhs<=1) {  
		x = new Epetra_Vector(*map);
		b = new Epetra_Vector(*map);
		xexact = new Epetra_Vector(*map);
	}
	else {
		x = new Epetra_MultiVector(*map, nrhs);
		b = new Epetra_MultiVector(*map, nrhs);
		xexact = new Epetra_MultiVector(*map, nrhs);
	}

	xexact->Random(); // Fill xexact with random values

  A->Multiply(false, *xexact, *b);

  return;
}
Beispiel #30
0
//=============================================================================
//
//  See also pre and post conditions in Amesos_Klu.h
//  Preconditions:
//    firsttime specifies that this is the first time that 
//    ConertToKluCrs has been called, i.e. in symbolic factorization.  
//    No data allocation should happen unless firsttime=true.
//    SerialMatrix_ points to the matrix to be factored and solved
//    NumGlobalElements_ has been set to the dimension of the matrix
//    numentries_ has been set to the number of non-zeros in the matrix
//      (i.e. CreateLocalMatrixAndExporters() has been callded)
//
//  Postconditions:
//    Ap, VecAi, VecAval contain the matrix as Klu needs it
//
//
int Amesos_Klu::ConvertToKluCRS(bool firsttime)
{
  ResetTimer(0);

  //
  //  Convert matrix to the form that Klu expects (Ap, VecAi, VecAval)
  //

  if (MyPID_==0) {
    assert( NumGlobalElements_ == SerialMatrix_->NumGlobalRows());
    assert( NumGlobalElements_ == SerialMatrix_->NumGlobalCols());
    if ( ! AddZeroToDiag_ ) {
      assert( numentries_ == SerialMatrix_->NumGlobalNonzeros()) ;
    } else {
      numentries_ = SerialMatrix_->NumGlobalNonzeros() ;
    }

    Epetra_CrsMatrix *CrsMatrix = dynamic_cast<Epetra_CrsMatrix *>(SerialMatrix_);
    bool StorageOptimized = ( CrsMatrix != 0 && CrsMatrix->StorageOptimized() );

    if ( AddToDiag_ != 0.0 ) StorageOptimized = false ;

    if ( firsttime ) { 
      Ap.resize( NumGlobalElements_+1 );
      if ( ! StorageOptimized ) { 
	VecAi.resize( EPETRA_MAX( NumGlobalElements_, numentries_) ) ;
	VecAval.resize( EPETRA_MAX( NumGlobalElements_, numentries_) ) ;
	Ai = &VecAi[0];
	Aval = &VecAval[0];
      }
    }

    double *RowValues;
    int *ColIndices;
    int NumEntriesThisRow;

    if( StorageOptimized ) {
      if ( firsttime ) {
	Ap[0] = 0;
	for ( int MyRow = 0; MyRow <NumGlobalElements_; MyRow++ ) {
	  if( CrsMatrix->
	      ExtractMyRowView( MyRow, NumEntriesThisRow, RowValues,
				ColIndices ) != 0 ) 
	    AMESOS_CHK_ERR( -10 );
	  if ( MyRow == 0 ) {
	    Ai = ColIndices ;
	    Aval = RowValues ;
	  }
	  Ap[MyRow+1] = Ap[MyRow] + NumEntriesThisRow ;
	}
      }
    } else { 
      
      int Ai_index = 0 ;
      int MyRow;
      
      int MaxNumEntries_ = SerialMatrix_->MaxNumEntries();
      if ( firsttime && CrsMatrix == 0 ) {
	ColIndicesV_.resize(MaxNumEntries_);
	RowValuesV_.resize(MaxNumEntries_);
      }
      
      for ( MyRow = 0; MyRow <NumGlobalElements_; MyRow++ ) {
	if ( CrsMatrix != 0 ) {
	  if( CrsMatrix->
	      ExtractMyRowView( MyRow, NumEntriesThisRow, RowValues,
				ColIndices ) != 0 ) 
	    AMESOS_CHK_ERR( -11 );

	} else {
	  if( SerialMatrix_->
			  ExtractMyRowCopy( MyRow, MaxNumEntries_,
					    NumEntriesThisRow, &RowValuesV_[0],
					    &ColIndicesV_[0] ) != 0 ) 
	    AMESOS_CHK_ERR( -12 );

	  RowValues =  &RowValuesV_[0];
	  ColIndices = &ColIndicesV_[0];
	}
	
	if ( firsttime ) {
	  Ap[MyRow] = Ai_index ;
	  for ( int j = 0; j < NumEntriesThisRow; j++ ) {
	    VecAi[Ai_index] = ColIndices[j] ;
	    //	  assert( VecAi[Ai_index] == Ai[Ai_index] ) ; 
	    VecAval[Ai_index] = RowValues[j] ;      //  We have to do this because of the hacks to get aroun bug #1502 
	    if (ColIndices[j] == MyRow) {
	      VecAval[Ai_index] += AddToDiag_;    
	    }
	    Ai_index++;
	  }
	} else { 
	  for ( int j = 0; j < NumEntriesThisRow; j++ ) {
	    VecAval[Ai_index] = RowValues[j] ;     
	    if (ColIndices[j] == MyRow) {
	      VecAval[Ai_index] += AddToDiag_;   
	    }
	    Ai_index++;
	  }
	}
      }
      Ap[MyRow] = Ai_index ;
    }

  }

  MtxConvTime_ = AddTime("Total matrix conversion time", MtxConvTime_, 0);

  return 0;
}