//============================================================================= int Amesos_Umfpack::ConvertToUmfpackCRS() { ResetTimer(0); ResetTimer(1); // Convert matrix to the form that Umfpack expects (Ap, Ai, Aval), // only on processor 0. The matrix has already been assembled in // SerialMatrix_; if only one processor is used, then SerialMatrix_ // points to the problem's matrix. if (MyPID_ == 0) { Ap.resize( NumGlobalElements_+1 ); Ai.resize( EPETRA_MAX( NumGlobalElements_, numentries_) ) ; Aval.resize( EPETRA_MAX( NumGlobalElements_, numentries_) ) ; int NumEntries = SerialMatrix_->MaxNumEntries(); int NumEntriesThisRow; int Ai_index = 0 ; int MyRow; for (MyRow = 0 ; MyRow < NumGlobalElements_; MyRow++) { int ierr; Ap[MyRow] = Ai_index ; ierr = SerialMatrix_->ExtractMyRowCopy(MyRow, NumEntries, NumEntriesThisRow, &Aval[Ai_index], &Ai[Ai_index]); if (ierr) AMESOS_CHK_ERR(-1); #if 1 // MS // added on 15-Mar-05 and KSS restored 8-Feb-06 if (AddToDiag_ != 0.0) { for (int i = 0 ; i < NumEntriesThisRow ; ++i) { if (Ai[Ai_index+i] == MyRow) { Aval[Ai_index+i] += AddToDiag_; break; } } } #endif Ai_index += NumEntriesThisRow; } Ap[MyRow] = Ai_index ; } MtxConvTime_ = AddTime("Total matrix conversion time", MtxConvTime_, 0); OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1); return 0; }
//============================================================================== int Ifpack_CrsRiluk::BlockGraph2PointGraph(const Epetra_CrsGraph & BG, Epetra_CrsGraph & PG, bool Upper) { if (!BG.IndicesAreLocal()) {EPETRA_CHK_ERR(-1);} // Must have done FillComplete on BG int * ColFirstPointInElementList = BG.RowMap().FirstPointInElementList(); int * ColElementSizeList = BG.RowMap().ElementSizeList(); if (BG.Importer()!=0) { ColFirstPointInElementList = BG.ImportMap().FirstPointInElementList(); ColElementSizeList = BG.ImportMap().ElementSizeList(); } int Length = (BG.MaxNumIndices()+1) * BG.ImportMap().MaxMyElementSize(); vector<int> tmpIndices(Length); int BlockRow, BlockOffset, NumEntries; int NumBlockEntries; int * BlockIndices; int NumMyRows_tmp = PG.NumMyRows(); for (int i=0; i<NumMyRows_tmp; i++) { EPETRA_CHK_ERR(BG.RowMap().FindLocalElementID(i, BlockRow, BlockOffset)); EPETRA_CHK_ERR(BG.ExtractMyRowView(BlockRow, NumBlockEntries, BlockIndices)); int * ptr = &tmpIndices[0]; // Set pointer to beginning of buffer int RowDim = BG.RowMap().ElementSize(BlockRow); NumEntries = 0; // This next line make sure that the off-diagonal entries in the block diagonal of the // original block entry matrix are included in the nonzero pattern of the point graph if (Upper) { int jstart = i+1; int jstop = EPETRA_MIN(NumMyRows_tmp,i+RowDim-BlockOffset); for (int j= jstart; j< jstop; j++) {*ptr++ = j; NumEntries++;} } for (int j=0; j<NumBlockEntries; j++) { int ColDim = ColElementSizeList[BlockIndices[j]]; NumEntries += ColDim; assert(NumEntries<=Length); // Sanity test int Index = ColFirstPointInElementList[BlockIndices[j]]; for (int k=0; k < ColDim; k++) *ptr++ = Index++; } // This next line make sure that the off-diagonal entries in the block diagonal of the // original block entry matrix are included in the nonzero pattern of the point graph if (!Upper) { int jstart = EPETRA_MAX(0,i-RowDim+1); int jstop = i; for (int j = jstart; j < jstop; j++) {*ptr++ = j; NumEntries++;} } EPETRA_CHK_ERR(PG.InsertMyIndices(i, NumEntries, &tmpIndices[0])); } SetAllocated(true); return(0); }
//========================================================================= int Epetra_MapColoring::CopyAndPermute(const Epetra_SrcDistObject& Source, int NumSameIDs, int NumPermuteIDs, int * PermuteToLIDs, int *PermuteFromLIDs, const Epetra_OffsetIndex * Indexor, Epetra_CombineMode CombineMode) { (void)Indexor; if( CombineMode != Add && CombineMode != Zero && CombineMode != Insert && CombineMode != AbsMax ) EPETRA_CHK_ERR(-1); //Unsupported CombinedMode, will default to Zero const Epetra_MapColoring & A = dynamic_cast<const Epetra_MapColoring &>(Source); int * From = A.ElementColors(); int *To = ElementColors_; // Do copy first if (NumSameIDs>0) if (To!=From) { if (CombineMode==Add) for (int j=0; j<NumSameIDs; j++) To[j] += From[j]; // Add to existing value else if(CombineMode==Insert) for (int j=0; j<NumSameIDs; j++) To[j] = From[j]; else if(CombineMode==AbsMax) { for (int j=0; j<NumSameIDs; j++) To[j] = EPETRA_MAX( To[j],std::abs(From[j])); } } // Do local permutation next if (NumPermuteIDs>0) { if (CombineMode==Add) for (int j=0; j<NumPermuteIDs; j++) To[PermuteToLIDs[j]] += From[PermuteFromLIDs[j]]; // Add to existing value else if(CombineMode==Insert || CombineMode == Zero) for (int j=0; j<NumPermuteIDs; j++) To[PermuteToLIDs[j]] = From[PermuteFromLIDs[j]]; else if(CombineMode==AbsMax) { for (int j=0; j<NumPermuteIDs; j++) To[PermuteToLIDs[j]] = EPETRA_MAX( To[PermuteToLIDs[j]],std::abs(From[PermuteFromLIDs[j]])); } } return(0); }
//============================================================================= long long Epetra_LongLongVector::MaxValue() { long long result = std::numeric_limits<long long>::min(); // smallest 64 bit int int iend = MyLength(); if (iend>0) result = Values_[0]; for (int i=0; i<iend; i++) result = EPETRA_MAX(result, Values_[i]); long long globalResult; this->Comm().MaxAll(&result, &globalResult, 1); return(globalResult); }
//============================================================================= int Epetra_IntVector::MaxValue() { int result = -2000000000; // Negative 2 billion is close to smallest 32 bit int int iend = MyLength(); if (iend>0) result = Values_[0]; for (int i=0; i<iend; i++) result = EPETRA_MAX(result, Values_[i]); int globalResult; this->Comm().MaxAll(&result, &globalResult, 1); return(globalResult); }
//============================================================================= int Epetra_IntSerialDenseMatrix::OneNorm() { int anorm = 0; int* ptr = 0; for(int j = 0; j < N_; j++) { int sum = 0; ptr = A_ + j*LDA_; for(int i = 0; i < M_; i++) sum += std::abs(*ptr++); anorm = EPETRA_MAX(anorm, sum); } return(anorm); }
//============================================================================= long long Epetra_LongLongSerialDenseMatrix::OneNorm() { long long anorm = 0; long long* ptr = 0; for(int j = 0; j < N_; j++) { long long sum = 0; ptr = A_ + j*LDA_; for(int i = 0; i < M_; i++) { const long long val = *ptr++; sum += (val > 0 ? val : -val); // No std::abs(long long) on VS2005. } anorm = EPETRA_MAX(anorm, sum); } return(anorm); }
//============================================================================= double Epetra_SerialSymDenseMatrix::NormInf(void) const { int i, j; double anorm = 0.0; double * ptr; if (!Upper()) { for (j=0; j<N_; j++) { double sum = 0.0; ptr = A_ + j + j*LDA_; for (i=j; i<N_; i++) sum += std::abs(*ptr++); ptr = A_ + j; for (i=0; i<j; i++) { sum += std::abs(*ptr); ptr += LDA_; } anorm = EPETRA_MAX(anorm, sum); } } else { for (j=0; j<N_; j++) { double sum = 0.0; ptr = A_ + j*LDA_; for (i=0; i<j; i++) sum += std::abs(*ptr++); ptr = A_ + j + j*LDA_; for (i=j; i<N_; i++) { sum += std::abs(*ptr); ptr += LDA_; } anorm = EPETRA_MAX(anorm, sum); } } UpdateFlops(N_*N_); return(anorm); }
//============================================================================= int Epetra_IntSerialDenseMatrix::InfNorm() { int anorm = 0; int* ptr = 0; // Loop across columns in inner loop. Most expensive memory access, but // requires no extra storage. for(int i = 0; i < M_; i++) { int sum = 0; ptr = A_ + i; for(int j = 0; j < N_; j++) { sum += std::abs(*ptr); ptr += LDA_; } anorm = EPETRA_MAX(anorm, sum); } return(anorm); }
//============================================================================= double Epetra_MsrMatrix::NormInf() const { if (NormInf_>-1.0) return(NormInf_); double Local_NormInf = 0.0; for (int i=0; i < NumMyRows_; i++) { int NumEntries = GetRow(i); double sum = 0.0; for (int j=0; j < NumEntries; j++) sum += fabs(Values_[j]); Local_NormInf = EPETRA_MAX(Local_NormInf, sum); } Comm().MaxAll(&Local_NormInf, &NormInf_, 1); UpdateFlops(NumGlobalNonzeros()); return(NormInf_); }
//============================================================================= long long Epetra_LongLongSerialDenseMatrix::InfNorm() { long long anorm = 0; long long* ptr = 0; // Loop across columns in inner loop. Most expensive memory access, but // requires no extra storage. for(int i = 0; i < M_; i++) { long long sum = 0; ptr = A_ + i; for(int j = 0; j < N_; j++) { const long long val = *ptr; sum += (val > 0 ? val : -val); // No std::abs(long long) on VS2005. ptr += LDA_; } anorm = EPETRA_MAX(anorm, sum); } return(anorm); }
//========================================================================= int Epetra_MapColoring::UnpackAndCombine(const Epetra_SrcDistObject & Source, int NumImportIDs, int * ImportLIDs, int LenImports, char * Imports, int & SizeOfPacket, Epetra_Distributor & Distor, Epetra_CombineMode CombineMode, const Epetra_OffsetIndex * Indexor ) { (void)Source; (void)LenImports; (void)Imports; (void)SizeOfPacket; (void)Distor; (void)Indexor; int j; if( CombineMode != Add && CombineMode != Zero && CombineMode != Insert && CombineMode != AbsMax ) EPETRA_CHK_ERR(-1); //Unsupported CombinedMode, will default to Zero if (NumImportIDs<=0) return(0); int * To = ElementColors_; int * ptr; // Unpack it... ptr = (int *) Imports; if (CombineMode==Add) for (j=0; j<NumImportIDs; j++) To[ImportLIDs[j]] += ptr[j]; // Add to existing value else if(CombineMode==Insert) for (j=0; j<NumImportIDs; j++) To[ImportLIDs[j]] = ptr[j]; else if(CombineMode==AbsMax) { for (j=0; j<NumImportIDs; j++) To[ImportLIDs[j]] = 0; for (j=0; j<NumImportIDs; j++) To[ImportLIDs[j]] = EPETRA_MAX( To[ImportLIDs[j]],std::abs(ptr[j])); } return(0); }
void Amesos_Mumps::CheckParameters() { #ifndef HAVE_AMESOS_MPI_C2F MaxProcs_ = -3; #endif // check parameters and fix values of MaxProcs_ int NumGlobalNonzeros, NumRows; NumGlobalNonzeros = Matrix().NumGlobalNonzeros(); NumRows = Matrix().NumGlobalRows(); // optimal value for MaxProcs == -1 int OptNumProcs1 = 1 + EPETRA_MAX(NumRows/10000, NumGlobalNonzeros/100000); OptNumProcs1 = EPETRA_MIN(Comm().NumProc(),OptNumProcs1); // optimal value for MaxProcs == -2 int OptNumProcs2 = (int)sqrt(1.0 * Comm().NumProc()); if (OptNumProcs2 < 1) OptNumProcs2 = 1; // fix the value of MaxProcs switch (MaxProcs_) { case -1: MaxProcs_ = OptNumProcs1; break; case -2: MaxProcs_ = OptNumProcs2; break; case -3: MaxProcs_ = Comm().NumProc(); break; } // few checks if (MaxProcs_ > Comm().NumProc()) MaxProcs_ = Comm().NumProc(); // if ( MaxProcs_ > 1 ) MaxProcs_ = Comm().NumProc(); // Bug - bogus kludge here - didn't work anyway }
//========================================================================= int Epetra_IntVector::UnpackAndCombine(const Epetra_SrcDistObject & Source, int NumImportIDs, int * ImportLIDs, int LenImports, char * Imports, int & SizeOfPacket, Epetra_Distributor & Distor, Epetra_CombineMode CombineMode, const Epetra_OffsetIndex * Indexor) { (void)Source; (void)LenImports; (void)SizeOfPacket; (void)Distor; (void)Indexor; int j, jj, k; if( CombineMode != Add && CombineMode != Zero && CombineMode != Insert && CombineMode != Average && CombineMode != AbsMax ) EPETRA_CHK_ERR(-1); //Unsupported CombinedMode, will default to Zero if (NumImportIDs<=0) return(0); int * To = Values_; int MaxElementSize = Map().MaxElementSize(); bool ConstantElementSize = Map().ConstantElementSize(); int * ToFirstPointInElementList = 0; int * ToElementSizeList = 0; if (!ConstantElementSize) { ToFirstPointInElementList = Map().FirstPointInElementList(); ToElementSizeList = Map().ElementSizeList(); } int * ptr; // Unpack it... ptr = (int *) Imports; // Point entry case if (MaxElementSize==1) { if (CombineMode==Add) for (j=0; j<NumImportIDs; j++) To[ImportLIDs[j]] += *ptr++; // Add to existing value else if(CombineMode==Insert) for (j=0; j<NumImportIDs; j++) To[ImportLIDs[j]] = *ptr++; else if(CombineMode==AbsMax) for (j=0; j<NumImportIDs; j++) { To[ImportLIDs[j]] = EPETRA_MAX( To[ImportLIDs[j]],std::abs(*ptr)); ptr++; } // Note: The following form of averaging is not a true average if more that one value is combined. // This might be an issue in the future, but we leave this way for now. else if(CombineMode==Average) for (j=0; j<NumImportIDs; j++) {To[ImportLIDs[j]] += *ptr++; To[ImportLIDs[j]] /= 2;} } // constant element size case else if (ConstantElementSize) { if (CombineMode==Add) { for (j=0; j<NumImportIDs; j++) { jj = MaxElementSize*ImportLIDs[j]; for (k=0; k<MaxElementSize; k++) To[jj+k] += *ptr++; // Add to existing value } } else if(CombineMode==Insert) { for (j=0; j<NumImportIDs; j++) { jj = MaxElementSize*ImportLIDs[j]; for (k=0; k<MaxElementSize; k++) To[jj+k] = *ptr++; } } else if(CombineMode==AbsMax) { for (j=0; j<NumImportIDs; j++) { jj = MaxElementSize*ImportLIDs[j]; for (k=0; k<MaxElementSize; k++) { To[jj+k] = EPETRA_MAX( To[jj+k], std::abs(*ptr)); ptr++; } } } // Note: The following form of averaging is not a true average if more that one value is combined. // This might be an issue in the future, but we leave this way for now. else if(CombineMode==Average) { for (j=0; j<NumImportIDs; j++) { jj = MaxElementSize*ImportLIDs[j]; for (k=0; k<MaxElementSize; k++) { To[jj+k] += *ptr++; To[jj+k] /= 2;} } } } // variable element size case else { int thisSizeOfPacket = MaxElementSize; if (CombineMode==Add) { for (j=0; j<NumImportIDs; j++) { ptr = (int *) Imports + j*thisSizeOfPacket; jj = ToFirstPointInElementList[ImportLIDs[j]]; int ElementSize = ToElementSizeList[ImportLIDs[j]]; for (k=0; k<ElementSize; k++) To[jj+k] += *ptr++; // Add to existing value } } else if(CombineMode==Insert){ for (j=0; j<NumImportIDs; j++) { ptr = (int *) Imports + j*thisSizeOfPacket; jj = ToFirstPointInElementList[ImportLIDs[j]]; int ElementSize = ToElementSizeList[ImportLIDs[j]]; for (k=0; k<ElementSize; k++) To[jj+k] = *ptr++; } } else if(CombineMode==AbsMax){ for (j=0; j<NumImportIDs; j++) { ptr = (int *) Imports + j*thisSizeOfPacket; jj = ToFirstPointInElementList[ImportLIDs[j]]; int ElementSize = ToElementSizeList[ImportLIDs[j]]; for (k=0; k<ElementSize; k++) { To[jj+k] = EPETRA_MAX( To[jj+k], std::abs(*ptr)); ptr++; } } } // Note: The following form of averaging is not a true average if more that one value is combined. // This might be an issue in the future, but we leave this way for now. else if(CombineMode==Average) { for (j=0; j<NumImportIDs; j++) { ptr = (int *) Imports + j*thisSizeOfPacket; jj = ToFirstPointInElementList[ImportLIDs[j]]; int ElementSize = ToElementSizeList[ImportLIDs[j]]; for (k=0; k<ElementSize; k++) { To[jj+k] += *ptr++; To[jj+k] /= 2;} } } } return(0); }
void TTrilinos_Util_CountMatrixMarket( const char *data_file, std::vector<int> &non_zeros, int_type &N_rows, int_type &nnz, const Epetra_Comm &comm) { FILE *in_file ; N_rows = 0 ; nnz = 0 ; int_type vecsize = non_zeros.size(); assert( vecsize == 0 ) ; const int BUFSIZE = 800 ; char buffer[BUFSIZE] ; bool first_off_diag = true ; bool upper ; if(comm.MyPID() == 0) { /* Get information about the array stored in the file specified in the */ /* argument list: */ in_file = fopen( data_file, "r"); if (in_file == NULL) { printf("Error: Cannot open file: %s\n",data_file); exit(1); } fgets( buffer, BUFSIZE, in_file ) ; bool symmetric = false ; std::string headerline1 = buffer; if ( headerline1.find("symmetric") != std::string::npos) symmetric = true; fgets( buffer, BUFSIZE, in_file ) ; while ( fgets( buffer, BUFSIZE, in_file ) ) { int_type i, j; float val ; if(sizeof(int) == sizeof(int_type)) sscanf( buffer, "%d %d %f", &i, &j, &val ) ; else if(sizeof(long long) == sizeof(int_type)) sscanf( buffer, "%lld %lld %f", &i, &j, &val ) ; else assert(false); int_type needvecsize = i; if (symmetric) needvecsize = EPETRA_MAX(i,j) ; if ( needvecsize >= vecsize ) { int_type oldvecsize = vecsize; vecsize += EPETRA_MAX((int_type) 1000,needvecsize-vecsize) ; non_zeros.resize(vecsize) ; for ( int_type i= oldvecsize; i < vecsize ; i++ ) non_zeros[i] = 0 ; } N_rows = EPETRA_MAX( N_rows, i ) ; if (symmetric) N_rows = EPETRA_MAX( N_rows, j ) ; non_zeros[i-1]++ ; nnz++; if ( symmetric && i != j ) { if ( first_off_diag ) { upper = j > i ; first_off_diag = false ; } if ( ( j > i && ! upper ) || ( i > j && upper ) ) { std::cout << "file not symmetric" << std::endl ; exit(1) ; } non_zeros[j-1]++ ; nnz++; } } fclose(in_file); } comm.Broadcast( &N_rows, 1, 0 ); comm.Broadcast( &nnz, 1, 0 ); return; }
//========================================================================= int Epetra_IntVector::CopyAndPermute(const Epetra_SrcDistObject& Source, int NumSameIDs, int NumPermuteIDs, int * PermuteToLIDs, int *PermuteFromLIDs, const Epetra_OffsetIndex * Indexor, Epetra_CombineMode CombineMode) { (void)Indexor; const Epetra_IntVector & A = dynamic_cast<const Epetra_IntVector &>(Source); if( CombineMode != Add && CombineMode != Zero && CombineMode != Insert && CombineMode != Average && CombineMode != AbsMax ) EPETRA_CHK_ERR(-1); //Unsupported CombinedMode, will default to Zero int * From; A.ExtractView(&From); int *To = Values_; int * ToFirstPointInElementList = 0; int * FromFirstPointInElementList = 0; int * FromElementSizeList = 0; int MaxElementSize = Map().MaxElementSize(); bool ConstantElementSize = Map().ConstantElementSize(); if (!ConstantElementSize) { ToFirstPointInElementList = Map().FirstPointInElementList(); FromFirstPointInElementList = A.Map().FirstPointInElementList(); FromElementSizeList = A.Map().ElementSizeList(); } int j, jj, jjj, k; int NumSameEntries; bool Case1 = false; bool Case2 = false; // bool Case3 = false; if (MaxElementSize==1) { Case1 = true; NumSameEntries = NumSameIDs; } else if (ConstantElementSize) { Case2 = true; NumSameEntries = NumSameIDs * MaxElementSize; } else { // Case3 = true; NumSameEntries = FromFirstPointInElementList[NumSameIDs]; } // Short circuit for the case where the source and target vector is the same. if (To==From) NumSameEntries = 0; // Do copy first if (NumSameIDs>0) if (To!=From) { if (CombineMode==Add) for (j=0; j<NumSameEntries; j++) To[j] += From[j]; // Add to existing value else if(CombineMode==Insert) for (j=0; j<NumSameEntries; j++) To[j] = From[j]; else if(CombineMode==AbsMax) for (j=0; j<NumSameEntries; j++) { To[j] = EPETRA_MAX( To[j],From[j]); } // Note: The following form of averaging is not a true average if more that one value is combined. // This might be an issue in the future, but we leave this way for now. else if(CombineMode==Average) for (j=0; j<NumSameEntries; j++) {To[j] += From[j]; To[j] /= 2;} } // Do local permutation next if (NumPermuteIDs>0) { // Point entry case if (Case1) { if (CombineMode==Add) for (j=0; j<NumPermuteIDs; j++) To[PermuteToLIDs[j]] += From[PermuteFromLIDs[j]]; // Add to existing value else if(CombineMode==Insert) for (j=0; j<NumPermuteIDs; j++) To[PermuteToLIDs[j]] = From[PermuteFromLIDs[j]]; else if(CombineMode==AbsMax) for (j=0; j<NumPermuteIDs; j++) { To[PermuteToLIDs[j]] = EPETRA_MAX( To[PermuteToLIDs[j]],From[PermuteFromLIDs[j]]); } // Note: The following form of averaging is not a true average if more that one value is combined. // This might be an issue in the future, but we leave this way for now. else if(CombineMode==Average) for (j=0; j<NumPermuteIDs; j++) {To[PermuteToLIDs[j]] += From[PermuteFromLIDs[j]]; To[PermuteToLIDs[j]] /= 2;} } // constant element size case else if (Case2) { if (CombineMode==Add) for (j=0; j<NumPermuteIDs; j++) { jj = MaxElementSize*PermuteToLIDs[j]; jjj = MaxElementSize*PermuteFromLIDs[j]; for (k=0; k<MaxElementSize; k++) To[jj+k] += From[jjj+k]; } else if(CombineMode==Insert) for (j=0; j<NumPermuteIDs; j++) { jj = MaxElementSize*PermuteToLIDs[j]; jjj = MaxElementSize*PermuteFromLIDs[j]; for (k=0; k<MaxElementSize; k++) To[jj+k] = From[jjj+k]; } else if(CombineMode==AbsMax) for (j=0; j<NumPermuteIDs; j++) { jj = MaxElementSize*PermuteToLIDs[j]; jjj = MaxElementSize*PermuteFromLIDs[j]; for (k=0; k<MaxElementSize; k++) To[jj+k] = EPETRA_MAX( To[jj+k],From[jjj+k]); } // Note: The following form of averaging is not a true average if more that one value is combined. // This might be an issue in the future, but we leave this way for now. else if(CombineMode==Average) for (j=0; j<NumPermuteIDs; j++) { jj = MaxElementSize*PermuteToLIDs[j]; jjj = MaxElementSize*PermuteFromLIDs[j]; for (k=0; k<MaxElementSize; k++) {To[jj+k] += From[jjj+k]; To[jj+k] /= 2;} } } // variable element size case else { if (CombineMode==Add) for (j=0; j<NumPermuteIDs; j++) { jj = ToFirstPointInElementList[PermuteToLIDs[j]]; jjj = FromFirstPointInElementList[PermuteFromLIDs[j]]; int ElementSize = FromElementSizeList[PermuteFromLIDs[j]]; for (k=0; k<ElementSize; k++) To[jj+k] += From[jjj+k]; } else if(CombineMode==Insert) for (j=0; j<NumPermuteIDs; j++) { jj = ToFirstPointInElementList[PermuteToLIDs[j]]; jjj = FromFirstPointInElementList[PermuteFromLIDs[j]]; int ElementSize = FromElementSizeList[PermuteFromLIDs[j]]; for (k=0; k<ElementSize; k++) To[jj+k] = From[jjj+k]; } else if(CombineMode==AbsMax) for (j=0; j<NumPermuteIDs; j++) { jj = ToFirstPointInElementList[PermuteToLIDs[j]]; jjj = FromFirstPointInElementList[PermuteFromLIDs[j]]; int ElementSize = FromElementSizeList[PermuteFromLIDs[j]]; for (k=0; k<ElementSize; k++) To[jj+k] = EPETRA_MAX( To[jj+k],From[jjj+k]); } else if(CombineMode==Average) for (j=0; j<NumPermuteIDs; j++) { jj = ToFirstPointInElementList[PermuteToLIDs[j]]; jjj = FromFirstPointInElementList[PermuteFromLIDs[j]]; int ElementSize = FromElementSizeList[PermuteFromLIDs[j]]; for (k=0; k<ElementSize; k++) {To[jj+k] += From[jjj+k]; To[jj+k] /= 2;} } } } return(0); }
int TestOneMatrix( std::string HBname, std::string MMname, std::string TRIname, Epetra_Comm &Comm, bool verbose ) { if ( Comm.MyPID() != 0 ) verbose = false ; Epetra_Map * readMap = 0; Epetra_CrsMatrix * HbA = 0; Epetra_Vector * Hbx = 0; Epetra_Vector * Hbb = 0; Epetra_Vector * Hbxexact = 0; Epetra_CrsMatrix * TriplesA = 0; Epetra_Vector * Triplesx = 0; Epetra_Vector * Triplesb = 0; Epetra_Vector * Triplesxexact = 0; Epetra_CrsMatrix * MatrixMarketA = 0; Epetra_Vector * MatrixMarketx = 0; Epetra_Vector * MatrixMarketb = 0; Epetra_Vector * MatrixMarketxexact = 0; int TRI_Size = TRIname.size() ; std::string LastFiveBytes = TRIname.substr( EPETRA_MAX(0,TRI_Size-5), TRI_Size ); if ( LastFiveBytes == ".TimD" ) { // Call routine to read in a file with a Tim Davis header and zero-based indexing EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra64( &TRIname[0], false, Comm, readMap, TriplesA, Triplesx, Triplesb, Triplesxexact, false, true, true ) ); delete readMap; } else { if ( LastFiveBytes == ".triU" ) { // Call routine to read in unsymmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra64( &TRIname[0], false, Comm, readMap, TriplesA, Triplesx, Triplesb, Triplesxexact, false, false ) ); delete readMap; } else { if ( LastFiveBytes == ".triS" ) { // Call routine to read in symmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra64( &TRIname[0], true, Comm, readMap, TriplesA, Triplesx, Triplesb, Triplesxexact, false, false ) ); delete readMap; } else { assert( false ) ; } } } EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra64( &MMname[0], Comm, readMap, MatrixMarketA, MatrixMarketx, MatrixMarketb, MatrixMarketxexact) ); delete readMap; // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra64( &HBname[0], Comm, readMap, HbA, Hbx, Hbb, Hbxexact) ; #if 0 std::cout << " HbA " ; HbA->Print( std::cout ) ; std::cout << std::endl ; std::cout << " MatrixMarketA " ; MatrixMarketA->Print( std::cout ) ; std::cout << std::endl ; std::cout << " TriplesA " ; TriplesA->Print( std::cout ) ; std::cout << std::endl ; #endif int TripleErr = 0 ; int MMerr = 0 ; for ( int i = 0 ; i < 10 ; i++ ) { double resid_Hb_Triples; double resid_Hb_Matrix_Market; double norm_A ; Hbx->Random(); // // Set the output vectors to different values: // Triplesb->PutScalar(1.1); Hbb->PutScalar(1.2); MatrixMarketb->PutScalar(1.3); HbA->Multiply( false, *Hbx, *Hbb ); norm_A = HbA->NormOne( ) ; TriplesA->Multiply( false, *Hbx, *Triplesb ); Triplesb->Update( 1.0, *Hbb, -1.0 ) ; MatrixMarketA->Multiply( false, *Hbx, *MatrixMarketb ); MatrixMarketb->Update( 1.0, *Hbb, -1.0 ) ; Triplesb->Norm1( &resid_Hb_Triples ) ; MatrixMarketb->Norm1( &resid_Hb_Matrix_Market ) ; TripleErr += ( resid_Hb_Triples > 1e-11 * norm_A ) ; MMerr += ( resid_Hb_Matrix_Market > 1e-11 * norm_A ) ; if ( verbose && resid_Hb_Triples > 1e-11 * norm_A ) std::cout << " resid_Hb_Triples = " << resid_Hb_Triples << " norm_A = " << norm_A << std::endl ; if ( verbose && resid_Hb_Matrix_Market > 1e-11 * norm_A ) std::cout << " resid_Hb_Matrix_Market = " << resid_Hb_Matrix_Market << " norm_A = " << norm_A << std::endl ; } if ( verbose ) { if ( TripleErr ) std::cout << " Error in reading " << HBname << " or " << TRIname << std::endl ; if ( MMerr ) std::cout << " Error in reading " << HBname << " or " << MMname << std::endl ; } delete HbA; delete Hbx; delete Hbb; delete Hbxexact; delete TriplesA; delete Triplesx; delete Triplesb; delete Triplesxexact; delete MatrixMarketA; delete MatrixMarketx; delete MatrixMarketb; delete MatrixMarketxexact; delete readMap; return TripleErr+MMerr ; }
int Amesos_Scalapack::Solve() { if( debug_ == 1 ) std::cout << "Entering `Solve()'" << std::endl; NumSolve_++; Epetra_MultiVector *vecX = Problem_->GetLHS() ; Epetra_MultiVector *vecB = Problem_->GetRHS() ; // // Compute the number of right hands sides // (and check that X and B have the same shape) // int nrhs; if ( vecX == 0 ) { nrhs = 0 ; EPETRA_CHK_ERR( vecB != 0 ) ; } else { nrhs = vecX->NumVectors() ; EPETRA_CHK_ERR( vecB->NumVectors() != nrhs ) ; } Epetra_MultiVector *ScalapackB =0; Epetra_MultiVector *ScalapackX =0; // // Extract Scalapack versions of X and B // double *ScalapackXvalues ; Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator()); Time_->ResetStartTime(); // track time to broadcast vectors // // Copy B to the scalapack version of B // const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap(); Epetra_MultiVector *ScalapackXextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ; Epetra_MultiVector *ScalapackBextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ; Epetra_Import ImportToScalapack( *VectorMap_, OriginalMap ); ScalapackBextract->Import( *vecB, ImportToScalapack, Insert ) ; ScalapackB = ScalapackBextract ; ScalapackX = ScalapackXextract ; VecTime_ += Time_->ElapsedTime(); // // Call SCALAPACKs PDGETRS to perform the solve // int DescX[10]; ScalapackX->Scale(1.0, *ScalapackB) ; int ScalapackXlda ; Time_->ResetStartTime(); // tract time to solve // // Setup DescX // if( nrhs > nb_ ) { EPETRA_CHK_ERR( -2 ); } int Ierr[1] ; Ierr[0] = 0 ; const int zero = 0 ; const int one = 1 ; if ( iam_ < nprow_ * npcol_ ) { assert( ScalapackX->ExtractView( &ScalapackXvalues, &ScalapackXlda ) == 0 ) ; if ( false ) std::cout << "Amesos_Scalapack.cpp: " << __LINE__ << " ScalapackXlda = " << ScalapackXlda << " lda_ = " << lda_ << " nprow_ = " << nprow_ << " npcol_ = " << npcol_ << " myprow_ = " << myprow_ << " mypcol_ = " << mypcol_ << " iam_ = " << iam_ << std::endl ; if ( TwoD_distribution_ ) assert( mypcol_ >0 || EPETRA_MAX(ScalapackXlda,1) == lda_ ) ; DESCINIT_F77(DescX, &NumGlobalElements_, &nrhs, &nb_, &nb_, &zero, &zero, &ictxt_, &lda_, Ierr ) ; assert( Ierr[0] == 0 ) ; // // For the 1D data distribution, we factor the transposed // matrix, hence we must invert the sense of the transposition // char trans = 'N'; if ( TwoD_distribution_ ) { if ( UseTranspose() ) trans = 'T' ; } else { if ( ! UseTranspose() ) trans = 'T' ; } if ( nprow_ * npcol_ == 1 ) { DGETRS_F77(&trans, &NumGlobalElements_, &nrhs, &DenseA_[0], &lda_, &Ipiv_[0], ScalapackXvalues, &lda_, Ierr ) ; } else { PDGETRS_F77(&trans, &NumGlobalElements_, &nrhs, &DenseA_[0], &one, &one, DescA_, &Ipiv_[0], ScalapackXvalues, &one, &one, DescX, Ierr ) ; } } SolTime_ += Time_->ElapsedTime(); Time_->ResetStartTime(); // track time to broadcast vectors // // Copy X back to the original vector // Epetra_Import ImportFromScalapack( OriginalMap, *VectorMap_ ); vecX->Import( *ScalapackX, ImportFromScalapack, Insert ) ; delete ScalapackBextract ; delete ScalapackXextract ; VecTime_ += Time_->ElapsedTime(); // All processes should return the same error code if ( nprow_ * npcol_ < Comm().NumProc() ) Comm().Broadcast( Ierr, 1, 0 ) ; // MS // compute vector norms if( ComputeVectorNorms_ == true || verbose_ == 2 ) { double NormLHS, NormRHS; for( int i=0 ; i<nrhs ; ++i ) { assert((*vecX)(i)->Norm2(&NormLHS)==0); assert((*vecB)(i)->Norm2(&NormRHS)==0); if( verbose_ && Comm().MyPID() == 0 ) { std::cout << "Amesos_Scalapack : vector " << i << ", ||x|| = " << NormLHS << ", ||b|| = " << NormRHS << std::endl; } } } // MS // compute true residual if( ComputeTrueResidual_ == true || verbose_ == 2 ) { double Norm; Epetra_MultiVector Ax(vecB->Map(),nrhs); for( int i=0 ; i<nrhs ; ++i ) { (Problem_->GetMatrix()->Multiply(UseTranspose(), *((*vecX)(i)), Ax)); (Ax.Update(1.0, *((*vecB)(i)), -1.0)); (Ax.Norm2(&Norm)); if( verbose_ && Comm().MyPID() == 0 ) { std::cout << "Amesos_Scalapack : vector " << i << ", ||Ax - b|| = " << Norm << std::endl; } } } return Ierr[0]; }
int Amesos_Scalapack::ConvertToScalapack(){ // // Convert matrix and vector to the form that Scalapack expects // ScaLAPACK accepts the matrix to be in any 2D block-cyclic form // // Amesos_ScaLAPACK uses one of two 2D data distributions: // a simple 1D non-cyclic data distribution with npcol= 1, or a // full 2D block-cyclic data distribution. // // 2D data distribvution: // Because the Epetra export operation is oriented toward a 1D // data distribution in which each row is entirely stored on // a single process, we create two intermediate matrices: FatIn and // FatOut, both of which have dimension: // NumGlobalElements * nprow by NumGlobalElements // This allows each row of FatOut to be owned by a single process. // The larger dimension does not significantly increase the // storage requirements and allows the export operation to be // efficient. // // 1D data distribution: // We have chosen the simplest 2D block-cyclic form, a 1D blocked (not-cyclic) // data distribution, for the matrix A. // We use the same distribution for the multivectors X and B. However, // except for very large numbers of right hand sides, this places all of X and B // on process 0, making it effectively a serial matrix. // // For now, we simply treat X and B as serial matrices (as viewed from epetra) // though ScaLAPACK treats them as distributed matrices. // if( debug_ == 1 ) std::cout << "Entering `ConvertToScalapack()'" << std::endl; Time_->ResetStartTime(); if ( iam_ < nprow_ * npcol_ ) { if ( TwoD_distribution_ ) { DenseA_.resize( NumOurRows_ * NumOurColumns_ ); for ( int i = 0 ; i < (int)DenseA_.size() ; i++ ) DenseA_[i] = 0 ; assert( lda_ == EPETRA_MAX(1,NumOurRows_) ) ; assert( DescA_[8] == lda_ ) ; int NzThisRow ; int MyRow; double *RowValues; int *ColIndices; int MaxNumEntries = FatOut_->MaxNumEntries(); std::vector<int>ColIndicesV(MaxNumEntries); std::vector<double>RowValuesV(MaxNumEntries); int NumMyElements = FatOut_->NumMyRows() ; for ( MyRow = 0; MyRow < NumMyElements ; MyRow++ ) { EPETRA_CHK_ERR( FatOut_-> ExtractMyRowView( MyRow, NzThisRow, RowValues, ColIndices ) != 0 ) ; // // The following eight lines are just a sanity check on MyRow: // int MyGlobalRow = FatOut_->GRID( MyRow ); assert( MyGlobalRow%npcol_ == mypcol_ ) ; // I should only own rows belonging to my processor column int MyTrueRow = MyGlobalRow/npcol_ ; // This is the original row int UniformRows = ( MyTrueRow / ( nprow_ * nb_ ) ) * nb_ ; int AllExcessRows = MyTrueRow - UniformRows * nprow_ ; int OurExcessRows = AllExcessRows - ( myprow_ * nb_ ) ; if ( MyRow != UniformRows + OurExcessRows ) { std::cout << " iam _ = " << iam_ << " MyGlobalRow = " << MyGlobalRow << " MyTrueRow = " << MyTrueRow << " UniformRows = " << UniformRows << " AllExcessRows = " << AllExcessRows << " OurExcessRows = " << OurExcessRows << " MyRow = " << MyRow << std::endl ; } assert( OurExcessRows >= 0 && OurExcessRows < nb_ ); assert( MyRow == UniformRows + OurExcessRows ) ; for ( int j = 0; j < NzThisRow; j++ ) { assert( FatOut_->RowMatrixColMap().GID( ColIndices[j] ) == FatOut_->GCID( ColIndices[j] ) ); int MyGlobalCol = FatOut_->GCID( ColIndices[j] ); assert( (MyGlobalCol/nb_)%npcol_ == mypcol_ ) ; int UniformCols = ( MyGlobalCol / ( npcol_ * nb_ ) ) * nb_ ; int AllExcessCols = MyGlobalCol - UniformCols * npcol_ ; int OurExcessCols = AllExcessCols - ( mypcol_ * nb_ ) ; assert( OurExcessCols >= 0 && OurExcessCols < nb_ ); int MyCol = UniformCols + OurExcessCols ; DenseA_[ MyCol * lda_ + MyRow ] = RowValues[j] ; } } } else { int NumMyElements = ScaLAPACK1DMatrix_->NumMyRows() ; assert( NumGlobalElements_ ==ScaLAPACK1DMatrix_->NumGlobalRows()); assert( NumGlobalElements_ ==ScaLAPACK1DMatrix_->NumGlobalCols()); DenseA_.resize( NumGlobalElements_ * NumMyElements ) ; for ( int i = 0 ; i < (int)DenseA_.size() ; i++ ) DenseA_[i] = 0 ; int NzThisRow ; int MyRow; double *RowValues; int *ColIndices; int MaxNumEntries = ScaLAPACK1DMatrix_->MaxNumEntries(); assert( DescA_[8] == lda_ ) ; // Double check Lda std::vector<int>ColIndicesV(MaxNumEntries); std::vector<double>RowValuesV(MaxNumEntries); for ( MyRow = 0; MyRow < NumMyElements ; MyRow++ ) { EPETRA_CHK_ERR( ScaLAPACK1DMatrix_-> ExtractMyRowView( MyRow, NzThisRow, RowValues, ColIndices ) != 0 ) ; for ( int j = 0; j < NzThisRow; j++ ) { DenseA_[ ( ScaLAPACK1DMatrix_->RowMatrixColMap().GID( ColIndices[j] ) ) + MyRow * NumGlobalElements_ ] = RowValues[j] ; } } // // Create a map to allow us to redistribute the vectors X and B // Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator()); const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap() ; assert( NumGlobalElements_ == OriginalMap.NumGlobalElements() ) ; int NumMyElements_ = 0 ; if (iam_==0) NumMyElements_ = NumGlobalElements_; if (VectorMap_) { delete VectorMap_ ; VectorMap_ = 0 ; } VectorMap_ = new Epetra_Map( NumGlobalElements_, NumMyElements_, 0, Comm() ); } } ConTime_ += Time_->ElapsedTime(); return 0; }
int Amesos_Scalapack::RedistributeA( ) { if( debug_ == 1 ) std::cout << "Entering `RedistributeA()'" << std::endl; Time_->ResetStartTime(); Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator()); EPETRA_CHK_ERR( RowMatrixA == 0 ) ; const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap() ; int NumberOfProcesses = Comm().NumProc() ; // // Compute a uniform distribution as ScaLAPACK would want it // MyFirstElement - The first element which this processor would have // NumExpectedElemetns - The number of elements which this processor would have // int NumRows_ = RowMatrixA->NumGlobalRows() ; int NumColumns_ = RowMatrixA->NumGlobalCols() ; if ( MaxProcesses_ > 0 ) { NumberOfProcesses = EPETRA_MIN( NumberOfProcesses, MaxProcesses_ ) ; } else { int ProcessNumHeuristic = (1+NumRows_/200)*(1+NumRows_/200); NumberOfProcesses = EPETRA_MIN( NumberOfProcesses, ProcessNumHeuristic ); } if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:171" << std::endl; // // Create the ScaLAPACK data distribution. // The TwoD data distribution is created in a completely different // manner and is not transposed (whereas the SaLAPACK 1D data // distribution was transposed) // if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:163" << std::endl; Comm().Barrier(); if ( TwoD_distribution_ ) { if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:166" << std::endl; Comm().Barrier(); npcol_ = EPETRA_MIN( NumberOfProcesses, EPETRA_MAX ( 2, (int) sqrt( NumberOfProcesses * 0.5 ) ) ) ; nprow_ = NumberOfProcesses / npcol_ ; // // Create the map for FatA - our first intermediate matrix // int NumMyElements = RowMatrixA->RowMatrixRowMap().NumMyElements() ; std::vector<int> MyGlobalElements( NumMyElements ); RowMatrixA->RowMatrixRowMap().MyGlobalElements( &MyGlobalElements[0] ) ; int NumMyColumns = RowMatrixA->RowMatrixColMap().NumMyElements() ; std::vector<int> MyGlobalColumns( NumMyColumns ); RowMatrixA->RowMatrixColMap().MyGlobalElements( &MyGlobalColumns[0] ) ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:194" << std::endl; std::vector<int> MyFatElements( NumMyElements * npcol_ ); for( int LocalRow=0; LocalRow<NumMyElements; LocalRow++ ) { for (int i = 0 ; i < npcol_; i++ ){ MyFatElements[LocalRow*npcol_+i] = MyGlobalElements[LocalRow]*npcol_+i; } } Epetra_Map FatInMap( npcol_*NumRows_, NumMyElements*npcol_, &MyFatElements[0], 0, Comm() ); // // Create FatIn, our first intermediate matrix // Epetra_CrsMatrix FatIn( Copy, FatInMap, 0 ); std::vector<std::vector<int> > FatColumnIndices(npcol_,std::vector<int>(1)); std::vector<std::vector<double> > FatMatrixValues(npcol_,std::vector<double>(1)); std::vector<int> FatRowPtrs(npcol_); // A FatRowPtrs[i] = the number // of entries in local row LocalRow*npcol_ + i if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:219" << std::endl; // mypcol_ = iam_%npcol_; myprow_ = (iam_/npcol_)%nprow_; if ( iam_ >= nprow_ * npcol_ ) { myprow_ = nprow_; mypcol_ = npcol_; } // Each row is split into npcol_ rows, with each of the // new rows containing only those elements belonging to // its process column (in the ScaLAPACK 2D process grid) // int MaxNumIndices = RowMatrixA->MaxNumEntries(); int NumIndices; std::vector<int> ColumnIndices(MaxNumIndices); std::vector<double> MatrixValues(MaxNumIndices); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:232 NumMyElements = " << NumMyElements << std::endl; nb_ = grid_nb_; for( int LocalRow=0; LocalRow<NumMyElements; ++LocalRow ) { RowMatrixA->ExtractMyRowCopy( LocalRow, MaxNumIndices, NumIndices, &MatrixValues[0], &ColumnIndices[0] ); for (int i=0; i<npcol_; i++ ) FatRowPtrs[i] = 0 ; // // Deal the individual matrix entries out to the row owned by // the process to which this matrix entry will belong. // for( int i=0 ; i<NumIndices ; ++i ) { int GlobalCol = MyGlobalColumns[ ColumnIndices[i] ]; int pcol_i = pcolnum( GlobalCol, nb_, npcol_ ) ; if ( FatRowPtrs[ pcol_i ]+1 >= FatColumnIndices[ pcol_i ].size() ) { FatColumnIndices[ pcol_i ]. resize( 2 * FatRowPtrs[ pcol_i ]+1 ); FatMatrixValues[ pcol_i ]. resize( 2 * FatRowPtrs[ pcol_i ]+1 ); } FatColumnIndices[pcol_i][FatRowPtrs[pcol_i]] = GlobalCol ; FatMatrixValues[pcol_i][FatRowPtrs[pcol_i]] = MatrixValues[i]; FatRowPtrs[ pcol_i ]++; } // // Insert each of the npcol_ rows individually // for ( int pcol_i = 0 ; pcol_i < npcol_ ; pcol_i++ ) { FatIn.InsertGlobalValues( MyGlobalElements[LocalRow]*npcol_ + pcol_i, FatRowPtrs[ pcol_i ], &FatMatrixValues[ pcol_i ][0], &FatColumnIndices[ pcol_i ][0] ); } } FatIn.FillComplete( false ); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:260" << std::endl; if ( debug_ == 1) std::cout << "Amesos_Scalapack.cpp:265B" << " iam_ = " << iam_ << " nb_ = " << nb_ << " nprow_ = " << nprow_ << " npcol_ = " << npcol_ << std::endl; // // Compute the map for our second intermediate matrix, FatOut // // Compute directly int UniformRows = ( NumRows_ / ( nprow_ * nb_ ) ) * nb_ ; int AllExcessRows = NumRows_ - UniformRows * nprow_ ; int OurExcessRows = EPETRA_MIN( nb_, AllExcessRows - ( myprow_ * nb_ ) ) ; OurExcessRows = EPETRA_MAX( 0, OurExcessRows ); NumOurRows_ = UniformRows + OurExcessRows ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:277" << std::endl; int UniformColumns = ( NumColumns_ / ( npcol_ * nb_ ) ) * nb_ ; int AllExcessColumns = NumColumns_ - UniformColumns * npcol_ ; int OurExcessColumns = EPETRA_MIN( nb_, AllExcessColumns - ( mypcol_ * nb_ ) ) ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:281" << std::endl; OurExcessColumns = EPETRA_MAX( 0, OurExcessColumns ); NumOurColumns_ = UniformColumns + OurExcessColumns ; if ( iam_ >= nprow_ * npcol_ ) { UniformRows = 0; NumOurRows_ = 0; NumOurColumns_ = 0; } Comm().Barrier(); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:295" << std::endl; #if 0 // Compute using ScaLAPACK's numroc routine, assert agreement int izero = 0; // All matrices start at process 0 int NumRocSays = numroc_( &NumRows_, &nb_, &myprow_, &izero, &nprow_ ); assert( NumOurRows_ == NumRocSays ); #endif // // Compute the rows which this process row owns in the ScaLAPACK 2D // process grid. // std::vector<int> AllOurRows(NumOurRows_); int RowIndex = 0 ; int BlockRow = 0 ; for ( ; BlockRow < UniformRows / nb_ ; BlockRow++ ) { for ( int RowOffset = 0; RowOffset < nb_ ; RowOffset++ ) { AllOurRows[RowIndex++] = BlockRow*nb_*nprow_ + myprow_*nb_ + RowOffset ; } } Comm().Barrier(); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:315" << std::endl; assert ( BlockRow == UniformRows / nb_ ) ; for ( int RowOffset = 0; RowOffset < OurExcessRows ; RowOffset++ ) { AllOurRows[RowIndex++] = BlockRow*nb_*nprow_ + myprow_*nb_ + RowOffset ; } assert( RowIndex == NumOurRows_ ); // // Distribute those rows amongst all the processes in that process row // This is an artificial distribution with the following properties: // 1) It is a 1D data distribution (each row belogs entirely to // a single process // 2) All data which will eventually belong to a given process row, // is entirely contained within the processes in that row. // Comm().Barrier(); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:312" << std::endl; // // Compute MyRows directly // std::vector<int>MyRows(NumOurRows_); RowIndex = 0 ; BlockRow = 0 ; for ( ; BlockRow < UniformRows / nb_ ; BlockRow++ ) { for ( int RowOffset = 0; RowOffset < nb_ ; RowOffset++ ) { MyRows[RowIndex++] = BlockRow*nb_*nprow_*npcol_ + myprow_*nb_*npcol_ + RowOffset*npcol_ + mypcol_ ; } } Comm().Barrier(); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:326" << std::endl; assert ( BlockRow == UniformRows / nb_ ) ; for ( int RowOffset = 0; RowOffset < OurExcessRows ; RowOffset++ ) { MyRows[RowIndex++] = BlockRow*nb_*nprow_*npcol_ + myprow_*nb_*npcol_ + RowOffset*npcol_ + mypcol_ ; } Comm().Barrier(); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:334" << std::endl; Comm().Barrier(); for (int i=0; i < NumOurRows_; i++ ) { assert( MyRows[i] == AllOurRows[i]*npcol_+mypcol_ ); } Comm().Barrier(); if ( debug_ == 1) std::cout << "Amesos_Scalapack.cpp:340" << " iam_ = " << iam_ << " myprow_ = " << myprow_ << " mypcol_ = " << mypcol_ << " NumRows_ = " << NumRows_ << " NumOurRows_ = " << NumOurRows_ << std::endl; Comm().Barrier(); Epetra_Map FatOutMap( npcol_*NumRows_, NumOurRows_, &MyRows[0], 0, Comm() ); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:344" << std::endl; Comm().Barrier(); if ( FatOut_ ) delete FatOut_ ; FatOut_ = new Epetra_CrsMatrix( Copy, FatOutMap, 0 ) ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:348" << std::endl; Epetra_Export ExportToFatOut( FatInMap, FatOutMap ) ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:360" << std::endl; FatOut_->Export( FatIn, ExportToFatOut, Add ); FatOut_->FillComplete( false ); // // Create a map to allow us to redistribute the vectors X and B // Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator()); const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap() ; assert( NumGlobalElements_ == OriginalMap.NumGlobalElements() ) ; int NumMyVecElements = 0 ; if ( mypcol_ == 0 ) { NumMyVecElements = NumOurRows_; } if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:385" << std::endl; if (VectorMap_) { delete VectorMap_ ; VectorMap_ = 0 ; } VectorMap_ = new Epetra_Map( NumGlobalElements_, NumMyVecElements, &AllOurRows[0], 0, Comm() ); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << " Amesos_Scalapack.cpp:393 debug_ = " << debug_ << std::endl; } else { nprow_ = 1 ; npcol_ = NumberOfProcesses / nprow_ ; assert ( nprow_ * npcol_ == NumberOfProcesses ) ; m_per_p_ = ( NumRows_ + NumberOfProcesses - 1 ) / NumberOfProcesses ; int MyFirstElement = EPETRA_MIN( iam_ * m_per_p_, NumRows_ ) ; int MyFirstNonElement = EPETRA_MIN( (iam_+1) * m_per_p_, NumRows_ ) ; int NumExpectedElements = MyFirstNonElement - MyFirstElement ; assert( NumRows_ == RowMatrixA->NumGlobalRows() ) ; if ( ScaLAPACK1DMap_ ) delete( ScaLAPACK1DMap_ ) ; ScaLAPACK1DMap_ = new Epetra_Map( NumRows_, NumExpectedElements, 0, Comm() ); if ( ScaLAPACK1DMatrix_ ) delete( ScaLAPACK1DMatrix_ ) ; ScaLAPACK1DMatrix_ = new Epetra_CrsMatrix(Copy, *ScaLAPACK1DMap_, 0); Epetra_Export ExportToScaLAPACK1D_( OriginalMap, *ScaLAPACK1DMap_); ScaLAPACK1DMatrix_->Export( *RowMatrixA, ExportToScaLAPACK1D_, Add ); ScaLAPACK1DMatrix_->FillComplete( false ) ; } if ( debug_ == 1) std::cout << "iam_ = " << iam_ << " Amesos_Scalapack.cpp:417 debug_ = " << debug_ << std::endl; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:402" << " nprow_ = " << nprow_ << " npcol_ = " << npcol_ << std::endl ; int info; const int zero = 0 ; if ( ictxt_ == -1313 ) { ictxt_ = 0 ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:408" << std::endl; SL_INIT_F77(&ictxt_, &nprow_, &npcol_) ; } if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:410A" << std::endl; int nprow; int npcol; int myrow; int mycol; BLACS_GRIDINFO_F77(&ictxt_, &nprow, &npcol, &myrow, &mycol) ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "iam_ = " << iam_ << " Amesos_Scalapack.cpp:410" << std::endl; if ( iam_ < nprow_ * npcol_ ) { assert( nprow == nprow_ ) ; if ( npcol != npcol_ ) std::cout << "Amesos_Scalapack.cpp:430 npcol = " << npcol << " npcol_ = " << npcol_ << std::endl ; assert( npcol == npcol_ ) ; if ( TwoD_distribution_ ) { assert( myrow == myprow_ ) ; assert( mycol == mypcol_ ) ; lda_ = EPETRA_MAX(1,NumOurRows_) ; } else { assert( myrow == 0 ) ; assert( mycol == iam_ ) ; nb_ = m_per_p_; lda_ = EPETRA_MAX(1,NumGlobalElements_); } if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp: " << __LINE__ << " TwoD_distribution_ = " << TwoD_distribution_ << " NumGlobalElements_ = " << NumGlobalElements_ << " debug_ = " << debug_ << " nb_ = " << nb_ << " lda_ = " << lda_ << " nprow_ = " << nprow_ << " npcol_ = " << npcol_ << " myprow_ = " << myprow_ << " mypcol_ = " << mypcol_ << " iam_ = " << iam_ << std::endl ; AMESOS_PRINT( myprow_ ); DESCINIT_F77(DescA_, &NumGlobalElements_, &NumGlobalElements_, &nb_, &nb_, &zero, &zero, &ictxt_, &lda_, &info) ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:441" << std::endl; assert( info == 0 ) ; } else { DescA_[0] = -13; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:458 nprow = " << nprow << std::endl; assert( nprow == -1 ) ; } if ( debug_ == 1) std::cout << "Amesos_Scalapack.cpp:446" << std::endl; MatTime_ += Time_->ElapsedTime(); return 0; }
//============================================================================= int Amesos_Dscpack::PerformSymbolicFactorization() { ResetTimer(0); ResetTimer(1); MyPID_ = Comm().MyPID(); NumProcs_ = Comm().NumProc(); Epetra_RowMatrix *RowMatrixA = Problem_->GetMatrix(); if (RowMatrixA == 0) AMESOS_CHK_ERR(-1); const Epetra_Map& OriginalMap = RowMatrixA->RowMatrixRowMap() ; const Epetra_MpiComm& comm1 = dynamic_cast<const Epetra_MpiComm &> (Comm()); int numrows = RowMatrixA->NumGlobalRows(); int numentries = RowMatrixA->NumGlobalNonzeros(); Teuchos::RCP<Epetra_CrsGraph> Graph; Epetra_CrsMatrix* CastCrsMatrixA = dynamic_cast<Epetra_CrsMatrix*>(RowMatrixA); if (CastCrsMatrixA) { Graph = Teuchos::rcp(const_cast<Epetra_CrsGraph*>(&(CastCrsMatrixA->Graph())), false); } else { int MaxNumEntries = RowMatrixA->MaxNumEntries(); Graph = Teuchos::rcp(new Epetra_CrsGraph(Copy, OriginalMap, MaxNumEntries)); std::vector<int> Indices(MaxNumEntries); std::vector<double> Values(MaxNumEntries); for (int i = 0 ; i < RowMatrixA->NumMyRows() ; ++i) { int NumEntries; RowMatrixA->ExtractMyRowCopy(i, MaxNumEntries, NumEntries, &Values[0], &Indices[0]); for (int j = 0 ; j < NumEntries ; ++j) Indices[j] = RowMatrixA->RowMatrixColMap().GID(Indices[j]); int GlobalRow = RowMatrixA->RowMatrixRowMap().GID(i); Graph->InsertGlobalIndices(GlobalRow, NumEntries, &Indices[0]); } Graph->FillComplete(); } // // Create a replicated map and graph // std::vector<int> AllIDs( numrows ) ; for ( int i = 0; i < numrows ; i++ ) AllIDs[i] = i ; Epetra_Map ReplicatedMap( -1, numrows, &AllIDs[0], 0, Comm()); Epetra_Import ReplicatedImporter(ReplicatedMap, OriginalMap); Epetra_CrsGraph ReplicatedGraph( Copy, ReplicatedMap, 0 ); AMESOS_CHK_ERR(ReplicatedGraph.Import(*Graph, ReplicatedImporter, Insert)); AMESOS_CHK_ERR(ReplicatedGraph.FillComplete()); // // Convert the matrix to Ap, Ai // std::vector <int> Replicates(numrows); std::vector <int> Ap(numrows + 1); std::vector <int> Ai(EPETRA_MAX(numrows, numentries)); for( int i = 0 ; i < numrows; i++ ) Replicates[i] = 1; int NumEntriesPerRow ; int *ColIndices = 0 ; int Ai_index = 0 ; for ( int MyRow = 0; MyRow <numrows; MyRow++ ) { AMESOS_CHK_ERR( ReplicatedGraph.ExtractMyRowView( MyRow, NumEntriesPerRow, ColIndices ) ); Ap[MyRow] = Ai_index ; for ( int j = 0; j < NumEntriesPerRow; j++ ) { Ai[Ai_index] = ColIndices[j] ; Ai_index++; } } assert( Ai_index == numentries ) ; Ap[ numrows ] = Ai_index ; MtxConvTime_ = AddTime("Total matrix conversion time", MtxConvTime_, 0); ResetTimer(0); // // Call Dscpack Symbolic Factorization // int OrderCode = 2; std::vector<double> MyANonZ; NumLocalNonz = 0 ; GlobalStructNewColNum = 0 ; GlobalStructNewNum = 0 ; GlobalStructOwner = 0 ; LocalStructOldNum = 0 ; NumGlobalCols = 0 ; // MS // Have to define the maximum number of processes to be used // MS // This is only a suggestion as Dscpack uses a number of processes that is a power of 2 int NumGlobalNonzeros = GetProblem()->GetMatrix()->NumGlobalNonzeros(); int NumRows = GetProblem()->GetMatrix()->NumGlobalRows(); // optimal value for MaxProcs == -1 int OptNumProcs1 = 1+EPETRA_MAX( NumRows/10000, NumGlobalNonzeros/1000000 ); OptNumProcs1 = EPETRA_MIN(NumProcs_,OptNumProcs1 ); // optimal value for MaxProcs == -2 int OptNumProcs2 = (int)sqrt(1.0 * NumProcs_); if( OptNumProcs2 < 1 ) OptNumProcs2 = 1; // fix the value of MaxProcs switch (MaxProcs_) { case -1: MaxProcs_ = EPETRA_MIN(OptNumProcs1, NumProcs_); break; case -2: MaxProcs_ = EPETRA_MIN(OptNumProcs2, NumProcs_); break; case -3: MaxProcs_ = NumProcs_; break; } #if 0 if (MyDscRank>=0 && A_and_LU_built) { DSC_ReFactorInitialize(PrivateDscpackData_->MyDSCObject); } #endif // if ( ! A_and_LU_built ) { // DSC_End( PrivateDscpackData_->MyDSCObject ) ; // PrivateDscpackData_->MyDSCObject = DSC_Begin() ; // } // MS // here I continue with the old code... OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1); DscNumProcs = 1 ; int DscMax = DSC_Analyze( numrows, &Ap[0], &Ai[0], &Replicates[0] ); while ( DscNumProcs * 2 <=EPETRA_MIN( MaxProcs_, DscMax ) ) DscNumProcs *= 2 ; MyDscRank = -1; DSC_Open0( PrivateDscpackData_->MyDSCObject_, DscNumProcs, &MyDscRank, comm1.Comm()) ; NumLocalCols = 0 ; // This is for those processes not in the Dsc grid if ( MyDscRank >= 0 ) { assert( MyPID_ == MyDscRank ) ; AMESOS_CHK_ERR( DSC_Order ( PrivateDscpackData_->MyDSCObject_, OrderCode, numrows, &Ap[0], &Ai[0], &Replicates[0], &NumGlobalCols, &NumLocalStructs, &NumLocalCols, &NumLocalNonz, &GlobalStructNewColNum, &GlobalStructNewNum, &GlobalStructOwner, &LocalStructOldNum ) ) ; assert( NumGlobalCols == numrows ) ; assert( NumLocalCols == NumLocalStructs ) ; } if ( MyDscRank >= 0 ) { int MaxSingleBlock; const int Limit = 5000000 ; // Memory Limit set to 5 Terabytes AMESOS_CHK_ERR( DSC_SFactor ( PrivateDscpackData_->MyDSCObject_, &TotalMemory_, &MaxSingleBlock, Limit, DSC_LBLAS3, DSC_DBLAS2 ) ) ; } // A_and_LU_built = true; // If you uncomment this, TestOptions fails SymFactTime_ = AddTime("Total symbolic factorization time", SymFactTime_, 0); return(0); }
int CrsMatrixTranspose( Epetra_CrsMatrix *In, Epetra_CrsMatrix *Out ) { int iam = In->Comm().MyPID() ; int numentries = In->NumGlobalNonzeros(); int NumRowEntries = 0; double *RowValues = 0; int *ColIndices = 0; int numrows = In->NumGlobalRows(); int numcols = In->NumGlobalCols(); std::vector <int> Ap( numcols+1 ); // Column i is stored in Aval(Ap[i]..Ap[i+1]-1) std::vector <int> nextAp( numcols+1 ); // Where to store next value in Column i std::vector <int> Ai( EPETRA_MAX( numcols, numentries) ) ; // Row indices std::vector <double> Aval( EPETRA_MAX( numcols, numentries) ) ; if ( iam == 0 ) { assert( In->NumMyRows() == In->NumGlobalRows() ) ; // // Count the number of entries in each column // std::vector <int>RowsPerCol( numcols ) ; for ( int i = 0 ; i < numcols ; i++ ) RowsPerCol[i] = 0 ; for ( int MyRow = 0; MyRow <numrows; MyRow++ ) { assert( In->ExtractMyRowView( MyRow, NumRowEntries, RowValues, ColIndices ) == 0 ) ; for ( int j = 0; j < NumRowEntries; j++ ) { RowsPerCol[ ColIndices[j] ] ++ ; } } // // Set Ap and nextAp based on RowsPerCol // Ap[0] = 0 ; for ( int i = 0 ; i < numcols ; i++ ) { Ap[i+1]= Ap[i] + RowsPerCol[i] ; nextAp[i] = Ap[i]; } // // Populate Ai and Aval // for ( int MyRow = 0; MyRow <numrows; MyRow++ ) { assert( In->ExtractMyRowView( MyRow, NumRowEntries, RowValues, ColIndices ) == 0 ) ; for ( int j = 0; j < NumRowEntries; j++ ) { Ai[ nextAp[ ColIndices[j] ] ] = MyRow ; Aval[ nextAp[ ColIndices[j] ] ] = RowValues[j] ; nextAp[ ColIndices[j] ] ++ ; } } // // Insert values into Out // for ( int MyRow = 0; MyRow <numrows; MyRow++ ) { int NumInCol = Ap[MyRow+1] - Ap[MyRow] ; Out->InsertGlobalValues( MyRow, NumInCol, &Aval[Ap[MyRow]], &Ai[Ap[MyRow]] ); assert( Out->IndicesAreGlobal() ) ; } } else { assert( In->NumMyRows() == 0 ) ; } assert( Out->FillComplete()==0 ) ; return 0 ; }
//========================================================================= RowMatrix_Transpose::NewTypeRef RowMatrix_Transpose:: operator()( OriginalTypeRef orig ) { origObj_ = &orig; int i, j, err; if( !TransposeRowMap_ ) { if( IgnoreNonLocalCols_ ) TransposeRowMap_ = (Epetra_Map *) &(orig.OperatorRangeMap()); // Should be replaced with refcount = else TransposeRowMap_ = (Epetra_Map *) &(orig.OperatorDomainMap()); // Should be replaced with refcount = } // This routine will work for any RowMatrix object, but will attempt cast the matrix to a CrsMatrix if // possible (because we can then use a View of the matrix and graph, which is much cheaper). // First get the local indices to count how many nonzeros will be in the // transpose graph on each processor Epetra_CrsMatrix * OrigCrsMatrix = dynamic_cast<Epetra_CrsMatrix*>(&orig); OrigMatrixIsCrsMatrix_ = (OrigCrsMatrix!=0); // If this pointer is non-zero, the cast to CrsMatrix worked NumMyRows_ = orig.NumMyRows(); NumMyCols_ = orig.NumMyCols(); TransNumNz_ = new int[NumMyCols_]; TransIndices_ = new int*[NumMyCols_]; TransValues_ = new double*[NumMyCols_]; TransMyGlobalEquations_ = new int[NumMyCols_]; int NumIndices; if (OrigMatrixIsCrsMatrix_) { const Epetra_CrsGraph & OrigGraph = OrigCrsMatrix->Graph(); // Get matrix graph for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0; for (i=0; i<NumMyRows_; i++) { err = OrigGraph.ExtractMyRowView(i, NumIndices, Indices_); // Get view of ith row if (err != 0) throw OrigGraph.ReportError("ExtractMyRowView failed",err); for (j=0; j<NumIndices; j++) ++TransNumNz_[Indices_[j]]; } } else // Original is not a CrsMatrix { MaxNumEntries_ = 0; int NumEntries; for (i=0; i<NumMyRows_; i++) { orig.NumMyRowEntries(i, NumEntries); MaxNumEntries_ = EPETRA_MAX(MaxNumEntries_, NumEntries); } Indices_ = new int[MaxNumEntries_]; Values_ = new double[MaxNumEntries_]; for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0; for (i=0; i<NumMyRows_; i++) { err = orig.ExtractMyRowCopy(i, MaxNumEntries_, NumIndices, Values_, Indices_); if (err != 0) { std::cerr << "ExtractMyRowCopy failed."<<std::endl; throw err; } for (j=0; j<NumIndices; j++) ++TransNumNz_[Indices_[j]]; } } // Most of remaining code is common to both cases for(i=0; i<NumMyCols_; i++) { NumIndices = TransNumNz_[i]; if (NumIndices>0) { TransIndices_[i] = new int[NumIndices]; TransValues_[i] = new double[NumIndices]; } } // Now copy values and global indices into newly create transpose storage for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0; // Reset transpose NumNz counter for (i=0; i<NumMyRows_; i++) { if (OrigMatrixIsCrsMatrix_) err = OrigCrsMatrix->ExtractMyRowView(i, NumIndices, Values_, Indices_); else err = orig.ExtractMyRowCopy(i, MaxNumEntries_, NumIndices, Values_, Indices_); if (err != 0) { std::cerr << "ExtractMyRowCopy failed."<<std::endl; throw err; } int ii = orig.RowMatrixRowMap().GID(i); for (j=0; j<NumIndices; j++) { int TransRow = Indices_[j]; int loc = TransNumNz_[TransRow]; TransIndices_[TransRow][loc] = ii; TransValues_[TransRow][loc] = Values_[j]; ++TransNumNz_[TransRow]; // increment counter into current transpose row } } // Build Transpose matrix with some rows being shared across processors. // We will use a view here since the matrix will not be used for anything else const Epetra_Map & TransMap = orig.RowMatrixColMap(); Epetra_CrsMatrix TempTransA1(View, TransMap, TransNumNz_); TransMap.MyGlobalElements(TransMyGlobalEquations_); for (i=0; i<NumMyCols_; i++) { err = TempTransA1.InsertGlobalValues(TransMyGlobalEquations_[i], TransNumNz_[i], TransValues_[i], TransIndices_[i]); if (err < 0) throw TempTransA1.ReportError("InsertGlobalValues failed.",err); } // Note: The following call to FillComplete is currently necessary because // some global constants that are needed by the Export () are computed in this routine err = TempTransA1.FillComplete(orig.OperatorRangeMap(),*TransposeRowMap_, false); if (err != 0) { throw TempTransA1.ReportError("FillComplete failed.",err); } // Now that transpose matrix with shared rows is entered, create a new matrix that will // get the transpose with uniquely owned rows (using the same row distribution as A). if( IgnoreNonLocalCols_ ) TransposeMatrix_ = new Epetra_CrsMatrix(Copy, *TransposeRowMap_, *TransposeRowMap_, 0); else TransposeMatrix_ = new Epetra_CrsMatrix(Copy, *TransposeRowMap_,0); // Create an Export object that will move TempTransA around TransposeExporter_ = new Epetra_Export(TransMap, *TransposeRowMap_); err = TransposeMatrix_->Export(TempTransA1, *TransposeExporter_, Add); if (err != 0) throw TransposeMatrix_->ReportError("Export failed.",err); err = TransposeMatrix_->FillComplete(orig.OperatorRangeMap(),*TransposeRowMap_); if (err != 0) throw TransposeMatrix_->ReportError("FillComplete failed.",err); if (MakeDataContiguous_) { err = TransposeMatrix_->MakeDataContiguous(); if (err != 0) throw TransposeMatrix_->ReportError("MakeDataContiguous failed.",err); } newObj_ = TransposeMatrix_; return *newObj_; }
int TestOtherClasses( const char* AmesosClass, int EpetraMatrixType, Epetra_CrsMatrix *& Amat, const bool transpose, const bool verbose, const int Levels, const double Rcond, bool RowMapEqualsColMap, bool TestAddZeroToDiag, int ExpectedError, double &maxrelerror, double &maxrelresidual, int &NumTests ) { int iam = Amat->Comm().MyPID() ; int NumErrors = 0 ; maxrelerror = 0.0; maxrelresidual = 0.0; const Epetra_Comm& Comm = Amat->Comm(); bool MyVerbose = false ; // if set equal to verbose, we exceed the test harness 1 Megabyte limit std::string StringAmesosClass = AmesosClass ; if ( AmesosClass ) MyVerbose = verbose ; // Turn this on temporarily to debug Mumps on atlantis { Teuchos::ParameterList ParamList ; ParamList.set( "NoDestroy", true ); // Only affects Amesos_Mumps ParamList.set( "Redistribute", true ); ParamList.set( "AddZeroToDiag", false ); ParamList.set( "MaxProcs", 100000 ); // ParamList.print( std::cerr, 10 ) ; double relerror; double relresidual; if (MyVerbose) std::cout << __FILE__ << "::" << __LINE__ << " AmesosClass= " << AmesosClass << " ParamList = " << ParamList << " transpose = " << transpose << " Levels = " << Levels << std::endl ; int Errors = PerformOneSolveAndTest(AmesosClass, EpetraMatrixType, Comm, transpose, MyVerbose, ParamList, Amat, Levels, Rcond, relerror, relresidual, ExpectedError ) ; if (MyVerbose || ( Errors && iam==0 ) ) std::cout << __FILE__ << "::" << __LINE__ << " AmesosClass= " << AmesosClass << " Errors = " << Errors << std::endl ; if ( Errors < 0 ) { NumErrors++; NumTests++ ; if ( MyVerbose ) { std::cout << AmesosClass << " failed with error code " << Errors << " " << __FILE__ << "::" << __LINE__ << std::endl ; } } else { NumErrors += Errors ; maxrelerror = EPETRA_MAX( relerror, maxrelerror ) ; maxrelresidual = EPETRA_MAX( relresidual, maxrelresidual ) ; NumTests++ ; } if (MyVerbose) std::cout << " TestOtherClasses " << AmesosClass << "" << "::" << __LINE__ << " NumErrors = " << NumErrors << std::endl ; if ( MyVerbose && Errors ) { std::cout << AmesosClass << " failed with transpose = " << (transpose?"true":"false") << std::endl ; } } std::string AC = AmesosClass ; bool taucs = ( AC == "Amesos_Taucs" ); bool klu = ( AC == "Amesos_Klu" ); bool paraklete = ( AC == "Amesos_Paraklete" ); bool mumps = ( AC == "Amesos_Mumps" ); bool scalapack = ( AC == "Amesos_Scalapack" ) ; bool lapack = ( AC == "Amesos_Lapack" ); // // Testing AddZeroToDiag and AddToDiag // When AddZeroToDiag is true, the value of AddToDiag is added to every diagonal element whether or not // that element exists in the structure of the matrix. // When AddZeroToDiag is false, the value of AddToDiag is added only to those diagonal elements // which are structually non-zero. // Support for these two flags varies // // // klu, superludist and parakalete support AddToDiag with or without AddZeroToDiag // scalapack and lapack, being dense codes, support AddToDiag, but only when AddZeroToDiag is set // // pardiso does not support AddToDiag - bug #1993 bool supports_AddToDiag_with_AddZeroToDiag = ( klu || paraklete || scalapack || lapack ) ; bool supports_AddToDiag_with_when_AddZeroTo_Diag_is_false = ( klu || paraklete || mumps || taucs || lapack ) ; if ( RowMapEqualsColMap && supports_AddToDiag_with_AddZeroToDiag && TestAddZeroToDiag ) { Teuchos::ParameterList ParamList ; ParamList.set( "NoDestroy", true ); // Only affects Amesos_Mumps ParamList.set( "Redistribute", false ); ParamList.set( "AddZeroToDiag", true ); ParamList.set( "AddToDiag", 1.3e2 ); // ParamList.print( std::cerr, 10 ) ; double relerror; double relresidual; if (MyVerbose) std::cout << __FILE__ << "::" << __LINE__ << " AmesosClass= " << AmesosClass << " ParamList = " << ParamList << " transpose = " << transpose << " Levels = " << Levels << std::endl ; int Errors = PerformOneSolveAndTest(AmesosClass, EpetraMatrixType, Comm, transpose, MyVerbose, ParamList, Amat, Levels, Rcond, relerror, relresidual, ExpectedError ) ; if (MyVerbose || ( Errors && iam==0 ) ) std::cout << __FILE__ << "::" << __LINE__ << " AmesosClass= " << AmesosClass << " Errors = " << Errors << std::endl ; if ( Errors < 0 ) { NumErrors++; NumTests++ ; if ( MyVerbose ) { std::cout << __FILE__ << "::" << __LINE__ << AmesosClass << " failed with error code " << Errors << " " << __FILE__ << "::" << __LINE__ << std::endl ; } } else { NumErrors += Errors ; maxrelerror = EPETRA_MAX( relerror, maxrelerror ) ; maxrelresidual = EPETRA_MAX( relresidual, maxrelresidual ) ; NumTests++ ; } if (MyVerbose) std::cout << " TestOtherClasses " << AmesosClass << "" << "::" << __LINE__ << " NumErrors = " << NumErrors << std::endl ; if ( MyVerbose && Errors ) { std::cout << AmesosClass << " failed with transpose = " << (transpose?"true":"false") << std::endl ; } } if ( RowMapEqualsColMap && supports_AddToDiag_with_when_AddZeroTo_Diag_is_false ) { Teuchos::ParameterList ParamList ; ParamList.set( "NoDestroy", true ); // Only affects Amesos_Mumps ParamList.set( "Redistribute", false ); ParamList.set( "AddToDiag", 1e2 ); // ParamList.print( std::cerr, 10 ) ; double relerror; double relresidual; if (MyVerbose) std::cout << __FILE__ << "::" << __LINE__ << " AmesosClass= " << AmesosClass << " ParamList = " << ParamList << " transpose = " << transpose << " Levels = " << Levels << std::endl ; int Errors = PerformOneSolveAndTest(AmesosClass, EpetraMatrixType, Comm, transpose, MyVerbose, ParamList, Amat, Levels, Rcond, relerror, relresidual, ExpectedError ) ; if (MyVerbose || ( Errors && iam==0 ) ) std::cout << __FILE__ << "::" << __LINE__ << " AmesosClass= " << AmesosClass << " Errors = " << Errors << std::endl ; if ( Errors < 0 ) { NumErrors++; NumTests++ ; if ( MyVerbose ) { std::cout << __FILE__ << "::" << __LINE__ << AmesosClass << " failed with error code " << Errors << " " << __FILE__ << "::" << __LINE__ << std::endl ; } } else { NumErrors += Errors ; maxrelerror = EPETRA_MAX( relerror, maxrelerror ) ; maxrelresidual = EPETRA_MAX( relresidual, maxrelresidual ) ; NumTests++ ; } if (MyVerbose) std::cout << " TestOtherClasses " << AmesosClass << "" << "::" << __LINE__ << " NumErrors = " << NumErrors << std::endl ; if ( MyVerbose && Errors ) { std::cout << AmesosClass << " failed with transpose = " << (transpose?"true":"false") << std::endl ; } } // // 2) Refactorize = true { Teuchos::ParameterList ParamList ; ParamList.set( "NoDestroy", true ); // Only affects Amesos_Mumps ParamList.set( "Refactorize", true ); double relerror; double relresidual; if (MyVerbose) std::cout << __FILE__ << "::" << __LINE__ << " AmesosClass= " << AmesosClass << " ParamList = " << ParamList << " transpose = " << transpose << " Levels = " << Levels << std::endl ; int Errors = PerformOneSolveAndTest(AmesosClass, EpetraMatrixType, Comm, transpose, MyVerbose, ParamList, Amat, Levels, Rcond, relerror, relresidual, ExpectedError ) ; if (MyVerbose || ( Errors && iam==0 ) ) std::cout << __FILE__ << "::" << __LINE__ << " AmesosClass= " << AmesosClass << " Errors = " << Errors << std::endl ; if (Errors < 0 ) { if (MyVerbose ) std::cout << AmesosClass << " not built in this executable " << std::endl ; return 0 ; } else { NumErrors += Errors ; maxrelerror = EPETRA_MAX( relerror, maxrelerror ) ; maxrelresidual = EPETRA_MAX( relresidual, maxrelresidual ) ; NumTests++ ; } if (MyVerbose) std::cout << " TestOtherClasses " << AmesosClass << "" << "::" << __LINE__ << " NumErrors = " << NumErrors << std::endl ; if ( MyVerbose && Errors ) { std::cout << AmesosClass << " failed with transpose = " << (transpose?"true":"false") << std::endl ; } } // // 5) MaxProcs = 2 { Teuchos::ParameterList ParamList ; ParamList.set( "NoDestroy", true ); // Only affects Amesos_Mumps ParamList.set( "MaxProcs", 2 ); // Only affects Paraklete (maybe Mumps) also Superludist byt that isn't tested here double relerror; double relresidual; if (MyVerbose) std::cout << __FILE__ << "::" << __LINE__ << " AmesosClass= " << AmesosClass << " ParamList = " << ParamList << " transpose = " << transpose << " Levels = " << Levels << std::endl ; int Errors = PerformOneSolveAndTest(AmesosClass, EpetraMatrixType, Comm, transpose, MyVerbose, ParamList, Amat, Levels, Rcond, relerror, relresidual, ExpectedError ) ; if (MyVerbose || ( Errors && iam==0 ) ) std::cout << __FILE__ << "::" << __LINE__ << " AmesosClass= " << AmesosClass << " Errors = " << Errors << std::endl ; if (Errors < 0 ) { if (MyVerbose ) std::cout << AmesosClass << " not built in this executable " << std::endl ; return 0 ; } else { NumErrors += Errors ; maxrelerror = EPETRA_MAX( relerror, maxrelerror ) ; maxrelresidual = EPETRA_MAX( relresidual, maxrelresidual ) ; NumTests++ ; } if (MyVerbose) std::cout << " TestOtherClasses " << AmesosClass << "" << "::" << __LINE__ << " NumErrors = " << NumErrors << std::endl ; if ( MyVerbose && Errors ) { std::cout << AmesosClass << " failed with transpose = " << (transpose?"true":"false") << std::endl ; } } // // ComputeTrueResidual is, by design, not quiet - it prints out the residual // #if 0 // // 4) ComputeTrueResidual==true { Teuchos::ParameterList ParamList ; ParamList.set( "NoDestroy", true ); // Only affects Amesos_Mumps ParamList.set( "ComputeTrueResidual", true ); double relerror; double relresidual; int Errors = PerformOneSolveAndTest(AmesosClass, EpetraMatrixType, Comm, transpose, MyVerbose, ParamList, Amat, Levels, Rcond, relerror, relresidual, ExpectedError ) ; if (Errors < 0 ) { if (MyVerbose ) std::cout << AmesosClass << " not built in this executable " << std::endl ; return 0 ; } else { NumErrors += Errors ; maxrelerror = EPETRA_MAX( relerror, maxrelerror ) ; maxrelresidual = EPETRA_MAX( relresidual, maxrelresidual ) ; NumTests++ ; } if (MyVerbose) std::cout << " TestOtherClasses " << AmesosClass << "" << "::" << __LINE__ << " NumErrors = " << NumErrors << std::endl ; if ( MyVerbose && Errors > 0 ) { std::cout << AmesosClass << " failed with transpose = " << (transpose?"true":"false") << std::endl ; } } #endif return NumErrors; }
void Trilinos_Util_ReadHpc2Epetra(char *data_file, const Epetra_Comm &comm, Epetra_Map *& map, Epetra_CrsMatrix *& A, Epetra_Vector *& x, Epetra_Vector *& b, Epetra_Vector *&xexact) { FILE *in_file ; int l; int * lp = &l; double v; double * vp = &v; #ifdef DEBUG bool debug = true; #else bool debug = false; #endif int size = comm.NumProc(); int rank = comm.MyPID(); printf("Reading matrix info from %s...\n",data_file); in_file = fopen( data_file, "r"); if (in_file == NULL) { printf("Error: Cannot open file: %s\n",data_file); exit(1); } int numGlobalEquations, total_nnz; fscanf(in_file,"%d",&numGlobalEquations); fscanf(in_file,"%d",&total_nnz); map = new Epetra_Map(numGlobalEquations, 0, comm); // Create map with uniform distribution A = new Epetra_CrsMatrix(Copy, *map, 0); // Construct matrix x = new Epetra_Vector(*map); b = new Epetra_Vector(*map); xexact = new Epetra_Vector(*map); int numMyEquations = map->NumMyPoints(); // Allocate arrays that are of length numMyEquations // Find max nnz per row for this processor int max_nnz = 0; for (int i=0; i<numGlobalEquations; i++) { fscanf(in_file, "%d",lp); /* row #, nnz in row */ if (map->MyGID(i)) max_nnz = EPETRA_MAX(max_nnz,l); } // Allocate arrays that are of length local_nnz double * list_of_vals = new double[max_nnz]; int *list_of_inds = new int [max_nnz]; {for (int i=0; i<numGlobalEquations; i++) { int cur_nnz; fscanf(in_file, "%d",&cur_nnz); if (map->MyGID(i)) // See if nnz for row should be added { if (debug) cout << "Process "<<rank <<" of "<<size<<" getting row "<<i<<endl; int nnz_kept = 0; for (int j=0; j<cur_nnz; j++) { fscanf(in_file, "%lf %d",vp,lp); if (v!=0.0) { list_of_vals[nnz_kept] = v; list_of_inds[nnz_kept] = l; nnz_kept++; } } A->InsertGlobalValues(i, nnz_kept, list_of_vals, list_of_inds); } else for (int j=0; j<cur_nnz; j++) fscanf(in_file, "%lf %d",vp,lp); // otherwise read and discard }} double xt, bt, xxt; {for (int i=0; i<numGlobalEquations; i++) { if (map->MyGID(i)) // See if entry should be added { if (debug) cout << "Process "<<rank<<" of " <<size<<" getting RHS "<<i<<endl; fscanf(in_file, "%lf %lf %lf",&xt, &bt, &xxt); int cur_local_row = map->LID(i); (*x)[cur_local_row] = xt; (*b)[cur_local_row] = bt; (*xexact)[cur_local_row] = xxt; } else fscanf(in_file, "%lf %lf %lf",vp, vp, vp); // or thrown away }} fclose(in_file); if (debug) cout << "Process "<<rank<<" of "<<size<<" has "<<numMyEquations << " rows. Min global row "<< map->MinMyGID() <<" Max global row "<< map->MaxMyGID() <<endl <<" and "<<A->NumMyNonzeros()<<" nonzeros."<<endl; A->FillComplete(); Epetra_Vector bcomp(*map); A->Multiply(false, *xexact, bcomp); double residual; bcomp.Norm2(&residual); if (comm.MyPID()==0) cout << "Norm of computed b = " << residual << endl; b->Norm2(&residual); if (comm.MyPID()==0) cout << "Norm of given b = " << residual << endl; bcomp.Update(-1.0, *b, 1.0); bcomp.Norm2(&residual); if (comm.MyPID()==0) cout << "Norm of difference between computed b and given b for xexact = " << residual << endl; delete [] list_of_vals; delete []list_of_inds; return; }
Teuchos::RCP<Epetra_CrsGraph> BlockAdjacencyGraph::compute( Epetra_CrsGraph& B, int nbrr, std::vector<int>&r, std::vector<double>& weights, bool verbose) { // Check if the graph is on one processor. int myMatProc = -1, matProc = -1; int myPID = B.Comm().MyPID(); for (int proc=0; proc<B.Comm().NumProc(); proc++) { if (B.NumGlobalEntries() == B.NumMyEntries()) myMatProc = myPID; } B.Comm().MaxAll( &myMatProc, &matProc, 1 ); if( matProc == -1) { cout << "FAIL for Global! All CrsGraph entries must be on one processor!\n"; abort(); } int i= 0, j = 0, k, l = 0, p, pm, q = -1, ns; int tree_height; int error = -1; /* error detected, possibly a problem with the input */ int nrr; /* number of rows in B */ int nzM = 0; /* number of edges in graph */ int m = 0; /* maximum number of nonzeros in any block row of B */ int* colstack = 0; /* stack used to process each block row */ int* bstree = 0; /* binary search tree */ std::vector<int> Mi, Mj, Mnum(nbrr+1,0); nrr = B.NumMyRows(); if ( matProc == myPID && verbose ) std::printf(" Matrix Size = %d Number of Blocks = %d\n",nrr, nbrr); else nrr = -1; /* Prevent processor from doing any computations */ bstree = csr_bst(nbrr); /* 0 : nbrr-1 */ tree_height = ceil31log2(nbrr) + 1; error = -1; l = 0; j = 0; m = 0; for( i = 0; i < nrr; i++ ){ if( i >= r[l+1] ){ ++l; /* new block row */ m = EPETRA_MAX(m,j) ; /* nonzeros in block row */ j = B.NumGlobalIndices(i); }else{ j += B.NumGlobalIndices(i); } } /* one more time for the final block */ m = EPETRA_MAX(m,j) ; /* nonzeros in block row */ colstack = (int*) malloc( EPETRA_MAX(m,1) * sizeof(int) ); // The compressed graph is actually computed twice, // due to concerns about memory limitations. First, // without memory allocation, just nzM is computed. // Next Mj is allocated. Then, the second time, the // arrays are actually populated. nzM = 0; q = -1; l = 0; int * indices; int numEntries; for( i = 0; i <= nrr; i++ ){ if( i >= r[l+1] ){ if( q > 0 ) std::qsort(colstack,q+1,sizeof(int),compare_ints); /* sort stack */ if( q >= 0 ) ns = 1; /* l, colstack[0] M */ for( j=1; j<=q ; j++ ){ /* delete copies */ if( colstack[j] > colstack[j-1] ) ++ns; } nzM += ns; /*M->p[l+1] = M->p[l] + ns;*/ ++l; q = -1; } if( i < nrr ){ B.ExtractMyRowView( i, numEntries, indices ); for( k = 0; k < numEntries; k++){ j = indices[k]; ns = 0; p = 0; while( (r[bstree[p]] > j) || (j >= r[bstree[p]+1]) ){ if( r[bstree[p]] > j){ p = 2*p+1; }else{ if( r[bstree[p]+1] <= j) p = 2*p+2; } ++ns; if( p > nbrr || ns > tree_height ) { error = j; std::printf("error: p %d nbrr %d ns %d %d\n",p,nbrr,ns,j); break; } } colstack[++q] = bstree[p]; } //if( error >-1 ){ std::printf("%d\n",error); break; } // p > nbrr is a fatal error that is ignored } } if ( matProc == myPID && verbose ) std::printf("nzM = %d \n", nzM ); Mi.resize( nzM ); Mj.resize( nzM ); q = -1; l = 0; pm = -1; for( i = 0; i <= nrr; i++ ){ if( i >= r[l+1] ){ if( q > 0 ) std::qsort(colstack,q+1,sizeof(colstack[0]),compare_ints); /* sort stack */ if( q >= 0 ){ Mi[++pm] = l; Mj[pm] = colstack[0]; } for( j=1; j<=q ; j++ ){ /* delete copies */ if( colstack[j] > colstack[j-1] ){ /* l, colstack[j] */ Mi[++pm] = l; Mj[pm] = colstack[j]; } } ++l; Mnum[l] = pm + 1; /* sparse row format: M->p[l+1] = M->p[l] + ns; */ q = -1; } if( i < nrr ){ B.ExtractMyRowView( i, numEntries, indices ); for( k = 0; k < numEntries; k++){ j = indices[k]; ns = 0; p = 0; while( (r[bstree[p]] > j) || (j >= r[bstree[p]+1]) ){ if( r[bstree[p]] > j){ p = 2*p+1; }else{ if( r[bstree[p]+1] <= j) p = 2*p+2; } ++ns; } colstack[++q] = bstree[p]; } } } if ( bstree ) free ( bstree ); if ( colstack ) free( colstack ); // Compute weights as number of rows in each block. weights.resize( nbrr ); for( l=0; l<nbrr; l++) weights[l] = r[l+1] - r[l]; // Compute Epetra_CrsGraph and return Teuchos::RCP<Epetra_Map> newMap; if ( matProc == myPID ) newMap = Teuchos::rcp( new Epetra_Map(nbrr, nbrr, 0, B.Comm() ) ); else newMap = Teuchos::rcp( new Epetra_Map( nbrr, 0, 0, B.Comm() ) ); Teuchos::RCP<Epetra_CrsGraph> newGraph = Teuchos::rcp( new Epetra_CrsGraph( Copy, *newMap, 0 ) ); for( l=0; l<newGraph->NumMyRows(); l++) { newGraph->InsertGlobalIndices( l, Mnum[l+1]-Mnum[l], &Mj[Mnum[l]] ); } newGraph->FillComplete(); return (newGraph); }
// // Amesos_TestMultiSolver.cpp reads in a matrix in Harwell-Boeing format, // calls one of the sparse direct solvers, using blocked right hand sides // and computes the error and residual. // // TestSolver ignores the Harwell-Boeing right hand sides, creating // random right hand sides instead. // // Amesos_TestMultiSolver can test either A x = b or A^T x = b. // This can be a bit confusing because sparse direct solvers // use compressed column storage - the transpose of Trilinos' // sparse row storage. // // Matrices: // readA - Serial. As read from the file. // transposeA - Serial. The transpose of readA. // serialA - if (transpose) then transposeA else readA // distributedA - readA distributed to all processes // passA - if ( distributed ) then distributedA else serialA // // int Amesos_TestMultiSolver( Epetra_Comm &Comm, char *matrix_file, int numsolves, SparseSolverType SparseSolver, bool transpose, int special, AMESOS_MatrixType matrix_type ) { int iam = Comm.MyPID() ; // int hatever; // if ( iam == 0 ) std::cin >> hatever ; Comm.Barrier(); Epetra_Map * readMap; Epetra_CrsMatrix * readA; Epetra_Vector * readx; Epetra_Vector * readb; Epetra_Vector * readxexact; std::string FileName = matrix_file ; int FN_Size = FileName.size() ; std::string LastFiveBytes = FileName.substr( EPETRA_MAX(0,FN_Size-5), FN_Size ); std::string LastFourBytes = FileName.substr( EPETRA_MAX(0,FN_Size-4), FN_Size ); bool NonContiguousMap = false; if ( LastFiveBytes == ".triU" ) { NonContiguousMap = true; // Call routine to read in unsymmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, false, Comm, readMap, readA, readx, readb, readxexact, NonContiguousMap ) ); } else { if ( LastFiveBytes == ".triS" ) { NonContiguousMap = true; // Call routine to read in symmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, true, Comm, readMap, readA, readx, readb, readxexact, NonContiguousMap ) ); } else { if ( LastFourBytes == ".mtx" ) { EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra( matrix_file, Comm, readMap, readA, readx, readb, readxexact) ); } else { // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra( matrix_file, Comm, readMap, readA, readx, readb, readxexact) ; } } } Epetra_CrsMatrix transposeA(Copy, *readMap, 0); Epetra_CrsMatrix *serialA ; if ( transpose ) { assert( CrsMatrixTranspose( readA, &transposeA ) == 0 ); serialA = &transposeA ; } else { serialA = readA ; } // Create uniform distributed map Epetra_Map map(readMap->NumGlobalElements(), 0, Comm); Epetra_Map* map_; if( NonContiguousMap ) { // // map gives us NumMyElements and MyFirstElement; // int NumGlobalElements = readMap->NumGlobalElements(); int NumMyElements = map.NumMyElements(); int MyFirstElement = map.MinMyGID(); std::vector<int> MapMap_( NumGlobalElements ); readMap->MyGlobalElements( &MapMap_[0] ) ; Comm.Broadcast( &MapMap_[0], NumGlobalElements, 0 ) ; map_ = new Epetra_Map( NumGlobalElements, NumMyElements, &MapMap_[MyFirstElement], 0, Comm); } else { map_ = new Epetra_Map( map ) ; } // Create Exporter to distribute read-in matrix and vectors Epetra_Export exporter(*readMap, *map_); Epetra_CrsMatrix A(Copy, *map_, 0); Epetra_RowMatrix * passA = 0; Epetra_MultiVector * passx = 0; Epetra_MultiVector * passb = 0; Epetra_MultiVector * passxexact = 0; Epetra_MultiVector * passresid = 0; Epetra_MultiVector * passtmp = 0; Epetra_MultiVector x(*map_,numsolves); Epetra_MultiVector b(*map_,numsolves); Epetra_MultiVector xexact(*map_,numsolves); Epetra_MultiVector resid(*map_,numsolves); Epetra_MultiVector tmp(*map_,numsolves); Epetra_MultiVector serialx(*readMap,numsolves); Epetra_MultiVector serialb(*readMap,numsolves); Epetra_MultiVector serialxexact(*readMap,numsolves); Epetra_MultiVector serialresid(*readMap,numsolves); Epetra_MultiVector serialtmp(*readMap,numsolves); bool distribute_matrix = ( matrix_type == AMESOS_Distributed ) ; if ( distribute_matrix ) { // // Initialize x, b and xexact to the values read in from the file // A.Export(*serialA, exporter, Add); Comm.Barrier(); assert(A.FillComplete()==0); Comm.Barrier(); passA = &A; passx = &x; passb = &b; passxexact = &xexact; passresid = &resid; passtmp = &tmp; } else { passA = serialA; passx = &serialx; passb = &serialb; passxexact = &serialxexact; passresid = &serialresid; passtmp = &serialtmp; } passxexact->SetSeed(131) ; passxexact->Random(); passx->SetSeed(11231) ; passx->Random(); passb->PutScalar( 0.0 ); passA->Multiply( transpose, *passxexact, *passb ) ; Epetra_MultiVector CopyB( *passb ) ; double Anorm = passA->NormInf() ; SparseDirectTimingVars::SS_Result.Set_Anorm(Anorm) ; Epetra_LinearProblem Problem( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ); double max_resid = 0.0; for ( int j = 0 ; j < special+1 ; j++ ) { Epetra_Time TotalTime( Comm ) ; if ( false ) { #ifdef TEST_UMFPACK unused code } else if ( SparseSolver == UMFPACK ) { UmfpackOO umfpack( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; umfpack.SetTrans( transpose ) ; umfpack.Solve() ; #endif #ifdef TEST_SUPERLU } else if ( SparseSolver == SuperLU ) { SuperluserialOO superluserial( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; superluserial.SetPermc( SuperLU_permc ) ; superluserial.SetTrans( transpose ) ; superluserial.SetUseDGSSV( special == 0 ) ; superluserial.Solve() ; #endif #ifdef HAVE_AMESOS_SLUD } else if ( SparseSolver == SuperLUdist ) { SuperludistOO superludist( Problem ) ; superludist.SetTrans( transpose ) ; EPETRA_CHK_ERR( superludist.Solve( true ) ) ; #endif #ifdef HAVE_AMESOS_SLUD2 } else if ( SparseSolver == SuperLUdist2 ) { Superludist2_OO superludist2( Problem ) ; superludist2.SetTrans( transpose ) ; EPETRA_CHK_ERR( superludist2.Solve( true ) ) ; #endif #ifdef TEST_SPOOLES } else if ( SparseSolver == SPOOLES ) { SpoolesOO spooles( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; spooles.SetTrans( transpose ) ; spooles.Solve() ; #endif #ifdef HAVE_AMESOS_DSCPACK } else if ( SparseSolver == DSCPACK ) { Teuchos::ParameterList ParamList ; Amesos_Dscpack dscpack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( dscpack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( dscpack.Solve( ) ); #endif #ifdef HAVE_AMESOS_UMFPACK } else if ( SparseSolver == UMFPACK ) { Teuchos::ParameterList ParamList ; Amesos_Umfpack umfpack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( umfpack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( umfpack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( umfpack.Solve( ) ); #endif #ifdef HAVE_AMESOS_KLU } else if ( SparseSolver == KLU ) { Teuchos::ParameterList ParamList ; Amesos_Klu klu( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( klu.SetParameters( ParamList ) ); EPETRA_CHK_ERR( klu.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( klu.SymbolicFactorization( ) ); EPETRA_CHK_ERR( klu.NumericFactorization( ) ); EPETRA_CHK_ERR( klu.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARAKLETE } else if ( SparseSolver == PARAKLETE ) { Teuchos::ParameterList ParamList ; Amesos_Paraklete paraklete( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( paraklete.SetParameters( ParamList ) ); EPETRA_CHK_ERR( paraklete.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( paraklete.SymbolicFactorization( ) ); EPETRA_CHK_ERR( paraklete.NumericFactorization( ) ); EPETRA_CHK_ERR( paraklete.Solve( ) ); #endif #ifdef HAVE_AMESOS_SLUS } else if ( SparseSolver == SuperLU ) { Epetra_SLU superluserial( &Problem ) ; EPETRA_CHK_ERR( superluserial.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superluserial.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superluserial.NumericFactorization( ) ); EPETRA_CHK_ERR( superluserial.Solve( ) ); #endif #ifdef HAVE_AMESOS_LAPACK } else if ( SparseSolver == LAPACK ) { Teuchos::ParameterList ParamList ; ParamList.set( "MaxProcs", -3 ); Amesos_Lapack lapack( Problem ) ; EPETRA_CHK_ERR( lapack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( lapack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( lapack.NumericFactorization( ) ); EPETRA_CHK_ERR( lapack.Solve( ) ); #endif #ifdef HAVE_AMESOS_TAUCS } else if ( SparseSolver == TAUCS ) { Teuchos::ParameterList ParamList ; Amesos_Taucs taucs( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( taucs.SetParameters( ParamList ) ); EPETRA_CHK_ERR( taucs.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( taucs.SymbolicFactorization( ) ); EPETRA_CHK_ERR( taucs.NumericFactorization( ) ); EPETRA_CHK_ERR( taucs.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARDISO } else if ( SparseSolver == PARDISO ) { Teuchos::ParameterList ParamList ; Amesos_Pardiso pardiso( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( pardiso.SetParameters( ParamList ) ); EPETRA_CHK_ERR( pardiso.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( pardiso.SymbolicFactorization( ) ); EPETRA_CHK_ERR( pardiso.NumericFactorization( ) ); EPETRA_CHK_ERR( pardiso.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARKLETE } else if ( SparseSolver == PARKLETE ) { Teuchos::ParameterList ParamList ; Amesos_Parklete parklete( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( parklete.SetParameters( ParamList ) ); EPETRA_CHK_ERR( parklete.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( parklete.SymbolicFactorization( ) ); EPETRA_CHK_ERR( parklete.NumericFactorization( ) ); EPETRA_CHK_ERR( parklete.Solve( ) ); #endif #ifdef HAVE_AMESOS_MUMPS } else if ( SparseSolver == MUMPS ) { Teuchos::ParameterList ParamList ; Amesos_Mumps mumps( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( mumps.SetParameters( ParamList ) ); EPETRA_CHK_ERR( mumps.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( mumps.SymbolicFactorization( ) ); EPETRA_CHK_ERR( mumps.NumericFactorization( ) ); EPETRA_CHK_ERR( mumps.Solve( ) ); #endif #ifdef HAVE_AMESOS_SCALAPACK } else if ( SparseSolver == SCALAPACK ) { Teuchos::ParameterList ParamList ; Amesos_Scalapack scalapack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( scalapack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( scalapack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( scalapack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( scalapack.NumericFactorization( ) ); EPETRA_CHK_ERR( scalapack.Solve( ) ); #endif #ifdef HAVE_AMESOS_SUPERLUDIST } else if ( SparseSolver == SUPERLUDIST ) { Teuchos::ParameterList ParamList ; Amesos_Superludist superludist( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( superludist.SetParameters( ParamList ) ); EPETRA_CHK_ERR( superludist.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superludist.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superludist.NumericFactorization( ) ); EPETRA_CHK_ERR( superludist.Solve( ) ); #endif #ifdef HAVE_AMESOS_SUPERLU } else if ( SparseSolver == SUPERLU ) { Teuchos::ParameterList ParamList ; Amesos_Superlu superlu( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( superlu.SetParameters( ParamList ) ); EPETRA_CHK_ERR( superlu.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superlu.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superlu.NumericFactorization( ) ); EPETRA_CHK_ERR( superlu.Solve( ) ); #endif #ifdef TEST_SPOOLESSERIAL } else if ( SparseSolver == SPOOLESSERIAL ) { SpoolesserialOO spoolesserial( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; spoolesserial.Solve() ; #endif } else { SparseDirectTimingVars::log_file << "Solver not implemented yet" << std::endl ; std::cerr << "\n\n#################### Requested solver not available (Or not tested with blocked RHS) on this platform #####################\n" << std::endl ; } SparseDirectTimingVars::SS_Result.Set_Total_Time( TotalTime.ElapsedTime() ); // SparseDirectTimingVars::SS_Result.Set_First_Time( 0.0 ); // SparseDirectTimingVars::SS_Result.Set_Middle_Time( 0.0 ); // SparseDirectTimingVars::SS_Result.Set_Last_Time( 0.0 ); // // Compute the error = norm(xcomp - xexact ) // std::vector <double> error(numsolves) ; double max_error = 0.0; passresid->Update(1.0, *passx, -1.0, *passxexact, 0.0); passresid->Norm2(&error[0]); for ( int i = 0 ; i< numsolves; i++ ) if ( error[i] > max_error ) max_error = error[i] ; SparseDirectTimingVars::SS_Result.Set_Error(max_error) ; // passxexact->Norm2(&error[0] ) ; // passx->Norm2(&error ) ; // // Compute the residual = norm(Ax - b) // std::vector <double> residual(numsolves) ; passtmp->PutScalar(0.0); passA->Multiply( transpose, *passx, *passtmp); passresid->Update(1.0, *passtmp, -1.0, *passb, 0.0); // passresid->Update(1.0, *passtmp, -1.0, CopyB, 0.0); passresid->Norm2(&residual[0]); for ( int i = 0 ; i< numsolves; i++ ) if ( residual[i] > max_resid ) max_resid = residual[i] ; SparseDirectTimingVars::SS_Result.Set_Residual(max_resid) ; std::vector <double> bnorm(numsolves); passb->Norm2( &bnorm[0] ) ; SparseDirectTimingVars::SS_Result.Set_Bnorm(bnorm[0]) ; std::vector <double> xnorm(numsolves); passx->Norm2( &xnorm[0] ) ; SparseDirectTimingVars::SS_Result.Set_Xnorm(xnorm[0]) ; if ( false && iam == 0 ) { std::cout << " Amesos_TestMutliSolver.cpp " << std::endl ; for ( int i = 0 ; i< numsolves && i < 10 ; i++ ) { std::cout << "i=" << i << " error = " << error[i] << " xnorm = " << xnorm[i] << " residual = " << residual[i] << " bnorm = " << bnorm[i] << std::endl ; } std::cout << std::endl << " max_resid = " << max_resid ; std::cout << " max_error = " << max_error << std::endl ; std::cout << " Get_residual() again = " << SparseDirectTimingVars::SS_Result.Get_Residual() << std::endl ; } } delete readA; delete readx; delete readb; delete readxexact; delete readMap; delete map_; Comm.Barrier(); return 0 ; }
void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int * xoff, int * yoff, int nsizes, int * sizes, int nrhs, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_BlockMap *& map, Epetra_VbrMatrix *& A, Epetra_MultiVector *& b, Epetra_MultiVector *& bt, Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) { int i; // Determine my global IDs long long * myGlobalElements; GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements); int numMyElements = numNodesX*numNodesY; Epetra_Map ptMap((long long)-1, numMyElements, myGlobalElements, 0, comm); // Create map with 2D block partitioning. delete [] myGlobalElements; Epetra_IntVector elementSizes(ptMap); // This vector will have the list of element sizes for (i=0; i<numMyElements; i++) elementSizes[i] = sizes[ptMap.GID64(i)%nsizes]; // cycle through sizes array map = new Epetra_BlockMap((long long)-1, numMyElements, ptMap.MyGlobalElements64(), elementSizes.Values(), ptMap.IndexBase64(), ptMap.Comm()); int profile = 0; if (StaticProfile) profile = numPoints; // FIXME: Won't compile until Epetra_VbrMatrix is modified. #if 0 int j; long long numGlobalEquations = ptMap.NumGlobalElements64(); if (MakeLocalOnly) A = new Epetra_VbrMatrix(Copy, *map, *map, profile); // Construct matrix rowmap=colmap else A = new Epetra_VbrMatrix(Copy, *map, profile); // Construct matrix long long * indices = new long long[numPoints]; // This section of code creates a vector of random values that will be used to create // light-weight dense matrices to pass into the VbrMatrix construction process. int maxElementSize = 0; for (i=0; i< nsizes; i++) maxElementSize = EPETRA_MAX(maxElementSize, sizes[i]); Epetra_LocalMap lmap((long long)maxElementSize*maxElementSize, ptMap.IndexBase(), ptMap.Comm()); Epetra_Vector randvec(lmap); randvec.Random(); randvec.Scale(-1.0); // Make value negative int nx = numNodesX*numProcsX; for (i=0; i<numMyElements; i++) { long long rowID = map->GID64(i); int numIndices = 0; int rowDim = sizes[rowID%nsizes]; for (j=0; j<numPoints; j++) { long long colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets if (colID>-1 && colID<numGlobalEquations) indices[numIndices++] = colID; } A->BeginInsertGlobalValues(rowID, numIndices, indices); for (j=0; j < numIndices; j++) { int colDim = sizes[indices[j]%nsizes]; A->SubmitBlockEntry(&(randvec[0]), rowDim, rowDim, colDim); } A->EndSubmitEntries(); } delete [] indices; A->FillComplete(); // Compute the InvRowSums of the matrix rows Epetra_Vector invRowSums(A->RowMap()); Epetra_Vector rowSums(A->RowMap()); A->InvRowSums(invRowSums); rowSums.Reciprocal(invRowSums); // Jam the row sum values into the diagonal of the Vbr matrix (to make it diag dominant) int numBlockDiagonalEntries; int * rowColDims; int * diagoffsets = map->FirstPointInElementList(); A->BeginExtractBlockDiagonalView(numBlockDiagonalEntries, rowColDims); for (i=0; i< numBlockDiagonalEntries; i++) { double * diagVals; int diagLDA; A->ExtractBlockDiagonalEntryView(diagVals, diagLDA); int rowDim = map->ElementSize(i); for (j=0; j<rowDim; j++) diagVals[j+j*diagLDA] = rowSums[diagoffsets[i]+j]; } if (nrhs<=1) { b = new Epetra_Vector(*map); bt = new Epetra_Vector(*map); xexact = new Epetra_Vector(*map); } else { b = new Epetra_MultiVector(*map, nrhs); bt = new Epetra_MultiVector(*map, nrhs); xexact = new Epetra_MultiVector(*map, nrhs); } xexact->Random(); // Fill xexact with random values A->Multiply(false, *xexact, *b); A->Multiply(true, *xexact, *bt); #endif // EPETRA_NO_32BIT_GLOBAL_INDICES return; }
void Trilinos_Util_GenerateVbrProblem(int nx, int ny, int npoints, int * xoff, int * yoff, int nsizes, int * sizes, int nrhs, const Epetra_Comm &comm, Epetra_BlockMap *& map, Epetra_VbrMatrix *& A, Epetra_MultiVector *& x, Epetra_MultiVector *& b, Epetra_MultiVector *&xexact) { int i, j; // Number of global equations is nx*ny. These will be distributed in a linear fashion int numGlobalEquations = nx*ny; Epetra_Map ptMap(numGlobalEquations, 0, comm); // Create map with equal distribution of equations. int numMyElements = ptMap.NumMyElements(); Epetra_IntVector elementSizes(ptMap); // This vector will have the list of element sizes for (i=0; i<numMyElements; i++) elementSizes[i] = sizes[ptMap.GID64(i)%nsizes]; // cycle through sizes array map = new Epetra_BlockMap(-1, numMyElements, ptMap.MyGlobalElements(), elementSizes.Values(), ptMap.IndexBase(), ptMap.Comm()); A = new Epetra_VbrMatrix(Copy, *map, 0); // Construct matrix int * indices = new int[npoints]; // double * values = new double[npoints]; // double dnpoints = (double) npoints; // This section of code creates a vector of random values that will be used to create // light-weight dense matrices to pass into the VbrMatrix construction process. int maxElementSize = 0; for (i=0; i< nsizes; i++) maxElementSize = EPETRA_MAX(maxElementSize, sizes[i]); Epetra_LocalMap lmap(maxElementSize*maxElementSize, ptMap.IndexBase(), ptMap.Comm()); Epetra_Vector randvec(lmap); randvec.Random(); randvec.Scale(-1.0); // Make value negative for (i=0; i<numMyElements; i++) { int rowID = map->GID(i); int numIndices = 0; int rowDim = sizes[rowID%nsizes]; for (j=0; j<npoints; j++) { int colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets if (colID>-1 && colID<numGlobalEquations) indices[numIndices++] = colID; } A->BeginInsertGlobalValues(rowID, numIndices, indices); for (j=0; j < numIndices; j++) { int colDim = sizes[indices[j]%nsizes]; A->SubmitBlockEntry(&(randvec[0]), rowDim, rowDim, colDim); } A->EndSubmitEntries(); } delete [] indices; A->FillComplete(); // Compute the InvRowSums of the matrix rows Epetra_Vector invRowSums(A->RowMap()); Epetra_Vector rowSums(A->RowMap()); A->InvRowSums(invRowSums); rowSums.Reciprocal(invRowSums); // Jam the row sum values into the diagonal of the Vbr matrix (to make it diag dominant) int numBlockDiagonalEntries; int * rowColDims; int * diagoffsets = map->FirstPointInElementList(); A->BeginExtractBlockDiagonalView(numBlockDiagonalEntries, rowColDims); for (i=0; i< numBlockDiagonalEntries; i++) { double * diagVals; int diagLDA; A->ExtractBlockDiagonalEntryView(diagVals, diagLDA); int rowDim = map->ElementSize(i); for (j=0; j<rowDim; j++) diagVals[j+j*diagLDA] = rowSums[diagoffsets[i]+j]; } if (nrhs<=1) { x = new Epetra_Vector(*map); b = new Epetra_Vector(*map); xexact = new Epetra_Vector(*map); } else { x = new Epetra_MultiVector(*map, nrhs); b = new Epetra_MultiVector(*map, nrhs); xexact = new Epetra_MultiVector(*map, nrhs); } xexact->Random(); // Fill xexact with random values A->Multiply(false, *xexact, *b); return; }
//============================================================================= // // See also pre and post conditions in Amesos_Klu.h // Preconditions: // firsttime specifies that this is the first time that // ConertToKluCrs has been called, i.e. in symbolic factorization. // No data allocation should happen unless firsttime=true. // SerialMatrix_ points to the matrix to be factored and solved // NumGlobalElements_ has been set to the dimension of the matrix // numentries_ has been set to the number of non-zeros in the matrix // (i.e. CreateLocalMatrixAndExporters() has been callded) // // Postconditions: // Ap, VecAi, VecAval contain the matrix as Klu needs it // // int Amesos_Klu::ConvertToKluCRS(bool firsttime) { ResetTimer(0); // // Convert matrix to the form that Klu expects (Ap, VecAi, VecAval) // if (MyPID_==0) { assert( NumGlobalElements_ == SerialMatrix_->NumGlobalRows()); assert( NumGlobalElements_ == SerialMatrix_->NumGlobalCols()); if ( ! AddZeroToDiag_ ) { assert( numentries_ == SerialMatrix_->NumGlobalNonzeros()) ; } else { numentries_ = SerialMatrix_->NumGlobalNonzeros() ; } Epetra_CrsMatrix *CrsMatrix = dynamic_cast<Epetra_CrsMatrix *>(SerialMatrix_); bool StorageOptimized = ( CrsMatrix != 0 && CrsMatrix->StorageOptimized() ); if ( AddToDiag_ != 0.0 ) StorageOptimized = false ; if ( firsttime ) { Ap.resize( NumGlobalElements_+1 ); if ( ! StorageOptimized ) { VecAi.resize( EPETRA_MAX( NumGlobalElements_, numentries_) ) ; VecAval.resize( EPETRA_MAX( NumGlobalElements_, numentries_) ) ; Ai = &VecAi[0]; Aval = &VecAval[0]; } } double *RowValues; int *ColIndices; int NumEntriesThisRow; if( StorageOptimized ) { if ( firsttime ) { Ap[0] = 0; for ( int MyRow = 0; MyRow <NumGlobalElements_; MyRow++ ) { if( CrsMatrix-> ExtractMyRowView( MyRow, NumEntriesThisRow, RowValues, ColIndices ) != 0 ) AMESOS_CHK_ERR( -10 ); if ( MyRow == 0 ) { Ai = ColIndices ; Aval = RowValues ; } Ap[MyRow+1] = Ap[MyRow] + NumEntriesThisRow ; } } } else { int Ai_index = 0 ; int MyRow; int MaxNumEntries_ = SerialMatrix_->MaxNumEntries(); if ( firsttime && CrsMatrix == 0 ) { ColIndicesV_.resize(MaxNumEntries_); RowValuesV_.resize(MaxNumEntries_); } for ( MyRow = 0; MyRow <NumGlobalElements_; MyRow++ ) { if ( CrsMatrix != 0 ) { if( CrsMatrix-> ExtractMyRowView( MyRow, NumEntriesThisRow, RowValues, ColIndices ) != 0 ) AMESOS_CHK_ERR( -11 ); } else { if( SerialMatrix_-> ExtractMyRowCopy( MyRow, MaxNumEntries_, NumEntriesThisRow, &RowValuesV_[0], &ColIndicesV_[0] ) != 0 ) AMESOS_CHK_ERR( -12 ); RowValues = &RowValuesV_[0]; ColIndices = &ColIndicesV_[0]; } if ( firsttime ) { Ap[MyRow] = Ai_index ; for ( int j = 0; j < NumEntriesThisRow; j++ ) { VecAi[Ai_index] = ColIndices[j] ; // assert( VecAi[Ai_index] == Ai[Ai_index] ) ; VecAval[Ai_index] = RowValues[j] ; // We have to do this because of the hacks to get aroun bug #1502 if (ColIndices[j] == MyRow) { VecAval[Ai_index] += AddToDiag_; } Ai_index++; } } else { for ( int j = 0; j < NumEntriesThisRow; j++ ) { VecAval[Ai_index] = RowValues[j] ; if (ColIndices[j] == MyRow) { VecAval[Ai_index] += AddToDiag_; } Ai_index++; } } } Ap[MyRow] = Ai_index ; } } MtxConvTime_ = AddTime("Total matrix conversion time", MtxConvTime_, 0); return 0; }