int Epetra_PETScAIJMatrix::Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { (void)TransA; int NumVectors = X.NumVectors(); if (NumVectors!=Y.NumVectors()) EPETRA_CHK_ERR(-1); // X and Y must have same number of vectors double ** xptrs; double ** yptrs; X.ExtractView(&xptrs); Y.ExtractView(&yptrs); if (RowMatrixImporter()!=0) { if (ImportVector_!=0) { if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;} } if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(RowMatrixColMap(),NumVectors); ImportVector_->Import(X, *RowMatrixImporter(), Insert); ImportVector_->ExtractView(&xptrs); } double *vals=0; int length; Vec petscX, petscY; int ierr; for (int i=0; i<NumVectors; i++) { # ifdef HAVE_MPI ierr=VecCreateMPIWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptrs[i],&petscX); CHKERRQ(ierr); ierr=VecCreateMPIWithArray(Comm_->Comm(),Y.MyLength(),Y.GlobalLength(),yptrs[i],&petscY); CHKERRQ(ierr); # else //FIXME untested ierr=VecCreateSeqWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptrs[i],&petscX); CHKERRQ(ierr); ierr=VecCreateSeqWithArray(Comm_->Comm(),Y.MyLength(),Y.GlobalLength(),yptrs[i],&petscY); CHKERRQ(ierr); # endif ierr = MatMult(Amat_,petscX,petscY);CHKERRQ(ierr); ierr = VecGetArray(petscY,&vals);CHKERRQ(ierr); ierr = VecGetLocalSize(petscY,&length);CHKERRQ(ierr); for (int j=0; j<length; j++) yptrs[i][j] = vals[j]; ierr = VecRestoreArray(petscY,&vals);CHKERRQ(ierr); } VecDestroy(petscX); VecDestroy(petscY); double flops = NumGlobalNonzeros(); flops *= 2.0; flops *= (double) NumVectors; UpdateFlops(flops); return(0); } //Multiply()
int writeMultiVector(FILE * handle, const Epetra_MultiVector & A, bool mmFormat) { int ierr = 0; int length = A.GlobalLength(); int numVectors = A.NumVectors(); const Epetra_Comm & comm = A.Map().Comm(); if (comm.MyPID()!=0) { if (A.MyLength()!=0) ierr = -1; } else { if (length!=A.MyLength()) ierr = -1; for (int j=0; j<numVectors; j++) { for (int i=0; i<length; i++) { double val = A[j][i]; if (mmFormat) fprintf(handle, "%22.16e\n", val); else fprintf(handle, "%22.16e ", val); } if (!mmFormat) fprintf(handle, "%s", "\n"); } } int ierrGlobal; comm.MinAll(&ierr, &ierrGlobal, 1); // If any processor has -1, all return -1 return(ierrGlobal); }
// ============================================================================= void VIO::EpetraMesh::Writer:: setValues( const Epetra_MultiVector & x, const Teuchos::Array<std::string> & scalarsNames ) { unsigned int numVecs = x.NumVectors(); unsigned int numVariables = x.GlobalLength(); unsigned int numNodes = mesh_->getNodesMap()->NumGlobalElements(); // make sure the sizes match the mesh if ( !mesh_.is_null() ) TEUCHOS_ASSERT_EQUALITY( numVariables, 2*numNodes ); // cast into a vtkUnstructuredGrid vtkSmartPointer<vtkUnstructuredGrid> vtkMesh = dynamic_cast<vtkUnstructuredGrid*> ( vtkDataSet_.GetPointer() ); TEUCHOS_ASSERT_INEQUALITY( 0, !=, vtkMesh ); // get scalarsNames, and insert default names if empty Teuchos::Array<std::string> scNames ( scalarsNames ); if ( scNames.empty() ) { scNames.resize ( numVecs ); for ( int vec=0; vec<numVecs; vec++ ) scNames[vec] = "x" + EpetraExt::toString ( vec ); } // fill the scalar field vtkSmartPointer<vtkDoubleArray> scalars = vtkSmartPointer<vtkDoubleArray>::New(); // real and imaginary part scalars->SetNumberOfComponents ( 2 ); for ( int vec=0; vec<numVecs; vec++ ) { scalars->SetName ( scNames[vec].c_str() ); for ( int k=0; k<numNodes; k++ ) { // const unsigned int dof_id = libmeshMesh_->node(k).dof_number(0,k,0); scalars->InsertNextValue ( x[vec][2*k] ); scalars->InsertNextValue ( x[vec][2*k+1] ); } vtkMesh->GetPointData()->AddArray ( scalars ); } return; }
// Convert a Epetra_MultiVector with assumed block structure dictated by the // vector space into a Thyra::MultiVectorBase object. // const Teuchos::RCP<const Thyra::MultiVectorBase<double> > blockEpetraToThyra(const Epetra_MultiVector & e,const Teuchos::RCP<const Thyra::VectorSpaceBase<double> > & vs) void blockEpetraToThyra(const Epetra_MultiVector & epetraX,const Teuchos::Ptr<Thyra::MultiVectorBase<double> > & thyraX) { TEUCHOS_ASSERT(thyraX->range()->dim()==epetraX.GlobalLength()); // extract local information from the Epetra_MultiVector int leadingDim=0,numVectors=0,localDim=0; double * epetraData=0; epetraX.ExtractView(&epetraData,&leadingDim); numVectors = epetraX.NumVectors(); blockEpetraToThyra(numVectors,epetraData,leadingDim,thyraX.ptr(),localDim); TEUCHOS_ASSERT(localDim==epetraX.MyLength()); }
// ============================================================================ void EpetraExt::XMLWriter:: Write(const std::string& Label, const Epetra_MultiVector& MultiVector) { TEUCHOS_TEST_FOR_EXCEPTION(IsOpen_ == false, std::logic_error, "No file has been opened"); int Length = MultiVector.GlobalLength(); int NumVectors = MultiVector.NumVectors(); if (Comm_.MyPID() == 0) { std::ofstream of(FileName_.c_str(), std::ios::app); of << "<MultiVector Label=\"" << Label << "\" Length=\"" << Length << '"' << " NumVectors=\"" << NumVectors << '"' << " Type=\"double\">" << std::endl; } for (int iproc = 0; iproc < Comm_.NumProc(); iproc++) { if (iproc == Comm_.MyPID()) { std::ofstream of(FileName_.c_str(), std::ios::app); of.precision(15); for (int i = 0; i < MultiVector.MyLength(); ++i) { for (int j = 0; j < NumVectors; ++j) of << std::setiosflags(std::ios::scientific) << MultiVector[j][i] << " "; of << std::endl; } of.close(); } Comm_.Barrier(); } if (Comm_.MyPID() == 0) { std::ofstream of(FileName_.c_str(), std::ios::app); of << "</MultiVector>" << std::endl; of.close(); } }
int DoCopyMultiVector(double** matlabApr, const Epetra_MultiVector& A) { int ierr = 0; int length = A.GlobalLength(); int numVectors = A.NumVectors(); const Epetra_Comm & comm = A.Map().Comm(); if (comm.MyPID()!=0) { if (A.MyLength()!=0) ierr = -1; } else { if (length!=A.MyLength()) ierr = -1; double* matlabAvalues = *matlabApr; double* Aptr = A.Values(); memcpy((void *)matlabAvalues, (void *)Aptr, sizeof(*Aptr) * length * numVectors); *matlabApr += length; } int ierrGlobal; comm.MinAll(&ierr, &ierrGlobal, 1); // If any processor has -1, all return -1 return(ierrGlobal); }
// Convert a Thyra::MultiVectorBase object to a Epetra_MultiVector object with // the map defined by the Epetra_Map. // const Teuchos::RCP<const Epetra_MultiVector> // blockThyraToEpetra(const Teuchos::RCP<const Thyra::MultiVectorBase<double> > & tX,const RCP<const Epetra_Map> & map) void blockThyraToEpetra(const Teuchos::RCP<const Thyra::MultiVectorBase<double> > & thyraX,Epetra_MultiVector & epetraX) { // build an Epetra_MultiVector object int numVectors = thyraX->domain()->dim(); // make sure the number of vectors are the same TEUCHOS_ASSERT(numVectors==epetraX.NumVectors()); TEUCHOS_ASSERT(thyraX->range()->dim()==epetraX.GlobalLength()); // extract local information from the Epetra_MultiVector int leadingDim=0,localDim=0; double * epetraData=0; epetraX.ExtractView(&epetraData,&leadingDim); // perform recursive copy blockThyraToEpetra(numVectors,epetraData,leadingDim,thyraX,localDim); // sanity check TEUCHOS_ASSERT(localDim==epetraX.Map().NumMyElements()); }
int MultiVectorToMatrixMarketFile( const char *filename, const Epetra_MultiVector & A, const char * matrixName, const char *matrixDescription, bool writeHeader) { int M = A.GlobalLength(); int N = A.NumVectors(); FILE * handle = 0; if (A.Map().Comm().MyPID()==0) { // Only PE 0 does this section handle = fopen(filename,"w"); if (!handle) return(-1); MM_typecode matcode; mm_initialize_typecode(&matcode); mm_set_matrix(&matcode); mm_set_array(&matcode); mm_set_real(&matcode); if (writeHeader==true) { // Only write header if requested (true by default) if (mm_write_banner(handle, matcode)) return(-1); if (matrixName!=0) fprintf(handle, "%% \n%% %s\n", matrixName); if (matrixDescription!=0) fprintf(handle, "%% %s\n%% \n", matrixDescription); if (mm_write_mtx_array_size(handle, M, N)) return(-1); } } if (MultiVectorToMatrixMarketHandle(handle, A)) return(-1); // Everybody calls this routine if (A.Map().Comm().MyPID()==0) // Only PE 0 opened a file if (fclose(handle)) return(-1); return(0); }
int main(int argc, char** argv) { int fail = 0, dim=0; #ifdef HAVE_MPI MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &localProc); MPI_Comm_size(MPI_COMM_WORLD, &numProcs); const Epetra_MpiComm Comm(MPI_COMM_WORLD); #else const Epetra_SerialComm Comm; #endif // ============================================================= // get command line options // ============================================================= Teuchos::CommandLineProcessor clp(false,true); std::string *inputFile = new std::string("simple.coords"); bool verbose = false; clp.setOption( "f", inputFile, "Name of coordinate input file"); clp.setOption( "v", "q", &verbose, "Display coordinates and weights before and after partitioning."); Teuchos::CommandLineProcessor::EParseCommandLineReturn parse_return = clp.parse(argc,argv); if( parse_return == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED){ #ifdef HAVE_MPI MPI_Finalize(); #endif return 0; } if( parse_return != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { #ifdef HAVE_MPI MPI_Finalize(); #endif return 1; } // ============================================================= // Open file of coordinates and distribute them across processes // so they are unbalanced. // ============================================================= Epetra_MultiVector *mv = ispatest::file2multivector(Comm, *inputFile); if (!mv || ((dim = mv->NumVectors()) < 1)){ if (localProc == 0) std::cerr << "Invalid input file " << *inputFile << std::endl; exit(1); } if (localProc == 0){ std::cerr << "Found input file " << *inputFile << ", " ; std::cerr << dim << " dimensional coordinates" << std::endl; } delete inputFile; int base = mv->Map().IndexBase(); int globalSize = mv->GlobalLength(); int myShare = 0; int n = numProcs - 1; if (n){ if (localProc < n){ int oneShare = globalSize / n; int leftOver = globalSize - (n * oneShare); myShare = oneShare + ((localProc < leftOver) ? 1 : 0); } } else{ myShare = globalSize; } Epetra_BlockMap unbalancedMap(globalSize, myShare, 1, base, mv->Map().Comm()); Epetra_Import importer(unbalancedMap, mv->Map()); Epetra_MultiVector umv(unbalancedMap, dim); umv.Import(*mv, importer, Insert); delete mv; Teuchos::RCP<const Epetra_MultiVector> coords = Teuchos::rcp(new const Epetra_MultiVector(umv)); // ============================================================= // Create some different coordinate weight vectors // ============================================================= Epetra_MultiVector *unitWgts = ispatest::makeWeights(coords->Map(), &ispatest::unitWeights); Epetra_MultiVector *veeWgts = ispatest::makeWeights(coords->Map(), &ispatest::veeWeights); Epetra_MultiVector *altWgts = ispatest::makeWeights(coords->Map(), &ispatest::alternateWeights); Teuchos::RCP<const Epetra_MultiVector> unit_weights_rcp = Teuchos::rcp(unitWgts); Teuchos::RCP<const Epetra_MultiVector> vee_weights_rcp = Teuchos::rcp(veeWgts); Teuchos::RCP<const Epetra_MultiVector> alt_weights_rcp = Teuchos::rcp(altWgts); if (localProc == 0){ std::cerr << "Unit weights: Each object has weight 1.0" << std::endl; std::cerr << "V weights: Low and high GIDs have high weights, center GIDs have low weights" << std::endl; std::cerr << "Alternate weights: Objects on even rank processes have one weight, on odd another weight" << std::endl; std::cerr << std::endl; } // ====================================================================== // Create a parameter list for Zoltan, and one for internal partitioning // ====================================================================== Teuchos::ParameterList internalParams; internalParams.set("PARTITIONING_METHOD", "SIMPLE_LINEAR"); Teuchos::ParameterList zoltanParams; Teuchos::ParameterList sublist = zoltanParams.sublist("ZOLTAN"); //sublist.set("DEBUG_LEVEL", "1"); // Zoltan will print out parameters //sublist.set("DEBUG_LEVEL", "5"); // proc 0 will trace Zoltan calls //sublist.set("DEBUG_MEMORY", "2"); // Zoltan will trace alloc & free // ============================================================= // Run some tests // ============================================================= zoltanParams.set("PARTITIONING METHOD", "RCB"); if (localProc == 0){ std::cerr << "RCB - unit weights" << std::endl; } fail = run_test(coords, unit_weights_rcp, zoltanParams); if (fail) goto failure; if (localProc == 0){ std::cerr << "PASS" << std::endl << std::endl; } // ************************************************************* if (localProc == 0){ std::cerr << "HSFC - V weights" << std::endl; } zoltanParams.set("PARTITIONING METHOD", "HSFC"); fail = run_test(coords, vee_weights_rcp, zoltanParams); if (fail) goto failure; if (localProc == 0){ std::cerr << "PASS" << std::endl << std::endl; } // ************************************************************* if (localProc == 0){ std::cerr << "RIB - alternate weights" << std::endl; } zoltanParams.set("PARTITIONING METHOD", "RIB"); fail = run_test(coords, alt_weights_rcp, zoltanParams); if (fail) goto failure; if (localProc == 0){ std::cerr << "PASS" << std::endl << std::endl; } // ************************************************************* if (localProc == 0){ std::cerr << "RIB - no weights supplied" << std::endl; } zoltanParams.set("PARTITIONING METHOD", "RIB"); fail = run_test(coords, zoltanParams); if (fail) goto failure; if (localProc == 0){ std::cerr << "PASS" << std::endl << std::endl; } // ************************************************************* goto done; failure: if (localProc == 0){ std::cerr << "FAIL: test failed" << std::endl; } done: #ifdef HAVE_MPI MPI_Finalize(); #endif return fail; }
int ARPACKm3::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) { // Computes eigenvalues and the corresponding eigenvectors // of the generalized eigenvalue problem // // K X = M X Lambda // // using ARPACK (mode 3). // // The convergence test is provided by ARPACK. // // Note that if M is not specified, then K X = X Lambda is solved. // (using the mode for generalized eigenvalue problem). // // Input variables: // // numEigen (integer) = Number of eigenmodes requested // // Q (Epetra_MultiVector) = Initial search space // The number of columns of Q defines the size of search space (=NCV). // The rows of X are distributed across processors. // As a rule of thumb in ARPACK User's guide, NCV >= 2*numEigen. // At exit, the first numEigen locations contain the eigenvectors requested. // // lambda (array of doubles) = Converged eigenvalues // The length of this array is equal to the number of columns in Q. // At exit, the first numEigen locations contain the eigenvalues requested. // // startingEV (integer) = Number of eigenmodes already stored in Q // A linear combination of these vectors is made to define the starting // vector, placed in resid. // // Return information on status of computation // // info >= 0 >> Number of converged eigenpairs at the end of computation // // // Failure due to input arguments // // info = - 1 >> The stiffness matrix K has not been specified. // info = - 2 >> The maps for the matrix K and the matrix M differ. // info = - 3 >> The maps for the matrix K and the preconditioner P differ. // info = - 4 >> The maps for the vectors and the matrix K differ. // info = - 5 >> Q is too small for the number of eigenvalues requested. // info = - 6 >> Q is too small for the computation parameters. // // info = - 8 >> numEigen must be smaller than the dimension of the matrix. // // info = - 30 >> MEMORY // // See ARPACK documentation for the meaning of INFO if (numEigen <= startingEV) { return numEigen; } int info = myVerify.inputArguments(numEigen, K, M, 0, Q, minimumSpaceDimension(numEigen)); if (info < 0) return info; int myPid = MyComm.MyPID(); int localSize = Q.MyLength(); int NCV = Q.NumVectors(); int knownEV = 0; if (NCV > Q.GlobalLength()) { if (numEigen >= Q.GlobalLength()) { cerr << endl; cerr << " !! The number of requested eigenvalues must be smaller than the dimension"; cerr << " of the matrix !!\n"; cerr << endl; return -8; } NCV = Q.GlobalLength(); } int localVerbose = verbose*(myPid == 0); // Define data for ARPACK highMem = (highMem > currentSize()) ? highMem : currentSize(); int ido = 0; int lwI = 22 + NCV; int *wI = new (nothrow) int[lwI]; if (wI == 0) { return -30; } memRequested += sizeof(int)*lwI/(1024.0*1024.0); int *iparam = wI; int *ipntr = wI + 11; int *select = wI + 22; int lworkl = NCV*(NCV+8); int lwD = lworkl + 4*localSize; double *wD = new (nothrow) double[lwD]; if (wD == 0) { delete[] wI; return -30; } memRequested += sizeof(double)*(4*localSize+lworkl)/(1024.0*1024.0); double *pointer = wD; double *workl = pointer; pointer = pointer + lworkl; double *resid = pointer; pointer = pointer + localSize; double *workd = pointer; double *v = Q.Values(); highMem = (highMem > currentSize()) ? highMem : currentSize(); double sigma = 0.0; if (startingEV > 0) { // Define the initial starting vector memset(resid, 0, localSize*sizeof(double)); for (int jj = 0; jj < startingEV; ++jj) for (int ii = 0; ii < localSize; ++ii) resid[ii] += v[ii + jj*localSize]; info = 1; } iparam[1-1] = 1; iparam[3-1] = maxIterEigenSolve; iparam[7-1] = 3; // The fourth parameter forces to use the convergence test provided by ARPACK. // This requires a customization of ARPACK (provided by R. Lehoucq). iparam[4-1] = 0; Epetra_Vector v1(View, Q.Map(), workd); Epetra_Vector v2(View, Q.Map(), workd + localSize); Epetra_Vector v3(View, Q.Map(), workd + 2*localSize); double *vTmp = new (nothrow) double[localSize]; if (vTmp == 0) { delete[] wI; delete[] wD; return -30; } memRequested += sizeof(double)*localSize/(1024.0*1024.0); highMem = (highMem > currentSize()) ? highMem : currentSize(); if (localVerbose > 0) { cout << endl; cout << " *|* Problem: "; if (M) cout << "K*Q = M*Q D "; else cout << "K*Q = Q D "; cout << endl; cout << " *|* Algorithm = ARPACK (mode 3)" << endl; cout << " *|* Number of requested eigenvalues = " << numEigen << endl; cout.precision(2); cout.setf(ios::scientific, ios::floatfield); cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl; if (startingEV > 0) cout << " *|* User-defined starting vector (Combination of " << startingEV << " vectors)\n"; cout << "\n -- Start iterations -- \n"; } #ifdef EPETRA_MPI Epetra_MpiComm *MPIComm = dynamic_cast<Epetra_MpiComm *>(const_cast<Epetra_Comm*>(&MyComm)); #endif timeOuterLoop -= MyWatch.WallTime(); while (ido != 99) { highMem = (highMem > currentSize()) ? highMem : currentSize(); #ifdef EPETRA_MPI if (MPIComm) callFortran.PSAUPD(MPIComm->Comm(), &ido, 'G', localSize, which, numEigen, tolEigenSolve, resid, NCV, v, localSize, iparam, ipntr, workd, workl, lworkl, &info, localVerbose); else callFortran.SAUPD(&ido, 'G', localSize, which, numEigen, tolEigenSolve, resid, NCV, v, localSize, iparam, ipntr, workd, workl, lworkl, &info, localVerbose); #else callFortran.SAUPD(&ido, 'G', localSize, which, numEigen, tolEigenSolve, resid, NCV, v, localSize, iparam, ipntr, workd, workl, lworkl, &info, localVerbose); #endif if (ido == -1) { // Apply the mass matrix v3.ResetView(workd + ipntr[0] - 1); v1.ResetView(vTmp); timeMassOp -= MyWatch.WallTime(); if (M) M->Apply(v3, v1); else memcpy(v1.Values(), v3.Values(), localSize*sizeof(double)); timeMassOp += MyWatch.WallTime(); massOp += 1; // Solve the stiffness problem v2.ResetView(workd + ipntr[1] - 1); timeStifOp -= MyWatch.WallTime(); K->ApplyInverse(v1, v2); timeStifOp += MyWatch.WallTime(); stifOp += 1; continue; } // if (ido == -1) if (ido == 1) { // Solve the stiffness problem v1.ResetView(workd + ipntr[2] - 1); v2.ResetView(workd + ipntr[1] - 1); timeStifOp -= MyWatch.WallTime(); K->ApplyInverse(v1, v2); timeStifOp += MyWatch.WallTime(); stifOp += 1; continue; } // if (ido == 1) if (ido == 2) { // Apply the mass matrix v1.ResetView(workd + ipntr[0] - 1); v2.ResetView(workd + ipntr[1] - 1); timeMassOp -= MyWatch.WallTime(); if (M) M->Apply(v1, v2); else memcpy(v2.Values(), v1.Values(), localSize*sizeof(double)); timeMassOp += MyWatch.WallTime(); massOp += 1; continue; } // if (ido == 2) } // while (ido != 99) timeOuterLoop += MyWatch.WallTime(); highMem = (highMem > currentSize()) ? highMem : currentSize(); if (info < 0) { if (myPid == 0) { cerr << endl; cerr << " Error with DSAUPD, info = " << info << endl; cerr << endl; } } else { // Compute the eigenvectors timePostProce -= MyWatch.WallTime(); #ifdef EPETRA_MPI if (MPIComm) callFortran.PSEUPD(MPIComm->Comm(), 1, 'A', select, lambda, v, localSize, sigma, 'G', localSize, which, numEigen, tolEigenSolve, resid, NCV, v, localSize, iparam, ipntr, workd, workl, lworkl, &info); else callFortran.SEUPD(1, 'A', select, lambda, v, localSize, sigma, 'G', localSize, which, numEigen, tolEigenSolve, resid, NCV, v, localSize, iparam, ipntr, workd, workl, lworkl, &info); #else callFortran.SEUPD(1, 'A', select, lambda, v, localSize, sigma, 'G', localSize, which, numEigen, tolEigenSolve, resid, NCV, v, localSize, iparam, ipntr, workd, workl, lworkl, &info); #endif timePostProce += MyWatch.WallTime(); highMem = (highMem > currentSize()) ? highMem : currentSize(); // Treat the error if (info != 0) { if (myPid == 0) { cerr << endl; cerr << " Error with DSEUPD, info = " << info << endl; cerr << endl; } } } // if (info < 0) if (info == 0) { outerIter = iparam[3-1]; knownEV = iparam[5-1]; orthoOp = iparam[11-1]; } delete[] wI; delete[] wD; delete[] vTmp; return (info == 0) ? knownEV : info; }
int ModifiedARPACKm3::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV, const Epetra_MultiVector *orthoVec) { // Computes the smallest eigenvalues and the corresponding eigenvectors // of the generalized eigenvalue problem // // K X = M X Lambda // // using ModifiedARPACK (mode 3). // // The convergence test is performed outisde of ARPACK // // || Kx - Mx lambda || < tol*lambda // // The norm ||.|| can be specified by the user through the array normWeight. // By default, the L2 Euclidean norm is used. // // Note that if M is not specified, then K X = X Lambda is solved. // (using the mode for generalized eigenvalue problem). // // Input variables: // // numEigen (integer) = Number of eigenmodes requested // // Q (Epetra_MultiVector) = Initial search space // The number of columns of Q defines the size of search space (=NCV). // The rows of X are distributed across processors. // As a rule of thumb in ARPACK User's guide, NCV >= 2*numEigen. // At exit, the first numEigen locations contain the eigenvectors requested. // // lambda (array of doubles) = Converged eigenvalues // The length of this array is equal to the number of columns in Q. // At exit, the first numEigen locations contain the eigenvalues requested. // // startingEV (integer) = Number of eigenmodes already stored in Q // A linear combination of these vectors is made to define the starting // vector, placed in resid. // // orthoVec (Pointer to Epetra_MultiVector) = Space to be orthogonal to // The computation is performed in the orthogonal of the space spanned // by the columns vectors in orthoVec. // // Return information on status of computation // // info >= 0 >> Number of converged eigenpairs at the end of computation // // // Failure due to input arguments // // info = - 1 >> The stiffness matrix K has not been specified. // info = - 2 >> The maps for the matrix K and the matrix M differ. // info = - 3 >> The maps for the matrix K and the preconditioner P differ. // info = - 4 >> The maps for the vectors and the matrix K differ. // info = - 5 >> Q is too small for the number of eigenvalues requested. // info = - 6 >> Q is too small for the computation parameters. // // info = - 8 >> numEigen must be smaller than the dimension of the matrix. // // info = - 30 >> MEMORY // // See ARPACK documentation for the meaning of INFO if (numEigen <= startingEV) { return numEigen; } int info = myVerify.inputArguments(numEigen, K, M, 0, Q, minimumSpaceDimension(numEigen)); if (info < 0) return info; int myPid = MyComm.MyPID(); int localSize = Q.MyLength(); int NCV = Q.NumVectors(); int knownEV = 0; if (NCV > Q.GlobalLength()) { if (numEigen >= Q.GlobalLength()) { cerr << endl; cerr << " !! The number of requested eigenvalues must be smaller than the dimension"; cerr << " of the matrix !!\n"; cerr << endl; return -8; } NCV = Q.GlobalLength(); } // Get the weight for approximating the M-inverse norm Epetra_Vector *vectWeight = 0; if (normWeight) { vectWeight = new Epetra_Vector(View, Q.Map(), normWeight); } int localVerbose = verbose*(myPid == 0); // Define data for ARPACK // // UH (10/17/03) Note that workl is also used // * to store the eigenvectors of the tridiagonal matrix // * as a workspace for DSTEQR // * as a workspace for recovering the global eigenvectors highMem = (highMem > currentSize()) ? highMem : currentSize(); int ido = 0; int lwI = 22; int *wI = new (nothrow) int[lwI]; if (wI == 0) { if (vectWeight) delete vectWeight; return -30; } memRequested += sizeof(int)*lwI/(1024.0*1024.0); int *iparam = wI; int *ipntr = wI + 11; int lworkl = NCV*(NCV+8); int lwD = lworkl + 4*localSize; double *wD = new (nothrow) double[lwD]; if (wD == 0) { if (vectWeight) delete vectWeight; delete[] wI; return -30; } memRequested += sizeof(double)*(4*localSize+lworkl)/(1024.0*1024.0); double *pointer = wD; double *workl = pointer; pointer = pointer + lworkl; double *resid = pointer; pointer = pointer + localSize; double *workd = pointer; double *v = Q.Values(); highMem = (highMem > currentSize()) ? highMem : currentSize(); if (startingEV > 0) { // Define the initial starting vector memset(resid, 0, localSize*sizeof(double)); for (int jj = 0; jj < startingEV; ++jj) for (int ii = 0; ii < localSize; ++ii) resid[ii] += v[ii + jj*localSize]; info = 1; } iparam[1-1] = 1; iparam[3-1] = maxIterEigenSolve; iparam[7-1] = 3; // The fourth parameter forces to use the convergence test provided by ARPACK. // This requires a customization of ARPACK (provided by R. Lehoucq). iparam[4-1] = 1; Epetra_Vector v1(View, Q.Map(), workd); Epetra_Vector v2(View, Q.Map(), workd + localSize); Epetra_Vector v3(View, Q.Map(), workd + 2*localSize); // Define further storage for the new residual check // Use a block of vectors to compute the residuals more quickly. // Note that workd could be used if memory becomes an issue. int loopZ = (NCV > 10) ? 10 : NCV; int lwD2 = localSize + 2*NCV-1 + NCV; lwD2 += (M) ? 3*loopZ*localSize : 2*loopZ*localSize; double *wD2 = new (nothrow) double[lwD2]; if (wD2 == 0) { if (vectWeight) delete vectWeight; delete[] wI; delete[] wD; return -30; } memRequested += sizeof(double)*lwD2/(1024.0*1024.0); pointer = wD2; // vTmp is used when ido = -1 double *vTmp = pointer; pointer = pointer + localSize; // dd and ee stores the tridiagonal matrix. // Note that DSTEQR destroys the contents of the input arrays. double *dd = pointer; pointer = pointer + NCV; double *ee = pointer; pointer = pointer + NCV-1; double *vz = pointer; pointer = pointer + loopZ*localSize; Epetra_MultiVector approxEV(View, Q.Map(), vz, localSize, loopZ); double *kvz = pointer; pointer = pointer + loopZ*localSize; Epetra_MultiVector KapproxEV(View, Q.Map(), kvz, localSize, loopZ); double *mvz = (M) ? pointer : vz; pointer = (M) ? pointer + loopZ*localSize : pointer; Epetra_MultiVector MapproxEV(View, Q.Map(), mvz, localSize, loopZ); double *normR = pointer; // zz contains the eigenvectors of the tridiagonal matrix. // workt is a workspace for DSTEQR. // Note that zz and workt will use parts of workl. double *zz, *workt; highMem = (highMem > currentSize()) ? highMem : currentSize(); // Define an array to store the residuals history if (localVerbose > 2) { resHistory = new (nothrow) double[maxIterEigenSolve*NCV]; if (resHistory == 0) { if (vectWeight) delete vectWeight; delete[] wI; delete[] wD; delete[] wD2; return -30; } historyCount = 0; } highMem = (highMem > currentSize()) ? highMem : currentSize(); if (localVerbose > 0) { cout << endl; cout << " *|* Problem: "; if (M) cout << "K*Q = M*Q D "; else cout << "K*Q = Q D "; cout << endl; cout << " *|* Algorithm = ARPACK (Mode 3, modified such that user checks convergence)" << endl; cout << " *|* Number of requested eigenvalues = " << numEigen << endl; cout.precision(2); cout.setf(ios::scientific, ios::floatfield); cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl; if (startingEV > 0) cout << " *|* User-defined starting vector (Combination of " << startingEV << " vectors)\n"; cout << " *|* Norm used for convergence: "; if (normWeight) cout << "weighted L2-norm with user-provided weights" << endl; else cout << "L^2-norm" << endl; if (orthoVec) cout << " *|* Size of orthogonal subspace = " << orthoVec->NumVectors() << endl; cout << "\n -- Start iterations -- \n"; } #ifdef EPETRA_MPI Epetra_MpiComm *MPIComm = dynamic_cast<Epetra_MpiComm *>(const_cast<Epetra_Comm*>(&MyComm)); #endif timeOuterLoop -= MyWatch.WallTime(); while (ido != 99) { highMem = (highMem > currentSize()) ? highMem : currentSize(); #ifdef EPETRA_MPI if (MPIComm) callFortran.PSAUPD(MPIComm->Comm(), &ido, 'G', localSize, "LM", numEigen, tolEigenSolve, resid, NCV, v, localSize, iparam, ipntr, workd, workl, lworkl, &info, 0); else callFortran.SAUPD(&ido, 'G', localSize, "LM", numEigen, tolEigenSolve, resid, NCV, v, localSize, iparam, ipntr, workd, workl, lworkl, &info, 0); #else callFortran.SAUPD(&ido, 'G', localSize, "LM", numEigen, tolEigenSolve, resid, NCV, v, localSize, iparam, ipntr, workd, workl, lworkl, &info, 0); #endif if (ido == -1) { // Apply the mass matrix v3.ResetView(workd + ipntr[0] - 1); v1.ResetView(vTmp); timeMassOp -= MyWatch.WallTime(); if (M) M->Apply(v3, v1); else memcpy(v1.Values(), v3.Values(), localSize*sizeof(double)); timeMassOp += MyWatch.WallTime(); massOp += 1; if ((orthoVec) && (verbose > 3)) { // Check the orthogonality double maxDot = myVerify.errorOrthogonality(orthoVec, &v1, 0); if (myPid == 0) { cout << " Maximum Euclidean dot product against orthogonal space (Before Solve) = "; cout << maxDot << endl; } } // Solve the stiffness problem v2.ResetView(workd + ipntr[1] - 1); timeStifOp -= MyWatch.WallTime(); K->ApplyInverse(v1, v2); timeStifOp += MyWatch.WallTime(); stifOp += 1; // Project the solution vector if needed // Note: Use mvz as workspace if (orthoVec) { Epetra_Vector Mv2(View, v2.Map(), mvz); if (M) M->Apply(v2, Mv2); else memcpy(Mv2.Values(), v2.Values(), localSize*sizeof(double)); modalTool.massOrthonormalize(v2, Mv2, M, *orthoVec, 1, 1); } if ((orthoVec) && (verbose > 3)) { // Check the orthogonality double maxDot = myVerify.errorOrthogonality(orthoVec, &v2, M); if (myPid == 0) { cout << " Maximum M-dot product against orthogonal space (After Solve) = "; cout << maxDot << endl; } } continue; } // if (ido == -1) if (ido == 1) { // Solve the stiffness problem v1.ResetView(workd + ipntr[2] - 1); v2.ResetView(workd + ipntr[1] - 1); if ((orthoVec) && (verbose > 3)) { // Check the orthogonality double maxDot = myVerify.errorOrthogonality(orthoVec, &v1, 0); if (myPid == 0) { cout << " Maximum Euclidean dot product against orthogonal space (Before Solve) = "; cout << maxDot << endl; } } timeStifOp -= MyWatch.WallTime(); K->ApplyInverse(v1, v2); timeStifOp += MyWatch.WallTime(); stifOp += 1; // Project the solution vector if needed // Note: Use mvz as workspace if (orthoVec) { Epetra_Vector Mv2(View, v2.Map(), mvz); if (M) M->Apply(v2, Mv2); else memcpy(Mv2.Values(), v2.Values(), localSize*sizeof(double)); modalTool.massOrthonormalize(v2, Mv2, M, *orthoVec, 1, 1); } if ((orthoVec) && (verbose > 3)) { // Check the orthogonality double maxDot = myVerify.errorOrthogonality(orthoVec, &v2, M); if (myPid == 0) { cout << " Maximum M-dot product against orthogonal space (After Solve) = "; cout << maxDot << endl; } } continue; } // if (ido == 1) if (ido == 2) { // Apply the mass matrix v1.ResetView(workd + ipntr[0] - 1); v2.ResetView(workd + ipntr[1] - 1); timeMassOp -= MyWatch.WallTime(); if (M) M->Apply(v1, v2); else memcpy(v2.Values(), v1.Values(), localSize*sizeof(double)); timeMassOp += MyWatch.WallTime(); massOp += 1; continue; } // if (ido == 2) if (ido == 4) { timeResidual -= MyWatch.WallTime(); // Copy the main diagonal of T memcpy(dd, workl + NCV + ipntr[4] - 1, NCV*sizeof(double)); // Copy the lower diagonal of T memcpy(ee, workl + ipntr[4], (NCV-1)*sizeof(double)); // Compute the eigenpairs of the tridiagonal matrix zz = workl + 4*NCV; workt = workl + 4*NCV + NCV*NCV; callFortran.STEQR('I', NCV, dd, ee, zz, NCV, workt, &info); if (info != 0) { if (localVerbose > 0) { cerr << endl; cerr << " Error with DSTEQR, info = " << info << endl; cerr << endl; } break; } // dd contains the eigenvalues in ascending order // Check the residual of the proposed eigenvectors of (K, M) int ii, jz; iparam[4] = 0; for (jz = 0; jz < NCV; jz += loopZ) { int colZ = (jz + loopZ < NCV) ? loopZ : NCV - jz; callBLAS.GEMM('N', 'N', localSize, colZ, NCV, 1.0, v, localSize, zz + jz*NCV, NCV, 0.0, vz, localSize); // Form the residuals if (M) M->Apply(approxEV, MapproxEV); K->Apply(approxEV, KapproxEV); for (ii = 0; ii < colZ; ++ii) { callBLAS.AXPY(localSize, -1.0/dd[ii+jz], MapproxEV.Values() + ii*localSize, KapproxEV.Values() + ii*localSize); } // Compute the norms of the residuals if (vectWeight) { KapproxEV.NormWeighted(*vectWeight, normR + jz); } else { KapproxEV.Norm2(normR + jz); } // Scale the norms of residuals with the eigenvalues for (ii = 0; ii < colZ; ++ii) { normR[ii+jz] = normR[ii+jz]*dd[ii+jz]; } // Put the number of converged pairs in iparam[5-1] for (ii=0; ii<colZ; ++ii) { if (normR[ii+jz] < tolEigenSolve) iparam[4] += 1; } } timeResidual += MyWatch.WallTime(); numResidual += NCV; outerIter += 1; if (localVerbose > 0) { cout << " Iteration " << outerIter; cout << " - Number of converged eigenvalues " << iparam[4] << endl; } if (localVerbose > 2) { memcpy(resHistory + historyCount, normR, NCV*sizeof(double)); historyCount += NCV; } if (localVerbose > 1) { cout.precision(2); cout.setf(ios::scientific, ios::floatfield); for (ii=0; ii < NCV; ++ii) { cout << " Iteration " << outerIter; cout << " - Scaled Norm of Residual " << ii << " = " << normR[ii] << endl; } cout << endl; cout.precision(2); for (ii = 0; ii < NCV; ++ii) { cout << " Iteration " << outerIter << " - Ritz eigenvalue " << ii; cout.setf((fabs(dd[ii]) > 100) ? ios::scientific : ios::fixed, ios::floatfield); cout << " = " << 1.0/dd[ii] << endl; } cout << endl; } } // if (ido == 4) } // while (ido != 99) timeOuterLoop += MyWatch.WallTime(); highMem = (highMem > currentSize()) ? highMem : currentSize(); if (info < 0) { if (myPid == 0) { cerr << endl; cerr << " Error with DSAUPD, info = " << info << endl; cerr << endl; } } else { // Get the eigenvalues timePostProce -= MyWatch.WallTime(); int ii, jj; double *pointer = workl + 4*NCV + NCV*NCV; for (ii=0; ii < localSize; ii += 3) { int nRow = (ii + 3 < localSize) ? 3 : localSize - ii; for (jj=0; jj<NCV; ++jj) memcpy(pointer + jj*nRow, v + ii + jj*localSize, nRow*sizeof(double)); callBLAS.GEMM('N', 'N', nRow, NCV, NCV, 1.0, pointer, nRow, zz, NCV, 0.0, Q.Values() + ii, localSize); } // Put the converged eigenpairs at the beginning knownEV = 0; for (ii=0; ii < NCV; ++ii) { if (normR[ii] < tolEigenSolve) { lambda[knownEV] = 1.0/dd[ii]; memcpy(Q.Values()+knownEV*localSize, Q.Values()+ii*localSize, localSize*sizeof(double)); knownEV += 1; if (knownEV == Q.NumVectors()) break; } } // Sort the eigenpairs if (knownEV > 0) { mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), localSize); } timePostProce += MyWatch.WallTime(); } // if (info < 0) if (info == 0) { orthoOp = iparam[11-1]; } delete[] wI; delete[] wD; delete[] wD2; if (vectWeight) delete vectWeight; return (info == 0) ? knownEV : info; }
int BuildMatrixTests (Epetra_MultiVector & C, const char TransA, const char TransB, const double alpha, Epetra_MultiVector& A, Epetra_MultiVector& B, const double beta, Epetra_MultiVector& C_GEMM ) { // For given values of TransA, TransB, alpha and beta, a (possibly // zero) filled Epetra_MultiVector C, and allocated // Epetra_MultiVectors A, B and C_GEMM this routine will generate values for // Epetra_MultiVectors A, B and C_GEMM such that, if A, B and (this) are // used with GEMM in this class, the results should match the results // generated by this routine. // Test for Strided multivectors (required for GEMM ops) if (!A.ConstantStride() || !B.ConstantStride() || !C_GEMM.ConstantStride() || !C.ConstantStride()) return(-1); // Error int i, j; double fi, fj; // Used for casting loop variables to floats // Get a view of the MultiVectors double *Ap = 0; double *Bp = 0; double *Cp = 0; double *C_GEMMp = 0; int A_nrows = A.MyLength(); int A_ncols = A.NumVectors(); int B_nrows = B.MyLength(); int B_ncols = B.NumVectors(); int C_nrows = C.MyLength(); int C_ncols = C.NumVectors(); int A_Stride = 0; int B_Stride = 0; int C_Stride = 0; int C_GEMM_Stride = 0; A.ExtractView(&Ap, &A_Stride); B.ExtractView(&Bp, &B_Stride); C.ExtractView(&Cp, &C_Stride); C_GEMM.ExtractView(&C_GEMMp, &C_GEMM_Stride); // Define some useful constants int opA_ncols = (TransA=='N') ? A.NumVectors() : A.MyLength(); int opB_nrows = (TransB=='N') ? B.MyLength() : B.NumVectors(); int C_global_inner_dim = (TransA=='N') ? A.NumVectors() : A.GlobalLength(); bool A_is_local = (!A.DistributedGlobal()); bool B_is_local = (!B.DistributedGlobal()); bool C_is_local = (!C.DistributedGlobal()); int A_IndexBase = A.Map().IndexBase(); int B_IndexBase = B.Map().IndexBase(); // Build two new maps that we can use for defining global equation indices below Epetra_Map * A_Map = new Epetra_Map(-1, A_nrows, A_IndexBase, A.Map().Comm()); Epetra_Map * B_Map = new Epetra_Map(-1, B_nrows, B_IndexBase, B.Map().Comm()); int* A_MyGlobalElements = new int[A_nrows]; A_Map->MyGlobalElements(A_MyGlobalElements); int* B_MyGlobalElements = new int[B_nrows]; B_Map->MyGlobalElements(B_MyGlobalElements); // Check for compatible dimensions if (C.MyLength() != C_nrows || opA_ncols != opB_nrows || C.NumVectors() != C_ncols || C.MyLength() != C_GEMM.MyLength() || C.NumVectors() != C_GEMM.NumVectors() ) { delete A_Map; delete B_Map; delete [] A_MyGlobalElements; delete [] B_MyGlobalElements; return(-2); // Return error } bool Case1 = ( A_is_local && B_is_local && C_is_local); // Case 1 above bool Case2 = (!A_is_local && !B_is_local && C_is_local && TransA=='T' );// Case 2 bool Case3 = (!A_is_local && B_is_local && !C_is_local && TransA=='N');// Case 3 // Test for meaningful cases if (!(Case1 || Case2 || Case3)) { delete A_Map; delete B_Map; delete [] A_MyGlobalElements; delete [] B_MyGlobalElements; return(-3); // Meaningless case } /* Fill A, B and C with values as follows: If A_is_local is false: A(i,j) = A_MyGlobalElements[i]*j, i=1,...,numLocalEquations, j=1,...,NumVectors else A(i,j) = i*j, i=1,...,numLocalEquations, j=1,...,NumVectors If B_is_local is false: B(i,j) = 1/(A_MyGlobalElements[i]*j), i=1,...,numLocalEquations, j=1,...,NumVectors else B(i,j) = 1/(i*j), i=1,...,numLocalEquations, j=1,...,NumVectors In addition, scale each entry by GlobalLength for A and 1/GlobalLength for B--keeps the magnitude of entries in check C_GEMM will depend on A_is_local and B_is_local. Three cases: 1) A_is_local true and B_is_local true: C_GEMM will be local replicated and equal to A*B = i*NumVectors/j 2) A_is_local false and B_is_local false C_GEMM will be local replicated = A(trans)*B(i,j) = i*numGlobalEquations/j 3) A_is_local false B_is_local true C_GEMM will distributed global and equals A*B = A_MyGlobalElements[i]*NumVectors/j */ // Define a scalar to keep magnitude of entries reasonable double sf = C_global_inner_dim; double sfinv = 1.0/sf; // Define A depending on A_is_local if (A_is_local) { for (j = 0; j <A_ncols ; j++) for (i = 0; i<A_nrows; i++) { fi = i+1; // Get float version of i and j, offset by 1. fj = j+1; Ap[i + A_Stride*j] = (fi*sfinv)*fj; } } else { for (j = 0; j <A_ncols ; j++) for (i = 0; i<A_nrows; i++) { fi = A_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1. fj = j+1; Ap[i + A_Stride*j] = (fi*sfinv)*fj; } } // Define B depending on TransB and B_is_local if (B_is_local) { for (j = 0; j <B_ncols ; j++) for (i = 0; i<B_nrows; i++) { fi = i+1; // Get float version of i and j, offset by 1. fj = j+1; Bp[i + B_Stride*j] = 1.0/((fi*sfinv)*fj); } } else { for (j = 0; j <B_ncols ; j++) for (i = 0; i<B_nrows; i++) { fi = B_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1. fj = j+1; Bp[i + B_Stride*j] = 1.0/((fi*sfinv)*fj); } } // Define C_GEMM depending on A_is_local and B_is_local. C_GEMM is also a // function of alpha, beta, TransA, TransB: // C_GEMM = alpha*A(TransA)*B(TransB) + beta*C_GEMM if (Case1) { for (j = 0; j <C_ncols ; j++) for (i = 0; i<C_nrows; i++) { // Get float version of i and j, offset by 1. fi = (i+1)*C_global_inner_dim; fj = j+1; C_GEMMp[i + C_GEMM_Stride*j] = alpha * (fi/fj) + beta * Cp[i + C_Stride*j]; } } else if (Case2) { for (j = 0; j <C_ncols ; j++) for (i = 0; i<C_nrows; i++) { // Get float version of i and j, offset by 1. fi = (i+1)*C_global_inner_dim; fj = j+1; C_GEMMp[i + C_GEMM_Stride*j] = alpha * (fi/fj) + beta * Cp[i + C_Stride*j]; } } else { for (j = 0; j <C_ncols ; j++) for (i = 0; i<C_nrows; i++) { // Get float version of i and j. fi = (A_MyGlobalElements[i]+1)*C_global_inner_dim; fj = j+1; C_GEMMp[i + C_GEMM_Stride*j] = alpha * (fi/fj) + beta * Cp[i + C_Stride*j]; } } delete A_Map; delete B_Map; delete [] A_MyGlobalElements; delete [] B_MyGlobalElements; return(0); }
int BuildMultiVectorTests (Epetra_MultiVector & C, const double alpha, Epetra_MultiVector& A, Epetra_MultiVector& sqrtA, Epetra_MultiVector& B, Epetra_MultiVector& C_alphaA, Epetra_MultiVector& C_alphaAplusB, Epetra_MultiVector& C_plusB, double* const dotvec_AB, double* const norm1_A, double* const norm2_sqrtA, double* const norminf_A, double* const normw_A, Epetra_MultiVector& Weights, double* const minval_A, double* const maxval_A, double* const meanval_A ) { // For given values alpha and a (possibly zero) filled // Epetra_MultiVector (the this object), allocated double * arguments dotvec_AB, // norm1_A, and norm2_A, and allocated Epetra_MultiVectors A, sqrtA, // B, C_alpha, C_alphaAplusB and C_plusB, this method will generate values for // Epetra_MultiVectors A, B and all of the additional arguments on // the list above such that, if A, B and (this) are used with the methods in // this class, the results should match the results generated by this routine. // Specifically, the results in dotvec_AB should match those from a call to // A.dotProd (B,dotvec). Similarly for other routines. int i,j; double fi, fj; // Used for casting loop variables to floats // Define some useful constants int A_nrows = A.MyLength(); int A_ncols = A.NumVectors(); int sqrtA_nrows = sqrtA.MyLength(); int sqrtA_ncols = sqrtA.NumVectors(); int B_nrows = B.MyLength(); int B_ncols = B.NumVectors(); double **Ap = 0; double **sqrtAp = 0; double **Bp = 0; double **Cp = 0; double **C_alphaAp = 0; double **C_alphaAplusBp = 0; double **C_plusBp = 0; double **Weightsp = 0; A.ExtractView(&Ap); sqrtA.ExtractView(&sqrtAp); B.ExtractView(&Bp); C.ExtractView(&Cp); C_alphaA.ExtractView(&C_alphaAp); C_alphaAplusB.ExtractView(&C_alphaAplusBp); C_plusB.ExtractView(&C_plusBp); Weights.ExtractView(&Weightsp); bool A_is_local = (A.MyLength() == A.GlobalLength()); bool B_is_local = (B.MyLength() == B.GlobalLength()); bool C_is_local = (C.MyLength() == C.GlobalLength()); int A_IndexBase = A.Map().IndexBase(); int B_IndexBase = B.Map().IndexBase(); // Build two new maps that we can use for defining global equation indices below Epetra_Map * A_Map = new Epetra_Map(-1, A_nrows, A_IndexBase, A.Map().Comm()); Epetra_Map * B_Map = new Epetra_Map(-1, B_nrows, B_IndexBase, B.Map().Comm()); int* A_MyGlobalElements = new int[A_nrows]; A_Map->MyGlobalElements(A_MyGlobalElements); int* B_MyGlobalElements = new int[B_nrows]; B_Map->MyGlobalElements(B_MyGlobalElements); // Check for compatible dimensions if (C.MyLength() != A_nrows || A_nrows != B_nrows || C.NumVectors() != A_ncols || A_ncols != B_ncols || sqrtA_nrows != A_nrows || sqrtA_ncols != A_ncols || C.MyLength() != C_alphaA.MyLength() || C.NumVectors() != C_alphaA.NumVectors() || C.MyLength() != C_alphaAplusB.MyLength() || C.NumVectors() != C_alphaAplusB.NumVectors() || C.MyLength() != C_plusB.MyLength() || C.NumVectors() != C_plusB.NumVectors() ) return(-2); // Return error bool Case1 = ( A_is_local && B_is_local && C_is_local); // Case 1 bool Case2 = (!A_is_local && !B_is_local && !C_is_local);// Case 2 // Test for meaningful cases if (!(Case1 || Case2)) return(-3); // Meaningless case /* Fill A and B with values as follows: If A_is_local is false: A(i,j) = A_MyGlobalElements[i]*j, i=1,...,numLocalEquations, j=1,...,NumVectors else A(i,j) = i*j, i=1,...,numLocalEquations, j=1,...,NumVectors If B_is_local is false: B(i,j) = 1/(A_MyGlobalElements[i]*j), i=1,...,numLocalEquations, j=1,...,NumVectors else B(i,j) = 1/(i*j), i=1,...,numLocalEquations, j=1,...,NumVectors In addition, scale each entry by GlobalLength for A and 1/GlobalLength for B--keeps the magnitude of entries in check */ //Define scale factor double sf = A.GlobalLength(); double sfinv = 1.0/sf; // Define A if (A_is_local) { for (j = 0; j <A_ncols ; j++) { for (i = 0; i<A_nrows; i++) { fi = i+1; // Get float version of i and j, offset by 1. fj = j+1; Ap[j][i] = (fi*sfinv)*fj; sqrtAp[j][i] = std::sqrt(Ap[j][i]); } } } else { for (j = 0; j <A_ncols ; j++) { for (i = 0; i<A_nrows; i++) { fi = A_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1. fj = j+1; Ap[j][i] = (fi*sfinv)*fj; sqrtAp[j][i] = std::sqrt(Ap[j][i]); } } } // Define B depending on TransB and B_is_local if (B_is_local) { for (j = 0; j <B_ncols ; j++) { for (i = 0; i<B_nrows; i++) { fi = i+1; // Get float version of i and j, offset by 1. fj = j+1; Bp[j][i] = 1.0/((fi*sfinv)*fj); } } } else { for (j = 0; j <B_ncols ; j++) { for (i = 0; i<B_nrows; i++) { fi = B_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1. fj = j+1; Bp[j][i] = 1.0/((fi*sfinv)*fj); } } } // Generate C_alphaA = alpha * A for (j = 0; j <A_ncols ; j++) for (i = 0; i<A_nrows; i++) C_alphaAp[j][i] = alpha * Ap[j][i]; // Generate C_alphaA = alpha * A + B for (j = 0; j <A_ncols ; j++) for (i = 0; i<A_nrows; i++) C_alphaAplusBp[j][i] = alpha * Ap[j][i] + Bp[j][i]; // Generate C_plusB = this + B for (j = 0; j <A_ncols ; j++) for (i = 0; i<A_nrows; i++) C_plusBp[j][i] = Cp[j][i] + Bp[j][i]; // Generate dotvec_AB. Because B(i,j) = 1/A(i,j), dotvec[i] = C.GlobalLength() for (i=0; i< A.NumVectors(); i++) dotvec_AB[i] = C.GlobalLength(); // For the next two results we want to be careful how we do arithmetic // to avoid very large numbers. // We are computing sfinv*(C.GlobalLength()*(C.GlobalLength()+1)/2) double result = C.GlobalLength(); result *= sfinv; result /= 2.0; result *= (double)(C.GlobalLength()+1); // Generate norm1_A. Can use formula for sum of first n integers. for (i=0; i< A.NumVectors(); i++) // m1_A[i] = (i+1)*C.GlobalLength()*(C.GlobalLength()+1)/2; norm1_A[i] = result * ((double) (i+1)); // Generate norm2_sqrtA. Can use formula for sum of first n integers. for (i=0; i< A.NumVectors(); i++) // norm2_sqrtA[i] = std::sqrt((double) ((i+1)*C.GlobalLength()*(C.GlobalLength()+1)/2)); norm2_sqrtA[i] = std::sqrt(result * ((double) (i+1))); // Generate norminf_A, minval_A, maxval_A, meanval_A. for (i=0; i< A.NumVectors(); i++) { norminf_A[i] = (double) (i+1); minval_A[i] = (double) (i+1)/ (double) A.GlobalLength(); maxval_A[i] = (double) (i+1); meanval_A[i] = norm1_A[i]/((double) (A.GlobalLength())); } // Define weights and expected weighted norm for (i=0; i< A.NumVectors(); i++) { double ip1 = (double) i+1; normw_A[i] = ip1; for (j=0; j<A_nrows; j++) Weightsp[i][j] = Ap[i][j]/ip1; } delete A_Map; delete B_Map; delete [] A_MyGlobalElements; delete [] B_MyGlobalElements; return(0); }