Exemplo n.º 1
int Epetra_PETScAIJMatrix::Multiply(bool TransA,
                               const Epetra_MultiVector& X,
                               Epetra_MultiVector& Y) const
  int NumVectors = X.NumVectors();
  if (NumVectors!=Y.NumVectors()) EPETRA_CHK_ERR(-1);  // X and Y must have same number of vectors

  double ** xptrs;
  double ** yptrs;
  if (RowMatrixImporter()!=0) {
    if (ImportVector_!=0) {
      if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;}
    if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(RowMatrixColMap(),NumVectors);
    ImportVector_->Import(X, *RowMatrixImporter(), Insert);

  double *vals=0;
  int length;
  Vec petscX, petscY;
  int ierr;
  for (int i=0; i<NumVectors; i++) {
#   ifdef HAVE_MPI
    ierr=VecCreateMPIWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptrs[i],&petscX); CHKERRQ(ierr);
    ierr=VecCreateMPIWithArray(Comm_->Comm(),Y.MyLength(),Y.GlobalLength(),yptrs[i],&petscY); CHKERRQ(ierr);
#   else //FIXME  untested
    ierr=VecCreateSeqWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptrs[i],&petscX); CHKERRQ(ierr);
    ierr=VecCreateSeqWithArray(Comm_->Comm(),Y.MyLength(),Y.GlobalLength(),yptrs[i],&petscY); CHKERRQ(ierr);
#   endif

    ierr = MatMult(Amat_,petscX,petscY);CHKERRQ(ierr);

    ierr = VecGetArray(petscY,&vals);CHKERRQ(ierr);
    ierr = VecGetLocalSize(petscY,&length);CHKERRQ(ierr);
    for (int j=0; j<length; j++) yptrs[i][j] = vals[j];
    ierr = VecRestoreArray(petscY,&vals);CHKERRQ(ierr);

  VecDestroy(petscX); VecDestroy(petscY);
  double flops = NumGlobalNonzeros();
  flops *= 2.0;
  flops *= (double) NumVectors;
} //Multiply()
int writeMultiVector(FILE * handle, const Epetra_MultiVector & A, bool mmFormat) {

  int ierr = 0;
  int length = A.GlobalLength();
  int numVectors = A.NumVectors();
  const Epetra_Comm & comm = A.Map().Comm();
  if (comm.MyPID()!=0) {
    if (A.MyLength()!=0) ierr = -1;
  else {
    if (length!=A.MyLength()) ierr = -1;
    for (int j=0; j<numVectors; j++) {
      for (int i=0; i<length; i++) {
	double val = A[j][i];
	if (mmFormat)
	  fprintf(handle, "%22.16e\n", val);
	  fprintf(handle, "%22.16e ", val);
      if (!mmFormat) fprintf(handle, "%s", "\n");
  int ierrGlobal;
  comm.MinAll(&ierr, &ierrGlobal, 1); // If any processor has -1, all return -1
Exemplo n.º 3
// =============================================================================
setValues( const Epetra_MultiVector          & x,
           const Teuchos::Array<std::string> & scalarsNames
  unsigned int numVecs = x.NumVectors();
  unsigned int numVariables = x.GlobalLength();

  unsigned int numNodes = mesh_->getNodesMap()->NumGlobalElements();

  // make sure the sizes match the mesh
  if ( !mesh_.is_null() )
      TEUCHOS_ASSERT_EQUALITY( numVariables, 2*numNodes );

  // cast into a vtkUnstructuredGrid
  vtkSmartPointer<vtkUnstructuredGrid> vtkMesh =
      dynamic_cast<vtkUnstructuredGrid*> ( vtkDataSet_.GetPointer() );

  // get scalarsNames, and insert default names if empty
  Teuchos::Array<std::string> scNames ( scalarsNames );
  if ( scNames.empty() )
      scNames.resize ( numVecs );
      for ( int vec=0; vec<numVecs; vec++ )
          scNames[vec] = "x" + EpetraExt::toString ( vec );

  // fill the scalar field
  vtkSmartPointer<vtkDoubleArray> scalars =

  // real and imaginary part
  scalars->SetNumberOfComponents ( 2 );

  for ( int vec=0; vec<numVecs; vec++ )
      scalars->SetName ( scNames[vec].c_str() );
      for ( int k=0; k<numNodes; k++ )
//           const unsigned int dof_id = libmeshMesh_->node(k).dof_number(0,k,0);
          scalars->InsertNextValue ( x[vec][2*k] );
          scalars->InsertNextValue ( x[vec][2*k+1] );
      vtkMesh->GetPointData()->AddArray ( scalars );

// Convert a Epetra_MultiVector with assumed block structure dictated by the
// vector space into a Thyra::MultiVectorBase object.
// const Teuchos::RCP<const Thyra::MultiVectorBase<double> > blockEpetraToThyra(const Epetra_MultiVector & e,const Teuchos::RCP<const Thyra::VectorSpaceBase<double> > & vs)
void blockEpetraToThyra(const Epetra_MultiVector & epetraX,const Teuchos::Ptr<Thyra::MultiVectorBase<double> > & thyraX) 

   // extract local information from the Epetra_MultiVector
   int leadingDim=0,numVectors=0,localDim=0;
   double * epetraData=0;

   numVectors = epetraX.NumVectors();


// ============================================================================
void EpetraExt::XMLWriter::
Write(const std::string& Label, const Epetra_MultiVector& MultiVector)
  TEUCHOS_TEST_FOR_EXCEPTION(IsOpen_ == false, std::logic_error,
                     "No file has been opened");

  int Length = MultiVector.GlobalLength();
  int NumVectors = MultiVector.NumVectors();

  if (Comm_.MyPID() == 0)
    std::ofstream of(FileName_.c_str(), std::ios::app);

    of << "<MultiVector Label=\"" << Label 
      << "\" Length=\"" << Length << '"'
      << " NumVectors=\"" << NumVectors << '"'
      << " Type=\"double\">" << std::endl;

  for (int iproc = 0; iproc < Comm_.NumProc(); iproc++)
    if (iproc == Comm_.MyPID())
      std::ofstream of(FileName_.c_str(), std::ios::app);

      for (int i = 0; i < MultiVector.MyLength(); ++i)
        for (int j = 0; j < NumVectors; ++j)
          of << std::setiosflags(std::ios::scientific) << MultiVector[j][i] << " ";
        of << std::endl;

  if (Comm_.MyPID() == 0)
    std::ofstream of(FileName_.c_str(), std::ios::app);
    of << "</MultiVector>" << std::endl;
int DoCopyMultiVector(double** matlabApr, const Epetra_MultiVector& A) {

  int ierr = 0;
  int length = A.GlobalLength();
  int numVectors = A.NumVectors();
  const Epetra_Comm & comm = A.Map().Comm();
  if (comm.MyPID()!=0) {
    if (A.MyLength()!=0) ierr = -1;
  else {
    if (length!=A.MyLength()) ierr = -1;
    double* matlabAvalues = *matlabApr;
    double* Aptr = A.Values();
    memcpy((void *)matlabAvalues, (void *)Aptr, sizeof(*Aptr) * length * numVectors);
    *matlabApr += length;   
  int ierrGlobal;
  comm.MinAll(&ierr, &ierrGlobal, 1); // If any processor has -1, all return -1
// Convert a Thyra::MultiVectorBase object to a Epetra_MultiVector object with
// the map defined by the Epetra_Map.
// const Teuchos::RCP<const Epetra_MultiVector> 
// blockThyraToEpetra(const Teuchos::RCP<const Thyra::MultiVectorBase<double> > & tX,const RCP<const Epetra_Map> & map)
void blockThyraToEpetra(const Teuchos::RCP<const Thyra::MultiVectorBase<double> > & thyraX,Epetra_MultiVector & epetraX)
   // build an Epetra_MultiVector object
   int numVectors = thyraX->domain()->dim();

   // make sure the number of vectors are the same

   // extract local information from the Epetra_MultiVector
   int leadingDim=0,localDim=0;
   double * epetraData=0;

   // perform recursive copy

   // sanity check
int MultiVectorToMatrixMarketFile( const char *filename, const Epetra_MultiVector & A, 
				 const char * matrixName,
				 const char *matrixDescription, 
				 bool writeHeader) {
  int M = A.GlobalLength();
  int N = A.NumVectors();

  FILE * handle = 0;

  if (A.Map().Comm().MyPID()==0) { // Only PE 0 does this section

    handle = fopen(filename,"w");
    if (!handle) return(-1);
    MM_typecode matcode;

    if (writeHeader==true) { // Only write header if requested (true by default)
      if (mm_write_banner(handle, matcode)) return(-1);
      if (matrixName!=0) fprintf(handle, "%% \n%% %s\n", matrixName);
      if (matrixDescription!=0) fprintf(handle, "%% %s\n%% \n", matrixDescription);
      if (mm_write_mtx_array_size(handle, M, N)) return(-1);
  if (MultiVectorToMatrixMarketHandle(handle, A)) return(-1); // Everybody calls this routine

  if (A.Map().Comm().MyPID()==0) // Only PE 0 opened a file
    if (fclose(handle)) return(-1);
Exemplo n.º 9
int main(int argc, char** argv) {

  int fail = 0, dim=0;

#ifdef HAVE_MPI
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &localProc);
  MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
  const Epetra_MpiComm Comm(MPI_COMM_WORLD);
  const Epetra_SerialComm Comm;

  // =============================================================
  // get command line options
  // =============================================================

  Teuchos::CommandLineProcessor clp(false,true);

  std::string *inputFile = new std::string("simple.coords");
  bool verbose = false;

  clp.setOption( "f", inputFile, "Name of coordinate input file");

  clp.setOption( "v", "q", &verbose,
		"Display coordinates and weights before and after partitioning.");

  Teuchos::CommandLineProcessor::EParseCommandLineReturn parse_return =

  if( parse_return == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED){
#ifdef HAVE_MPI
    return 0;
  if( parse_return != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) {
#ifdef HAVE_MPI
    return 1;

  // =============================================================
  // Open file of coordinates and distribute them across processes
  // so they are unbalanced.
  // =============================================================

  Epetra_MultiVector *mv = ispatest::file2multivector(Comm, *inputFile);

  if (!mv || ((dim = mv->NumVectors()) < 1)){
    if (localProc == 0)
      std::cerr << "Invalid input file " << *inputFile << std::endl;

  if (localProc == 0){
    std::cerr << "Found input file " << *inputFile << ", " ;
    std::cerr << dim << " dimensional coordinates" << std::endl;

  delete inputFile;

  int base = mv->Map().IndexBase();
  int globalSize = mv->GlobalLength();
  int myShare = 0;
  int n = numProcs - 1;

  if (n){
    if (localProc < n){
      int oneShare = globalSize / n;
      int leftOver = globalSize - (n * oneShare);
      myShare = oneShare + ((localProc < leftOver) ? 1 : 0);
    myShare = globalSize;

  Epetra_BlockMap unbalancedMap(globalSize, myShare, 1, base, mv->Map().Comm());
  Epetra_Import importer(unbalancedMap, mv->Map());
  Epetra_MultiVector umv(unbalancedMap, dim);
  umv.Import(*mv, importer, Insert);

  delete mv;

  Teuchos::RCP<const Epetra_MultiVector> coords =
    Teuchos::rcp(new const Epetra_MultiVector(umv));

  // =============================================================
  // Create some different coordinate weight vectors
  // =============================================================

  Epetra_MultiVector *unitWgts = ispatest::makeWeights(coords->Map(), &ispatest::unitWeights);
  Epetra_MultiVector *veeWgts = ispatest::makeWeights(coords->Map(), &ispatest::veeWeights);
  Epetra_MultiVector *altWgts = ispatest::makeWeights(coords->Map(), &ispatest::alternateWeights);

  Teuchos::RCP<const Epetra_MultiVector> unit_weights_rcp = Teuchos::rcp(unitWgts);
  Teuchos::RCP<const Epetra_MultiVector> vee_weights_rcp = Teuchos::rcp(veeWgts);
  Teuchos::RCP<const Epetra_MultiVector> alt_weights_rcp = Teuchos::rcp(altWgts);

  if (localProc == 0){
    std::cerr << "Unit weights: Each object has weight 1.0" << std::endl;
    std::cerr << "V weights: Low and high GIDs have high weights, center GIDs have low weights" << std::endl;
    std::cerr << "Alternate weights: Objects on even rank processes have one weight, on odd another weight" << std::endl;
    std::cerr << std::endl;

  // ======================================================================
  //  Create a parameter list for Zoltan, and one for internal partitioning
  // ======================================================================

  Teuchos::ParameterList internalParams;


  Teuchos::ParameterList zoltanParams;

  Teuchos::ParameterList sublist = zoltanParams.sublist("ZOLTAN");

  //sublist.set("DEBUG_LEVEL", "1"); // Zoltan will print out parameters
  //sublist.set("DEBUG_LEVEL", "5");   // proc 0 will trace Zoltan calls
  //sublist.set("DEBUG_MEMORY", "2");  // Zoltan will trace alloc & free

  // =============================================================
  // Run some tests
  // =============================================================
  zoltanParams.set("PARTITIONING METHOD", "RCB");

  if (localProc == 0){
    std::cerr << "RCB - unit weights" << std::endl;

  fail = run_test(coords, unit_weights_rcp, zoltanParams);

  if (fail) goto failure;

  if (localProc == 0){
    std::cerr << "PASS" << std::endl << std::endl;
  // *************************************************************

  if (localProc == 0){
    std::cerr << "HSFC - V weights" << std::endl;

  zoltanParams.set("PARTITIONING METHOD", "HSFC");
  fail = run_test(coords, vee_weights_rcp, zoltanParams);

  if (fail) goto failure;

  if (localProc == 0){
    std::cerr << "PASS" << std::endl << std::endl;

  // *************************************************************

  if (localProc == 0){
    std::cerr << "RIB - alternate weights" << std::endl;
  zoltanParams.set("PARTITIONING METHOD", "RIB");

  fail = run_test(coords, alt_weights_rcp, zoltanParams);

  if (fail) goto failure;

  if (localProc == 0){
    std::cerr << "PASS" << std::endl << std::endl;

  // *************************************************************

  if (localProc == 0){
    std::cerr << "RIB - no weights supplied" << std::endl;
  zoltanParams.set("PARTITIONING METHOD", "RIB");

  fail = run_test(coords, zoltanParams);

  if (fail) goto failure;

  if (localProc == 0){
    std::cerr << "PASS" << std::endl << std::endl;

  // *************************************************************

  goto done;


  if (localProc == 0){
    std::cerr << "FAIL: test failed" << std::endl;


#ifdef HAVE_MPI

  return fail;
Exemplo n.º 10
int ARPACKm3::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {

  // Computes eigenvalues and the corresponding eigenvectors
  // of the generalized eigenvalue problem
  //      K X = M X Lambda
  // using ARPACK (mode 3).
  // The convergence test is provided by ARPACK.
  // Note that if M is not specified, then  K X = X Lambda is solved.
  // (using the mode for generalized eigenvalue problem).
  // Input variables:
  // numEigen  (integer) = Number of eigenmodes requested
  // Q (Epetra_MultiVector) = Initial search space
  //                   The number of columns of Q defines the size of search space (=NCV).
  //                   The rows of X are distributed across processors.
  //                   As a rule of thumb in ARPACK User's guide, NCV >= 2*numEigen.
  //                   At exit, the first numEigen locations contain the eigenvectors requested.
  // lambda (array of doubles) = Converged eigenvalues
  //                   The length of this array is equal to the number of columns in Q.
  //                   At exit, the first numEigen locations contain the eigenvalues requested.
  // startingEV (integer) = Number of eigenmodes already stored in Q
  //                   A linear combination of these vectors is made to define the starting
  //                   vector, placed in resid.
  // Return information on status of computation
  // info >=   0 >> Number of converged eigenpairs at the end of computation
  // // Failure due to input arguments
  // info = -  1 >> The stiffness matrix K has not been specified.
  // info = -  2 >> The maps for the matrix K and the matrix M differ.
  // info = -  3 >> The maps for the matrix K and the preconditioner P differ.
  // info = -  4 >> The maps for the vectors and the matrix K differ.
  // info = -  5 >> Q is too small for the number of eigenvalues requested.
  // info = -  6 >> Q is too small for the computation parameters.
  // info = -  8 >> numEigen must be smaller than the dimension of the matrix.
  // info = - 30 >> MEMORY
  // See ARPACK documentation for the meaning of INFO

  if (numEigen <= startingEV) {
    return numEigen;

  int info = myVerify.inputArguments(numEigen, K, M, 0, Q, minimumSpaceDimension(numEigen));
  if (info < 0)
    return info;

  int myPid = MyComm.MyPID();

  int localSize = Q.MyLength();
  int NCV = Q.NumVectors();
  int knownEV = 0;

  if (NCV > Q.GlobalLength()) {
    if (numEigen >= Q.GlobalLength()) {
      cerr << endl;
      cerr << " !! The number of requested eigenvalues must be smaller than the dimension";
      cerr << " of the matrix !!\n";
      cerr << endl;
      return -8;
    NCV = Q.GlobalLength();

  int localVerbose = verbose*(myPid == 0);

  // Define data for ARPACK
  highMem = (highMem > currentSize()) ? highMem : currentSize();

  int ido = 0;

  int lwI = 22 + NCV;
  int *wI = new (nothrow) int[lwI];
  if (wI == 0) {
    return -30;
  memRequested += sizeof(int)*lwI/(1024.0*1024.0);

  int *iparam = wI;
  int *ipntr = wI + 11;
  int *select = wI + 22;

  int lworkl = NCV*(NCV+8);
  int lwD = lworkl + 4*localSize;
  double *wD = new (nothrow) double[lwD];
  if (wD == 0) {
    delete[] wI;
    return -30;
  memRequested += sizeof(double)*(4*localSize+lworkl)/(1024.0*1024.0);

  double *pointer = wD;

  double *workl = pointer;
  pointer = pointer + lworkl;

  double *resid = pointer;
  pointer = pointer + localSize;

  double *workd = pointer;

  double *v = Q.Values();

  highMem = (highMem > currentSize()) ? highMem : currentSize();

  double sigma = 0.0;

  if (startingEV > 0) {
    // Define the initial starting vector
    memset(resid, 0, localSize*sizeof(double));
    for (int jj = 0; jj < startingEV; ++jj)
      for (int ii = 0; ii < localSize; ++ii)
         resid[ii] += v[ii + jj*localSize];
    info = 1;

  iparam[1-1] = 1;
  iparam[3-1] = maxIterEigenSolve;
  iparam[7-1] = 3;

  // The fourth parameter forces to use the convergence test provided by ARPACK.
  // This requires a customization of ARPACK (provided by R. Lehoucq).

  iparam[4-1] = 0;

  Epetra_Vector v1(View, Q.Map(), workd);
  Epetra_Vector v2(View, Q.Map(), workd + localSize);
  Epetra_Vector v3(View, Q.Map(), workd + 2*localSize);

  double *vTmp = new (nothrow) double[localSize];
  if (vTmp == 0) {
    delete[] wI;
    delete[] wD;
    return -30;
  memRequested += sizeof(double)*localSize/(1024.0*1024.0);

  highMem = (highMem > currentSize()) ? highMem : currentSize();

  if (localVerbose > 0) {
    cout << endl;
    cout << " *|* Problem: ";
    if (M) 
      cout << "K*Q = M*Q D ";
      cout << "K*Q = Q D ";
    cout << endl;
    cout << " *|* Algorithm = ARPACK (mode 3)" << endl;
    cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
    cout.setf(ios::scientific, ios::floatfield);
    cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
    if (startingEV > 0)
      cout << " *|* User-defined starting vector (Combination of " << startingEV << " vectors)\n";
    cout << "\n -- Start iterations -- \n";

  Epetra_MpiComm *MPIComm = dynamic_cast<Epetra_MpiComm *>(const_cast<Epetra_Comm*>(&MyComm));

  timeOuterLoop -= MyWatch.WallTime();
  while (ido != 99) {

    highMem = (highMem > currentSize()) ? highMem : currentSize();

    if (MPIComm)
      callFortran.PSAUPD(MPIComm->Comm(), &ido, 'G', localSize, which, numEigen, tolEigenSolve,
             resid, NCV, v, localSize, iparam, ipntr, workd, workl, lworkl, &info, localVerbose);
      callFortran.SAUPD(&ido, 'G', localSize, which, numEigen, tolEigenSolve, resid, NCV, v, 
             localSize, iparam, ipntr, workd, workl, lworkl, &info, localVerbose);
    callFortran.SAUPD(&ido, 'G', localSize, which, numEigen, tolEigenSolve, resid, NCV, v,
             localSize, iparam, ipntr, workd, workl, lworkl, &info, localVerbose);

    if (ido == -1) {
      // Apply the mass matrix      
      v3.ResetView(workd + ipntr[0] - 1);
      timeMassOp -= MyWatch.WallTime();
      if (M)
        M->Apply(v3, v1);
        memcpy(v1.Values(), v3.Values(), localSize*sizeof(double));
      timeMassOp += MyWatch.WallTime();
      massOp += 1;
      // Solve the stiffness problem
      v2.ResetView(workd + ipntr[1] - 1);
      timeStifOp -= MyWatch.WallTime();
      K->ApplyInverse(v1, v2);
      timeStifOp += MyWatch.WallTime();
      stifOp += 1;
    } // if (ido == -1)

    if (ido == 1) {
      // Solve the stiffness problem
      v1.ResetView(workd + ipntr[2] - 1);
      v2.ResetView(workd + ipntr[1] - 1);
      timeStifOp -= MyWatch.WallTime();
      K->ApplyInverse(v1, v2);
      timeStifOp += MyWatch.WallTime();
      stifOp += 1;
    } // if (ido == 1)

    if (ido == 2) {
      // Apply the mass matrix      
      v1.ResetView(workd + ipntr[0] - 1);
      v2.ResetView(workd + ipntr[1] - 1);
      timeMassOp -= MyWatch.WallTime();
      if (M)
        M->Apply(v1, v2);
        memcpy(v2.Values(), v1.Values(), localSize*sizeof(double));
      timeMassOp += MyWatch.WallTime();
      massOp += 1;
    } // if (ido == 2)

  } // while (ido != 99)
  timeOuterLoop += MyWatch.WallTime();
  highMem = (highMem > currentSize()) ? highMem : currentSize();

  if (info < 0) {
    if (myPid == 0) {
      cerr << endl;
      cerr << " Error with DSAUPD, info = " << info << endl;
      cerr << endl;
  else {

    // Compute the eigenvectors
    timePostProce -= MyWatch.WallTime();
    if (MPIComm)
      callFortran.PSEUPD(MPIComm->Comm(), 1, 'A', select, lambda, v, localSize, sigma, 'G',
            localSize, which, numEigen, tolEigenSolve, resid, NCV, v, localSize, iparam, ipntr, 
            workd, workl, lworkl, &info);
      callFortran.SEUPD(1, 'A', select, lambda, v, localSize, sigma, 'G', localSize, which,
            numEigen, tolEigenSolve, resid, NCV, v, localSize, iparam, ipntr, workd, workl,
            lworkl, &info);
    callFortran.SEUPD(1, 'A', select, lambda, v, localSize, sigma, 'G', localSize, which,
          numEigen, tolEigenSolve, resid, NCV, v, localSize, iparam, ipntr, workd, workl,
          lworkl, &info);
    timePostProce += MyWatch.WallTime();
    highMem = (highMem > currentSize()) ? highMem : currentSize();

    // Treat the error
    if (info != 0) {
      if (myPid == 0) {
        cerr << endl;
        cerr << " Error with DSEUPD, info = " << info << endl;
        cerr << endl;

  } // if (info < 0)

  if (info == 0) {
    outerIter = iparam[3-1];
    knownEV = iparam[5-1];
    orthoOp = iparam[11-1];

  delete[] wI;
  delete[] wD;
  delete[] vTmp;

  return (info == 0) ? knownEV : info;

Exemplo n.º 11
int ModifiedARPACKm3::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, 
                              int startingEV, const Epetra_MultiVector *orthoVec) {

  // Computes the smallest eigenvalues and the corresponding eigenvectors
  // of the generalized eigenvalue problem
  //      K X = M X Lambda
  // using ModifiedARPACK (mode 3).
  // The convergence test is performed outisde of ARPACK
  //                      || Kx - Mx lambda || < tol*lambda
  // The norm ||.|| can be specified by the user through the array normWeight.
  // By default, the L2 Euclidean norm is used.
  // Note that if M is not specified, then  K X = X Lambda is solved.
  // (using the mode for generalized eigenvalue problem).
  // Input variables:
  // numEigen  (integer) = Number of eigenmodes requested
  // Q (Epetra_MultiVector) = Initial search space
  //                   The number of columns of Q defines the size of search space (=NCV).
  //                   The rows of X are distributed across processors.
  //                   As a rule of thumb in ARPACK User's guide, NCV >= 2*numEigen.
  //                   At exit, the first numEigen locations contain the eigenvectors requested.
  // lambda (array of doubles) = Converged eigenvalues
  //                   The length of this array is equal to the number of columns in Q.
  //                   At exit, the first numEigen locations contain the eigenvalues requested.
  // startingEV (integer) = Number of eigenmodes already stored in Q
  //                   A linear combination of these vectors is made to define the starting
  //                   vector, placed in resid.
  // orthoVec (Pointer to Epetra_MultiVector) = Space to be orthogonal to
  //                   The computation is performed in the orthogonal of the space spanned
  //                   by the columns vectors in orthoVec.
  // Return information on status of computation
  // info >=   0 >> Number of converged eigenpairs at the end of computation
  // // Failure due to input arguments
  // info = -  1 >> The stiffness matrix K has not been specified.
  // info = -  2 >> The maps for the matrix K and the matrix M differ.
  // info = -  3 >> The maps for the matrix K and the preconditioner P differ.
  // info = -  4 >> The maps for the vectors and the matrix K differ.
  // info = -  5 >> Q is too small for the number of eigenvalues requested.
  // info = -  6 >> Q is too small for the computation parameters.
  // info = -  8 >> numEigen must be smaller than the dimension of the matrix.
  // info = - 30 >> MEMORY
  // See ARPACK documentation for the meaning of INFO

  if (numEigen <= startingEV) {
    return numEigen;

  int info = myVerify.inputArguments(numEigen, K, M, 0, Q, minimumSpaceDimension(numEigen));
  if (info < 0)
    return info;

  int myPid = MyComm.MyPID();

  int localSize = Q.MyLength();
  int NCV = Q.NumVectors();
  int knownEV = 0;

  if (NCV > Q.GlobalLength()) {
    if (numEigen >= Q.GlobalLength()) {
      cerr << endl;
      cerr << " !! The number of requested eigenvalues must be smaller than the dimension";
      cerr << " of the matrix !!\n";
      cerr << endl;
      return -8;
    NCV = Q.GlobalLength();

  // Get the weight for approximating the M-inverse norm
  Epetra_Vector *vectWeight = 0;
  if (normWeight) {
    vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);

  int localVerbose = verbose*(myPid == 0);

  // Define data for ARPACK
  // UH (10/17/03) Note that workl is also used 
  //               * to store the eigenvectors of the tridiagonal matrix
  //               * as a workspace for DSTEQR
  //               * as a workspace for recovering the global eigenvectors

  highMem = (highMem > currentSize()) ? highMem : currentSize();

  int ido = 0;

  int lwI = 22;
  int *wI = new (nothrow) int[lwI];
  if (wI == 0) {
    if (vectWeight)
      delete vectWeight;
    return -30;
  memRequested += sizeof(int)*lwI/(1024.0*1024.0);

  int *iparam = wI;
  int *ipntr = wI + 11;

  int lworkl = NCV*(NCV+8);
  int lwD = lworkl + 4*localSize;
  double *wD = new (nothrow) double[lwD];
  if (wD == 0) {
    if (vectWeight)
      delete vectWeight;
    delete[] wI;
    return -30;
  memRequested += sizeof(double)*(4*localSize+lworkl)/(1024.0*1024.0);

  double *pointer = wD;

  double *workl = pointer;
  pointer = pointer + lworkl;

  double *resid = pointer;
  pointer = pointer + localSize;

  double *workd = pointer;

  double *v = Q.Values();

  highMem = (highMem > currentSize()) ? highMem : currentSize();

  if (startingEV > 0) {
    // Define the initial starting vector
    memset(resid, 0, localSize*sizeof(double));
    for (int jj = 0; jj < startingEV; ++jj)
      for (int ii = 0; ii < localSize; ++ii)
         resid[ii] += v[ii + jj*localSize];
    info = 1;

  iparam[1-1] = 1;
  iparam[3-1] = maxIterEigenSolve;
  iparam[7-1] = 3;

  // The fourth parameter forces to use the convergence test provided by ARPACK.
  // This requires a customization of ARPACK (provided by R. Lehoucq).

  iparam[4-1] = 1;

  Epetra_Vector v1(View, Q.Map(), workd);
  Epetra_Vector v2(View, Q.Map(), workd + localSize);
  Epetra_Vector v3(View, Q.Map(), workd + 2*localSize);

  // Define further storage for the new residual check
  // Use a block of vectors to compute the residuals more quickly.
  // Note that workd could be used if memory becomes an issue.
  int loopZ = (NCV > 10) ? 10 : NCV;

  int lwD2 = localSize + 2*NCV-1 + NCV;
  lwD2 += (M) ? 3*loopZ*localSize : 2*loopZ*localSize;
  double *wD2 = new (nothrow) double[lwD2];
  if (wD2 == 0) {
    if (vectWeight)
      delete vectWeight;
    delete[] wI;
    delete[] wD;
    return -30;
  memRequested += sizeof(double)*lwD2/(1024.0*1024.0);

  pointer = wD2;
  // vTmp is used when ido = -1
  double *vTmp = pointer;
  pointer = pointer + localSize;

  // dd and ee stores the tridiagonal matrix.
  // Note that DSTEQR destroys the contents of the input arrays.
  double *dd = pointer;
  pointer = pointer + NCV;

  double *ee = pointer;
  pointer = pointer + NCV-1;

  double *vz = pointer;
  pointer = pointer + loopZ*localSize;
  Epetra_MultiVector approxEV(View, Q.Map(), vz, localSize, loopZ);

  double *kvz = pointer;
  pointer = pointer + loopZ*localSize;
  Epetra_MultiVector KapproxEV(View, Q.Map(), kvz, localSize, loopZ);

  double *mvz = (M) ? pointer : vz;
  pointer = (M) ? pointer + loopZ*localSize : pointer;
  Epetra_MultiVector MapproxEV(View, Q.Map(), mvz, localSize, loopZ);

  double *normR = pointer;

  // zz contains the eigenvectors of the tridiagonal matrix.
  // workt is a workspace for DSTEQR.
  // Note that zz and workt will use parts of workl.
  double *zz, *workt;

  highMem = (highMem > currentSize()) ? highMem : currentSize();

  // Define an array to store the residuals history
  if (localVerbose > 2) {
    resHistory = new (nothrow) double[maxIterEigenSolve*NCV];
    if (resHistory == 0) {
      if (vectWeight)
        delete vectWeight;
      delete[] wI;
      delete[] wD;
      delete[] wD2;
      return -30;
    historyCount = 0;

  highMem = (highMem > currentSize()) ? highMem : currentSize();

  if (localVerbose > 0) {
    cout << endl;
    cout << " *|* Problem: ";
    if (M) 
      cout << "K*Q = M*Q D ";
      cout << "K*Q = Q D ";
    cout << endl;
    cout << " *|* Algorithm = ARPACK (Mode 3, modified such that user checks convergence)" << endl;
    cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
    cout.setf(ios::scientific, ios::floatfield);
    cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
    if (startingEV > 0)
      cout << " *|* User-defined starting vector (Combination of " << startingEV << " vectors)\n";
    cout << " *|* Norm used for convergence: ";
    if (normWeight)
      cout << "weighted L2-norm with user-provided weights" << endl;
      cout << "L^2-norm" << endl;
    if (orthoVec)
      cout << " *|* Size of orthogonal subspace = " << orthoVec->NumVectors() << endl;
    cout << "\n -- Start iterations -- \n";

  Epetra_MpiComm *MPIComm = dynamic_cast<Epetra_MpiComm *>(const_cast<Epetra_Comm*>(&MyComm));

  timeOuterLoop -= MyWatch.WallTime();
  while (ido != 99) {

    highMem = (highMem > currentSize()) ? highMem : currentSize();

    if (MPIComm)
      callFortran.PSAUPD(MPIComm->Comm(), &ido, 'G', localSize, "LM", numEigen, tolEigenSolve,
             resid, NCV, v, localSize, iparam, ipntr, workd, workl, lworkl, &info, 0);
      callFortran.SAUPD(&ido, 'G', localSize, "LM", numEigen, tolEigenSolve, resid, NCV, v, 
             localSize, iparam, ipntr, workd, workl, lworkl, &info, 0);
    callFortran.SAUPD(&ido, 'G', localSize, "LM", numEigen, tolEigenSolve, resid, NCV, v,
             localSize, iparam, ipntr, workd, workl, lworkl, &info, 0);

    if (ido == -1) {
      // Apply the mass matrix      
      v3.ResetView(workd + ipntr[0] - 1);
      timeMassOp -= MyWatch.WallTime();
      if (M)
        M->Apply(v3, v1);
        memcpy(v1.Values(), v3.Values(), localSize*sizeof(double));
      timeMassOp += MyWatch.WallTime();
      massOp += 1;
      if ((orthoVec) && (verbose > 3)) {
        // Check the orthogonality
        double maxDot = myVerify.errorOrthogonality(orthoVec, &v1, 0);
        if (myPid == 0) {
          cout << " Maximum Euclidean dot product against orthogonal space (Before Solve) = ";
          cout << maxDot << endl;
      // Solve the stiffness problem
      v2.ResetView(workd + ipntr[1] - 1);
      timeStifOp -= MyWatch.WallTime();
      K->ApplyInverse(v1, v2);
      timeStifOp += MyWatch.WallTime();
      stifOp += 1;
      // Project the solution vector if needed
      // Note: Use mvz as workspace
      if (orthoVec) {
        Epetra_Vector Mv2(View, v2.Map(), mvz);
        if (M)
          M->Apply(v2, Mv2);
          memcpy(Mv2.Values(), v2.Values(), localSize*sizeof(double));
        modalTool.massOrthonormalize(v2, Mv2, M, *orthoVec, 1, 1);
      if ((orthoVec) && (verbose > 3)) {
        // Check the orthogonality
        double maxDot = myVerify.errorOrthogonality(orthoVec, &v2, M);
        if (myPid == 0) {
          cout << " Maximum M-dot product against orthogonal space (After Solve) = ";
          cout << maxDot << endl;
    } // if (ido == -1)

    if (ido == 1) {
      // Solve the stiffness problem
      v1.ResetView(workd + ipntr[2] - 1);
      v2.ResetView(workd + ipntr[1] - 1);
      if ((orthoVec) && (verbose > 3)) {
        // Check the orthogonality
        double maxDot = myVerify.errorOrthogonality(orthoVec, &v1, 0);
        if (myPid == 0) {
          cout << " Maximum Euclidean dot product against orthogonal space (Before Solve) = ";
          cout << maxDot << endl;
      timeStifOp -= MyWatch.WallTime();
      K->ApplyInverse(v1, v2);
      timeStifOp += MyWatch.WallTime();
      stifOp += 1;
      // Project the solution vector if needed
      // Note: Use mvz as workspace
      if (orthoVec) {
        Epetra_Vector Mv2(View, v2.Map(), mvz);
        if (M)
          M->Apply(v2, Mv2);
          memcpy(Mv2.Values(), v2.Values(), localSize*sizeof(double));
        modalTool.massOrthonormalize(v2, Mv2, M, *orthoVec, 1, 1);
      if ((orthoVec) && (verbose > 3)) {
        // Check the orthogonality
        double maxDot = myVerify.errorOrthogonality(orthoVec, &v2, M);
        if (myPid == 0) {
          cout << " Maximum M-dot product against orthogonal space (After Solve) = ";
          cout << maxDot << endl;
    } // if (ido == 1)

    if (ido == 2) {
      // Apply the mass matrix      
      v1.ResetView(workd + ipntr[0] - 1);
      v2.ResetView(workd + ipntr[1] - 1);
      timeMassOp -= MyWatch.WallTime();
      if (M)
        M->Apply(v1, v2);
        memcpy(v2.Values(), v1.Values(), localSize*sizeof(double));
      timeMassOp += MyWatch.WallTime();
      massOp += 1;
    } // if (ido == 2)

    if (ido == 4) {
      timeResidual -= MyWatch.WallTime();
      // Copy the main diagonal of T
      memcpy(dd, workl + NCV + ipntr[4] - 1, NCV*sizeof(double));
      // Copy the lower diagonal of T
      memcpy(ee, workl + ipntr[4], (NCV-1)*sizeof(double));
      // Compute the eigenpairs of the tridiagonal matrix
      zz = workl + 4*NCV;
      workt = workl + 4*NCV + NCV*NCV;
      callFortran.STEQR('I', NCV, dd, ee, zz, NCV, workt, &info);
      if (info != 0) {
        if (localVerbose > 0) {
          cerr << endl;
          cerr << " Error with DSTEQR, info = " << info << endl;
          cerr << endl;
      // dd contains the eigenvalues in ascending order 
      // Check the residual of the proposed eigenvectors of (K, M)
      int ii, jz;
      iparam[4] = 0;
      for (jz = 0; jz < NCV; jz += loopZ) {
        int colZ = (jz + loopZ < NCV) ? loopZ : NCV - jz;
        callBLAS.GEMM('N', 'N', localSize, colZ, NCV, 1.0, v, localSize,
                      zz + jz*NCV, NCV, 0.0, vz, localSize);
        // Form the residuals
        if (M)
          M->Apply(approxEV, MapproxEV); 
        K->Apply(approxEV, KapproxEV); 
        for (ii = 0; ii < colZ; ++ii) {
          callBLAS.AXPY(localSize, -1.0/dd[ii+jz], MapproxEV.Values() + ii*localSize, 
                        KapproxEV.Values() + ii*localSize);
        // Compute the norms of the residuals
        if (vectWeight) {
          KapproxEV.NormWeighted(*vectWeight, normR + jz);
        else {
          KapproxEV.Norm2(normR + jz);
        // Scale the norms of residuals with the eigenvalues
        for (ii = 0; ii < colZ; ++ii) {
          normR[ii+jz] = normR[ii+jz]*dd[ii+jz];
        // Put the number of converged pairs in iparam[5-1]
        for (ii=0; ii<colZ; ++ii) {
          if (normR[ii+jz] < tolEigenSolve)
            iparam[4] += 1;
      timeResidual += MyWatch.WallTime();
      numResidual += NCV;
      outerIter += 1;
      if (localVerbose > 0) {
        cout << " Iteration " << outerIter;
        cout << " - Number of converged eigenvalues " << iparam[4] << endl;
      if (localVerbose > 2) {
        memcpy(resHistory + historyCount, normR, NCV*sizeof(double));
        historyCount += NCV;
      if (localVerbose > 1) {
        cout.setf(ios::scientific, ios::floatfield);
        for (ii=0; ii < NCV; ++ii) {
          cout << " Iteration " << outerIter;
          cout << " - Scaled Norm of Residual " << ii << " = " << normR[ii] << endl;
        cout << endl;
        for (ii = 0; ii < NCV; ++ii) {
          cout << " Iteration " << outerIter << " - Ritz eigenvalue " << ii;
          cout.setf((fabs(dd[ii]) > 100) ? ios::scientific : ios::fixed, ios::floatfield);
          cout << " = " << 1.0/dd[ii] << endl;
        cout << endl;
    } // if (ido == 4)

  } // while (ido != 99)
  timeOuterLoop += MyWatch.WallTime();
  highMem = (highMem > currentSize()) ? highMem : currentSize();

  if (info < 0) {
    if (myPid == 0) {
      cerr << endl;
      cerr << " Error with DSAUPD, info = " << info << endl;
      cerr << endl;
  else {
    // Get the eigenvalues
    timePostProce -= MyWatch.WallTime();
    int ii, jj;
    double *pointer = workl + 4*NCV + NCV*NCV;
    for (ii=0; ii < localSize; ii += 3) {
      int nRow = (ii + 3 < localSize) ? 3 : localSize - ii;
      for (jj=0; jj<NCV; ++jj)
        memcpy(pointer + jj*nRow, v + ii + jj*localSize, nRow*sizeof(double));
      callBLAS.GEMM('N', 'N', nRow, NCV, NCV, 1.0, pointer, nRow, zz, NCV,
                    0.0, Q.Values() + ii, localSize);
    // Put the converged eigenpairs at the beginning
    knownEV = 0;
    for (ii=0; ii < NCV; ++ii) {
      if (normR[ii] < tolEigenSolve) {
        lambda[knownEV] = 1.0/dd[ii];
        memcpy(Q.Values()+knownEV*localSize, Q.Values()+ii*localSize, localSize*sizeof(double));
        knownEV += 1;
        if (knownEV == Q.NumVectors())
    // Sort the eigenpairs
    if (knownEV > 0) {
      mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), localSize);
    timePostProce += MyWatch.WallTime();
  } // if (info < 0)

  if (info == 0) {
    orthoOp = iparam[11-1];

  delete[] wI;
  delete[] wD;
  delete[] wD2;
  if (vectWeight)
    delete vectWeight;

  return (info == 0) ? knownEV : info;

Exemplo n.º 12
  int  BuildMatrixTests (Epetra_MultiVector & C,
		       const char TransA, const char TransB, 
		       const double alpha, 
		       Epetra_MultiVector& A, 
		       Epetra_MultiVector& B,
		       const double beta,
		       Epetra_MultiVector& C_GEMM ) {

    // For given values of TransA, TransB, alpha and beta, a (possibly
    // zero) filled Epetra_MultiVector C, and allocated
    // Epetra_MultiVectors A, B and C_GEMM this routine will generate values for 
    // Epetra_MultiVectors A, B and C_GEMM such that, if A, B and (this) are 
    // used with GEMM in this class, the results should match the results 
    // generated by this routine.

    // Test for Strided multivectors (required for GEMM ops)

    if (!A.ConstantStride()   ||
	!B.ConstantStride()      ||
	!C_GEMM.ConstantStride() ||
	!C.ConstantStride()) return(-1); // Error 

    int i, j;
    double fi, fj;  // Used for casting loop variables to floats

    // Get a view of the MultiVectors

    double *Ap      = 0;
    double *Bp      = 0;
    double *Cp      = 0;
    double *C_GEMMp = 0;

    int A_nrows = A.MyLength();
    int A_ncols = A.NumVectors();
    int B_nrows = B.MyLength();
    int B_ncols = B.NumVectors();
    int C_nrows = C.MyLength();
    int C_ncols = C.NumVectors();
    int A_Stride         = 0;
    int B_Stride         = 0;
    int C_Stride         = 0;
    int C_GEMM_Stride    = 0;

    A.ExtractView(&Ap, &A_Stride);
    B.ExtractView(&Bp, &B_Stride);
    C.ExtractView(&Cp, &C_Stride);
    C_GEMM.ExtractView(&C_GEMMp, &C_GEMM_Stride);

      // Define some useful constants

    int opA_ncols = (TransA=='N') ? A.NumVectors() : A.MyLength();
    int opB_nrows = (TransB=='N') ? B.MyLength() : B.NumVectors();
    int C_global_inner_dim  = (TransA=='N') ? A.NumVectors() : A.GlobalLength();

    bool A_is_local = (!A.DistributedGlobal());
    bool B_is_local = (!B.DistributedGlobal());
    bool C_is_local = (!C.DistributedGlobal());

    int A_IndexBase = A.Map().IndexBase();
    int B_IndexBase = B.Map().IndexBase();
    // Build two new maps that we can use for defining global equation indices below
    Epetra_Map * A_Map = new Epetra_Map(-1, A_nrows, A_IndexBase, A.Map().Comm());
    Epetra_Map * B_Map = new Epetra_Map(-1, B_nrows, B_IndexBase, B.Map().Comm());

    int* A_MyGlobalElements = new int[A_nrows];
    int* B_MyGlobalElements = new int[B_nrows];

  // Check for compatible dimensions

    if (C.MyLength()        != C_nrows     ||
	opA_ncols      != opB_nrows   ||
	C.NumVectors()    != C_ncols     ||
	C.MyLength()        != C_GEMM.MyLength()        ||
	C.NumVectors()    != C_GEMM.NumVectors()      ) {
      delete A_Map;
      delete B_Map;
      delete [] A_MyGlobalElements;
      delete [] B_MyGlobalElements;
      return(-2); // Return error

    bool Case1 = ( A_is_local &&  B_is_local &&  C_is_local);  // Case 1 above
    bool Case2 = (!A_is_local && !B_is_local &&  C_is_local && TransA=='T' );// Case 2
    bool Case3 = (!A_is_local &&  B_is_local && !C_is_local && TransA=='N');// Case 3
    // Test for meaningful cases

    if (!(Case1 || Case2 || Case3)) {
      delete A_Map;
      delete B_Map;
      delete [] A_MyGlobalElements;
      delete [] B_MyGlobalElements;
      return(-3); // Meaningless case

    /* Fill A, B and C with values as follows:

       If A_is_local is false:
       A(i,j) = A_MyGlobalElements[i]*j, i=1,...,numLocalEquations, j=1,...,NumVectors
       A(i,j) = i*j,     i=1,...,numLocalEquations, j=1,...,NumVectors
       If B_is_local is false:
       B(i,j) = 1/(A_MyGlobalElements[i]*j), i=1,...,numLocalEquations, j=1,...,NumVectors
       B(i,j) = 1/(i*j), i=1,...,numLocalEquations, j=1,...,NumVectors
       In addition, scale each entry by GlobalLength for A and
       1/GlobalLength for B--keeps the magnitude of entries in check
       C_GEMM will depend on A_is_local and B_is_local.  Three cases:
       1) A_is_local true and B_is_local true:
       C_GEMM will be local replicated and equal to A*B = i*NumVectors/j
       2) A_is_local false and B_is_local false
       C_GEMM will be local replicated = A(trans)*B(i,j) = i*numGlobalEquations/j
       3) A_is_local false B_is_local true
       C_GEMM will distributed global and equals A*B = A_MyGlobalElements[i]*NumVectors/j

    // Define a scalar to keep magnitude of entries reasonable

    double sf = C_global_inner_dim;
    double sfinv = 1.0/sf;

    // Define A depending on A_is_local

    if (A_is_local)
	for (j = 0; j <A_ncols ; j++) 
	  for (i = 0; i<A_nrows; i++)
	      fi = i+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Ap[i + A_Stride*j] = (fi*sfinv)*fj;
	for (j = 0; j <A_ncols ; j++) 
	  for (i = 0; i<A_nrows; i++)
	      fi = A_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Ap[i + A_Stride*j] = (fi*sfinv)*fj;
    // Define B depending on TransB and B_is_local
    if (B_is_local)
	for (j = 0; j <B_ncols ; j++) 
	  for (i = 0; i<B_nrows; i++)
	      fi = i+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Bp[i + B_Stride*j] = 1.0/((fi*sfinv)*fj);
	for (j = 0; j <B_ncols ; j++) 
	  for (i = 0; i<B_nrows; i++)
	      fi = B_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Bp[i + B_Stride*j] = 1.0/((fi*sfinv)*fj);
    // Define C_GEMM depending on A_is_local and B_is_local.  C_GEMM is also a
    // function of alpha, beta, TransA, TransB: 
    //       C_GEMM = alpha*A(TransA)*B(TransB) + beta*C_GEMM
    if (Case1)
	for (j = 0; j <C_ncols ; j++) 
	  for (i = 0; i<C_nrows; i++)
	      // Get float version of i and j, offset by 1.
	      fi = (i+1)*C_global_inner_dim;
	      fj = j+1;
	      C_GEMMp[i + C_GEMM_Stride*j] = alpha * (fi/fj)
		+ beta * Cp[i + C_Stride*j];
    else if (Case2)
	for (j = 0; j <C_ncols ; j++)
	  for (i = 0; i<C_nrows; i++)
	      // Get float version of i and j, offset by 1.
	      fi = (i+1)*C_global_inner_dim;
	      fj = j+1;
	      C_GEMMp[i + C_GEMM_Stride*j] = alpha * (fi/fj)
		+ beta * Cp[i + C_Stride*j];
	for (j = 0; j <C_ncols ; j++) 
	  for (i = 0; i<C_nrows; i++)
	      // Get float version of i and j.
	      fi = (A_MyGlobalElements[i]+1)*C_global_inner_dim;
	      fj = j+1;
	      C_GEMMp[i + C_GEMM_Stride*j] = alpha * (fi/fj)
		+ beta * Cp[i + C_Stride*j];
    delete A_Map;
    delete B_Map;
    delete [] A_MyGlobalElements;
    delete [] B_MyGlobalElements;

Exemplo n.º 13
int  BuildMultiVectorTests (Epetra_MultiVector & C, const double alpha, 
				Epetra_MultiVector& A, 
				Epetra_MultiVector& sqrtA,
				Epetra_MultiVector& B,
				Epetra_MultiVector& C_alphaA,
				Epetra_MultiVector& C_alphaAplusB,
				Epetra_MultiVector& C_plusB,
				double* const dotvec_AB,
				double* const norm1_A,
				double* const norm2_sqrtA,
				double* const norminf_A,
				double* const normw_A,
				Epetra_MultiVector& Weights,
				double* const minval_A,
				double* const maxval_A,
				double* const meanval_A ) {

  // For given values alpha and a (possibly zero) filled 
  // Epetra_MultiVector (the this object), allocated double * arguments dotvec_AB, 
  // norm1_A, and norm2_A, and allocated Epetra_MultiVectors A, sqrtA,
  // B, C_alpha, C_alphaAplusB and C_plusB, this method will generate values for 
  // Epetra_MultiVectors A, B and all of the additional arguments on
  // the list above such that, if A, B and (this) are used with the methods in 
  // this class, the results should match the results generated by this routine.
  // Specifically, the results in dotvec_AB should match those from a call to 
  // A.dotProd (B,dotvec).  Similarly for other routines.
  int i,j;
  double fi, fj;  // Used for casting loop variables to floats
  // Define some useful constants
  int A_nrows = A.MyLength();
  int A_ncols = A.NumVectors();
  int sqrtA_nrows = sqrtA.MyLength();
  int sqrtA_ncols = sqrtA.NumVectors();
  int B_nrows = B.MyLength();
  int B_ncols = B.NumVectors();
  double **Ap = 0;
  double **sqrtAp = 0;
  double **Bp = 0;
  double **Cp = 0;
  double **C_alphaAp = 0;
  double **C_alphaAplusBp = 0;
  double **C_plusBp = 0;
  double **Weightsp = 0;

  bool A_is_local = (A.MyLength() == A.GlobalLength());
  bool B_is_local = (B.MyLength() == B.GlobalLength());
  bool C_is_local = (C.MyLength()    == C.GlobalLength());

  int A_IndexBase = A.Map().IndexBase();
  int B_IndexBase = B.Map().IndexBase();
    // Build two new maps that we can use for defining global equation indices below
    Epetra_Map * A_Map = new Epetra_Map(-1, A_nrows, A_IndexBase, A.Map().Comm());
    Epetra_Map * B_Map = new Epetra_Map(-1, B_nrows, B_IndexBase, B.Map().Comm());

    int* A_MyGlobalElements = new int[A_nrows];
    int* B_MyGlobalElements = new int[B_nrows];

  // Check for compatible dimensions
  if (C.MyLength()        != A_nrows     ||
      A_nrows        != B_nrows     ||
      C.NumVectors()    != A_ncols     ||
      A_ncols        != B_ncols     ||
      sqrtA_nrows    != A_nrows     ||
      sqrtA_ncols    != A_ncols     ||
      C.MyLength()        != C_alphaA.MyLength()     ||
      C.NumVectors()    != C_alphaA.NumVectors() ||
      C.MyLength()        != C_alphaAplusB.MyLength()     ||
      C.NumVectors()    != C_alphaAplusB.NumVectors() ||
      C.MyLength()        != C_plusB.MyLength()      ||
      C.NumVectors()    != C_plusB.NumVectors()     ) return(-2); // Return error
  bool Case1 = ( A_is_local &&  B_is_local &&  C_is_local);  // Case 1
  bool Case2 = (!A_is_local && !B_is_local && !C_is_local);// Case 2
  // Test for meaningful cases
  if (!(Case1 || Case2)) return(-3); // Meaningless case
  /* Fill A and B with values as follows:
     If A_is_local is false:
     A(i,j) = A_MyGlobalElements[i]*j,     i=1,...,numLocalEquations, j=1,...,NumVectors
     A(i,j) = i*j,     i=1,...,numLocalEquations, j=1,...,NumVectors

     If B_is_local is false:
     B(i,j) = 1/(A_MyGlobalElements[i]*j), i=1,...,numLocalEquations, j=1,...,NumVectors

     B(i,j) = 1/(i*j), i=1,...,numLocalEquations, j=1,...,NumVectors

     In addition, scale each entry by GlobalLength for A and
     1/GlobalLength for B--keeps the magnitude of entries in check

  //Define scale factor

  double sf = A.GlobalLength();
  double sfinv = 1.0/sf;

  // Define A

  if (A_is_local)
      for (j = 0; j <A_ncols ; j++) 
	  for (i = 0; i<A_nrows; i++)
	      fi = i+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Ap[j][i] = (fi*sfinv)*fj;
	      sqrtAp[j][i] = std::sqrt(Ap[j][i]);
      for (j = 0; j <A_ncols ; j++) 
	  for (i = 0; i<A_nrows; i++)
	      fi = A_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Ap[j][i] = (fi*sfinv)*fj;
	      sqrtAp[j][i] = std::sqrt(Ap[j][i]);

  // Define B depending on TransB and B_is_local
  if (B_is_local)
      for (j = 0; j <B_ncols ; j++) 
	  for (i = 0; i<B_nrows; i++)
	      fi = i+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Bp[j][i] = 1.0/((fi*sfinv)*fj);
      for (j = 0; j <B_ncols ; j++) 
	  for (i = 0; i<B_nrows; i++)
	      fi = B_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Bp[j][i] = 1.0/((fi*sfinv)*fj);
  // Generate C_alphaA = alpha * A

  for (j = 0; j <A_ncols ; j++) 
      for (i = 0; i<A_nrows; i++)
	  C_alphaAp[j][i] = alpha * Ap[j][i];

  // Generate C_alphaA = alpha * A + B

  for (j = 0; j <A_ncols ; j++) 
    for (i = 0; i<A_nrows; i++)
      C_alphaAplusBp[j][i] = alpha * Ap[j][i] + Bp[j][i];
  // Generate C_plusB = this + B

  for (j = 0; j <A_ncols ; j++) 
    for (i = 0; i<A_nrows; i++)
      C_plusBp[j][i] = Cp[j][i] + Bp[j][i];

  // Generate dotvec_AB.  Because B(i,j) = 1/A(i,j), dotvec[i] =  C.GlobalLength()

  for (i=0; i< A.NumVectors(); i++) dotvec_AB[i] = C.GlobalLength();

  // For the next two results we want to be careful how we do arithmetic 
  // to avoid very large numbers.
  // We are computing sfinv*(C.GlobalLength()*(C.GlobalLength()+1)/2)

      double result = C.GlobalLength();
      result *= sfinv;
      result /= 2.0;
      result *= (double)(C.GlobalLength()+1);

   // Generate norm1_A.  Can use formula for sum of first n integers.

  for (i=0; i< A.NumVectors(); i++) 
    // m1_A[i] = (i+1)*C.GlobalLength()*(C.GlobalLength()+1)/2;
    norm1_A[i] = result * ((double) (i+1));

  // Generate norm2_sqrtA.  Can use formula for sum of first n integers. 

  for (i=0; i< A.NumVectors(); i++) 
    // norm2_sqrtA[i] = std::sqrt((double) ((i+1)*C.GlobalLength()*(C.GlobalLength()+1)/2));
    norm2_sqrtA[i] = std::sqrt(result * ((double) (i+1)));

  // Generate norminf_A, minval_A, maxval_A, meanval_A. 

  for (i=0; i< A.NumVectors(); i++) 
    norminf_A[i] = (double) (i+1);
    minval_A[i] =  (double) (i+1)/ (double) A.GlobalLength();
    maxval_A[i] = (double) (i+1);
    meanval_A[i] = norm1_A[i]/((double) (A.GlobalLength()));

  // Define weights and expected weighted norm
  for (i=0; i< A.NumVectors(); i++) 
      double ip1 = (double) i+1;
      normw_A[i] = ip1;
      for (j=0; j<A_nrows; j++) Weightsp[i][j] = Ap[i][j]/ip1;

  delete A_Map;
  delete B_Map;
  delete [] A_MyGlobalElements;
  delete [] B_MyGlobalElements;