C++ (Cpp) KP Exemples

Exemple #1

0

Afficher le fichier

Fichier : q_nonrigid_registration.cpp Projet : Vaa3D/v3d_external

bool q_ini_bsplinebasis_3D(const long n,Matrix &BxBxB)
{
	if(n<=0)
	{
		printf("ERROR: n should > 0!\n");
		return false;
	}

	Matrix B(4,4);
	B.row(1) << -1 << 3 <<-3 << 1;
	B.row(2) <<  3 <<-6 << 3 << 0;
	B.row(3) << -3 << 0 << 3 << 0;
	B.row(4) <<  1 << 4 << 1 << 0;
	B/=6.0;

	Matrix T(n,4);
	double t_step=1.0/n;
	for(long i=0;i<n;i++)
	{
		double t=t_step*i;
		for(long j=0;j<=3;j++)
			T(i+1,j+1)=pow(t,3-j);
	}

	Matrix TB=T*B;
	Matrix BxB=KP(TB,TB);
	BxBxB=KP(BxB,TB);

	return true;
}

Exemple #2

0

Afficher le fichier

Fichier : FB_FT_PIDWL.cpp Projet : EstebanQuerol/Black_FORTE

void FORTE_FB_FT_PIDWL::setInitialValues(){
  KP() = 1.0;
  TN() = 1.0;
  TV() = 1.0;
  LIM_L() = -1.0E38;
  LIM_H() = 1.0E38;
}

Exemple #3

0

Afficher le fichier

Fichier : BlockDACG.cpp Projet : Tech-XCorp/Trilinos

int BlockDACG::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {

  // Computes the smallest eigenvalues and the corresponding eigenvectors
  // of the generalized eigenvalue problem
  // 
  //      K X = M X Lambda
  // 
  // using a Block Deflation Accelerated Conjugate Gradient algorithm.
  //
  // Note that if M is not specified, then  K X = X Lambda is solved.
  //
  // Ref: P. Arbenz & R. Lehoucq, "A comparison of algorithms for modal analysis in the
  // absence of a sparse direct method", SNL, Technical Report SAND2003-1028J
  // With the notations of this report, the coefficient beta is defined as 
  //                 diag( H^T_{k} G_{k} ) / diag( H^T_{k-1} G_{k-1} )
  // 
  // Input variables:
  // 
  // numEigen  (integer) = Number of eigenmodes requested
  // 
  // Q (Epetra_MultiVector) = Converged eigenvectors
  //                   The number of columns of Q must be equal to numEigen + blockSize.
  //                   The rows of Q are distributed across processors.
  //                   At exit, the first numEigen columns contain the eigenvectors requested.
  // 
  // lambda (array of doubles) = Converged eigenvalues
  //                   At input, it must be of size numEigen + blockSize.
  //                   At exit, the first numEigen locations contain the eigenvalues requested.
  //
  // startingEV (integer) = Number of existing converged eigenmodes
  //
  // Return information on status of computation
  // 
  // info >=   0 >> Number of converged eigenpairs at the end of computation
  // 
  // // Failure due to input arguments
  // 
  // info = -  1 >> The stiffness matrix K has not been specified.
  // info = -  2 >> The maps for the matrix K and the matrix M differ.
  // info = -  3 >> The maps for the matrix K and the preconditioner P differ.
  // info = -  4 >> The maps for the vectors and the matrix K differ.
  // info = -  5 >> Q is too small for the number of eigenvalues requested.
  // info = -  6 >> Q is too small for the computation parameters.
  //
  // info = - 10 >> Failure during the mass orthonormalization
  // 
  // info = - 20 >> Error in LAPACK during the local eigensolve
  //
  // info = - 30 >> MEMORY
  //

  // Check the input parameters
  
  if (numEigen <= startingEV) {
    return startingEV;
  }

  int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, numEigen + blockSize);
  if (info < 0)
    return info;

  int myPid = MyComm.MyPID();

  // Get the weight for approximating the M-inverse norm
  Epetra_Vector *vectWeight = 0;
  if (normWeight) {
    vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
  }

  int knownEV = startingEV;
  int localVerbose = verbose*(myPid==0);

  // Define local block vectors
  //
  // MX = Working vectors (storing M*X if M is specified, else pointing to X)
  // KX = Working vectors (storing K*X)
  //
  // R = Residuals
  //
  // H = Preconditioned residuals
  //
  // P = Search directions
  // MP = Working vectors (storing M*P if M is specified, else pointing to P)
  // KP = Working vectors (storing K*P)

  int xr = Q.MyLength();
  Epetra_MultiVector X(View, Q, numEigen, blockSize);
  X.Random();

  int tmp;
  tmp = (M == 0) ? 5*blockSize*xr : 7*blockSize*xr;

  double *work1 = new (nothrow) double[tmp]; 
  if (work1 == 0) {
    if (vectWeight)
      delete vectWeight;
    info = -30;
    return info;
  }
  memRequested += sizeof(double)*tmp/(1024.0*1024.0);

  highMem = (highMem > currentSize()) ? highMem : currentSize();

  double *tmpD = work1;

  Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize);
  tmpD = (M) ? tmpD + xr*blockSize : tmpD;

  Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector H(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector P(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, blockSize);

  // Define arrays
  //
  // theta = Store the local eigenvalues (size: 2*blockSize)
  // normR = Store the norm of residuals (size: blockSize)
  //
  // oldHtR = Store the previous H_i^T*R_i    (size: blockSize)
  // currentHtR = Store the current H_i^T*R_i (size: blockSize)
  //
  // MM = Local mass matrix              (size: 2*blockSize x 2*blockSize)
  // KK = Local stiffness matrix         (size: 2*blockSize x 2*blockSize)
  //
  // S = Local eigenvectors              (size: 2*blockSize x 2*blockSize)

  int lwork2;
  lwork2 = 5*blockSize + 12*blockSize*blockSize;
  double *work2 = new (nothrow) double[lwork2];
  if (work2 == 0) {
    if (vectWeight)
      delete vectWeight;
    delete[] work1;
    info = -30;
    return info;
  }

  highMem = (highMem > currentSize()) ? highMem : currentSize();

  tmpD = work2;

  double *theta = tmpD;
  tmpD = tmpD + 2*blockSize;

  double *normR = tmpD;
  tmpD = tmpD + blockSize;

  double *oldHtR = tmpD;
  tmpD = tmpD + blockSize;
  
  double *currentHtR = tmpD;
  tmpD = tmpD + blockSize;
  memset(currentHtR, 0, blockSize*sizeof(double));
  
  double *MM = tmpD;
  tmpD = tmpD + 4*blockSize*blockSize;

  double *KK = tmpD;
  tmpD = tmpD + 4*blockSize*blockSize;

  double *S = tmpD;

  memRequested += sizeof(double)*lwork2/(1024.0*1024.0);

  // Define an array to store the residuals history
  if (localVerbose > 2) {
    resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
    if (resHistory == 0) {
      if (vectWeight)
        delete vectWeight;
      delete[] work1;
      delete[] work2;
      info = -30;
      return info;
    }
    historyCount = 0;
  }

  // Miscellaneous definitions

  bool reStart = false;
  numRestart = 0;

  int localSize;
  int twoBlocks = 2*blockSize;
  int nFound = blockSize;
  int i, j;

  if (localVerbose > 0) {
    cout << endl;
    cout << " *|* Problem: ";
    if (M) 
      cout << "K*Q = M*Q D ";
    else
      cout << "K*Q = Q D ";
    if (Prec)
      cout << " with preconditioner";
    cout << endl;
    cout << " *|* Algorithm = DACG (block version)" << endl;
    cout << " *|* Size of blocks = " << blockSize << endl;
    cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
    cout.precision(2);
    cout.setf(ios::scientific, ios::floatfield);
    cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
    cout << " *|* Norm used for convergence: ";
    if (normWeight)
      cout << "weighted L2-norm with user-provided weights" << endl;
    else
      cout << "L^2-norm" << endl;
    if (startingEV > 0)
      cout << " *|* Input converged eigenvectors = " << startingEV << endl;
    cout << "\n -- Start iterations -- \n";
  }

  timeOuterLoop -= MyWatch.WallTime();
  for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {

    highMem = (highMem > currentSize()) ? highMem : currentSize();

    if ((outerIter == 1) || (reStart == true)) {

      reStart = false;
      localSize = blockSize;

      if (nFound > 0) {

        Epetra_MultiVector X2(View, X, blockSize-nFound, nFound);
        Epetra_MultiVector MX2(View, MX, blockSize-nFound, nFound);
        Epetra_MultiVector KX2(View, KX, blockSize-nFound, nFound);

        // Apply the mass matrix to X
        timeMassOp -= MyWatch.WallTime();
        if (M)
          M->Apply(X2, MX2);
        timeMassOp += MyWatch.WallTime();
        massOp += nFound;

        if (knownEV > 0) {
          // Orthonormalize X against the known eigenvectors with Gram-Schmidt
          // Note: Use R as a temporary work space
          Epetra_MultiVector copyQ(View, Q, 0, knownEV);
          timeOrtho -= MyWatch.WallTime();
          info = modalTool.massOrthonormalize(X, MX, M, copyQ, nFound, 0, R.Values());
          timeOrtho += MyWatch.WallTime();
          // Exit the code if the orthogonalization did not succeed
          if (info < 0) {
            info = -10;
            delete[] work1;
            delete[] work2;
            if (vectWeight)
              delete vectWeight;
            return info;
          }
        }

        // Apply the stiffness matrix to X
        timeStifOp -= MyWatch.WallTime();
        K->Apply(X2, KX2);
        timeStifOp += MyWatch.WallTime();
        stifOp += nFound;

      } // if (nFound > 0)

    } // if ((outerIter == 1) || (reStart == true))
    else {

      // Apply the preconditioner on the residuals
      if (Prec != 0) {
        timePrecOp -= MyWatch.WallTime();
        Prec->ApplyInverse(R, H);
        timePrecOp += MyWatch.WallTime();
        precOp += blockSize;
      }
      else {
        memcpy(H.Values(), R.Values(), xr*blockSize*sizeof(double));
      }

      // Compute the product H^T*R
      timeSearchP -= MyWatch.WallTime();      
      memcpy(oldHtR, currentHtR, blockSize*sizeof(double));
      H.Dot(R, currentHtR);
      // Define the new search directions
      if (localSize == blockSize) {
        P.Scale(-1.0, H);
        localSize = twoBlocks;
      } // if (localSize == blockSize)
      else {
        bool hasZeroDot = false;
        for (j = 0; j < blockSize; ++j) {
          if (oldHtR[j] == 0.0) {
            hasZeroDot = true;
            break; 
          }
          callBLAS.SCAL(xr, currentHtR[j]/oldHtR[j], P.Values() + j*xr);
        }
        if (hasZeroDot == true) {
          // Restart the computation when there is a null dot product
          if (localVerbose > 0) {
            cout << endl;
            cout << " !! Null dot product -- Restart the search space !!\n";
            cout << endl;
          }
          if (blockSize == 1) {
            X.Random();
            nFound = blockSize;
          }
          else {
            Epetra_MultiVector Xinit(View, X, j, blockSize-j);
            Xinit.Random();
            nFound = blockSize - j;
          } // if (blockSize == 1)
          reStart = true;
          numRestart += 1;
          info = 0;
          continue;
        }
        callBLAS.AXPY(xr*blockSize, -1.0, H.Values(), P.Values());
      } // if (localSize == blockSize)
      timeSearchP += MyWatch.WallTime();

      // Apply the mass matrix on P
      timeMassOp -= MyWatch.WallTime();
      if (M)
        M->Apply(P, MP);
      timeMassOp += MyWatch.WallTime();
      massOp += blockSize;

      if (knownEV > 0) {
        // Orthogonalize P against the known eigenvectors
        // Note: Use R as a temporary work space
        Epetra_MultiVector copyQ(View, Q, 0, knownEV);
        timeOrtho -= MyWatch.WallTime();
        modalTool.massOrthonormalize(P, MP, M, copyQ, blockSize, 1, R.Values());
        timeOrtho += MyWatch.WallTime();
      }

      // Apply the stiffness matrix to P
      timeStifOp -= MyWatch.WallTime();
      K->Apply(P, KP);
      timeStifOp += MyWatch.WallTime();
      stifOp += blockSize;

    } // if ((outerIter == 1) || (reStart == true))

    // Form "local" mass and stiffness matrices
    // Note: Use S as a temporary workspace
    timeLocalProj -= MyWatch.WallTime();
    modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KX.Values(), xr,
                    KK, localSize, S);
    modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MX.Values(), xr,
                    MM, localSize, S);
    if (localSize > blockSize) {
      modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KP.Values(), xr,
                      KK + blockSize*localSize, localSize, S);
      modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, KP.Values(), xr,
                      KK + blockSize*localSize + blockSize, localSize, S);
      modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MP.Values(), xr,
                      MM + blockSize*localSize, localSize, S);
      modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, MP.Values(), xr,
                      MM + blockSize*localSize + blockSize, localSize, S);
    } // if (localSize > blockSize)
    timeLocalProj += MyWatch.WallTime();

    // Perform a spectral decomposition
    timeLocalSolve -= MyWatch.WallTime();
    int nevLocal = localSize;
    info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
                                  S, localSize, theta, localVerbose,
                                  (blockSize == 1) ? 1: 0);
    timeLocalSolve += MyWatch.WallTime();

    if (info < 0) {
      // Stop when spectral decomposition has a critical failure
      break;
    }

    // Check for restarting
    if ((theta[0] < 0.0) || (nevLocal < blockSize)) {
      if (localVerbose > 0) {
        cout << " Iteration " << outerIter;
        cout << "- Failure for spectral decomposition - RESTART with new random search\n";
      }
      if (blockSize == 1) {
        X.Random();
        nFound = blockSize;
      }
      else {
        Epetra_MultiVector Xinit(View, X, 1, blockSize-1);
        Xinit.Random();
        nFound = blockSize - 1;
      } // if (blockSize == 1)
      reStart = true;
      numRestart += 1;
      info = 0;
      continue;
    } // if ((theta[0] < 0.0) || (nevLocal < blockSize))

    if ((localSize == twoBlocks) && (nevLocal == blockSize)) {
      for (j = 0; j < nevLocal; ++j)
        memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*sizeof(double));
      localSize = blockSize;
    }

    // Check the direction of eigenvectors
    // Note: This sign check is important for convergence
    for (j = 0; j < nevLocal; ++j) {
      double coeff = S[j + j*localSize];
      if (coeff < 0.0)
        callBLAS.SCAL(localSize, -1.0, S + j*localSize);
    }

    // Compute the residuals
    timeResidual -= MyWatch.WallTime();
    callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KX.Values(), xr,
                  S, localSize, 0.0, R.Values(), xr);
    if (localSize == twoBlocks) {
      callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
                    S + blockSize, localSize, 1.0, R.Values(), xr);
    }
    for (j = 0; j < blockSize; ++j)
      callBLAS.SCAL(localSize, theta[j], S + j*localSize);
    callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MX.Values(), xr,
                  S, localSize, 1.0, R.Values(), xr);
    if (localSize == twoBlocks) {
      callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MP.Values(), xr,
                  S + blockSize, localSize, 1.0, R.Values(), xr);
    }
    for (j = 0; j < blockSize; ++j)
      callBLAS.SCAL(localSize, 1.0/theta[j], S + j*localSize);
    timeResidual += MyWatch.WallTime();
    
    // Compute the norms of the residuals
    timeNorm -= MyWatch.WallTime();
    if (vectWeight)
      R.NormWeighted(*vectWeight, normR);
    else
      R.Norm2(normR);
    // Scale the norms of residuals with the eigenvalues
    // Count the converged eigenvectors
    nFound = 0;
    for (j = 0; j < blockSize; ++j) {
      normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
      if (normR[j] < tolEigenSolve) 
        nFound += 1;
    }
    timeNorm += MyWatch.WallTime();

    // Store the residual history
    if (localVerbose > 2) {
      memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
      historyCount += 1;
    }

    // Print information on current iteration
    if (localVerbose > 0) {
      cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
      cout << knownEV + nFound << endl;
    }

    if (localVerbose > 1) {
      cout << endl;
      cout.precision(2);
      cout.setf(ios::scientific, ios::floatfield);
      for (i=0; i<blockSize; ++i) {
        cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
        cout << " = " << normR[i] << endl;
      }
      cout << endl;
      cout.precision(2);
      for (i=0; i<blockSize; ++i) {
        cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
        cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
        cout << " = " << theta[i] << endl;
      }
      cout << endl;
    }

    if (nFound == 0) {
      // Update the spaces
      // Note: Use H as a temporary work space
      timeLocalUpdate -= MyWatch.WallTime();
      memcpy(H.Values(), X.Values(), xr*blockSize*sizeof(double));
      callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
                    0.0, X.Values(), xr);
      memcpy(H.Values(), KX.Values(), xr*blockSize*sizeof(double));
      callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
                    0.0, KX.Values(), xr);
      if (M) {
        memcpy(H.Values(), MX.Values(), xr*blockSize*sizeof(double));
        callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
                      0.0, MX.Values(), xr);
      }
      if (localSize == twoBlocks) {
        callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, P.Values(), xr,
                      S + blockSize, localSize, 1.0, X.Values(), xr);
        callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
                      S + blockSize, localSize, 1.0, KX.Values(), xr);
        if (M) {
          callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MP.Values(), xr,
                      S + blockSize, localSize, 1.0, MX.Values(), xr);
        }
      } // if (localSize == twoBlocks)
      timeLocalUpdate += MyWatch.WallTime();
      // When required, monitor some orthogonalities
      if (verbose > 2) {
        if (knownEV == 0) {
          accuracyCheck(&X, &MX, &R, 0, (localSize>blockSize) ? &P : 0);
        }
        else {
          Epetra_MultiVector copyQ(View, Q, 0, knownEV);
          accuracyCheck(&X, &MX, &R, &copyQ, (localSize>blockSize) ? &P : 0);
        }
      } // if (verbose > 2)
      continue;
    } // if (nFound == 0)

    // Order the Ritz eigenvectors by putting the converged vectors at the beginning
    int firstIndex = blockSize;
    for (j = 0; j < blockSize; ++j) {
      if (normR[j] >= tolEigenSolve) {
        firstIndex = j;
        break;
      }
    } // for (j = 0; j < blockSize; ++j)
    while (firstIndex < nFound) {
      for (j = firstIndex; j < blockSize; ++j) {
        if (normR[j] < tolEigenSolve) {
          // Swap the j-th and firstIndex-th position
          callFortran.SWAP(localSize, S + j*localSize, 1, S + firstIndex*localSize, 1);
          callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
          callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
          break;
        }
      } // for (j = firstIndex; j < blockSize; ++j)
      for (j = 0; j < blockSize; ++j) {
        if (normR[j] >= tolEigenSolve) {
          firstIndex = j;
          break;
        }
      } // for (j = 0; j < blockSize; ++j)
    } // while (firstIndex < nFound)

    // Copy the converged eigenvalues
    memcpy(lambda + knownEV, theta, nFound*sizeof(double));

    // Convergence test
    if (knownEV + nFound >= numEigen) {
      callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
                    S, localSize, 0.0, R.Values(), xr);
      if (localSize > blockSize) {
        callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
                      S + blockSize, localSize, 1.0, R.Values(), xr);
      }
      memcpy(Q.Values() + knownEV*xr, R.Values(), nFound*xr*sizeof(double));
      knownEV += nFound;
      if (localVerbose == 1) {
        cout << endl;
        cout.precision(2);
        cout.setf(ios::scientific, ios::floatfield);
        for (i=0; i<blockSize; ++i) {
          cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
          cout << " = " << normR[i] << endl;
        }
        cout << endl;
      }
      break;
    }

    // Store the converged eigenvalues and eigenvectors
    callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
                  S, localSize, 0.0, Q.Values() + knownEV*xr, xr);
    if (localSize == twoBlocks) {
      callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
                    S + blockSize, localSize, 1.0, Q.Values() + knownEV*xr, xr);
    }
    knownEV += nFound;

    // Define the restarting vectors
    timeRestart -= MyWatch.WallTime();
    int leftOver = (nevLocal < blockSize + nFound) ? nevLocal - nFound : blockSize;
    double *Snew = S + nFound*localSize;
    memcpy(H.Values(), X.Values(), blockSize*xr*sizeof(double));
    callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
                  Snew, localSize, 0.0, X.Values(), xr);
    memcpy(H.Values(), KX.Values(), blockSize*xr*sizeof(double));
    callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
                  Snew, localSize, 0.0, KX.Values(), xr);
    if (M) {
      memcpy(H.Values(), MX.Values(), blockSize*xr*sizeof(double));
      callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
                    Snew, localSize, 0.0, MX.Values(), xr);
    }
    if (localSize == twoBlocks) {
      callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, P.Values(), xr,
                    Snew+blockSize, localSize, 1.0, X.Values(), xr);
      callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KP.Values(), xr,
                    Snew+blockSize, localSize, 1.0, KX.Values(), xr);
      if (M) {
        callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, MP.Values(), xr,
                      Snew+blockSize, localSize, 1.0, MX.Values(), xr);
      }
    } // if (localSize == twoBlocks)
    if (nevLocal < blockSize + nFound) {
      // Put new random vectors at the end of the block
      Epetra_MultiVector Xtmp(View, X, leftOver, blockSize - leftOver);
      Xtmp.Random();
    }
    else {
      nFound = 0;
    } // if (nevLocal < blockSize + nFound)
    reStart = true;
    timeRestart += MyWatch.WallTime();

  } // for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter)
  timeOuterLoop += MyWatch.WallTime();
  highMem = (highMem > currentSize()) ? highMem : currentSize();

  // Clean memory
  delete[] work1;
  delete[] work2;
  if (vectWeight)
    delete vectWeight;

  // Sort the eigenpairs
  timePostProce -= MyWatch.WallTime();
  if ((info == 0) && (knownEV > 0)) {
    mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
  }
  timePostProce += MyWatch.WallTime();

  return (info == 0) ? knownEV : info;

}

Exemple #4

0

Afficher le fichier

Fichier : BlockPCGSolver.cpp Projet : ChiahungTai/Trilinos

int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y, int blkSize) const {

  int xrow = X.MyLength();
  int xcol = X.NumVectors();
  int ycol = Y.NumVectors();

  int info = 0;
  int localVerbose = verbose*(MyComm.MyPID() == 0);
  double *valX = X.Values();
  int NB = 3 + callLAPACK.ILAENV(1, "hetrd", "u", blkSize);
  int lworkD = (blkSize > NB) ? blkSize*blkSize : NB*blkSize;
  int wSize = 4*blkSize*xrow + 3*blkSize + 2*blkSize*blkSize + lworkD;

  bool useY = true;
  if (ycol % blkSize != 0) {
    // Allocate an extra block to store the solutions
    wSize += blkSize*xrow;
    useY = false;
  }

  if (lWorkSpace < wSize) {
    delete[] workSpace;
    workSpace = new (std::nothrow) double[wSize];
    if (workSpace == 0) {
      info = -1;
      return info;
    }
    lWorkSpace = wSize;
  } // if (lWorkSpace < wSize)

  double *pointer = workSpace;

  // Array to store the matrix PtKP
  double *PtKP = pointer;
  pointer = pointer + blkSize*blkSize;

  // Array to store coefficient matrices
  double *coeff = pointer;
  pointer = pointer + blkSize*blkSize;

  // Workspace array
  double *workD = pointer;
  pointer = pointer + lworkD;

  // Array to store the eigenvalues of P^t K P
  double *da = pointer;
  pointer = pointer + blkSize;

  // Array to store the norms of right hand sides
  double *initNorm = pointer;
  pointer = pointer + blkSize;

  // Array to store the norms of residuals
  double *resNorm = pointer;
  pointer = pointer + blkSize;

  // Array to store the residuals
  double *valR = pointer;
  pointer = pointer + xrow*blkSize;
  Epetra_MultiVector R(View, X.Map(), valR, xrow, blkSize);

  // Array to store the preconditioned residuals
  double *valZ = pointer;
  pointer = pointer + xrow*blkSize;
  Epetra_MultiVector Z(View, X.Map(), valZ, xrow, blkSize);

  // Array to store the search directions
  double *valP = pointer;
  pointer = pointer + xrow*blkSize;
  Epetra_MultiVector P(View, X.Map(), valP, xrow, blkSize);

  // Array to store the image of the search directions
  double *valKP = pointer;
  pointer = pointer + xrow*blkSize;
  Epetra_MultiVector KP(View, X.Map(), valKP, xrow, blkSize);

  // Pointer to store the solutions
  double *valSOL = (useY == true) ? Y.Values() : pointer;

  int iRHS;
  for (iRHS = 0; iRHS < xcol; iRHS += blkSize) {

    int numVec = (iRHS + blkSize < xcol) ? blkSize : xcol - iRHS;

    // Set the initial residuals to the right hand sides
    if (numVec < blkSize) {
      R.Random();
    }
    memcpy(valR, valX + iRHS*xrow, numVec*xrow*sizeof(double));

    // Set the initial guess to zero
    valSOL = (useY == true) ? Y.Values() + iRHS*xrow : valSOL;
    Epetra_MultiVector SOL(View, X.Map(), valSOL, xrow, blkSize);
    SOL.PutScalar(0.0);

    int ii = 0;
    int iter = 0;
    int nFound = 0;

    R.Norm2(initNorm);

    if (localVerbose > 1) {
      std::cout << std::endl;
      std::cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1 << std::endl;
      if (localVerbose > 2) {
        std::fprintf(stderr,"\n");
        for (ii = 0; ii < numVec; ++ii) {
          std::cout << " ... Initial Residual Norm " << ii << " = " << initNorm[ii] << std::endl;
        }
        std::cout << std::endl;
      }
    }

    // Iteration loop
    for (iter = 1; iter <= iterMax; ++iter) {

      // Apply the preconditioner
      if (Prec)
        Prec->ApplyInverse(R, Z);
      else
        Z = R;

      // Define the new search directions
      if (iter == 1) {
        P = Z;
      }
      else {
        // Compute P^t K Z
        callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, xrow, 1.0, KP.Values(), xrow, Z.Values(), xrow,
                      0.0, workD, blkSize);
        MyComm.SumAll(workD, coeff, blkSize*blkSize);

        // Compute the coefficient (P^t K P)^{-1} P^t K Z
        callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize,
                      0.0, workD, blkSize);
        for (ii = 0; ii < blkSize; ++ii)
          callBLAS.SCAL(blkSize, da[ii], workD + ii, blkSize);
        callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize,
                      0.0, coeff, blkSize);

        // Update the search directions
        // Note: Use KP as a workspace
        memcpy(KP.Values(), P.Values(), xrow*blkSize*sizeof(double));
        callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, xrow, blkSize, blkSize, 1.0, KP.Values(), xrow, coeff, blkSize,
                      0.0, P.Values(), xrow);

        P.Update(1.0, Z, -1.0);

      } // if (iter == 1)

      K->Apply(P, KP);

      // Compute P^t K P
      callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, xrow, 1.0, P.Values(), xrow, KP.Values(), xrow,
                    0.0, workD, blkSize);
      MyComm.SumAll(workD, PtKP, blkSize*blkSize);

      // Eigenvalue decomposition of P^t K P
      callLAPACK.SYEV('V', 'U', blkSize, PtKP, blkSize, da, workD, lworkD, &info);
      if (info) {
        // Break the loop as spectral decomposition failed
        break;
      } // if (info)

      // Compute the pseudo-inverse of the eigenvalues
      for (ii = 0; ii < blkSize; ++ii) {
        TEUCHOS_TEST_FOR_EXCEPTION(da[ii] < 0.0, std::runtime_error, "Negative "
                           "eigenvalue for P^T K P: da[" << ii << "] = "
                           << da[ii] << ".");
        da[ii] = (da[ii] == 0.0) ? 0.0 : 1.0/da[ii];
      } // for (ii = 0; ii < blkSize; ++ii)

      // Compute P^t R
      callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, xrow, 1.0, P.Values(), xrow, R.Values(), xrow,
                    0.0, workD, blkSize);
      MyComm.SumAll(workD, coeff, blkSize*blkSize);

      // Compute the coefficient (P^t K P)^{-1} P^t R
      callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize,
                    0.0, workD, blkSize);
      for (ii = 0; ii < blkSize; ++ii)
        callBLAS.SCAL(blkSize, da[ii], workD + ii, blkSize);
      callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize,
                    0.0, coeff, blkSize);

      // Update the solutions
      callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, xrow, blkSize, blkSize, 1.0, P.Values(), xrow, coeff, blkSize,
                    1.0, valSOL, xrow);

      // Update the residuals
      callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, xrow, blkSize, blkSize, -1.0, KP.Values(), xrow, coeff, blkSize,
                    1.0, R.Values(), xrow);

      // Check convergence
      R.Norm2(resNorm);
      nFound = 0;
      for (ii = 0; ii < numVec; ++ii) {
        if (resNorm[ii] <= tolCG*initNorm[ii])
          nFound += 1;
      }

      if (localVerbose > 1) {
        std::cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1;
        std::cout << " -- Iteration " << iter << " -- " << nFound << " converged vectors\n";
        if (localVerbose > 2) {
          std::cout << std::endl;
          for (ii = 0; ii < numVec; ++ii) {
            std::cout << " ... ";
            std::cout.width(5);
            std::cout << ii << " ... Residual = ";
            std::cout.precision(2);
            std::cout.setf(std::ios::scientific, std::ios::floatfield);
            std::cout << resNorm[ii] << " ... Right Hand Side = " << initNorm[ii] << std::endl;
          }
          std::cout << std::endl;
        }
      }

      if (nFound == numVec) {
        break;
      }

    }  // for (iter = 1; iter <= maxIter; ++iter)

    if (useY == false) {
      // Copy the solutions back into Y
      memcpy(Y.Values() + xrow*iRHS, valSOL, numVec*xrow*sizeof(double));
    }

    numSolve += nFound;

    if (nFound == numVec) {
      minIter = (iter < minIter) ? iter : minIter;
      maxIter = (iter > maxIter) ? iter : maxIter;
      sumIter += iter;
    }

  } // for (iRHS = 0; iRHS < xcol; iRHS += blkSize)

  return info;
}

Exemple #5

0

Afficher le fichier

Fichier : mc_irf_var.cpp Projet : cran/MSBVAR

//extern "C"
SEXP mc_irf_var(SEXP varobj, SEXP nsteps, SEXP draws)
{
  int m, p, dr=INTEGER(draws)[0], ns=INTEGER(nsteps)[0], T, df, i;
  SEXP AR, Y, Bhat, XR, prior, hstar, meanS, output;

  // Get # vars/lags/steps/draws/T/df
  PROTECT(AR = listElt(varobj, "ar.coefs"));
  PROTECT(Y = listElt(varobj, "Y"));
  m = INTEGER(getAttrib(AR, R_DimSymbol))[0]; //#vars
  p = INTEGER(getAttrib(AR, R_DimSymbol))[2]; //#lags
  T = nrows(Y); df = T - m*p - m - 1;
  UNPROTECT(2);

  // Put coefficients from varobj$Bhat in Bcoefs vector (m^2*p, 1)
  PROTECT(Bhat = coerceVector(listElt(varobj, "Bhat"), REALSXP));
  Matrix bcoefs = R2Cmat(Bhat, m*p, m);
  bcoefs = bcoefs.AsColumn();
  UNPROTECT(1);

  // Define X(T x m*p) subset of varobj$X and XXinv as solve(X'X)
  PROTECT(XR = coerceVector(listElt(varobj,"X"),REALSXP));
  Matrix X = R2Cmat(XR, T, m*p), XXinv;
  UNPROTECT(1);

  // Get the correct moment matrix
  PROTECT(prior = listElt(varobj,"prior"));
  if(!isNull(prior)){
    PROTECT(hstar = coerceVector(listElt(varobj,"hstar"),REALSXP));
    XXinv = R2Cmat(hstar, m*p, m*p).i();
    UNPROTECT(1);
  }
  else { XXinv = (X.t()*X).i(); }
  UNPROTECT(1);

  // Get the transpose of the Cholesky decomp of XXinv
  SymmetricMatrix XXinvSym; XXinvSym << XXinv;
  XXinv = Cholesky(XXinvSym);

  // Cholesky of covariance
  PROTECT(meanS = coerceVector(listElt(varobj,"mean.S"),REALSXP));
  SymmetricMatrix meanSSym; meanSSym << R2Cmat(meanS, m, m);
  Matrix Sigmat = Cholesky(meanSSym);
  UNPROTECT(1);

  // Matricies needed for the loop
  ColumnVector bvec; bvec=0.0;
  Matrix sqrtwish, impulse(dr,m*m*ns); impulse = 0.0;
  SymmetricMatrix sigmadraw; sigmadraw = 0.0;
  IdentityMatrix I(m);

  GetRNGstate();
  // Main Loop
  for (i=1; i<=dr; i++){
    // Wishart/Beta draws
    sigmadraw << Sigmat*(T*rwish(I,df).i())*Sigmat.t();
    sqrtwish = Cholesky(sigmadraw);
    bvec = bcoefs+KP(sqrtwish, XXinv)*rnorms(m*m*p);

    // IRF computation
    impulse.Row(i) = irf_var_from_beta(sqrtwish, bvec, ns).t();
    if (!(i%1000)){ Rprintf("Monte Carlo IRF Iteration = %d\n",i); }
  } // end main loop
  PutRNGstate();

  int dims[]={dr,ns,m*m};
  PROTECT(output = C2R3D(impulse,dims));
  setclass(output,"mc.irf.VAR");
  UNPROTECT(1);
  return output;
}